jky594176 commited on
Commit
db4168a
1 Parent(s): f35c801
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<TITLE_START>": 50257, "<TITLE_END>": 50258, "<INSTR_START>": 50259, "<NEXT_INSTR>": 50260, "<INSTR_END>": 50261, "<INGR_START>": 50262, "<NEXT_INGR>": 50263, "<INGR_END>": 50264, "<RECIPE_START>": 50265, "<RECIPE_END>": 50266, "<INPUT_START>": 50267, "<INPUT_END>": 50268, "<NEXT_INPUT>": 50269}
merges.txt ADDED
The diff for this file is too large to render. See raw diff
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "additional_special_tokens": ["<TITLE_START>", "<TITLE_END>", "<INSTR_START>", "<NEXT_INSTR>", "<INSTR_END>", "<INGR_START>", "<NEXT_INGR>", "<INGR_END>", "<RECIPE_START>", "<RECIPE_END>", "<INPUT_START>", "<INPUT_END>", "<NEXT_INPUT>"]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "tokenizer_file": "/root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0", "name_or_path": "./gdrive/MyDrive/COMP0087/350k_GPT2"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff