Asaf Yehoodai commited on
Commit
61a305f
1 Parent(s): 5af886b

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<ENT0>": 65001, "</ENT0>": 65002, "<ENT1>": 65003, "</ENT1>": 65004, "<ENT2>": 65005, "</ENT2>": 65006, "<ENT3>": 65007, "</ENT3>": 65008, "<ENT4>": 65009, "</ENT4>": 65010, "<ENT5>": 65011, "</ENT5>": 65012, "<ENT6>": 65013, "</ENT6>": 65014, "<ENT7>": 65015, "</ENT7>": 65016, "<ENT8>": 65017, "</ENT8>": 65018, "<ENT9>": 65019, "</ENT9>": 65020, "<ENT10>": 65021, "</ENT10>": 65022, "<ENT11>": 65023, "</ENT11>": 65024, "<ENT12>": 65025, "</ENT12>": 65026, "<ENT13>": 65027, "</ENT13>": 65028, "<ENT14>": 65029, "</ENT14>": 65030, "<ENT15>": 65031, "</ENT15>": 65032, "<ENT16>": 65033, "</ENT16>": 65034, "<ENT17>": 65035, "</ENT17>": 65036, "<ENT18>": 65037, "</ENT18>": 65038, "<ENT19>": 65039, "</ENT19>": 65040, "<MALE>": 65041, "</MALE>": 65042, "<FEMALE>": 65043, "</FEMALE>": 65044}
source.spm ADDED
Binary file (802 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
Binary file (826 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "eng", "target_lang": "spa", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "/cs/labs/oabend/asaf305612939/co-ref-project/mt-output/paper_models/es/en_es_coref_exp", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff