Asaf Yehoodai
commited on
Commit
•
61a305f
1
Parent(s):
5af886b
add tokenizer
Browse files- added_tokens.json +1 -0
- source.spm +0 -0
- special_tokens_map.json +1 -0
- target.spm +0 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<ENT0>": 65001, "</ENT0>": 65002, "<ENT1>": 65003, "</ENT1>": 65004, "<ENT2>": 65005, "</ENT2>": 65006, "<ENT3>": 65007, "</ENT3>": 65008, "<ENT4>": 65009, "</ENT4>": 65010, "<ENT5>": 65011, "</ENT5>": 65012, "<ENT6>": 65013, "</ENT6>": 65014, "<ENT7>": 65015, "</ENT7>": 65016, "<ENT8>": 65017, "</ENT8>": 65018, "<ENT9>": 65019, "</ENT9>": 65020, "<ENT10>": 65021, "</ENT10>": 65022, "<ENT11>": 65023, "</ENT11>": 65024, "<ENT12>": 65025, "</ENT12>": 65026, "<ENT13>": 65027, "</ENT13>": 65028, "<ENT14>": 65029, "</ENT14>": 65030, "<ENT15>": 65031, "</ENT15>": 65032, "<ENT16>": 65033, "</ENT16>": 65034, "<ENT17>": 65035, "</ENT17>": 65036, "<ENT18>": 65037, "</ENT18>": 65038, "<ENT19>": 65039, "</ENT19>": 65040, "<MALE>": 65041, "</MALE>": 65042, "<FEMALE>": 65043, "</FEMALE>": 65044}
|
source.spm
ADDED
Binary file (802 kB). View file
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
target.spm
ADDED
Binary file (826 kB). View file
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"source_lang": "eng", "target_lang": "spa", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "/cs/labs/oabend/asaf305612939/co-ref-project/mt-output/paper_models/es/en_es_coref_exp", "tokenizer_class": "MarianTokenizer"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|