arrafmousa
/

SimAPI_tokenizer

arrafmousa commited on Aug 17, 2023

Commit

bf7c19d

1 Parent(s): c1d31ef

Upload tokenizer

Files changed (6) hide show

added_tokens.json ADDED Viewed

+{
+  "": 67028,
+  "EtOH": 67030,
+  "[0]": 67037,
+  "[1]": 67065,
+  "[TAB][TAB]min": 67038,
+  "[TAB][TAB]minimal": 67031,
+  "[TAB]if": 67056,
+  "[TAB]optimized": 67032,
+  "[TAB]portions": 67064,
+  "[TAB]tmp": 67042,
+  "[[": 67060,
+  "[[reactor": 67054,
+  "[]": 67052,
+  "append": 67062,
+  "components": 67036,
+  "components]": 67035,
+  "compounds": 67048,
+  "described": 67034,
+  "desired": 67050,
+  "elements": 67043,
+  "exact": 67058,
+  "grams": 67046,
+  "have[0]": 67039,
+  "inf": 67047,
+  "mL": 67033,
+  "minimal": 67053,
+  "molar": 67044,
+  "need": 67045,
+  "needed": 67061,
+  "needed[1]": 67049,
+  "needed]": 67059,
+  "optimized": 67055,
+  "pcp": 67029,
+  "portion": 67051,
+  "portions": 67041,
+  "reactors": 67066,
+  "threshold": 67040,
+  "tmp": 67063,
+  "zip": 67057
+}

source.spm ADDED Viewed

Binary file (790 kB). View file

special_tokens_map.json ADDED Viewed

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

target.spm ADDED Viewed

Binary file (814 kB). View file

tokenizer_config.json ADDED Viewed

+{
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "en",
+  "sp_model_kwargs": {},
+  "target_lang": "nl",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff