AlekseyKorshuk
commited on
Commit
·
09488e2
1
Parent(s):
86ac22d
add tokenizer
Browse files- added_tokens.json +1 -0
- merges.txt +0 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<bos>": 40478, "<eos>": 40479, "<speaker1>": 40480, "<speaker2>": 40481, "<pad>": 40482}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<bos>", "eos_token": "<eos>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<speaker1>", "<speaker2>"]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "special_tokens_map_file": null, "name_or_path": "/var/folders/c9/nlyjrmvd2m741ghn3g9l8v540000gn/T/tmpzwxhh1ys", "tokenizer_class": "OpenAIGPTTokenizer"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|