thanks to NovelAI ❤
Browse files- special_tokens_map.json +1 -0
- tokenizer.model +3 -0
- tokenizer_config.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "<|endoftext|>", "unk_token": "<|unknown|>", "pad_token": "<|pad|>", "bos_token": "<|startoftext|>", "additional_special_tokens": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u2581\u2581", "\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2003", "\u2002", "\u2042", "\u2500", "***", "----", "\u2581\"", "\u2581Author", "\u2581Title", "\u2581Tags", "\u2581Genre", "\u2581Style", "\u2581Knowledge", "\u2581Summary", "\u2581Rating", "Type", "Characters", "Glossary", "<|spmspace|>", "<|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|mtvocab|>", "<|mtvenglish|>", "<|mtvjapanese|>", "<|mtsentence|>", "<|mtsjapanese|>", "<|mtsenglish|>", "<|mtsentenceend|>", "<|mtvocabend|>", "<|mtend|>", "<|mask|>", "<|masksingle|>", "<|maskshort|>", "<|maskmedium|>", "<|masklong|>", "<|maskparagraph|>", "<|maskend|>", "<|fill|>", "<|fillend|>", "<|rubycover|>", "<|rubystart|>", "<|rubyend|>", "<|reserved0|>", "<|reserved1|>", "<|reserved2|>", "<|reserved3|>", "<|reserved4|>", "<|reserved5|>", "<|reserved6|>", "<|reserved7|>", "<|reserved8|>", "<|reserved9|>", "<|reserved10|>"]}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:578fa0ed4d6dbee435f21d7f7a741506d09cdd93cce241008abf725407cbdb41
|
3 |
+
size 1033724
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "<|endoftext|>", "unk_token": "<|unknown|>", "pad_token": "<|pad|>", "bos_token": "<|startoftext|>", "additional_special_tokens": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u2581\u2581", "\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2003", "\u2002", "\u2042", "\u2500", "***", "----", "\u2581\"", "\u2581Author", "\u2581Title", "\u2581Tags", "\u2581Genre", "\u2581Style", "\u2581Knowledge", "\u2581Summary", "\u2581Rating", "Type", "Characters", "Glossary", "<|spmspace|>", "<|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|><|spmspace|>", "<|mtvocab|>", "<|mtvenglish|>", "<|mtvjapanese|>", "<|mtsentence|>", "<|mtsjapanese|>", "<|mtsenglish|>", "<|mtsentenceend|>", "<|mtvocabend|>", "<|mtend|>", "<|mask|>", "<|masksingle|>", "<|maskshort|>", "<|maskmedium|>", "<|masklong|>", "<|maskparagraph|>", "<|maskend|>", "<|fill|>", "<|fillend|>", "<|rubycover|>", "<|rubystart|>", "<|rubyend|>", "<|reserved0|>", "<|reserved1|>", "<|reserved2|>", "<|reserved3|>", "<|reserved4|>", "<|reserved5|>", "<|reserved6|>", "<|reserved7|>", "<|reserved8|>", "<|reserved9|>", "<|reserved10|>"], "truncation": false, "model_max_length": 8192, "add_bos_token": true, "add_eos_token": false, "tokenizer_class": "LlamaTokenizer", "clean_up_tokenization_spaces": false, "sp_model_kwargs": {}}
|