twc-bart-pretrain / tokenizer_config.json
MorrisPark's picture
tokenizer
a3a94b2
raw
history blame contribute delete
605 Bytes
{"bos_token": "[BOS]", "eos_token": "[EOS]", "cls_token": "[BOS]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "mask_token": "[MASK]", "additional_special_tokens": ["#@μ£Όμ†Œ#", "#@이λͺ¨ν‹°μ½˜#", "#@이름#", "#@URL#", "#@μ†Œμ†#", "#@기타#", "#@μ „λ²ˆ#", "#@계정#", "#@url#", "#@번호#", "#@금육#", "#@신원#", "#@μž₯μ†Œ#", "#@μ‹œμŠ€ν…œ#사진#", "#@μ‹œμŠ€ν…œ#λ™μ˜μƒ#", "#@μ‹œμŠ€ν…œ#기타#", "#@μ‹œμŠ€ν…œ#검색#", "#@μ‹œμŠ€ν…œ#지도#", "#@μ‹œμŠ€ν…œ#μ‚­μ œ#", "#@μ‹œμŠ€ν…œ#파일#", "#@μ‹œμŠ€ν…œ#μ†‘κΈˆ#", "#@μ‹œμŠ€ν…œ#"], "tokenizer_class": "PreTrainedTokenizerFast"}