{ | |
"tokenizer_class": "Video2ReactionTextTokenizer", | |
"tokenizer_config": { | |
"special_tokens": { | |
"<s>": 0, | |
"</s>": 1, | |
"<unk>": 2, | |
"<pad>": 3, | |
"<mask>": 4 | |
}, | |
"tokenizer_parameters": { | |
"tokenizer_model": "BPE", | |
"add_prefix_space": false, | |
"trim_offsets": true, | |
"model_max_length": 512, | |
"tokenization_method": "ByteLevel", | |
"tokenizer_version": "1.0.0" | |
}, | |
"training_parameters": { | |
"vocab_size": 30000, | |
"min_frequency": 2, | |
"special_tokens_count": 5, | |
"max_length_single_sentence": 512, | |
"max_length_pair": 512, | |
"pad_token_id": 3, | |
"unk_token_id": 2, | |
"bos_token_id": 0, | |
"eos_token_id": 1 | |
}, | |
"model_information": { | |
"author": "Szilárd", | |
"description": "Custom tokenizer for Video2ReactionText model", | |
"license": "MIT", | |
"version": "1.0.0" | |
} | |
} | |
} | |