Video2ReactionText / tokenizer_config.json
szili2011's picture
Create tokenizer_config.json
f8bc109 verified
raw
history blame
912 Bytes
{
"tokenizer_class": "Video2ReactionTextTokenizer",
"tokenizer_config": {
"special_tokens": {
"<s>": 0,
"</s>": 1,
"<unk>": 2,
"<pad>": 3,
"<mask>": 4
},
"tokenizer_parameters": {
"tokenizer_model": "BPE",
"add_prefix_space": false,
"trim_offsets": true,
"model_max_length": 512,
"tokenization_method": "ByteLevel",
"tokenizer_version": "1.0.0"
},
"training_parameters": {
"vocab_size": 30000,
"min_frequency": 2,
"special_tokens_count": 5,
"max_length_single_sentence": 512,
"max_length_pair": 512,
"pad_token_id": 3,
"unk_token_id": 2,
"bos_token_id": 0,
"eos_token_id": 1
},
"model_information": {
"author": "Szilárd",
"description": "Custom tokenizer for Video2ReactionText model",
"license": "MIT",
"version": "1.0.0"
}
}
}