screeve-pos-lemmatizer / tokenizer_config.json
Nargizi's picture
update model_max_length
0842bbe
{
"additional_special_tokens": [
"Cj",
"V",
"Num",
"Pron",
"N",
"A",
"Adv",
"Other",
"Interj",
"Pp",
"<POS>"
],
"clean_up_tokenization_spaces": true,
"cls_token": "<START>",
"do_lower_case": true,
"mask_token": "[MASK]",
"model_input_names": [
"input_ids",
"attention_mask",
"decoder_input_ids"
],
"model_max_length": 1024,
"pad_token": "<PAD>",
"sep_token": "<END>",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "<UNK>"
}