bert-base-arabertv2-finetuned-squad / tokenizer_config.json
RaghadDS's picture
Model save
070354d verified
raw
history blame contribute delete
No virus
6.79 kB
{
"added_tokens_decoder": {
"0": {
"content": "+ا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"1": {
"content": "+ك",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"2": {
"content": "ب+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"3": {
"content": "+هم",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"4": {
"content": "+ات",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"5": {
"content": "+ي",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"6": {
"content": "ل+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"7": {
"content": "+هما",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"8": {
"content": "+نا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"9": {
"content": "+ن",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"10": {
"content": "+ها",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"11": {
"content": "+كما",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"12": {
"content": "+ة",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"13": {
"content": "ف+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"14": {
"content": "+كم",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"15": {
"content": "+كن",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"16": {
"content": "+ت",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"17": {
"content": "[بريد]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"18": {
"content": "[مستخدم]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"19": {
"content": "لل+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"20": {
"content": "ال+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"21": {
"content": "[رابط]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"22": {
"content": "س+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"23": {
"content": "+ان",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"24": {
"content": "+وا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"25": {
"content": "+ه",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"26": {
"content": "+ون",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"27": {
"content": "+هن",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"28": {
"content": "+ين",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"29": {
"content": "و+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"30": {
"content": "ك+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": true,
"special": true
},
"31": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"33": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"34": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"35": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_basic_tokenize": true,
"do_lower_case": false,
"mask_token": "[MASK]",
"max_len": 512,
"model_max_length": 512,
"never_split": [
"+ك",
"+كما",
"ك+",
"+وا",
"+ين",
"و+",
"+كن",
"+ان",
"+هم",
"+ة",
"[بريد]",
"لل+",
"+ي",
"+ت",
"+ن",
"س+",
"ل+",
"[مستخدم]",
"+كم",
"+ا",
"ب+",
"ف+",
"+نا",
"+ها",
"+ون",
"+هما",
"ال+",
"+ه",
"+هن",
"+ات",
"[رابط]"
],
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[UNK]"
}