astroBERT / tokenizer_config.json
fgrezes's picture
tokenizer max len and NER labels added
fdbc193
{"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": true, "handle_chinese_chars": true, "lowercase": false, "do_basic_tokenize": true, "never_split": null, "special_tokens_map_file": "/proj.adsnlp/jupyter-lab/one/fgrezes/astroBERT-Tasks/Task_1_MLM/data/non_ocr_post_1950_xml_tokenizer/BertTokenizerFast/special_tokens_map.json", "name_or_path": "../astroBERT-Tasks/Finetuning_1_NER/trained-models/NER_astroBERT_all_labeled_data_run01/checkpoint-173000/", "add_special_tokens": true, "tokenizer_class": "BertTokenizer"}