fgrezes commited on
Commit
fdbc193
1 Parent(s): 1565f86

tokenizer max len and NER labels added

Browse files
Files/NER_DEAL_id2label.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": "B-Archive", "1": "B-CelestialObject", "2": "B-CelestialObjectRegion", "3": "B-CelestialRegion", "4": "B-Citation", "5": "B-Collaboration", "6": "B-ComputingFacility", "7": "B-Database", "8": "B-Dataset", "9": "B-EntityOfFutureInterest", "10": "B-Event", "11": "B-Fellowship", "12": "B-Formula", "13": "B-Grant", "14": "B-Identifier", "15": "B-Instrument", "16": "B-Location", "17": "B-Mission", "18": "B-Model", "19": "B-ObservationalTechniques", "20": "B-Observatory", "21": "B-Organization", "22": "B-Person", "23": "B-Proposal", "24": "B-Software", "25": "B-Survey", "26": "B-Tag", "27": "B-Telescope", "28": "B-TextGarbage", "29": "B-URL", "30": "B-Wavelength", "31": "I-Archive", "32": "I-CelestialObject", "33": "I-CelestialObjectRegion", "34": "I-CelestialRegion", "35": "I-Citation", "36": "I-Collaboration", "37": "I-ComputingFacility", "38": "I-Database", "39": "I-Dataset", "40": "I-EntityOfFutureInterest", "41": "I-Event", "42": "I-Fellowship", "43": "I-Formula", "44": "I-Grant", "45": "I-Identifier", "46": "I-Instrument", "47": "I-Location", "48": "I-Mission", "49": "I-Model", "50": "I-ObservationalTechniques", "51": "I-Observatory", "52": "I-Organization", "53": "I-Person", "54": "I-Proposal", "55": "I-Software", "56": "I-Survey", "57": "I-Tag", "58": "I-Telescope", "59": "I-TextGarbage", "60": "I-URL", "61": "I-Wavelength", "62": "O"}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": true, "handle_chinese_chars": true, "lowercase": false, "do_basic_tokenize": true, "never_split": null, "special_tokens_map_file": "/proj.adsnlp/jupyter-lab/one/fgrezes/NLP-playground/data/non_ocr_post_1950_xml_tokenizer/BertTokenizerFast/special_tokens_map.json", "name_or_path": "./exp_run/checkpoint-10350000/", "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": true, "handle_chinese_chars": true, "lowercase": false, "do_basic_tokenize": true, "never_split": null, "special_tokens_map_file": "/proj.adsnlp/jupyter-lab/one/fgrezes/astroBERT-Tasks/Task_1_MLM/data/non_ocr_post_1950_xml_tokenizer/BertTokenizerFast/special_tokens_map.json", "name_or_path": "../astroBERT-Tasks/Finetuning_1_NER/trained-models/NER_astroBERT_all_labeled_data_run01/checkpoint-173000/", "add_special_tokens": true, "tokenizer_class": "BertTokenizer"}