Spaces:
Running
Running
File size: 3,426 Bytes
b7be871 1215818 b7be871 1215818 b7be871 1215818 b7be871 1215818 b7be871 1215818 b7be871 1215818 b7be871 1215818 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
## Taken from https://github.com/microsoft/presidio/blob/main/docs/samples/python/transformers_recognizer/configuration.py
STANFORD_COFIGURATION = {
"DEFAULT_MODEL_PATH": "StanfordAIMI/stanford-deidentifier-base",
"PRESIDIO_SUPPORTED_ENTITIES": [
"LOCATION",
"PERSON",
"ORGANIZATION",
"AGE",
"PHONE_NUMBER",
"EMAIL",
"DATE_TIME",
"DEVICE",
"ZIP",
"PROFESSION",
"USERNAME",
"ID"
],
"LABELS_TO_IGNORE": ["O"],
"DEFAULT_EXPLANATION": "Identified as {} by the StanfordAIMI/stanford-deidentifier-base NER model",
"SUB_WORD_AGGREGATION": "simple",
"DATASET_TO_PRESIDIO_MAPPING": {
"DATE": "DATE_TIME",
"DOCTOR": "PERSON",
"PATIENT": "PERSON",
"HOSPITAL": "LOCATION",
"MEDICALRECORD": "ID",
"IDNUM": "ID",
"ORGANIZATION": "ORGANIZATION",
"ZIP": "ZIP",
"PHONE": "PHONE_NUMBER",
"USERNAME": "USERNAME",
"STREET": "LOCATION",
"PROFESSION": "PROFESSION",
"COUNTRY": "LOCATION",
"LOCATION-OTHER": "LOCATION",
"FAX": "PHONE_NUMBER",
"EMAIL": "EMAIL",
"STATE": "LOCATION",
"DEVICE": "DEVICE",
"ORG": "ORGANIZATION",
"AGE": "AGE",
},
"MODEL_TO_PRESIDIO_MAPPING": {
"PER": "PERSON",
"PERSON": "PERSON",
"LOC": "LOCATION",
"ORG": "ORGANIZATION",
"AGE": "AGE",
"PATIENT": "PERSON",
"HCW": "PERSON",
"HOSPITAL": "LOCATION",
"PATORG": "ORGANIZATION",
"DATE": "DATE_TIME",
"PHONE": "PHONE_NUMBER",
"VENDOR": "ORGANIZATION",
},
"CHUNK_OVERLAP_SIZE": 40,
"CHUNK_SIZE": 600,
"ID_SCORE_MULTIPLIER": 0.4,
"ID_ENTITY_NAME": "ID"
}
BERT_DEID_CONFIGURATION = {
"PRESIDIO_SUPPORTED_ENTITIES": [
"LOCATION",
"PERSON",
"ORGANIZATION",
"AGE",
"PHONE_NUMBER",
"EMAIL",
"DATE_TIME",
"ZIP",
"PROFESSION",
"USERNAME",
"ID"
],
"DEFAULT_MODEL_PATH": "obi/deid_roberta_i2b2",
"LABELS_TO_IGNORE": ["O"],
"DEFAULT_EXPLANATION": "Identified as {} by the obi/deid_roberta_i2b2 NER model",
"SUB_WORD_AGGREGATION": "simple",
"DATASET_TO_PRESIDIO_MAPPING": {
"DATE": "DATE_TIME",
"DOCTOR": "PERSON",
"PATIENT": "PERSON",
"HOSPITAL": "ORGANIZATION",
"MEDICALRECORD": "O",
"IDNUM": "O",
"ORGANIZATION": "ORGANIZATION",
"ZIP": "O",
"PHONE": "PHONE_NUMBER",
"USERNAME": "",
"STREET": "LOCATION",
"PROFESSION": "PROFESSION",
"COUNTRY": "LOCATION",
"LOCATION-OTHER": "LOCATION",
"FAX": "PHONE_NUMBER",
"EMAIL": "EMAIL",
"STATE": "LOCATION",
"DEVICE": "O",
"ORG": "ORGANIZATION",
"AGE": "AGE",
},
"MODEL_TO_PRESIDIO_MAPPING": {
"PER": "PERSON",
"LOC": "LOCATION",
"ORG": "ORGANIZATION",
"AGE": "AGE",
"ID": "ID",
"EMAIL": "EMAIL",
"PATIENT": "PERSON",
"STAFF": "PERSON",
"HOSP": "ORGANIZATION",
"PATORG": "ORGANIZATION",
"DATE": "DATE_TIME",
"PHONE": "PHONE_NUMBER",
},
"CHUNK_OVERLAP_SIZE": 40,
"CHUNK_SIZE": 600,
"ID_SCORE_MULTIPLIER": 0.4,
"ID_ENTITY_NAME": "ID"
}
|