Spaces:
Running
Running
KarishmaShirsath
commited on
Commit
•
46dbc0f
1
Parent(s):
1e7dab8
Upload 2 files
Browse filesAdded model selected option
- Final file.ipynb +0 -0
- PiiMaskingService.py +11 -7
Final file.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
PiiMaskingService.py
CHANGED
@@ -9,11 +9,12 @@ from presidio_analyzer.nlp_engine import (
|
|
9 |
NlpEngine,
|
10 |
NlpEngineProvider,
|
11 |
)
|
|
|
12 |
|
13 |
|
14 |
class PiiMaskingService():
|
15 |
|
16 |
-
def analyze(self, text: str):
|
17 |
|
18 |
entitiesToRecognize=['UK_NHS','EMAIL','AU_ABN','CRYPTO','ID','URL',
|
19 |
'AU_MEDICARE','IN_PAN','ORGANIZATION','IN_AADHAAR',
|
@@ -23,10 +24,12 @@ class PiiMaskingService():
|
|
23 |
'US_SSN','AU_TFN','US_PASSPORT','US_ITIN','NRP','AGE','GENERIC_PII'
|
24 |
]
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
30 |
|
31 |
results = analyzer.analyze(text=text, entities=entitiesToRecognize, language='en')
|
32 |
print("analyzer results:")
|
@@ -39,6 +42,7 @@ class PiiMaskingService():
|
|
39 |
self,
|
40 |
text: str,
|
41 |
operator: str,
|
|
|
42 |
# analyze_results: List[RecognizerResult],
|
43 |
):
|
44 |
operator_config = None
|
@@ -48,7 +52,7 @@ class PiiMaskingService():
|
|
48 |
operator_config = {
|
49 |
"type": "mask",
|
50 |
"masking_char": "*",
|
51 |
-
"chars_to_mask":
|
52 |
"from_end": False,
|
53 |
}
|
54 |
elif operator == "encrypt":
|
@@ -61,7 +65,7 @@ class PiiMaskingService():
|
|
61 |
operator = "custom"
|
62 |
|
63 |
|
64 |
-
analyzer_result = self.analyze(text)
|
65 |
|
66 |
engine = AnonymizerEngine()
|
67 |
|
|
|
9 |
NlpEngine,
|
10 |
NlpEngineProvider,
|
11 |
)
|
12 |
+
from presidio_analyzer.nlp_engine import TransformersNlpEngine, NerModelConfiguration
|
13 |
|
14 |
|
15 |
class PiiMaskingService():
|
16 |
|
17 |
+
def analyze(self, text: str, model: str):
|
18 |
|
19 |
entitiesToRecognize=['UK_NHS','EMAIL','AU_ABN','CRYPTO','ID','URL',
|
20 |
'AU_MEDICARE','IN_PAN','ORGANIZATION','IN_AADHAAR',
|
|
|
24 |
'US_SSN','AU_TFN','US_PASSPORT','US_ITIN','NRP','AGE','GENERIC_PII'
|
25 |
]
|
26 |
|
27 |
+
if model == "HuggingFace/obi/deid_roberta_i2b2":
|
28 |
+
nlp_engine, registry= self.create_nlp_engine_with_transformers("obi/deid_roberta_i2b2")
|
29 |
+
elif model == "flair/ner-english-large":
|
30 |
+
nlp_engine, registry= self.create_nlp_engine_with_flair("flair/ner-english-large")
|
31 |
+
|
32 |
+
analyzer = AnalyzerEngine(nlp_engine=nlp_engine, registry=registry)
|
33 |
|
34 |
results = analyzer.analyze(text=text, entities=entitiesToRecognize, language='en')
|
35 |
print("analyzer results:")
|
|
|
42 |
self,
|
43 |
text: str,
|
44 |
operator: str,
|
45 |
+
model: str
|
46 |
# analyze_results: List[RecognizerResult],
|
47 |
):
|
48 |
operator_config = None
|
|
|
52 |
operator_config = {
|
53 |
"type": "mask",
|
54 |
"masking_char": "*",
|
55 |
+
"chars_to_mask": 15,
|
56 |
"from_end": False,
|
57 |
}
|
58 |
elif operator == "encrypt":
|
|
|
65 |
operator = "custom"
|
66 |
|
67 |
|
68 |
+
analyzer_result = self.analyze(text, model)
|
69 |
|
70 |
engine = AnonymizerEngine()
|
71 |
|