redux / app.py
Straive-Kripa's picture
Update app.py
6f448bd verified
raw
history blame
No virus
931 Bytes
import spacy
import re
# Load the spaCy model, confirm that additional download has been made
nlp = spacy.load("en_core_web_sm")
pii_config = {
'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'),
'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]'),
'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '[WEBSITE]')
}
def anonymize(text):
for pii_type, (pattern, placeholder) in pii_config.items():
text = re.sub(pattern, placeholder, text)
# Anonymize named entities using spaCy
doc = nlp(text)
for ent in doc.ents:
if ent.label_ in ['PERSON', 'ORG', 'GPE', 'LOC']:
text = text.replace(ent.text, f'[{ent.label_}]')
return text
demo = gr.Interface(
fn=anonymize,
inputs=["text"],
outputs=["text"],
)
demo.launch()