import spacy import re # Load the spaCy model, confirm that additional download has been made nlp = spacy.load("en_core_web_sm") pii_config = { 'phone_number': (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'), 'text_address': (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'), 'social_security_number': (r'\b\d{3}-\d{2}-\d{4}\b', '[SSN]'), 'website': (r'\b(?:http://|https://)?(?:www\.)?[a-zA-Z0-9./]+\.[a-z]{2,}\b', '[WEBSITE]') } def anonymize(text): for pii_type, (pattern, placeholder) in pii_config.items(): text = re.sub(pattern, placeholder, text) # Anonymize named entities using spaCy doc = nlp(text) for ent in doc.ents: if ent.label_ in ['PERSON', 'ORG', 'GPE', 'LOC']: text = text.replace(ent.text, f'[{ent.label_}]') return text demo = gr.Interface( fn=anonymize, inputs=["text"], outputs=["text"], ) demo.launch()