communicaite / services /anonymizer.py
seduerr's picture
init
190f036
raw history blame
No virus
1.48 kB
import spacy
import names
from faker import Faker
nlp = spacy.load("en_core_web_sm")
fake = Faker()
with open('./src/female_names.txt', 'r') as file:
female = [current_name.rstrip() for current_name in file.readlines()]
def anonymize(text):
doc = nlp(text)
name_to_anonymize = " ".join(
[entity.text for entity in doc.ents if entity.label_ == 'PERSON'])
orga_to_anonymize = " ".join(
[entity.text for entity in doc.ents if entity.label_ == 'ORG'])
# Anonymize Name and Surname
if len(name_to_anonymize) != 0:
counter = 0
while counter < (len(name_to_anonymize.split(' '))-1):
if str(name_to_anonymize.split(' ')[counter]).upper() in female:
text = text.replace(str(name_to_anonymize.split(
' ')[counter]), names.get_first_name(gender='female'))
else:
text = text.replace(str(name_to_anonymize.split(
' ')[counter]), names.get_first_name(gender='male'))
text = text.replace(str(name_to_anonymize.split(
' ')[counter+1]), names.get_last_name())
counter += 2
# Anonymize Corporation
if len(orga_to_anonymize) != 0:
counter_org = 0
while counter_org < (len(orga_to_anonymize.split(' '))):
text = text.replace(str(orga_to_anonymize.split(' ')[
counter_org]), fake.company())
counter_org += 1
return text