import spacy import names from faker import Faker nlp = spacy.load("en_core_web_sm") fake = Faker() with open('./src/female_names.txt', 'r') as file: female = [current_name.rstrip() for current_name in file.readlines()] def anonymize(text): doc = nlp(text) name_to_anonymize = " ".join( [entity.text for entity in doc.ents if entity.label_ == 'PERSON']) orga_to_anonymize = " ".join( [entity.text for entity in doc.ents if entity.label_ == 'ORG']) # Anonymize Name and Surname if len(name_to_anonymize) != 0: counter = 0 while counter < (len(name_to_anonymize.split(' '))-1): if str(name_to_anonymize.split(' ')[counter]).upper() in female: text = text.replace(str(name_to_anonymize.split( ' ')[counter]), names.get_first_name(gender='female')) else: text = text.replace(str(name_to_anonymize.split( ' ')[counter]), names.get_first_name(gender='male')) text = text.replace(str(name_to_anonymize.split( ' ')[counter+1]), names.get_last_name()) counter += 2 # Anonymize Corporation if len(orga_to_anonymize) != 0: counter_org = 0 while counter_org < (len(orga_to_anonymize.split(' '))): text = text.replace(str(orga_to_anonymize.split(' ')[ counter_org]), fake.company()) counter_org += 1 return text