Spaces:
Build error
Build error
import spacy | |
import names | |
from faker import Faker | |
nlp = spacy.load("en_core_web_sm") | |
fake = Faker() | |
with open('./src/female_names.txt', 'r') as file: | |
female = [current_name.rstrip() for current_name in file.readlines()] | |
def anonymize(text): | |
doc = nlp(text) | |
name_to_anonymize = " ".join( | |
[entity.text for entity in doc.ents if entity.label_ == 'PERSON']) | |
orga_to_anonymize = " ".join( | |
[entity.text for entity in doc.ents if entity.label_ == 'ORG']) | |
# Anonymize Name and Surname | |
if len(name_to_anonymize) != 0: | |
counter = 0 | |
while counter < (len(name_to_anonymize.split(' '))-1): | |
if str(name_to_anonymize.split(' ')[counter]).upper() in female: | |
text = text.replace(str(name_to_anonymize.split( | |
' ')[counter]), names.get_first_name(gender='female')) | |
else: | |
text = text.replace(str(name_to_anonymize.split( | |
' ')[counter]), names.get_first_name(gender='male')) | |
text = text.replace(str(name_to_anonymize.split( | |
' ')[counter+1]), names.get_last_name()) | |
counter += 2 | |
# Anonymize Corporation | |
if len(orga_to_anonymize) != 0: | |
counter_org = 0 | |
while counter_org < (len(orga_to_anonymize.split(' '))): | |
text = text.replace(str(orga_to_anonymize.split(' ')[ | |
counter_org]), fake.company()) | |
counter_org += 1 | |
return text | |