peter szemraj
add additional entities
ebe69d1
raw history blame
No virus
828 Bytes
"""
utils.py - Utility functions for the project.
"""
def postprocess(text:str):
"""
postprocess - remove common values in scraped dataset
Args:
text (str): the text to postprocess
"""
replacements = {
"ENA":"<COMPANY>",
"Enron":"<COMPANY>",
"Enron Corporation":"<COMPANY>",
"Sony Pictures Entertainment":"<COMPANY>",
"Columbia Pictures":"<COMPANY>",
"Sony":"<COMPANY>",
"Columbia":"<COMPANY>",
"Hillary":"Jane",
"Clinton":"Smith",
"Amy":"Jane",
"Sara":"Jane",
"Harambe":"Jane",
"Pascal":"<PERSON>",
}
# replace common values, also check lowercase
for k, v in replacements.items():
text = text.replace(k, v)
text = text.replace(k.lower(), v)
return text