peter szemraj commited on
Commit
0601dad
1 Parent(s): 601bf30
Files changed (1) hide show
  1. utils.py +31 -0
utils.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py - Utility functions for the project.
3
+ """
4
+
5
+ def postprocess(text:str):
6
+ """
7
+ postprocess - remove common values in scraped dataset
8
+
9
+ Args:
10
+ text (str): the text to postprocess
11
+ """
12
+
13
+ replacements = {
14
+ "ENA":"<COMPANY>",
15
+ "Enron":"<COMPANY>",
16
+ "Sony":"<COMPANY>",
17
+ "Columbia":"<COMPANY>",
18
+ "Hilary":"John",
19
+ "Clinton":"Smith",
20
+ "Amy":"Jane",
21
+ "Pascal":"<PERSON>",
22
+ }
23
+
24
+ # replace common values, also check lowercase
25
+ for k, v in replacements.items():
26
+ text = text.replace(k, v)
27
+ text = text.replace(k.lower(), v)
28
+
29
+
30
+ return text
31
+