mskov commited on
Commit
77c21f6
1 Parent(s): a7827a1

Update replace_explitives.py

Browse files
Files changed (1) hide show
  1. replace_explitives.py +19 -7
replace_explitives.py CHANGED
@@ -1,28 +1,40 @@
1
  import regex as re
 
2
 
3
-
 
 
 
4
 
5
  def sub_explitives(textfile, selection):
6
 
7
  replacetext = "person"
8
-
 
 
 
 
 
9
  # text = word_tokenize(textfile)
10
  # print(text)
11
  # sentences = sent_tokenize(textfile)
12
 
13
  if selection == "B-Word":
14
- target_word = r"\bbitch\b"
15
  elif selection == "N-Word":
16
- target_word = r"\bnigga\b"
17
  elif selection == "All Explitives":
18
- target_word = r"\bshit\b"
19
  else:
20
- target_word = None
21
 
22
  print("selection:", selection, "target_word:", target_word)
23
 
24
  if target_word:
25
  print("target word was found, ", target_word)
26
  print(textfile)
27
- textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
 
 
 
28
  return textfile
 
1
  import regex as re
2
+ import nltk
3
 
4
+ def load_words_from_file(file_path):
5
+ with open(file_path, "r", encoding="utf-8") as f:
6
+ words = [line.strip() for line in f.readlines()]
7
+ return words
8
 
9
  def sub_explitives(textfile, selection):
10
 
11
  replacetext = "person"
12
+
13
+ # Load target words from text files
14
+ b_word_list = load_words_from_file("b_word.txt")
15
+ n_word_list = load_words_from_file("n_word.txt")
16
+ expletives_list = load_words_from_file("expletives.txt")
17
+
18
  # text = word_tokenize(textfile)
19
  # print(text)
20
  # sentences = sent_tokenize(textfile)
21
 
22
  if selection == "B-Word":
23
+ target_word = b_word_list
24
  elif selection == "N-Word":
25
+ target_word = n_word_list
26
  elif selection == "All Explitives":
27
+ target_word = expletives_list
28
  else:
29
+ target_word = []
30
 
31
  print("selection:", selection, "target_word:", target_word)
32
 
33
  if target_word:
34
  print("target word was found, ", target_word)
35
  print(textfile)
36
+ pattern = r"\b" + re.escape(target_word) + r"\b" # Create a regex pattern for each word
37
+ # textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
38
+ textfile = re.sub(pattern, replacetext, textfile, flags=re.IGNORECASE)
39
+
40
  return textfile