Spaces:
Runtime error
Runtime error
File size: 1,400 Bytes
a2817bd 77c21f6 5b1bf51 77c21f6 5b1bf51 18a25c3 5b1bf51 77c21f6 5b1bf51 62b1c9a 77c21f6 62b1c9a 77c21f6 62b1c9a 77c21f6 5b1bf51 77c21f6 5b1bf51 9aec1b9 5b1bf51 a7827a1 9aec1b9 2904831 9aec1b9 77c21f6 9aec1b9 5b1bf51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import regex as re
import nltk
def load_words_from_file(file_path):
with open(file_path, "r", encoding="utf-8") as f:
words = [line.strip() for line in f.readlines()]
return words
def sub_explitives(textfile, selection):
replacetext = "person"
# Load target words from text files
b_word_list = load_words_from_file("b_word.txt")
n_word_list = load_words_from_file("n_word.txt")
expletives_list = load_words_from_file("expletives.txt")
# text = word_tokenize(textfile)
# print(text)
# sentences = sent_tokenize(textfile)
if selection == "B-Word":
target_word = b_word_list
elif selection == "N-Word":
target_word = n_word_list
elif selection == "All Explitives":
target_word = expletives_list
else:
target_word = []
print("selection:", selection, "target_word:", target_word)
lines = textfile.split('\n')
if target_word:
print("target word was found, ", target_word)
print(textfile)
for i, line in enumerate(lines):
for word in target_word:
pattern = r"\b" + re.escape(word) + r"\b"
# textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
lines[i] = re.sub(pattern, replacetext, lines[i], flags=re.IGNORECASE)
textfile = '\n'.join(lines)
return textfile |