File size: 1,400 Bytes
a2817bd
77c21f6
5b1bf51
77c21f6
 
 
 
5b1bf51
18a25c3
5b1bf51
 
77c21f6
 
 
 
 
 
5b1bf51
 
 
 
62b1c9a
77c21f6
62b1c9a
77c21f6
62b1c9a
77c21f6
5b1bf51
77c21f6
5b1bf51
 
9aec1b9
5b1bf51
 
 
a7827a1
9aec1b9
2904831
 
9aec1b9
 
77c21f6
9aec1b9
5b1bf51
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import regex as re
import nltk

def load_words_from_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        words = [line.strip() for line in f.readlines()]
    return words

def sub_explitives(textfile, selection):

    replacetext = "person"
    
    # Load target words from text files
    b_word_list = load_words_from_file("b_word.txt")
    n_word_list = load_words_from_file("n_word.txt")
    expletives_list = load_words_from_file("expletives.txt")
    
    # text = word_tokenize(textfile)
    # print(text)
    # sentences = sent_tokenize(textfile)

    if selection == "B-Word":
        target_word = b_word_list
    elif selection == "N-Word":
        target_word = n_word_list
    elif selection == "All Explitives":
        target_word = expletives_list
    else:
        target_word = []
        
    print("selection:", selection, "target_word:", target_word)
    lines = textfile.split('\n')

    if target_word:
        print("target word was found, ", target_word)
        print(textfile)
        for i, line in enumerate(lines):
            for word in target_word:
                pattern = r"\b" + re.escape(word) + r"\b"  
                # textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
                lines[i] = re.sub(pattern, replacetext, lines[i], flags=re.IGNORECASE)

    textfile = '\n'.join(lines)
    return textfile