File size: 1,885 Bytes
32a03a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os


class wording_choice_aggregator():
    """
    A class used for retrieving frequencies based on wording in a message
    """

    def get_frequency_of_capatalised_words(self, text):
        """
        A function used to retrieve the frequencies of capitalised words in a dataset
        :param text:
        :return: the frequency of capitalised words in a dataset
        """
        number_of_capatalised_words = 0
        for word in text.split(" "):
            if word.isupper():
                number_of_capatalised_words = number_of_capatalised_words + 1

        total_number_of_words = len(text.split(" "))
        frequency = number_of_capatalised_words / total_number_of_words

        return frequency

    def get_frequency_of_violent_or_curse_words(self, text, violent_words_datasets_location):
        """
        A function ued for retrieving the frequencies of violent words in a dataset
        :param text:
        :return: the frequency of violent words in a dataset
        """

        dataset_folder = os.path.join(os.getcwd(), violent_words_datasets_location)

        list_of_violent_or_curse_words = []

        # Retrieves all words in all of the files in the violent or curse word datasets
        for filename in os.listdir(dataset_folder):
            with open(os.path.join(dataset_folder, filename), 'r') as file:

                for line in file.readlines():
                    line = line.strip().replace("\n", " ").replace(",", "")
                    list_of_violent_or_curse_words.append(line)

        number_of_swear_words = 0
        for word in text.split(" "):
            if word in list_of_violent_or_curse_words:
                number_of_swear_words = number_of_swear_words + 1

        total_number_of_words = len(text.split(" "))
        frequency = number_of_swear_words / total_number_of_words
        return frequency