File size: 2,105 Bytes
1ba6bc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import stopwords

flatten = lambda l: [item for sublist in l for item in sublist]

tagsswn = {
    "NN": "n",
    "VB": "v",
    "JJ": "a",
    "RB": "r",
}

def get_sentiment(aval, stopwords):
    """
    Calcula o score de sentimento de um texto usando SentiWordNet.

    Entrada:
        aval (str): Texto a ser analisado.

    Saída:
        tuple: Score positivo e negativo do texto.
    """
    pos_scores = []
    neg_scores = []
    sentences = nltk.sent_tokenize(aval)
    sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words))

    tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords]

    for word, pos in tagged_sentence_words:

        swn_pos = tagsswn.get(pos[:2], None)
        if not swn_pos:
            continue

        synsets = list(swn.senti_synsets(word.lower(), swn_pos))

        if not synsets:
            continue

        synset = synsets[0]
        pos_scores.append(synset.pos_score())
        neg_scores.append(synset.neg_score())

    sump = np.sum(pos_scores) if pos_scores else 0
    sumn = np.sum(neg_scores) if neg_scores else 0

    return sump, sumn

def classify_sentiment(aval, stopwords):
    """
    Classifica um texto como positivo ou negativo com base no score de sentimento.

    Entrada:
        aval (str): Texto a ser classificado.

    Saída:
        str: "positive" se o score positivo for maior, "negative" caso contrário.
    """
    pos_score, neg_score = get_sentiment(aval, stopwords)
    return "positive" if pos_score > neg_score else "negative"


def wordnet_pipeline(df, column):
    nltk.download('sentiwordnet')
    nltk.download('wordnet')
    nltk.download('stopwords')
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')

    stpwrds = set(stopwords.words("english"))

    l = []
    for review in df[column]:
        l.append(classify_sentiment(review, stpwrds))
    
    return l