IMDB_Reviews / wordnet.py
danielcd99's picture
added symbolic model
1ba6bc3
raw
history blame
No virus
2.11 kB
import numpy as np
import nltk
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import stopwords
flatten = lambda l: [item for sublist in l for item in sublist]
tagsswn = {
"NN": "n",
"VB": "v",
"JJ": "a",
"RB": "r",
}
def get_sentiment(aval, stopwords):
"""
Calcula o score de sentimento de um texto usando SentiWordNet.
Entrada:
aval (str): Texto a ser analisado.
Saída:
tuple: Score positivo e negativo do texto.
"""
pos_scores = []
neg_scores = []
sentences = nltk.sent_tokenize(aval)
sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words))
tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords]
for word, pos in tagged_sentence_words:
swn_pos = tagsswn.get(pos[:2], None)
if not swn_pos:
continue
synsets = list(swn.senti_synsets(word.lower(), swn_pos))
if not synsets:
continue
synset = synsets[0]
pos_scores.append(synset.pos_score())
neg_scores.append(synset.neg_score())
sump = np.sum(pos_scores) if pos_scores else 0
sumn = np.sum(neg_scores) if neg_scores else 0
return sump, sumn
def classify_sentiment(aval, stopwords):
"""
Classifica um texto como positivo ou negativo com base no score de sentimento.
Entrada:
aval (str): Texto a ser classificado.
Saída:
str: "positive" se o score positivo for maior, "negative" caso contrário.
"""
pos_score, neg_score = get_sentiment(aval, stopwords)
return "positive" if pos_score > neg_score else "negative"
def wordnet_pipeline(df, column):
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
stpwrds = set(stopwords.words("english"))
l = []
for review in df[column]:
l.append(classify_sentiment(review, stpwrds))
return l