import numpy as np import nltk from nltk.corpus import sentiwordnet as swn from nltk.corpus import stopwords flatten = lambda l: [item for sublist in l for item in sublist] tagsswn = { "NN": "n", "VB": "v", "JJ": "a", "RB": "r", } def get_sentiment(aval, stopwords): """ Calcula o score de sentimento de um texto usando SentiWordNet. Entrada: aval (str): Texto a ser analisado. Saída: tuple: Score positivo e negativo do texto. """ pos_scores = [] neg_scores = [] sentences = nltk.sent_tokenize(aval) sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences] tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words)) tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords] for word, pos in tagged_sentence_words: swn_pos = tagsswn.get(pos[:2], None) if not swn_pos: continue synsets = list(swn.senti_synsets(word.lower(), swn_pos)) if not synsets: continue synset = synsets[0] pos_scores.append(synset.pos_score()) neg_scores.append(synset.neg_score()) sump = np.sum(pos_scores) if pos_scores else 0 sumn = np.sum(neg_scores) if neg_scores else 0 return sump, sumn def classify_sentiment(aval, stopwords): """ Classifica um texto como positivo ou negativo com base no score de sentimento. Entrada: aval (str): Texto a ser classificado. Saída: str: "positive" se o score positivo for maior, "negative" caso contrário. """ pos_score, neg_score = get_sentiment(aval, stopwords) return "positive" if pos_score > neg_score else "negative" def wordnet_pipeline(df, column): nltk.download('sentiwordnet') nltk.download('wordnet') nltk.download('stopwords') nltk.download('punkt') nltk.download('averaged_perceptron_tagger') stpwrds = set(stopwords.words("english")) l = [] for review in df[column]: l.append(classify_sentiment(review, stpwrds)) return l