import os import string from collections import Counter from datetime import datetime, timezone, timedelta from pathlib import Path import nltk from nltk.corpus import stopwords from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk.stem import WordNetLemmatizer from nltk.tokenize import word_tokenize from utils import * datetime_format = "%d/%m/%Y %H:%M:%S" ist_offset = timedelta(hours=5, minutes=30) def now(): utc_time = datetime.now(timezone.utc) ist_time = utc_time.astimezone(timezone(ist_offset)) return ist_time.strftime(datetime_format) class SentimentAnalyser: def __init__(self): nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') self.emotions = Path("utils/emotions.txt").resolve() def sentiment(self, text): prompt = text lower_case = text.lower() cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation)) # Using word_tokenize because it's faster than split() tokenized_words = word_tokenize(cleaned_text, "english") # Removing Stop Words final_words = [] for word in tokenized_words: if word not in stopwords.words("english"): final_words.append(word) # Lemmatization - From plural to single + base form of a word (example better -> good) lemma_words = [] for word in final_words: word = WordNetLemmatizer().lemmatize(word) lemma_words.append(word) emotion_list = [] with open(self.emotions) as f: for line in f: clear_line = line.replace("\n", "").replace(",", "").replace("'", "").replace(" ", "").strip() word, emotion = clear_line.split(":") if word in lemma_words: emotion_list.append(emotion) print(f"[{now()}] Emotion List:", emotion_list) if not len(emotion_list): print(f"[{now()}] No emotion could be extracted.") return None emotions_count = Counter(emotion_list) print(f"[{now()}] Emotions Count:", emotions_count) common = emotions_count.most_common(1) print(f"[{now()}] Common Emotions:", common) sentiment, val = common[0] print(f"[{now()}] Emotion:", sentiment) return sentiment