Spaces:
Sleeping
Sleeping
import os | |
import string | |
from collections import Counter | |
from datetime import datetime, timezone, timedelta | |
from pathlib import Path | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
from nltk.stem import WordNetLemmatizer | |
from nltk.tokenize import word_tokenize | |
from utils import * | |
datetime_format = "%d/%m/%Y %H:%M:%S" | |
ist_offset = timedelta(hours=5, minutes=30) | |
def now(): | |
utc_time = datetime.now(timezone.utc) | |
ist_time = utc_time.astimezone(timezone(ist_offset)) | |
return ist_time.strftime(datetime_format) | |
class SentimentAnalyser: | |
def __init__(self): | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
self.emotions = Path("utils/emotions.txt").resolve() | |
def sentiment(self, text): | |
prompt = text | |
lower_case = text.lower() | |
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation)) | |
# Using word_tokenize because it's faster than split() | |
tokenized_words = word_tokenize(cleaned_text, "english") | |
# Removing Stop Words | |
final_words = [] | |
for word in tokenized_words: | |
if word not in stopwords.words("english"): | |
final_words.append(word) | |
# Lemmatization - From plural to single + base form of a word (example better -> good) | |
lemma_words = [] | |
for word in final_words: | |
word = WordNetLemmatizer().lemmatize(word) | |
lemma_words.append(word) | |
emotion_list = [] | |
with open(self.emotions) as f: | |
for line in f: | |
clear_line = line.replace("\n", "").replace(",", "").replace("'", "").replace(" ", "").strip() | |
word, emotion = clear_line.split(":") | |
if word in lemma_words: | |
emotion_list.append(emotion) | |
print(f"[{now()}] Emotion List:", emotion_list) | |
if not len(emotion_list): | |
print(f"[{now()}] No emotion could be extracted.") | |
return None | |
emotions_count = Counter(emotion_list) | |
print(f"[{now()}] Emotions Count:", emotions_count) | |
common = emotions_count.most_common(1) | |
print(f"[{now()}] Common Emotions:", common) | |
sentiment, val = common[0] | |
print(f"[{now()}] Emotion:", sentiment) | |
return sentiment | |