Spaces:
Sleeping
Sleeping
File size: 2,092 Bytes
876403c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import json
import os
import preproc, date_parser
from collections import Counter
import pandas as pd
SCRIPT_DIR = os.path.dirname(__file__)
def sentiment_verbs():
with open(f'{SCRIPT_DIR}/sentiment/verbs.json', 'r', encoding='utf-8') as file:
return json.load(file)
def sentiment_nouns():
with open(f'{SCRIPT_DIR}/sentiment/emo_clean.json', 'r', encoding='utf-8') as file:
return json.load(file)
def get_sentiment_from_verbs(lemmas):
res = []
VERBS = sentiment_verbs()
matching = set(lemmas) & set(VERBS.keys())
lemmas_dict = Counter(lemmas)
if matching:
for word in matching:
for _, sentiment in VERBS[word]:
s = [sentiment] * lemmas_dict[word]
res.extend(s)
return Counter(res)
else: return Counter()
def get_sentiment_from_nouns(lemmas):
res = []
NOUNS = sentiment_nouns()
matching = set(lemmas) & set(NOUNS.keys())
lemmas_dict = Counter(lemmas)
if matching:
for word in matching:
for sentiment in NOUNS[word]:
s = [sentiment] * lemmas_dict[word]
res.extend(s)
return Counter(res)
else: return Counter()
def get_overall_sentiment(tokens):
lemmas = preproc.get_all_lemmas(tokens)
verbs = get_sentiment_from_verbs(lemmas)
nouns = get_sentiment_from_nouns(lemmas)
return verbs + nouns
def get_sentiment_index(sentiments):
return sentiments['positive'] - sentiments['negative']
def get_most_sentiment(sentiment_index):
sentiments = []
for index in sentiment_index:
if index > 0:
sentiments.append('positive')
elif index < 0:
sentiments.append('negative')
else:
sentiments.append('neutral')
sentiments = Counter(sentiments)
return sentiments.most_common(1)[0][0]
def data_for_sentiment_chart(df):
df = df.copy()
df['n_date'] = df.apply(lambda row:
date_parser.normalize_dates(row.date_start, row.date_stop),
axis=1)
return df[['n_date', 'sent_index']]
|