Spaces:

senyukhin
/

annotator_demo

Sleeping

App Files Files Community

annotator_demo / sentiment_parser.py

senyukhin

Upload 11 files

eea4f3c about 1 year ago

raw

history blame contribute delete

2.11 kB

	import json
	import os
	import preproc, date_parser
	from collections import Counter
	import pandas as pd
	import streamlit as st

	SCRIPT_DIR = os.path.dirname(__file__)

	def sentiment_verbs():
	with open(f'{SCRIPT_DIR}/sentiment/verbs.json', 'r', encoding='utf-8') as file:
	return json.load(file)


	def sentiment_nouns():
	with open(f'{SCRIPT_DIR}/sentiment/emo_clean.json', 'r', encoding='utf-8') as file:
	return json.load(file)

	VERBS = sentiment_verbs()
	NOUNS = sentiment_nouns()


	def get_sentiment_from_verbs(lemmas):
	res = []
	matching = set(lemmas) & set(VERBS.keys())
	lemmas_dict = Counter(lemmas)
	if matching:
	for word in matching:
	for _, sentiment in VERBS[word]:
	s = [sentiment] * lemmas_dict[word]
	res.extend(s)
	return Counter(res)
	else: return Counter()

	def get_sentiment_from_nouns(lemmas):
	res = []
	matching = set(lemmas) & set(NOUNS.keys())
	lemmas_dict = Counter(lemmas)
	if matching:
	for word in matching:
	for sentiment in NOUNS[word]:
	s = [sentiment] * lemmas_dict[word]
	res.extend(s)
	return Counter(res)
	else: return Counter()

	def get_overall_sentiment(tokens):
	lemmas = preproc.get_all_lemmas(tokens)
	verbs = get_sentiment_from_verbs(lemmas)
	nouns = get_sentiment_from_nouns(lemmas)
	return verbs + nouns


	def get_sentiment_index(sentiments):
	return sentiments['positive'] - sentiments['negative']


	def get_most_sentiment(sentiment_index):
	sentiments = []
	for index in sentiment_index:
	if index > 0:
	sentiments.append('positive')
	elif index < 0:
	sentiments.append('negative')
	else:
	sentiments.append('neutral')
	sentiments = Counter(sentiments)
	return sentiments.most_common(1)[0][0]

	def data_for_sentiment_chart(df):
	df = df.copy()
	df['n_date'] = df.apply(lambda row:
	date_parser.normalize_dates(row.date_start, row.date_stop),
	axis=1)
	return df[['n_date', 'sent_index']]