Spaces:

EnzoBustos
/

IC-2022-Classificacao-de-Dados-Financeiros

Runtime error

App Files Files Community

IC-2022-Classificacao-de-Dados-Financeiros / app.py

EnzoBustos

Update app.py

0a9468c almost 2 years ago

raw history blame

No virus

6.76 kB

	from transformers import pipeline
	import torch
	import streamlit as st
	from textblob import TextBlob
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	import os
	import re
	import pandas as pd

	def translate_text_blob(text):
	blob = TextBlob(text)
	return str(blob.translate(from_lang="pt", to="en"))

	def sentiment_vader(text):

	vader_object = SentimentIntensityAnalyzer()

	sentiment_dict = vader_object.polarity_scores(text)
	negative = sentiment_dict['neg']
	neutral = sentiment_dict['neu']
	positive = sentiment_dict['pos']
	compound = sentiment_dict['compound']

	if sentiment_dict['compound'] >= 0.05 :
	overall_sentiment = "Positive"

	elif sentiment_dict['compound'] <= - 0.05 :
	overall_sentiment = "Negative"

	else :
	overall_sentiment = "Neutral"

	return overall_sentiment.upper()

	def classify_by_company(text):
	path = os.path.dirname(os.path.realpath(__file__)) + "/Companies"

	for filename in os.listdir(path):
	with open(path + '/' + filename, 'r') as f:
	companies = [word[:-1] for word in f.readlines()]
	companies = "\|".join(companies)
	companies = "/" + companies + "/gm"

	if re.search(companies, text):
	return filename[:-4] + " - Infered by company name in text"

	return ""

	def load_models(parameters_list):

	translation_map = {
	#Translation PT to EN
	"TextBlob" : "TextBlob",
	"M2M100" : "facebook/m2m100_418M",
	"OPUS" : "Helsinki-NLP/opus-mt-mul-en",
	"T5" : "unicamp-dl/translation-pt-en-t5",
	"mBART" : "Narrativa/mbart-large-50-finetuned-opus-en-pt-translation",
	}

	sentiment_map = {
	#Sentiment Analysis
	"VADER" : "VADER",
	"FinBERT" : "ProsusAI/finbert",
	"DistilBERT" : "distilbert-base-uncased-finetuned-sst-2-english",
	"BERT" : "nlptown/bert-base-multilingual-uncased-sentiment",
	}

	zeroshot_map = {
	#Zeroshot Classification
	"RoBERTa" : "joeddav/xlm-roberta-large-xnli",
	"mDeBERTa" : "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
	"DistilroBERTa" : "cross-encoder/nli-distilroberta-base",
	}

	candidate_labels = [
	"Industrial Goods",
	"Communications",
	"Cyclic Consumption",
	"Non-cyclical Consumption",
	"Financial",
	"Basic Materials",
	#"Others",
	"Oil, Gas and Biofuels",
	"Health",
	#"Initial Sector",
	"Information Technology",
	"Public utility"
	]

	device_num = 0 if torch.cuda.is_available() else -1

	if parameters_list[0] == "TextBlob":
	out_translation = translate_text_blob(parameters_list[3])
	else:
	translation = pipeline("translation_pt_to_en", model=translation_map[parameters_list[0]], tokenizer=translation_map[parameters_list[0]], device=device_num)
	out_translation = translation(parameters_list[3])[0]["translation_text"]

	if parameters_list[1] == "VADER":
	out_sentiment = sentiment_vader(out_translation)
	else:
	sentiment = pipeline("sentiment-analysis", model=sentiment_map[parameters_list[1]], tokenizer=sentiment_map[parameters_list[1]], device=device_num)
	out_sentiment = sentiment(out_translation)[0]["label"].upper()

	company_classification = classify_by_company(parameters_list[3].upper())

	if company_classification:
	out_classification = company_classification
	else:
	classification = pipeline("zero-shot-classification", model=zeroshot_map[parameters_list[2]], tokenizer=zeroshot_map[parameters_list[2]], device=device_num)
	out_classification = classification(out_translation, candidate_labels)["labels"][0] + " - Infered by {}".format(parameters_list[2])

	return out_translation, out_sentiment, out_classification

	sheet_id = "1pg13sroB-pIEXtIL5UrRowKdXWRPWnH7"
	sheet_name = "Sheet1"
	url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"

	df = pd.read_csv(url)

	header = st.container()
	model = st.container()
	model_1, model_2 = st.columns(2)
	dataset = st.container()
	analysis = st.container()
	analysis_1, analysis_2 = st.columns(2)

	with header:
	st.title("IC 2022 Classificação de Dados Financeiros")
	st.write("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent sapien tortor, suscipit quis ornare ut, laoreet vitae nisi. Mauris quis consectetur risus, non blandit mauris. Sed ut odio tempor, ullamcorper leo eu, mollis eros.")

	with model:

	st.header("Modelo para Tradução e Classificação!")

	with model_1:
	translation_pt_to_en = st.selectbox('Qual modelo você deseja usar para tradução?', ('TextBlob', 'M2M100', 'OPUS', 'T5', 'mBART'))
	sentiment_analysis = st.selectbox('Qual modelo você deseja usar para análise de sentimento?', ('VADER', 'FinBERT', 'DistilBERT', 'BERT'))
	zero_shot_classification = st.selectbox('Qual modelo você deseja usar para classificação?', ('RoBERTa', 'mDeBERTa', 'DistilroBERTa'))

	text = st.text_input(label="Coloque seu texto sobre mercado financeiro em português!", value=r"As ações da Raia Drogasil subiram em 98% desde o último bimestre, segundo as avaliações da revista!")

	submit = st.button('Gerar análises!')

	with model_2:
	if submit:
	with st.spinner('Wait for it...'):
	parameters = [translation_pt_to_en, sentiment_analysis, zero_shot_classification, text]
	outputs = load_models(parameters)

	st.write("Translation..................................................................: \n {} \n \n".format(outputs[0]))
	st.write("Sentiment...................................................................: \n {} \n \n".format(outputs[1]))
	st.write("Classification...............................................................: \n {} \n \n".format(outputs[2]))

	with dataset:
	st.header("Dados utilizados no projeto!")
	st.write("Os dados blablablabla")
	st.dataframe(df)
	st.subheader("Descrição das colunas:")
	st.write("\t- Texts: Coluna que mostra os textos financeiros")
	st.write("\t- Texts: Coluna que mostra os textos financeiros")
	st.write("\t- Texts: Coluna que mostra os textos financeiros")
	st.write("\t- Texts: Coluna que mostra os textos financeiros")
	st.write("\t- Texts: Coluna que mostra os textos financeiros")

	with analysis:
	st.header("Visualização dos dados utilizados através de WordClouds")

	with analysis_1:
	pass