# Importamos las librerías necesarias
import os
from pathlib import Path
from matplotlib import pyplot as plt
import pandas as pd
from spacy.lang.es.stop_words import STOP_WORDS as es_stopwords
from wordcloud import WordCloud


# Dataset y configuraciones del los proyectos
# Configigurations
path = Path().cwd().parent/"Dataset"


# Funciones usadas para plotear los datos
def plots_world_cloud(df, title, figsize=(10, 10)):
    """This function is used to plot the world cloud"""
    text = " ".join(df)
    plt.figure(figsize=figsize)
    wordcloud = WordCloud(background_color="white", stopwords=es_stopwords).generate(text)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title(title)
    plt.show()


# Importamos los datasets y los usamos para explorar los datos
df_train = pd.read_csv(path/"train.csv")
df_test = pd.read_csv(path/"test.csv")

df_train["len"] = df_train.text.apply(len)
df_train["tag"] = "train"
df_test["len"] = df_test.text.apply(len)
df_test["tag"] = "test"
df_train = pd.concat([df_train, df_test], axis=0)


# 
df_train.head()


pd.get_dummies(df_train[["target_sentiment", "companies_sentiment", "consumers_sentiment"]], columns=["target_sentiment", "companies_sentiment", "consumers_sentiment"]).iloc[0].astype(int)

target_sentiment_negative        1
target_sentiment_neutral         0
target_sentiment_positive        0
target_sentiment_postive         0
companies_sentiment_negative     1
companies_sentiment_neutral      0
companies_sentiment_positive     0
companies_sentiment_positive     0
consumers_sentiment_negative     1
consumers_sentiment_neutral      0
consumers_sentiment_neutral?     0
consumers_sentiment_positive     0
consumers_sentiment_positive     0
Name: 0, dtype: int32


df_train.target_sentiment.value_counts().plot(kind="bar")

<Axes: xlabel='target_sentiment'>


df_train.boxplot(column="len", by="target_sentiment")

<Axes: title={'center': 'len'}, xlabel='target_sentiment'>


df_train.groupby("target_sentiment").len.describe()


df_train.companies_sentiment.value_counts().plot(kind="bar")

<Axes: xlabel='companies_sentiment'>


df_train.groupby("companies_sentiment").len.describe()


df_train.boxplot(column="len", by="companies_sentiment")

<Axes: title={'center': 'len'}, xlabel='companies_sentiment'>


df_train.groupby("companies_sentiment").len.describe()


df_train.consumers_sentiment.value_counts().plot(kind="bar")

<Axes: xlabel='consumers_sentiment'>


df_train.boxplot(column="len", by="consumers_sentiment")

<Axes: title={'center': 'len'}, xlabel='consumers_sentiment'>


plots_world_cloud(df_train.text, "Word Cloud")


plots_world_cloud(df_train[df_train.target_sentiment == "positive"].text, "Word Cloud - target(Positive)")


plots_world_cloud(df_train[df_train.target_sentiment == "negative"].text, "Word Cloud - target(Negative)")


plots_world_cloud(df_train[df_train.target_sentiment == "neutral"].text, "Word Cloud - target(Neutral)")


plots_world_cloud(df_train[df_train.companies_sentiment == "positive"].text, "Word Cloud - companie(positive)")


plots_world_cloud(df_train[df_train.companies_sentiment == "negative"].text, "Word Cloud - companie(negative)")


plots_world_cloud(df_train[df_train.companies_sentiment == "neutral"].text, "Word Cloud - companie(neutro)")


plots_world_cloud(df_train[df_train.consumers_sentiment == "positive"].text, "Word Cloud - consumers(positive)")


plots_world_cloud(df_train[df_train.consumers_sentiment == "negative"].text, "Word Cloud - consumers(negative)")


plots_world_cloud(df_train[df_train.consumers_sentiment == "neutral"].text, "Word Cloud - consumers(neutral)")

	count	mean	std	min	25%	50%	75%	max
target_sentiment
negative	356.0	72.870787	17.779404	22.0	63.75	74.0	82.0	159.0
neutral	69.0	61.797101	21.466513	17.0	46.00	60.0	77.0	109.0
positive	480.0	71.352083	17.458779	22.0	60.00	70.0	82.0	155.0

	count	mean	std	min	25%	50%	75%	max
companies_sentiment
negative	301.0	72.269103	18.411699	22.0	61.0	71.0	83.00	159.0
neutral	430.0	70.395349	18.389644	17.0	59.0	71.5	81.00	137.0
positive	174.0	71.448276	16.873044	22.0	60.0	72.0	82.75	120.0

	count	mean	std	min	25%	50%	75%	max
companies_sentiment
negative	301.0	72.269103	18.411699	22.0	61.0	71.0	83.0	159.0
neutral	430.0	70.395349	18.389644	17.0	59.0	71.5	81.0	137.0
positive	173.0	71.427746	16.919843	22.0	60.0	72.0	83.0	120.0
positive	1.0	75.000000	NaN	75.0	75.0	75.0	75.0	75.0

FinancIA¶

Variable objectivo.¶

Análisis exploratorio¶

target_sentiment¶

companies_sentiment¶

consumers_sentiment¶

Conclusiones¶

World Cloud¶

Target Sentiment¶

Companies Sentiment¶

Consumer sentiment¶

Conclusiones¶

	id	text	target	target_sentiment	companies_sentiment	consumers_sentiment	len	tag
0	69737	Renfe afronta mañana un nuevo día de paros par...	Renfe	negative	negative	negative	71	train
1	71795	Presupuesto populista con cimientos frágiles	Presupuesto populista	negative	negative	negative	44	train
2	77984	Biden no cree que la OPEP+ vaya a ayudar con l...	OPEP+	negative	negative	negative	69	train
3	82648	La deuda de las familias cae en 25.000 millone...	deuda de las familias	positive	positive	positive	91	train
4	97337	Bestinver: no hay "momento más inoportuno" par...	Bestinver	negative	negative	negative	57	train