update analyse and graph functions
Browse files- app.py +5 -5
- st_pages/analyse.py +17 -53
- utils/graph_functions.py +14 -7
app.py
CHANGED
@@ -19,8 +19,8 @@ init_session_state()
|
|
19 |
with st.sidebar:
|
20 |
st.write("<br>" * 4, unsafe_allow_html=True)
|
21 |
selected_task = on_hover_tabs(
|
22 |
-
tabName=["Home Page", "Analyse Sentiment", "Dashboard", "
|
23 |
-
iconName=["home", "engineering", "equalizer", "
|
24 |
styles={
|
25 |
"navtab": {"background-color": "#fff"},
|
26 |
"tabOptionsStyle": {
|
@@ -46,9 +46,9 @@ elif selected_task == "Dashboard":
|
|
46 |
with st.spinner("Loading Dashboard..."):
|
47 |
dashboard()
|
48 |
|
49 |
-
elif selected_task == "Data":
|
50 |
-
|
51 |
-
|
52 |
|
53 |
elif selected_task == "About Us":
|
54 |
about_us_page(CONTRIBUTORS)
|
|
|
19 |
with st.sidebar:
|
20 |
st.write("<br>" * 4, unsafe_allow_html=True)
|
21 |
selected_task = on_hover_tabs(
|
22 |
+
tabName=["Home Page", "Analyse Sentiment", "Dashboard", "About Us"],
|
23 |
+
iconName=["home", "engineering", "equalizer", "contact_support"],
|
24 |
styles={
|
25 |
"navtab": {"background-color": "#fff"},
|
26 |
"tabOptionsStyle": {
|
|
|
46 |
with st.spinner("Loading Dashboard..."):
|
47 |
dashboard()
|
48 |
|
49 |
+
# elif selected_task == "Data":
|
50 |
+
# load_header("Manually Labelled Dataset")
|
51 |
+
# dataset_page()
|
52 |
|
53 |
elif selected_task == "About Us":
|
54 |
about_us_page(CONTRIBUTORS)
|
st_pages/analyse.py
CHANGED
@@ -1,71 +1,33 @@
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
-
from utils.
|
4 |
-
from utils.utils import (
|
5 |
-
load_header,
|
6 |
-
is_valid_twitter_url,
|
7 |
-
combine_author_and_comments_df,
|
8 |
-
)
|
9 |
-
|
10 |
|
11 |
def analyse_page():
|
12 |
-
load_header("Analizar
|
13 |
|
14 |
-
cols = st.columns([5, 1
|
15 |
|
16 |
with cols[0]:
|
17 |
-
|
18 |
-
"Paste your link here:",
|
19 |
-
placeholder="https://x.com/Google/status/1790555395041472948",
|
20 |
-
).strip()
|
21 |
|
22 |
with cols[1]:
|
23 |
st.write("<br>", unsafe_allow_html=True)
|
24 |
-
submitted = st.button("
|
25 |
-
|
26 |
-
if submitted and not is_valid_twitter_url(twitter_url):
|
27 |
-
st.toast("⚠️ Invalid URL")
|
28 |
-
|
29 |
-
if submitted and is_valid_twitter_url(twitter_url):
|
30 |
-
if "master_df" not in st.session_state:
|
31 |
-
st.session_state["master_df"] = None
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
|
39 |
|
40 |
-
|
41 |
-
st.
|
|
|
42 |
|
43 |
elif not submitted:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
master_df = combine_author_and_comments_df(df_author, df_comments)
|
49 |
-
st.session_state["master_df"] = master_df
|
50 |
-
|
51 |
-
with st.expander("Sample Data", expanded=True):
|
52 |
-
if (
|
53 |
-
"master_df" in st.session_state
|
54 |
-
and st.session_state["master_df"] is not None
|
55 |
-
):
|
56 |
-
st.dataframe(
|
57 |
-
st.session_state["master_df"], height=450, use_container_width=True
|
58 |
-
)
|
59 |
-
|
60 |
-
with cols[2]:
|
61 |
-
st.write("<br>", unsafe_allow_html=True)
|
62 |
-
st.download_button(
|
63 |
-
label="Download CSV",
|
64 |
-
data=st.session_state["master_df"].to_csv(index=False).encode("utf-8"),
|
65 |
-
file_name="output.csv",
|
66 |
-
use_container_width=True,
|
67 |
-
)
|
68 |
-
|
69 |
|
70 |
if __name__ == "__main__":
|
71 |
st.set_page_config(
|
@@ -73,3 +35,5 @@ if __name__ == "__main__":
|
|
73 |
)
|
74 |
with st.spinner("Loading Dashboard..."):
|
75 |
analyse_page()
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import streamlit as st
|
3 |
+
from utils.utils import load_header
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def analyse_page():
|
6 |
+
load_header("Analizar CSV")
|
7 |
|
8 |
+
cols = st.columns([5, 1])
|
9 |
|
10 |
with cols[0]:
|
11 |
+
uploaded_file = st.file_uploader("Agrega tu archivo CSV aquí", type=["csv"])
|
|
|
|
|
|
|
12 |
|
13 |
with cols[1]:
|
14 |
st.write("<br>", unsafe_allow_html=True)
|
15 |
+
submitted = st.button("Procesar", use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
if submitted and uploaded_file is not None:
|
18 |
+
with st.spinner("Leyendo archivo..."):
|
19 |
+
df = pd.read_csv(uploaded_file)
|
20 |
|
21 |
+
st.session_state["master_df"] = df
|
|
|
22 |
|
23 |
+
# Muestra las primeras 5 filas del DataFrame
|
24 |
+
with st.expander("Vista previa datos", expanded=True):
|
25 |
+
st.dataframe(df.head(), height=450, use_container_width=True)
|
26 |
|
27 |
elif not submitted:
|
28 |
+
if "master_df" in st.session_state and st.session_state["master_df"] is not None:
|
29 |
+
with st.expander("Sample Data", expanded=True):
|
30 |
+
st.dataframe(st.session_state["master_df"].head(), height=450, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
if __name__ == "__main__":
|
33 |
st.set_page_config(
|
|
|
35 |
)
|
36 |
with st.spinner("Loading Dashboard..."):
|
37 |
analyse_page()
|
38 |
+
|
39 |
+
|
utils/graph_functions.py
CHANGED
@@ -45,7 +45,14 @@ def load_data(df):
|
|
45 |
|
46 |
@st.cache_data
|
47 |
def process_texts(texts):
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
tokenized_texts = texts.apply(word_tokenize)
|
50 |
tokenized_texts = tokenized_texts.apply(
|
51 |
lambda x: [word.lower() for word in x if word.lower() not in stop_words]
|
@@ -90,7 +97,7 @@ def display_word_cloud(dataframe):
|
|
90 |
|
91 |
|
92 |
def most_common_trigrams(df, pdf=False):
|
93 |
-
stop_words = set(stopwords.words("
|
94 |
|
95 |
colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
|
96 |
fig = make_subplots(rows=1, cols=3)
|
@@ -155,7 +162,7 @@ def display_target_count(df):
|
|
155 |
2,
|
156 |
)
|
157 |
fig.update_layout(
|
158 |
-
title_text="
|
159 |
title_y=1,
|
160 |
title_font=dict(color="#808495", size=15),
|
161 |
autosize=True,
|
@@ -192,15 +199,15 @@ def sentiment_over_date(df):
|
|
192 |
fig.update_yaxes(showgrid=False)
|
193 |
fig.update_layout(
|
194 |
title={
|
195 |
-
"text": "
|
196 |
"x": 0.2,
|
197 |
"y": 1,
|
198 |
"xanchor": "center",
|
199 |
"yanchor": "top",
|
200 |
"font": {"size": 15, "color": "#808495", "family": "Arial"},
|
201 |
},
|
202 |
-
xaxis_title="
|
203 |
-
yaxis_title="
|
204 |
hovermode="x",
|
205 |
showlegend=True,
|
206 |
autosize=False,
|
@@ -331,4 +338,4 @@ def metrics_bar(tweet_data, df):
|
|
331 |
# st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
|
332 |
pos.metric(label=":green[Positive]", value=tweet_data["positive"])
|
333 |
neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
|
334 |
-
neg.metric(label=":red[Negative]", value=tweet_data["negative"])
|
|
|
45 |
|
46 |
@st.cache_data
|
47 |
def process_texts(texts):
|
48 |
+
custom_stopwords = set([
|
49 |
+
'ser', 'haber', 'hacer', 'tener', 'poder', 'ir', 'q', 'si', 'solo', 'saber', 'decir',
|
50 |
+
'dar', 'querer', 'ver', 'así', 'sos', 'maje', 'dejar', 'si', 'solo', 'si', 'op', 'vos',
|
51 |
+
'cada', 'mismo', 'usted', 'mas', 'pues', 'andar', 'ahora', 'claro', 'nunca', 'quedar', 'pasar',
|
52 |
+
'venir', 'poner', 'dio', 'señora', 'señor', 'ahí', 'asi', 'vez', 'jajaja'
|
53 |
+
])
|
54 |
+
stop_words = set(stopwords.words("spanish"))
|
55 |
+
stop_words.update(custom_stopwords)
|
56 |
tokenized_texts = texts.apply(word_tokenize)
|
57 |
tokenized_texts = tokenized_texts.apply(
|
58 |
lambda x: [word.lower() for word in x if word.lower() not in stop_words]
|
|
|
97 |
|
98 |
|
99 |
def most_common_trigrams(df, pdf=False):
|
100 |
+
stop_words = set(stopwords.words("spanish")) # noqa: F841
|
101 |
|
102 |
colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
|
103 |
fig = make_subplots(rows=1, cols=3)
|
|
|
162 |
2,
|
163 |
)
|
164 |
fig.update_layout(
|
165 |
+
title_text="Análisis de Sentimientos",
|
166 |
title_y=1,
|
167 |
title_font=dict(color="#808495", size=15),
|
168 |
autosize=True,
|
|
|
199 |
fig.update_yaxes(showgrid=False)
|
200 |
fig.update_layout(
|
201 |
title={
|
202 |
+
"text": "Sentimiento a través del tiempo",
|
203 |
"x": 0.2,
|
204 |
"y": 1,
|
205 |
"xanchor": "center",
|
206 |
"yanchor": "top",
|
207 |
"font": {"size": 15, "color": "#808495", "family": "Arial"},
|
208 |
},
|
209 |
+
xaxis_title="Fecha",
|
210 |
+
yaxis_title="Conteo",
|
211 |
hovermode="x",
|
212 |
showlegend=True,
|
213 |
autosize=False,
|
|
|
338 |
# st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
|
339 |
pos.metric(label=":green[Positive]", value=tweet_data["positive"])
|
340 |
neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
|
341 |
+
neg.metric(label=":red[Negative]", value=tweet_data["negative"])
|