arpy8 commited on
Commit
f3b949b
·
1 Parent(s): 2ae57e9

update analyse and graph functions

Browse files
Files changed (3) hide show
  1. app.py +5 -5
  2. st_pages/analyse.py +17 -53
  3. utils/graph_functions.py +14 -7
app.py CHANGED
@@ -19,8 +19,8 @@ init_session_state()
19
  with st.sidebar:
20
  st.write("<br>" * 4, unsafe_allow_html=True)
21
  selected_task = on_hover_tabs(
22
- tabName=["Home Page", "Analyse Sentiment", "Dashboard", "Data", "About Us"],
23
- iconName=["home", "engineering", "equalizer", "analytics", "contact_support"],
24
  styles={
25
  "navtab": {"background-color": "#fff"},
26
  "tabOptionsStyle": {
@@ -46,9 +46,9 @@ elif selected_task == "Dashboard":
46
  with st.spinner("Loading Dashboard..."):
47
  dashboard()
48
 
49
- elif selected_task == "Data":
50
- load_header("Manually Labelled Dataset")
51
- dataset_page()
52
 
53
  elif selected_task == "About Us":
54
  about_us_page(CONTRIBUTORS)
 
19
  with st.sidebar:
20
  st.write("<br>" * 4, unsafe_allow_html=True)
21
  selected_task = on_hover_tabs(
22
+ tabName=["Home Page", "Analyse Sentiment", "Dashboard", "About Us"],
23
+ iconName=["home", "engineering", "equalizer", "contact_support"],
24
  styles={
25
  "navtab": {"background-color": "#fff"},
26
  "tabOptionsStyle": {
 
46
  with st.spinner("Loading Dashboard..."):
47
  dashboard()
48
 
49
+ # elif selected_task == "Data":
50
+ # load_header("Manually Labelled Dataset")
51
+ # dataset_page()
52
 
53
  elif selected_task == "About Us":
54
  about_us_page(CONTRIBUTORS)
st_pages/analyse.py CHANGED
@@ -1,71 +1,33 @@
1
  import pandas as pd
2
  import streamlit as st
3
- from utils.scraper import fetch_main_tweet_dataframe, fetch_comments_dataframe
4
- from utils.utils import (
5
- load_header,
6
- is_valid_twitter_url,
7
- combine_author_and_comments_df,
8
- )
9
-
10
 
11
  def analyse_page():
12
- load_header("Analizar Tweet")
13
 
14
- cols = st.columns([5, 1, 1])
15
 
16
  with cols[0]:
17
- twitter_url = st.text_input(
18
- "Paste your link here:",
19
- placeholder="https://x.com/Google/status/1790555395041472948",
20
- ).strip()
21
 
22
  with cols[1]:
23
  st.write("<br>", unsafe_allow_html=True)
24
- submitted = st.button("Submit", use_container_width=True)
25
-
26
- if submitted and not is_valid_twitter_url(twitter_url):
27
- st.toast("⚠️ Invalid URL")
28
-
29
- if submitted and is_valid_twitter_url(twitter_url):
30
- if "master_df" not in st.session_state:
31
- st.session_state["master_df"] = None
32
 
33
- with st.spinner("Scraping data..."):
34
- df_author = fetch_main_tweet_dataframe(twitter_url)
35
- df_comments = fetch_comments_dataframe(twitter_url)
36
 
37
- df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
38
- df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
39
 
40
- master_df = combine_author_and_comments_df(df_author, df_comments)
41
- st.session_state["master_df"] = master_df
 
42
 
43
  elif not submitted:
44
- with st.spinner("Loading data..."):
45
- df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
46
- df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
47
-
48
- master_df = combine_author_and_comments_df(df_author, df_comments)
49
- st.session_state["master_df"] = master_df
50
-
51
- with st.expander("Sample Data", expanded=True):
52
- if (
53
- "master_df" in st.session_state
54
- and st.session_state["master_df"] is not None
55
- ):
56
- st.dataframe(
57
- st.session_state["master_df"], height=450, use_container_width=True
58
- )
59
-
60
- with cols[2]:
61
- st.write("<br>", unsafe_allow_html=True)
62
- st.download_button(
63
- label="Download CSV",
64
- data=st.session_state["master_df"].to_csv(index=False).encode("utf-8"),
65
- file_name="output.csv",
66
- use_container_width=True,
67
- )
68
-
69
 
70
  if __name__ == "__main__":
71
  st.set_page_config(
@@ -73,3 +35,5 @@ if __name__ == "__main__":
73
  )
74
  with st.spinner("Loading Dashboard..."):
75
  analyse_page()
 
 
 
1
  import pandas as pd
2
  import streamlit as st
3
+ from utils.utils import load_header
 
 
 
 
 
 
4
 
5
  def analyse_page():
6
+ load_header("Analizar CSV")
7
 
8
+ cols = st.columns([5, 1])
9
 
10
  with cols[0]:
11
+ uploaded_file = st.file_uploader("Agrega tu archivo CSV aquí", type=["csv"])
 
 
 
12
 
13
  with cols[1]:
14
  st.write("<br>", unsafe_allow_html=True)
15
+ submitted = st.button("Procesar", use_container_width=True)
 
 
 
 
 
 
 
16
 
17
+ if submitted and uploaded_file is not None:
18
+ with st.spinner("Leyendo archivo..."):
19
+ df = pd.read_csv(uploaded_file)
20
 
21
+ st.session_state["master_df"] = df
 
22
 
23
+ # Muestra las primeras 5 filas del DataFrame
24
+ with st.expander("Vista previa datos", expanded=True):
25
+ st.dataframe(df.head(), height=450, use_container_width=True)
26
 
27
  elif not submitted:
28
+ if "master_df" in st.session_state and st.session_state["master_df"] is not None:
29
+ with st.expander("Sample Data", expanded=True):
30
+ st.dataframe(st.session_state["master_df"].head(), height=450, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  if __name__ == "__main__":
33
  st.set_page_config(
 
35
  )
36
  with st.spinner("Loading Dashboard..."):
37
  analyse_page()
38
+
39
+
utils/graph_functions.py CHANGED
@@ -45,7 +45,14 @@ def load_data(df):
45
 
46
  @st.cache_data
47
  def process_texts(texts):
48
- stop_words = set(stopwords.words("english"))
 
 
 
 
 
 
 
49
  tokenized_texts = texts.apply(word_tokenize)
50
  tokenized_texts = tokenized_texts.apply(
51
  lambda x: [word.lower() for word in x if word.lower() not in stop_words]
@@ -90,7 +97,7 @@ def display_word_cloud(dataframe):
90
 
91
 
92
  def most_common_trigrams(df, pdf=False):
93
- stop_words = set(stopwords.words("english")) # noqa: F841
94
 
95
  colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
96
  fig = make_subplots(rows=1, cols=3)
@@ -155,7 +162,7 @@ def display_target_count(df):
155
  2,
156
  )
157
  fig.update_layout(
158
- title_text="Sentiment Distribution",
159
  title_y=1,
160
  title_font=dict(color="#808495", size=15),
161
  autosize=True,
@@ -192,15 +199,15 @@ def sentiment_over_date(df):
192
  fig.update_yaxes(showgrid=False)
193
  fig.update_layout(
194
  title={
195
- "text": "Sentiment Over Time",
196
  "x": 0.2,
197
  "y": 1,
198
  "xanchor": "center",
199
  "yanchor": "top",
200
  "font": {"size": 15, "color": "#808495", "family": "Arial"},
201
  },
202
- xaxis_title="Date",
203
- yaxis_title="Sentiment Count",
204
  hovermode="x",
205
  showlegend=True,
206
  autosize=False,
@@ -331,4 +338,4 @@ def metrics_bar(tweet_data, df):
331
  # st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
332
  pos.metric(label=":green[Positive]", value=tweet_data["positive"])
333
  neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
334
- neg.metric(label=":red[Negative]", value=tweet_data["negative"])
 
45
 
46
  @st.cache_data
47
  def process_texts(texts):
48
+ custom_stopwords = set([
49
+ 'ser', 'haber', 'hacer', 'tener', 'poder', 'ir', 'q', 'si', 'solo', 'saber', 'decir',
50
+ 'dar', 'querer', 'ver', 'así', 'sos', 'maje', 'dejar', 'si', 'solo', 'si', 'op', 'vos',
51
+ 'cada', 'mismo', 'usted', 'mas', 'pues', 'andar', 'ahora', 'claro', 'nunca', 'quedar', 'pasar',
52
+ 'venir', 'poner', 'dio', 'señora', 'señor', 'ahí', 'asi', 'vez', 'jajaja'
53
+ ])
54
+ stop_words = set(stopwords.words("spanish"))
55
+ stop_words.update(custom_stopwords)
56
  tokenized_texts = texts.apply(word_tokenize)
57
  tokenized_texts = tokenized_texts.apply(
58
  lambda x: [word.lower() for word in x if word.lower() not in stop_words]
 
97
 
98
 
99
  def most_common_trigrams(df, pdf=False):
100
+ stop_words = set(stopwords.words("spanish")) # noqa: F841
101
 
102
  colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
103
  fig = make_subplots(rows=1, cols=3)
 
162
  2,
163
  )
164
  fig.update_layout(
165
+ title_text="Análisis de Sentimientos",
166
  title_y=1,
167
  title_font=dict(color="#808495", size=15),
168
  autosize=True,
 
199
  fig.update_yaxes(showgrid=False)
200
  fig.update_layout(
201
  title={
202
+ "text": "Sentimiento a través del tiempo",
203
  "x": 0.2,
204
  "y": 1,
205
  "xanchor": "center",
206
  "yanchor": "top",
207
  "font": {"size": 15, "color": "#808495", "family": "Arial"},
208
  },
209
+ xaxis_title="Fecha",
210
+ yaxis_title="Conteo",
211
  hovermode="x",
212
  showlegend=True,
213
  autosize=False,
 
338
  # st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
339
  pos.metric(label=":green[Positive]", value=tweet_data["positive"])
340
  neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
341
+ neg.metric(label=":red[Negative]", value=tweet_data["negative"])