Spaces:

arpy8
/

Omdena-IREX-Sentiment-Analysis

Running

App Files Files Community

arpy8 commited on Jul 19, 2024

Commit

f3b949b

1 Parent(s): 2ae57e9

update analyse and graph functions

Browse files

Files changed (3) hide show

app.py +5 -5
st_pages/analyse.py +17 -53
utils/graph_functions.py +14 -7

app.py CHANGED Viewed

@@ -19,8 +19,8 @@ init_session_state()
 with st.sidebar:
     st.write("<br>" * 4, unsafe_allow_html=True)
     selected_task = on_hover_tabs(
-        tabName=["Home Page", "Analyse Sentiment", "Dashboard", "Data", "About Us"],
-        iconName=["home", "engineering", "equalizer", "analytics", "contact_support"],
         styles={
             "navtab": {"background-color": "#fff"},
             "tabOptionsStyle": {
@@ -46,9 +46,9 @@ elif selected_task == "Dashboard":
         with st.spinner("Loading Dashboard..."):
             dashboard()
-elif selected_task == "Data":
-    load_header("Manually Labelled Dataset")
-    dataset_page()
 elif selected_task == "About Us":
     about_us_page(CONTRIBUTORS)

 with st.sidebar:
     st.write("<br>" * 4, unsafe_allow_html=True)
     selected_task = on_hover_tabs(
+        tabName=["Home Page", "Analyse Sentiment", "Dashboard", "About Us"],
+        iconName=["home", "engineering", "equalizer", "contact_support"],
         styles={
             "navtab": {"background-color": "#fff"},
             "tabOptionsStyle": {
         with st.spinner("Loading Dashboard..."):
             dashboard()
+# elif selected_task == "Data":
+#     load_header("Manually Labelled Dataset")
+#     dataset_page()
 elif selected_task == "About Us":
     about_us_page(CONTRIBUTORS)

st_pages/analyse.py CHANGED Viewed

@@ -1,71 +1,33 @@
 import pandas as pd
 import streamlit as st
-from utils.scraper import fetch_main_tweet_dataframe, fetch_comments_dataframe
-from utils.utils import (
-    load_header,
-    is_valid_twitter_url,
-    combine_author_and_comments_df,
-)
 def analyse_page():
-    load_header("Analizar Tweet")
-    cols = st.columns([5, 1, 1])
     with cols[0]:
-        twitter_url = st.text_input(
-            "Paste your link here:",
-            placeholder="https://x.com/Google/status/1790555395041472948",
-        ).strip()
     with cols[1]:
         st.write("<br>", unsafe_allow_html=True)
-        submitted = st.button("Submit", use_container_width=True)
-    if submitted and not is_valid_twitter_url(twitter_url):
-        st.toast("⚠️ Invalid URL")
-    if submitted and is_valid_twitter_url(twitter_url):
-        if "master_df" not in st.session_state:
-            st.session_state["master_df"] = None
-        with st.spinner("Scraping data..."):
-            df_author = fetch_main_tweet_dataframe(twitter_url)
-            df_comments = fetch_comments_dataframe(twitter_url)
-            df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
-            df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
-            master_df = combine_author_and_comments_df(df_author, df_comments)
-            st.session_state["master_df"] = master_df
     elif not submitted:
-        with st.spinner("Loading data..."):
-            df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
-            df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
-            master_df = combine_author_and_comments_df(df_author, df_comments)
-            st.session_state["master_df"] = master_df
-        with st.expander("Sample Data", expanded=True):
-            if (
-                "master_df" in st.session_state
-                and st.session_state["master_df"] is not None
-            ):
-                st.dataframe(
-                    st.session_state["master_df"], height=450, use_container_width=True
-                )
-        with cols[2]:
-            st.write("<br>", unsafe_allow_html=True)
-            st.download_button(
-                label="Download CSV",
-                data=st.session_state["master_df"].to_csv(index=False).encode("utf-8"),
-                file_name="output.csv",
-                use_container_width=True,
-            )
 if __name__ == "__main__":
     st.set_page_config(
@@ -73,3 +35,5 @@ if __name__ == "__main__":
     )
     with st.spinner("Loading Dashboard..."):
         analyse_page()

 import pandas as pd
 import streamlit as st
+from utils.utils import load_header
 def analyse_page():
+    load_header("Analizar CSV")
+    cols = st.columns([5, 1])
     with cols[0]:
+        uploaded_file = st.file_uploader("Agrega tu archivo CSV aquí", type=["csv"])
     with cols[1]:
         st.write("<br>", unsafe_allow_html=True)
+        submitted = st.button("Procesar", use_container_width=True)
+    if submitted and uploaded_file is not None:
+        with st.spinner("Leyendo archivo..."):
+            df = pd.read_csv(uploaded_file)
+            st.session_state["master_df"] = df
+            # Muestra las primeras 5 filas del DataFrame
+            with st.expander("Vista previa datos", expanded=True):
+                st.dataframe(df.head(), height=450, use_container_width=True)
     elif not submitted:
+        if "master_df" in st.session_state and st.session_state["master_df"] is not None:
+            with st.expander("Sample Data", expanded=True):
+                st.dataframe(st.session_state["master_df"].head(), height=450, use_container_width=True)
 if __name__ == "__main__":
     st.set_page_config(
     )
     with st.spinner("Loading Dashboard..."):
         analyse_page()

utils/graph_functions.py CHANGED Viewed

@@ -45,7 +45,14 @@ def load_data(df):
 @st.cache_data
 def process_texts(texts):
-    stop_words = set(stopwords.words("english"))
     tokenized_texts = texts.apply(word_tokenize)
     tokenized_texts = tokenized_texts.apply(
         lambda x: [word.lower() for word in x if word.lower() not in stop_words]
@@ -90,7 +97,7 @@ def display_word_cloud(dataframe):
 def most_common_trigrams(df, pdf=False):
-    stop_words = set(stopwords.words("english"))  # noqa: F841
     colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
     fig = make_subplots(rows=1, cols=3)
@@ -155,7 +162,7 @@ def display_target_count(df):
         2,
     )
     fig.update_layout(
-        title_text="Sentiment Distribution",
         title_y=1,
         title_font=dict(color="#808495", size=15),
         autosize=True,
@@ -192,15 +199,15 @@ def sentiment_over_date(df):
     fig.update_yaxes(showgrid=False)
     fig.update_layout(
         title={
-            "text": "Sentiment Over Time",
             "x": 0.2,
             "y": 1,
             "xanchor": "center",
             "yanchor": "top",
             "font": {"size": 15, "color": "#808495", "family": "Arial"},
         },
-        xaxis_title="Date",
-        yaxis_title="Sentiment Count",
         hovermode="x",
         showlegend=True,
         autosize=False,
@@ -331,4 +338,4 @@ def metrics_bar(tweet_data, df):
             # st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
             pos.metric(label=":green[Positive]", value=tweet_data["positive"])
             neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
-            neg.metric(label=":red[Negative]", value=tweet_data["negative"])

 @st.cache_data
 def process_texts(texts):
+    custom_stopwords = set([
+    'ser', 'haber', 'hacer', 'tener', 'poder', 'ir', 'q', 'si', 'solo', 'saber', 'decir',
+    'dar', 'querer', 'ver', 'así', 'sos', 'maje', 'dejar', 'si', 'solo', 'si', 'op', 'vos',
+    'cada', 'mismo', 'usted', 'mas', 'pues', 'andar', 'ahora', 'claro', 'nunca', 'quedar', 'pasar',
+    'venir', 'poner', 'dio', 'señora', 'señor', 'ahí', 'asi', 'vez', 'jajaja'
+    ])
+    stop_words = set(stopwords.words("spanish"))
+    stop_words.update(custom_stopwords)
     tokenized_texts = texts.apply(word_tokenize)
     tokenized_texts = tokenized_texts.apply(
         lambda x: [word.lower() for word in x if word.lower() not in stop_words]
 def most_common_trigrams(df, pdf=False):
+    stop_words = set(stopwords.words("spanish"))  # noqa: F841
     colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
     fig = make_subplots(rows=1, cols=3)
         2,
     )
     fig.update_layout(
+        title_text="Análisis de Sentimientos",
         title_y=1,
         title_font=dict(color="#808495", size=15),
         autosize=True,
     fig.update_yaxes(showgrid=False)
     fig.update_layout(
         title={
+            "text": "Sentimiento a través del tiempo",
             "x": 0.2,
             "y": 1,
             "xanchor": "center",
             "yanchor": "top",
             "font": {"size": 15, "color": "#808495", "family": "Arial"},
         },
+        xaxis_title="Fecha",
+        yaxis_title="Conteo",
         hovermode="x",
         showlegend=True,
         autosize=False,
             # st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
             pos.metric(label=":green[Positive]", value=tweet_data["positive"])
             neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
+            neg.metric(label=":red[Negative]", value=tweet_data["negative"])