update app
Browse files- .gitattributes +1 -0
- app.py +19 -16
- assets/dataset/manually_labelled/_test_df_450_manual_final.csv +0 -0
- assets/dataset/manually_labelled/_train_df_2100_manual_final.csv +0 -0
- assets/dataset/manually_labelled/_validation_df_450_manual_final.csv +0 -0
- assets/dataset/manually_labelled/output_raw.csv +0 -0
- assets/html/about.html +3 -9
- assets/html/report.html +3 -0
- st_pages/about.py +1 -1
- st_pages/analyse.py +18 -9
- st_pages/dashboard.py +16 -8
- st_pages/dataset.py +44 -0
- utils/constants.py +4 -3
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.html filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
import pandas as pd
|
2 |
import streamlit as st
|
3 |
from st_on_hover_tabs import on_hover_tabs
|
4 |
|
5 |
from st_pages.home import home_page
|
6 |
from st_pages.analyse import analyse_page
|
|
|
7 |
from st_pages.dashboard import dashboard
|
8 |
from st_pages.about import about_us_page
|
9 |
-
from utils.constants import PAGE_FAVICON, CONTRIBUTORS
|
10 |
-
from utils.utils import load_css, init_session_state
|
11 |
|
12 |
st.set_page_config(
|
13 |
page_title="Sentiment Analysis Tool", page_icon=PAGE_FAVICON, layout="wide"
|
@@ -19,33 +19,36 @@ init_session_state()
|
|
19 |
with st.sidebar:
|
20 |
st.write("<br>" * 4, unsafe_allow_html=True)
|
21 |
selected_task = on_hover_tabs(
|
22 |
-
tabName=["Home Page", "Analyse Sentiment", "Dashboard", "About Us"],
|
23 |
-
iconName=["home", "engineering", "
|
24 |
styles={
|
25 |
"navtab": {"background-color": "#fff"},
|
26 |
"tabOptionsStyle": {
|
27 |
":hover :hover": {"color": "#170034", "cursor": "pointer"}
|
28 |
},
|
29 |
},
|
30 |
-
default_choice=
|
31 |
)
|
32 |
|
33 |
if selected_task == "Home Page":
|
34 |
-
|
|
|
35 |
|
36 |
elif selected_task == "Analyse Sentiment":
|
37 |
-
|
|
|
38 |
|
39 |
elif selected_task == "Dashboard":
|
40 |
if "master_df" in st.session_state and st.session_state["master_df"] is None:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
49 |
|
50 |
elif selected_task == "About Us":
|
51 |
about_us_page(CONTRIBUTORS)
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from st_on_hover_tabs import on_hover_tabs
|
3 |
|
4 |
from st_pages.home import home_page
|
5 |
from st_pages.analyse import analyse_page
|
6 |
+
from st_pages.dataset import dataset_page
|
7 |
from st_pages.dashboard import dashboard
|
8 |
from st_pages.about import about_us_page
|
9 |
+
from utils.constants import PAGE_FAVICON, CONTRIBUTORS, DEFAULT_CHOICE
|
10 |
+
from utils.utils import load_css, init_session_state, load_header
|
11 |
|
12 |
st.set_page_config(
|
13 |
page_title="Sentiment Analysis Tool", page_icon=PAGE_FAVICON, layout="wide"
|
|
|
19 |
with st.sidebar:
|
20 |
st.write("<br>" * 4, unsafe_allow_html=True)
|
21 |
selected_task = on_hover_tabs(
|
22 |
+
tabName=["Home Page", "Analyse Sentiment", "Dashboard", "Data", "About Us"],
|
23 |
+
iconName=["home", "engineering", "equalizer", "analytics", "contact_support"],
|
24 |
styles={
|
25 |
"navtab": {"background-color": "#fff"},
|
26 |
"tabOptionsStyle": {
|
27 |
":hover :hover": {"color": "#170034", "cursor": "pointer"}
|
28 |
},
|
29 |
},
|
30 |
+
default_choice=DEFAULT_CHOICE,
|
31 |
)
|
32 |
|
33 |
if selected_task == "Home Page":
|
34 |
+
with st.spinner("Loading Home Page..."):
|
35 |
+
home_page()
|
36 |
|
37 |
elif selected_task == "Analyse Sentiment":
|
38 |
+
with st.spinner("Loading Analyse Page..."):
|
39 |
+
analyse_page()
|
40 |
|
41 |
elif selected_task == "Dashboard":
|
42 |
if "master_df" in st.session_state and st.session_state["master_df"] is None:
|
43 |
+
load_header("Sentiment Analysis Dashboard")
|
44 |
+
st.info("Please analyze a tweet first.")
|
45 |
+
else:
|
46 |
+
with st.spinner("Loading Dashboard..."):
|
47 |
+
dashboard()
|
48 |
+
|
49 |
+
elif selected_task == "Data":
|
50 |
+
load_header("Manually Labelled Dataset")
|
51 |
+
dataset_page()
|
52 |
|
53 |
elif selected_task == "About Us":
|
54 |
about_us_page(CONTRIBUTORS)
|
assets/dataset/manually_labelled/_test_df_450_manual_final.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
assets/dataset/manually_labelled/_train_df_2100_manual_final.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
assets/dataset/manually_labelled/_validation_df_450_manual_final.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
assets/dataset/manually_labelled/output_raw.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
assets/html/about.html
CHANGED
@@ -1,9 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
<tbody class="table">
|
5 |
-
<th width="20%" colspan="3">Team Members</th>
|
6 |
-
<tr width="20%">{team}</tr>
|
7 |
-
</tbody>
|
8 |
-
</table>
|
9 |
-
</div>
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6fdd84a069b0f6cd27c32dc190a8dcc9a60644edd9eb87db92193619209c892
|
3 |
+
size 200
|
|
|
|
|
|
|
|
|
|
|
|
assets/html/report.html
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cb4288084f42117df155d8cc4ff449c341fd3e2bd81bdbad8707a217cfba88b
|
3 |
+
size 12001413
|
st_pages/about.py
CHANGED
@@ -4,7 +4,7 @@ from utils.constants import CONTRIBUTORS
|
|
4 |
|
5 |
|
6 |
def about_us_page(contributors):
|
7 |
-
load_header("Meet
|
8 |
|
9 |
data = contributors
|
10 |
full_string = ""
|
|
|
4 |
|
5 |
|
6 |
def about_us_page(contributors):
|
7 |
+
load_header("Meet Our Team")
|
8 |
|
9 |
data = contributors
|
10 |
full_string = ""
|
st_pages/analyse.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
from utils.scraper import fetch_main_tweet_dataframe, fetch_comments_dataframe
|
3 |
from utils.utils import (
|
@@ -33,20 +34,28 @@ def analyse_page():
|
|
33 |
df_author = fetch_main_tweet_dataframe(twitter_url)
|
34 |
df_comments = fetch_comments_dataframe(twitter_url)
|
35 |
|
36 |
-
|
37 |
-
|
38 |
|
39 |
master_df = combine_author_and_comments_df(df_author, df_comments)
|
40 |
st.session_state["master_df"] = master_df
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
with cols[2]:
|
52 |
st.write("<br>", unsafe_allow_html=True)
|
|
|
1 |
+
import pandas as pd
|
2 |
import streamlit as st
|
3 |
from utils.scraper import fetch_main_tweet_dataframe, fetch_comments_dataframe
|
4 |
from utils.utils import (
|
|
|
34 |
df_author = fetch_main_tweet_dataframe(twitter_url)
|
35 |
df_comments = fetch_comments_dataframe(twitter_url)
|
36 |
|
37 |
+
df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
|
38 |
+
df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
|
39 |
|
40 |
master_df = combine_author_and_comments_df(df_author, df_comments)
|
41 |
st.session_state["master_df"] = master_df
|
42 |
|
43 |
+
elif not submitted:
|
44 |
+
with st.spinner("Loading data..."):
|
45 |
+
df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
|
46 |
+
df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
|
47 |
|
48 |
+
master_df = combine_author_and_comments_df(df_author, df_comments)
|
49 |
+
st.session_state["master_df"] = master_df
|
50 |
+
|
51 |
+
with st.expander("Sample Data", expanded=True):
|
52 |
+
if (
|
53 |
+
"master_df" in st.session_state
|
54 |
+
and st.session_state["master_df"] is not None
|
55 |
+
):
|
56 |
+
st.dataframe(
|
57 |
+
st.session_state["master_df"], height=450, use_container_width=True
|
58 |
+
)
|
59 |
|
60 |
with cols[2]:
|
61 |
st.write("<br>", unsafe_allow_html=True)
|
st_pages/dashboard.py
CHANGED
@@ -1,11 +1,19 @@
|
|
1 |
-
import pandas as pd
|
2 |
import streamlit as st
|
3 |
from utils.generate_pdf import construct_pdf
|
4 |
from utils.utils import load_header, add_columns_for_graphs
|
5 |
-
from utils.graph_functions import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
def dashboard():
|
|
|
9 |
master_df = st.session_state["master_df"]
|
10 |
|
11 |
if st.session_state["tweet_data"] is None:
|
@@ -101,13 +109,13 @@ if __name__ == "__main__":
|
|
101 |
)
|
102 |
init_session_state()
|
103 |
|
104 |
-
df = pd.read_csv("assets/dataset/master.csv")
|
105 |
-
st.session_state["master_df"] = pd.read_csv(
|
106 |
-
|
107 |
-
)
|
108 |
|
109 |
-
df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
|
110 |
-
df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
|
111 |
|
112 |
# with st.spinner("Analyzing Tweets..."):
|
113 |
# master_df, tweet_data = construct_master_df(df_author, df_comments)
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from utils.generate_pdf import construct_pdf
|
3 |
from utils.utils import load_header, add_columns_for_graphs
|
4 |
+
from utils.graph_functions import (
|
5 |
+
metrics_bar,
|
6 |
+
most_common_trigrams,
|
7 |
+
display_word_cloud,
|
8 |
+
crear_grafico_dispersion,
|
9 |
+
display_target_count,
|
10 |
+
sentiment_over_date,
|
11 |
+
stacked_bar_fig,
|
12 |
+
)
|
13 |
|
14 |
|
15 |
def dashboard():
|
16 |
+
# st.session_state["master_df"] = pd.read_csv("output.csv")
|
17 |
master_df = st.session_state["master_df"]
|
18 |
|
19 |
if st.session_state["tweet_data"] is None:
|
|
|
109 |
)
|
110 |
init_session_state()
|
111 |
|
112 |
+
# df = pd.read_csv("assets/dataset/master.csv")
|
113 |
+
# st.session_state["master_df"] = pd.read_csv(
|
114 |
+
# "assets/dataset/temp_output_combined.csv"
|
115 |
+
# )
|
116 |
|
117 |
+
# df_author = pd.read_csv("assets/dataset/temp_output_author.csv")
|
118 |
+
# df_comments = pd.read_csv("assets/dataset/temp_output_comments.csv")
|
119 |
|
120 |
# with st.spinner("Analyzing Tweets..."):
|
121 |
# master_df, tweet_data = construct_master_df(df_author, df_comments)
|
st_pages/dataset.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import streamlit.components.v1 as components
|
4 |
+
|
5 |
+
|
6 |
+
def dataset_page():
|
7 |
+
df = pd.read_csv("assets/dataset/manually_labelled/output_raw.csv", index_col=0)
|
8 |
+
|
9 |
+
df["comment_id"] = df["comment_id"].astype("str")
|
10 |
+
df["post_id"] = df["post_id"].astype("str")
|
11 |
+
|
12 |
+
left, _, right = st.columns([2, 2, 1])
|
13 |
+
|
14 |
+
with left:
|
15 |
+
st.caption(
|
16 |
+
"Our team developed manually labelled datasets for the purpose of training our model. The dataset contains 3000 tweets. The dataset is divided into three subsets: train, test, and validation."
|
17 |
+
)
|
18 |
+
|
19 |
+
with right:
|
20 |
+
st.write("<br>", unsafe_allow_html=True)
|
21 |
+
with open(
|
22 |
+
"assets/dataset/manually_labelled/omdena-IREX-manually-labelled-dataset.zip",
|
23 |
+
"rb",
|
24 |
+
) as fp:
|
25 |
+
st.download_button(
|
26 |
+
label="Download Datasets",
|
27 |
+
data=fp,
|
28 |
+
file_name="omdena-IREX-manually-labelled-dataset.zip",
|
29 |
+
mime="application/zip",
|
30 |
+
use_container_width=True,
|
31 |
+
)
|
32 |
+
|
33 |
+
with st.expander("Dataset Description"):
|
34 |
+
components.html(
|
35 |
+
open("assets/html/report.html", "r", encoding="utf8").read(), height=17450
|
36 |
+
)
|
37 |
+
|
38 |
+
with st.expander("Manually Labelled Dataset (Merged)", expanded=True):
|
39 |
+
st.dataframe(df, height=300)
|
40 |
+
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
st.set_page_config(page_title="Sentiment Analysis", layout="wide")
|
44 |
+
dataset_page()
|
utils/constants.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
PAGE_BANNER = "assets/img/banner.png"
|
2 |
PAGE_FAVICON = "assets/img/favicon.png"
|
3 |
PAGE_BACKGROUND = "assets/img/background.jpg"
|
@@ -32,9 +36,6 @@ CONTRIBUTORS = [
|
|
32 |
|
33 |
TEXT_COLOR = {"positive": "green", "neutral": "gray", "negative": "red"}
|
34 |
|
35 |
-
|
36 |
-
from sklearn.feature_extraction import text as sklearn_text
|
37 |
-
|
38 |
spanish_stop_words = sklearn_text.ENGLISH_STOP_WORDS.union(
|
39 |
set(
|
40 |
[
|
|
|
1 |
+
from sklearn.feature_extraction import text as sklearn_text
|
2 |
+
|
3 |
+
DEFAULT_CHOICE = 0
|
4 |
+
|
5 |
PAGE_BANNER = "assets/img/banner.png"
|
6 |
PAGE_FAVICON = "assets/img/favicon.png"
|
7 |
PAGE_BACKGROUND = "assets/img/background.jpg"
|
|
|
36 |
|
37 |
TEXT_COLOR = {"positive": "green", "neutral": "gray", "negative": "red"}
|
38 |
|
|
|
|
|
|
|
39 |
spanish_stop_words = sklearn_text.ENGLISH_STOP_WORDS.union(
|
40 |
set(
|
41 |
[
|