Spaces:

evanderin
/

P2M2_FTDS-RMT17_Evan_Derin_Ihsanudin

Runtime error

App Files Files Community

evanderin commited on Feb 11, 2023

Commit

662db6f

•

1 Parent(s): 86c8774

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
app.py +29 -0
eda.py +67 -0
eda_preprocessing.csv +3 -0
prediction.py +115 -0
requirements.txt +10 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 best_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 best_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+eda_preprocessing.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import streamlit as st
+import eda
+import prediction
+# Set Config dan icon
+st.set_page_config(
+        page_title='Churn Prediction',
+        layout='wide',
+        )
+# Hide Streamlit Style
+hide_streamlit_style = """
+            <style>
+            #MainMenu {visibility: hidden;}
+            footer {visibility: hidden;}
+            </style>
+            """
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+# Membuat navigasi
+st.sidebar.markdown("# Evan Derin Ihsanudin - RMT-FTDS-17")
+navigation = st.sidebar.selectbox('Pilih Halaman (Tweet Prediction/EDA): ', ('Tweet Prediction','Exploratory Data Analysis'))
+st.sidebar.image("https://imgur.com/MmPULSL.png", use_column_width=True)
+# Run modul dengan if else
+if navigation == 'Tweet Prediction' :
+    prediction.run()
+else :
+    eda.run()

eda.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+def run() :
+    # Membuat Title
+    st.markdown("<h1 style='text-align: center;'>Exploratory Data Analysis</h1>", unsafe_allow_html=True)
+    st.write('Berikut adalah EDA dan Workcloud dari Setiap Kategori Tweet')
+    # Import DF
+    df_eda = pd.read_csv('eda_preprocessing.csv')
+    # Membuat Sub Header
+    st.subheader('**Persebaran Kategori Tweet**')
+    # Membuat visualisasi Distribusi Tweet
+    fig, ax =plt.subplots(1,2,figsize=(15,6))
+    sns.countplot(x='cyberbullying_type', data=df_eda, palette="winter", ax=ax[0])
+    ax[0].set_xlabel("cyberbullying_type", fontsize= 12)
+    ax[0].set_ylabel("# of Tweet", fontsize= 12)
+    fig.suptitle('Tweet Type Distribution', fontsize=18, fontweight='bold')
+    ax[0].set_ylim(0,10000)
+    ax[0].tick_params(axis='x', rotation=90)
+    plt.xlabel("cyberbullying_type", fontsize= 12)
+    plt.ylabel("# of Tweet", fontsize= 12)
+    for p in ax[0].patches:
+        ax[0].annotate("%.0f"%(p.get_height()), (p.get_x() + p.get_width() / 2,
+                        p.get_height()+205), ha='center', va='center',fontsize = 11)
+    df_eda['cyberbullying_type'].value_counts().plot(kind='pie',autopct='%1.1f%%', textprops = {"fontsize":12})
+    ax[1].set_ylabel("% of Tweet", fontsize= 12)
+    st.pyplot(fig)
+    # Membuat Sub Header
+    st.subheader('**All Tweet**')
+    st.image('https://imgur.com/quc6ru7.png')
+    # Membuat Sub Header
+    st.subheader('**Age Tweet**')
+    st.image('https://imgur.com/WB2tdlJ.png')
+    # Membuat Sub Header
+    st.subheader('**Gender Tweet**')
+    st.image('https://imgur.com/Pd9G2k9.png')
+    # Membuat Sub Header
+    st.subheader('**Religion Tweet**')
+    st.image('https://imgur.com/GE8Sj39.png')
+    # Membuat Sub Header
+    st.subheader('**Other Cyberbullying Tweet**')
+    st.image('https://imgur.com/sr6MYGO.png')
+    # Membuat Sub Header
+    st.subheader('**Not Cyberbullying Tweet**')
+    st.image('https://imgur.com/iWyNSVH.png')
+if __name__ == '__main__':
+    run()

eda_preprocessing.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:881116368469ea2fb6e6f294dcc2aeafd835e3d8feeb7d56bbe1f3e540b523a3
+size 16791030

prediction.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Library Streamlit
+import streamlit as st
+# Library Load Model
+import pandas as pd
+import numpy as np
+from tensorflow.keras.models import load_model
+# Library Pre-Processing
+from nltk.stem import WordNetLemmatizer
+import nltk
+import re
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+def run() :
+    # Load Model
+    model_lstm = load_model('best_model')
+    # Membuat Title
+    st.markdown("<h1 style='text-align: center;'>Cyberbullying Tweet Prediction</h1>", unsafe_allow_html=True)
+    # Menambahkan Deskripsi Form
+    st.write('Page ini berisi model untuk memprediksi jenis Cyberbullying pada tweet')
+    with st.form(key= 'form_tweet'):
+        st.markdown('### **Tweet**')
+        tweet_text = st.text_input('',value= '')
+        submitted = st.form_submit_button('Predict')
+    # Additional Stopwords
+    additional_stopwords = ['rt', 'mkr', 'didn', 'bc', 'n', 'm',
+                    'im', 'll', 'y', 've', 'u', 'ur', 'don',
+                    'p', 't', 's', 'aren', 'kp', 'o', 'kat',
+                    'de', 're', 'amp', 'will', 'wa', 'e', 'like', 'andre', 'na', 're', 'lil', 'd', 'na', 'pete', 'annie', 'nikki', 'lmao', 'miley', 'wan', 'gon']
+    # Setting stopwords english
+    stpwds_eng = list(set(stopwords.words('english')))
+    for i in additional_stopwords:
+        stpwds_eng.append(i)
+    # Membuat Fungsi Pre-Processing Text
+    cleaning_pattern = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"
+    lemmatizer = WordNetLemmatizer()
+    def text_proses(teks):
+        # Mengubah Teks ke Lowercase
+        teks = teks.lower()
+        # Menghilangkan Link
+        teks = re.sub(cleaning_pattern, ' ', teks)
+        # Menghilangkan Mention
+        teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
+        # Menghilangkan Hashtag
+        teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
+        # Menghilangkan \n
+        teks = re.sub(r"\\n", " ",teks)
+        # Menghilangkan kata dibawah 3 char
+        teks = re.sub(r'\b\w{1,3}\b', " ",teks)
+        # Menghilangkan Whitespace
+        teks = teks.strip()
+        # Menghilangkan yang Bukan Huruf seperti Emoji, Gamma dll
+        teks = re.sub("[^A-Za-z\s']", " ", teks)
+        # Menghilangkan double space
+        teks = re.sub("\s\s+" , " ", teks)
+        # Melakukan Tokenisasi
+        tokens = word_tokenize(teks)
+        # Menghilangkan Stopwords
+        teks = ' '.join([word for word in tokens if word not in stpwds_eng])
+        # Melakukan Lemmatizer
+        teks = lemmatizer.lemmatize(teks)
+        return teks
+    # Membuat Dataframe
+    data_inf = {
+    'tweet_text' : tweet_text
+    }
+    data_inf = pd.DataFrame([data_inf])
+    if submitted :
+        # Preprocessing Data Inference
+        data_inf['tweet_processed'] = data_inf['tweet_text'].apply(lambda x: text_proses(x))
+        # Prediksi jenis tweet
+        y_inf_pred = np.argmax(model_lstm.predict(data_inf['tweet_processed']), axis=-1)
+        # Membuat fungsi untuk return result prediksi
+        if y_inf_pred[0] == 0:
+            result = 'age'
+        elif y_inf_pred[0] == 1:
+            result = 'ethnicity'
+        elif y_inf_pred[0] == 2:
+            result = 'gender'
+        elif y_inf_pred[0] == 3:
+            result = 'not_cyberbullying'
+        elif y_inf_pred[0] == 4:
+            result = 'other_cyberbullying'
+        else:
+            result = 'religion'
+        st.write('# Cyberbullying Prediction : ', result)
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+pandas
+seaborn
+matplotlib
+tensorflow == 2.11.0
+scikit-learn == 1.0.2
+numpy
+plotly
+re == 2.2.1
+nltk == 3.7