Upload 9 files

Files changed (10) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_lstm_3.keras filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

+import streamlit as st
+import pandas as pd
+# import joblib as jb
+import eda
+import prediction
+#header
+"""
+# Graded Challenge 7
+**Name    : Frederick Kurniawan Putra**
+**Batch   : HCK016**
+This is model deployment of Hate Speech Sentiment Prediction.
+"""
+PAGES = {
+    "Eda": eda,
+    "Prediction": prediction
+}
+st.sidebar.title('Navigation')
+selection = st.sidebar.radio("Go to", list(PAGES.keys()))
+page = PAGES[selection]
+page.app()

eda.py ADDED Viewed

+import streamlit as st
+from PIL import Image
+def app():
+    st.title('EDA')
+    # EDA 1
+    st.write('1. Percentage of Hate Speech and Non Hate Speech')
+    image = Image.open('eda_1.png')
+    st.image(image)
+    st.write('Here we can see that our dataset has 85.58% Non Hate Speech, while hate speech represents 14.42% data. This means that the data is imbalanced.')
+    # EDA 2
+    st.write('2. Most Frequent Unigram in dataset')
+    image = Image.open('eda_2.png')
+    st.image(image)
+    st.write("In this data we can see that Most frequent Unigrams that might represent mockery and racism are white, black, slut, afro, faggot. This indicates that racism slur is a indication of hate speech.")
+    # EDA 3
+    st.write('3. Most Frequent Bigram in dataset')
+    image = Image.open('eda_3.png')
+    st.image(image)
+    st.write("Here we can see that Most Frequent Bigrams in hate speech content also dominated with racial slur such as afro american, ching chong, non whites, while also followed by mockery such as shithole countries and fucking retard.")
+    # EDA 4
+    st.write('4. Top County that become topic of target in Content')
+    image = Image.open('eda_4.png')
+    st.image(image)
+    st.write("Here we can see that country that become a topic or target for hate speeech are africa, america, europt or london. We can safely assume based on bigram analysis that the context is refering to african american.")

eda_1.png ADDED Viewed

eda_2.png ADDED Viewed

eda_3.png ADDED Viewed

eda_4.png ADDED Viewed

model_lstm_3.keras ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb97107b62ea08755bafef7adb9f9931eb5a5f121deb9cd9e3c34a4a3a1ebb7
+size 32582869

prediction.py ADDED Viewed

+import streamlit as st
+import pandas as pd
+import pickle
+import tensorflow as tf
+from tensorflow.keras.layers import Dense, Concatenate, Input, Dropout
+from tensorflow.keras.models import load_model, Sequential, Model
+def user_input():
+    txt = st.text_area('Text to analyze', '''
+    It was the best of times, it was the worst of times, it was
+    the age of wisdom, it was the age of foolishness, it was
+    the epoch of belief, it was the epoch of incredulity, it
+    was the season of Light, it was the season of Darkness, it
+    was the spring of hope, it was the winter of despair, (
+    ''')
+    data = {
+        'Content': txt
+    }
+    features = pd.DataFrame(data, index=[0])
+    return features
+def app():
+    st.title('Hate Speech Sentiment Analysis')
+    # Getting user input
+    input_df = user_input()
+    # load model
+    model_1 = load_model('model_lstm_3.keras')
+    # Predict Score
+    if st.button('Analyze Now'):
+        predict_proba = model_1.predict(input_df)
+        predictions = tf.where(predict_proba >= 0.5, 1, 0)
+        if predictions == 1:
+            st.write("Analysis: Hate Speech")
+        else:
+            st.write("Analysis: Non-Hate Speech")
+    else:
+        st.write('Analysis:')
+app()

requirements.txt ADDED Viewed

+scikit-learn==1.5.0
+pandas
+matplotlib
+joblib
+streamlit==0.71.0
+tensorflow==2.15.0