UdonUltra commited on
Commit
c884264
1 Parent(s): fbbb6be

Upload 9 files

Browse files
Files changed (10) hide show
  1. .gitattributes +1 -0
  2. app.py +27 -0
  3. eda.py +29 -0
  4. eda_1.png +0 -0
  5. eda_2.png +0 -0
  6. eda_3.png +0 -0
  7. eda_4.png +0 -0
  8. model_lstm_3.keras +3 -0
  9. prediction.py +47 -0
  10. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_lstm_3.keras filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ # import joblib as jb
4
+ import eda
5
+ import prediction
6
+
7
+
8
+ #header
9
+ """
10
+ # Graded Challenge 7
11
+ **Name : Frederick Kurniawan Putra**
12
+
13
+ **Batch : HCK016**
14
+
15
+ This is model deployment of Hate Speech Sentiment Prediction.
16
+ """
17
+
18
+
19
+
20
+ PAGES = {
21
+ "Eda": eda,
22
+ "Prediction": prediction
23
+ }
24
+ st.sidebar.title('Navigation')
25
+ selection = st.sidebar.radio("Go to", list(PAGES.keys()))
26
+ page = PAGES[selection]
27
+ page.app()
eda.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+
4
+ def app():
5
+ st.title('EDA')
6
+
7
+ # EDA 1
8
+ st.write('1. Percentage of Hate Speech and Non Hate Speech')
9
+ image = Image.open('eda_1.png')
10
+ st.image(image)
11
+ st.write('Here we can see that our dataset has 85.58% Non Hate Speech, while hate speech represents 14.42% data. This means that the data is imbalanced.')
12
+
13
+ # EDA 2
14
+ st.write('2. Most Frequent Unigram in dataset')
15
+ image = Image.open('eda_2.png')
16
+ st.image(image)
17
+ st.write("In this data we can see that Most frequent Unigrams that might represent mockery and racism are white, black, slut, afro, faggot. This indicates that racism slur is a indication of hate speech.")
18
+
19
+ # EDA 3
20
+ st.write('3. Most Frequent Bigram in dataset')
21
+ image = Image.open('eda_3.png')
22
+ st.image(image)
23
+ st.write("Here we can see that Most Frequent Bigrams in hate speech content also dominated with racial slur such as afro american, ching chong, non whites, while also followed by mockery such as shithole countries and fucking retard.")
24
+
25
+ # EDA 4
26
+ st.write('4. Top County that become topic of target in Content')
27
+ image = Image.open('eda_4.png')
28
+ st.image(image)
29
+ st.write("Here we can see that country that become a topic or target for hate speeech are africa, america, europt or london. We can safely assume based on bigram analysis that the context is refering to african american.")
eda_1.png ADDED
eda_2.png ADDED
eda_3.png ADDED
eda_4.png ADDED
model_lstm_3.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb97107b62ea08755bafef7adb9f9931eb5a5f121deb9cd9e3c34a4a3a1ebb7
3
+ size 32582869
prediction.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import tensorflow as tf
5
+ from tensorflow.keras.layers import Dense, Concatenate, Input, Dropout
6
+ from tensorflow.keras.models import load_model, Sequential, Model
7
+
8
+
9
+ def user_input():
10
+
11
+ txt = st.text_area('Text to analyze', '''
12
+ It was the best of times, it was the worst of times, it was
13
+ the age of wisdom, it was the age of foolishness, it was
14
+ the epoch of belief, it was the epoch of incredulity, it
15
+ was the season of Light, it was the season of Darkness, it
16
+ was the spring of hope, it was the winter of despair, (
17
+ ''')
18
+
19
+
20
+ data = {
21
+ 'Content': txt
22
+
23
+ }
24
+ features = pd.DataFrame(data, index=[0])
25
+ return features
26
+
27
+ def app():
28
+ st.title('Hate Speech Sentiment Analysis')
29
+ # Getting user input
30
+ input_df = user_input()
31
+
32
+ # load model
33
+ model_1 = load_model('model_lstm_3.keras')
34
+
35
+ # Predict Score
36
+ if st.button('Analyze Now'):
37
+ predict_proba = model_1.predict(input_df)
38
+ predictions = tf.where(predict_proba >= 0.5, 1, 0)
39
+ if predictions == 1:
40
+ st.write("Analysis: Hate Speech")
41
+ else:
42
+ st.write("Analysis: Non-Hate Speech")
43
+ else:
44
+ st.write('Analysis:')
45
+
46
+
47
+ app()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ scikit-learn==1.5.0
2
+ pandas
3
+ matplotlib
4
+ joblib
5
+ streamlit==0.71.0
6
+ tensorflow==2.15.0