mukhlishr commited on
Commit
54d8c28
1 Parent(s): 5f964f4

last milestone project bootcamp

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ lstm1_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('page : ', ('EDA', 'Sentiment Prediction'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+
10
+ else:
11
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+ from wordcloud import WordCloud
8
+
9
+
10
+ st.set_page_config(
11
+ page_title = 'Sentiment Analysis',
12
+ layout = 'wide',
13
+ initial_sidebar_state='expanded'
14
+ )
15
+
16
+ def run():
17
+
18
+ # title
19
+ st.title( 'Sentiment Prediction')
20
+
21
+ # sub header
22
+ st.subheader('Positive, Neutral, Negative')
23
+
24
+ # insert image
25
+ image = Image.open('sensi.jpg')
26
+ st.image(image, caption='image from AltextSoft, education purpose only')
27
+
28
+ # Deskripsi
29
+ st.write('Exploratory Data from dataset')
30
+
31
+ # show data frame
32
+ st.write('The first 10 Data')
33
+ df = pd.read_csv('https://raw.githubusercontent.com/mukhlishr/rasyidi/main/tripadvisor_hotel_reviews.csv')
34
+ st.dataframe(df.head(10))
35
+
36
+ # Barplot target columns
37
+ st.write('###### Rating ')
38
+ fig=plt.figure(figsize=(15,5))
39
+ sns.countplot(x='Rating', data = df)
40
+ st.pyplot(fig)
41
+
42
+
43
+ def Sentimen(x):
44
+ if x<= 5 and x>3:
45
+ return 'positive'
46
+ if x== 3:
47
+ return 'neutral'
48
+ if x< 3 and x>=1:
49
+ return 'negative'
50
+ # Create column 'sentimen'
51
+ df['sentimen'] = df['Rating'].apply(Sentimen)
52
+
53
+ # Barplot Sentiment
54
+ st.write('###### Sentiment of Review ')
55
+ fig=plt.figure(figsize=(15,5))
56
+ sns.countplot(x='sentimen', data = df)
57
+ st.pyplot(fig)
58
+
59
+ # Wordcloud
60
+ st.write('###### Word Cloud ')
61
+ def show_wordcloud(data, title = None):
62
+ wordcloud = WordCloud(
63
+ background_color='black',
64
+ max_words=200,
65
+ max_font_size=45,
66
+ scale=1,
67
+ random_state=1
68
+ ).generate(" ".join(data))
69
+ fig = plt.figure(1, figsize=(15, 15))
70
+ plt.axis('off')
71
+ if title:
72
+ fig.suptitle(title, fontsize=20)
73
+ fig.subplots_adjust(top=2.3)
74
+
75
+ st.plt.imshow(wordcloud)
76
+ st.plt.show()
77
+
78
+ # Wordcloud all
79
+ show_wordcloud(df['Review'].values)
80
+
81
+
82
+ if __name__ == '__main__':
83
+ run()
lstm1_model/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7ac22092976fb14c5dc3d4fa9cbff17981090cff4e21c7e7439fa25666bde0
3
+ size 55
lstm1_model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84bca81839322d43a3dabdf82a402aec863fb83721b806192b9f525c0db08270
3
+ size 34241
lstm1_model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e190da7671489d29d2f86b59e2ff6f13694cb4584c8cf5a56abcf71489580830
3
+ size 5377771
lstm1_model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8f262655b9256c7021c6f3059b4f0b0647f923f235baf7f60350a80e8765a4
3
+ size 54617086
lstm1_model/variables/variables.index ADDED
Binary file (4.21 kB). View file
 
prediction.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import nltk
5
+ import tensorflow as tf
6
+ from nltk.corpus import stopwords
7
+ import re
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.stem import WordNetLemmatizer
10
+ from keras.models import load_model
11
+
12
+
13
+
14
+
15
+ # Load model tf fromat.
16
+ new_model = tf.keras.models.load_model('lstm1_model')
17
+
18
+
19
+
20
+
21
+ def run():
22
+
23
+ with st.form(key='Review & Rating'):
24
+
25
+ Review = st.text_input('Review your stay', value='')
26
+ Rating = st.selectbox('Rate us', (1,2,3,4,5), index=3, help='1 = very bad , 5 = very nice')
27
+ st.markdown('---')
28
+
29
+ submitted = st.form_submit_button('Predict')
30
+
31
+ data_inf = {
32
+ 'Review':Review,
33
+ 'Rating': Rating,
34
+ }
35
+
36
+ data_inf = pd.DataFrame([data_inf])
37
+
38
+
39
+ # Define Stopwords
40
+
41
+ ## Load Stopwords from NLTK
42
+ nltk.download('stopwords')
43
+ nltk.download('punkt')
44
+
45
+ stw_en = stopwords.words("english")
46
+
47
+ ## Create A New Stopwords
48
+ new_stw = [ 'hotel', 'room','rooms','good','day','resort','night','restaurant','people','time', "n't", 'got' ,
49
+ 'staff',
50
+ 'stay',
51
+ 'location',
52
+ 'service',
53
+ 'stayed',
54
+ 'beach',
55
+ 'breakfast',
56
+ 'clean',
57
+ 'food',
58
+ 'place',
59
+ 'pool',
60
+ 'like',
61
+ 'really',
62
+ 'bed',
63
+ 'area',
64
+ 'bar',
65
+ 'small',
66
+ 'walk',
67
+ 'little',
68
+ 'bathroom',
69
+ 'trip',
70
+ 'floor',
71
+ 'minute',
72
+ 'water',
73
+ 'lot',
74
+ 'great',
75
+ 'nice',
76
+ 'went',
77
+ 'thing',
78
+ 'problem',
79
+ 'want',
80
+ 'drink',
81
+ 'way',
82
+ 'get',
83
+ 'go',
84
+ 'say'
85
+ ]
86
+
87
+ ## Merge Stopwords
88
+ stw_en = stw_en + new_stw
89
+ stw_en = list(set(stw_en))
90
+
91
+
92
+ # Membuat Function untuk preprocessing kata dalam dataframe
93
+
94
+
95
+ def text_proses(teks):
96
+ # Mengubah Teks ke Lowercase
97
+ teks = teks.lower()
98
+
99
+ # Menghilangkan Mention
100
+ teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
101
+
102
+ # Menghilangkan Hashtag
103
+ teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
104
+
105
+ # Menghilangkan \n
106
+ teks = re.sub(r"\\n", " ",teks)
107
+
108
+ # Menghilangkan Whitespace
109
+ teks = teks.strip()
110
+
111
+
112
+ # Menghilangkan Link
113
+ teks = re.sub(r"http\S+", " ", teks)
114
+ teks = re.sub(r"www.\S+", " ", teks)
115
+
116
+ # Menghilangkan yang Bukan Huruf seperti Emoji, Simbol Matematika (seperti μ), dst
117
+ teks = re.sub("[^A-Za-z\s']", " ", teks)
118
+
119
+ # Melakukan Tokenisasi
120
+ tokens = word_tokenize(teks)
121
+
122
+ # Menghilangkan Stopwords
123
+ teks = ' '.join([word for word in tokens if word not in stw_en])
124
+
125
+ return teks
126
+
127
+ # Function lemmatizer
128
+ def lemmatize_text(text):
129
+ sentence = []
130
+ for word in text.split():
131
+ lemmatizer = WordNetLemmatizer()
132
+ sentence.append(lemmatizer.lemmatize(word, 'v'))
133
+ return ' '.join(sentence)
134
+
135
+
136
+ # Mengaplikasikan Semua Teknik Preprocessing ke dalam Semua Documents
137
+
138
+ data_inf['text_processed'] = data_inf['Review'].apply(text_proses)
139
+ data_inf
140
+
141
+ # lemmatize review
142
+ nltk.download('wordnet')
143
+ data_inf['text_processed'] = data_inf['text_processed'].apply(lemmatize_text)
144
+ data_inf
145
+
146
+ inf = data_inf['text_processed']
147
+
148
+ st.dataframe(inf)
149
+
150
+ if submitted:
151
+ # Predict using model ann
152
+ y_pred = new_model.predict(inf)
153
+ y_pred_conv= np.where(y_pred >= 0.5, 1, 0)
154
+ y_pred_df = pd.DataFrame(y_pred_conv, columns=['0', '1', '2'])
155
+ y_pred_final=y_pred_df.idxmax(1).astype(int)
156
+
157
+ if y_pred_final.any() == 2:
158
+ st.write('## Dude, your guest gave Positive feedback')
159
+ if y_pred_final.any() == 1:
160
+ st.write('## Dude, your guest gave Neutral feedback')
161
+ else:
162
+ st.write('## Attention, your guest gave Negative feedback')
163
+
164
+
165
+ if __name__ == '__main__':
166
+ run()
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # daftar library yang dibutuhkan semua
2
+ streamlit
3
+ tensorflow
4
+ pandas
5
+ seaborn
6
+ matplotlib
7
+ numpy
8
+ scikit-learn==1.2.1
9
+ plotly
10
+ nltk
11
+ keras
12
+ re
sensi.jpg ADDED