Spaces:
Runtime error
Runtime error
last milestone project bootcamp
Browse files- .gitattributes +1 -0
- app.py +11 -0
- eda.py +83 -0
- lstm1_model/fingerprint.pb +3 -0
- lstm1_model/keras_metadata.pb +3 -0
- lstm1_model/saved_model.pb +3 -0
- lstm1_model/variables/variables.data-00000-of-00001 +3 -0
- lstm1_model/variables/variables.index +0 -0
- prediction.py +166 -0
- requirements.txt +12 -0
- sensi.jpg +0 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
lstm1_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('page : ', ('EDA', 'Sentiment Prediction'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda.run()
|
9 |
+
|
10 |
+
else:
|
11 |
+
prediction.run()
|
eda.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
from wordcloud import WordCloud
|
8 |
+
|
9 |
+
|
10 |
+
st.set_page_config(
|
11 |
+
page_title = 'Sentiment Analysis',
|
12 |
+
layout = 'wide',
|
13 |
+
initial_sidebar_state='expanded'
|
14 |
+
)
|
15 |
+
|
16 |
+
def run():
|
17 |
+
|
18 |
+
# title
|
19 |
+
st.title( 'Sentiment Prediction')
|
20 |
+
|
21 |
+
# sub header
|
22 |
+
st.subheader('Positive, Neutral, Negative')
|
23 |
+
|
24 |
+
# insert image
|
25 |
+
image = Image.open('sensi.jpg')
|
26 |
+
st.image(image, caption='image from AltextSoft, education purpose only')
|
27 |
+
|
28 |
+
# Deskripsi
|
29 |
+
st.write('Exploratory Data from dataset')
|
30 |
+
|
31 |
+
# show data frame
|
32 |
+
st.write('The first 10 Data')
|
33 |
+
df = pd.read_csv('https://raw.githubusercontent.com/mukhlishr/rasyidi/main/tripadvisor_hotel_reviews.csv')
|
34 |
+
st.dataframe(df.head(10))
|
35 |
+
|
36 |
+
# Barplot target columns
|
37 |
+
st.write('###### Rating ')
|
38 |
+
fig=plt.figure(figsize=(15,5))
|
39 |
+
sns.countplot(x='Rating', data = df)
|
40 |
+
st.pyplot(fig)
|
41 |
+
|
42 |
+
|
43 |
+
def Sentimen(x):
|
44 |
+
if x<= 5 and x>3:
|
45 |
+
return 'positive'
|
46 |
+
if x== 3:
|
47 |
+
return 'neutral'
|
48 |
+
if x< 3 and x>=1:
|
49 |
+
return 'negative'
|
50 |
+
# Create column 'sentimen'
|
51 |
+
df['sentimen'] = df['Rating'].apply(Sentimen)
|
52 |
+
|
53 |
+
# Barplot Sentiment
|
54 |
+
st.write('###### Sentiment of Review ')
|
55 |
+
fig=plt.figure(figsize=(15,5))
|
56 |
+
sns.countplot(x='sentimen', data = df)
|
57 |
+
st.pyplot(fig)
|
58 |
+
|
59 |
+
# Wordcloud
|
60 |
+
st.write('###### Word Cloud ')
|
61 |
+
def show_wordcloud(data, title = None):
|
62 |
+
wordcloud = WordCloud(
|
63 |
+
background_color='black',
|
64 |
+
max_words=200,
|
65 |
+
max_font_size=45,
|
66 |
+
scale=1,
|
67 |
+
random_state=1
|
68 |
+
).generate(" ".join(data))
|
69 |
+
fig = plt.figure(1, figsize=(15, 15))
|
70 |
+
plt.axis('off')
|
71 |
+
if title:
|
72 |
+
fig.suptitle(title, fontsize=20)
|
73 |
+
fig.subplots_adjust(top=2.3)
|
74 |
+
|
75 |
+
st.plt.imshow(wordcloud)
|
76 |
+
st.plt.show()
|
77 |
+
|
78 |
+
# Wordcloud all
|
79 |
+
show_wordcloud(df['Review'].values)
|
80 |
+
|
81 |
+
|
82 |
+
if __name__ == '__main__':
|
83 |
+
run()
|
lstm1_model/fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b7ac22092976fb14c5dc3d4fa9cbff17981090cff4e21c7e7439fa25666bde0
|
3 |
+
size 55
|
lstm1_model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84bca81839322d43a3dabdf82a402aec863fb83721b806192b9f525c0db08270
|
3 |
+
size 34241
|
lstm1_model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e190da7671489d29d2f86b59e2ff6f13694cb4584c8cf5a56abcf71489580830
|
3 |
+
size 5377771
|
lstm1_model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a8f262655b9256c7021c6f3059b4f0b0647f923f235baf7f60350a80e8765a4
|
3 |
+
size 54617086
|
lstm1_model/variables/variables.index
ADDED
Binary file (4.21 kB). View file
|
|
prediction.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import nltk
|
5 |
+
import tensorflow as tf
|
6 |
+
from nltk.corpus import stopwords
|
7 |
+
import re
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
from nltk.stem import WordNetLemmatizer
|
10 |
+
from keras.models import load_model
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
# Load model tf fromat.
|
16 |
+
new_model = tf.keras.models.load_model('lstm1_model')
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def run():
|
22 |
+
|
23 |
+
with st.form(key='Review & Rating'):
|
24 |
+
|
25 |
+
Review = st.text_input('Review your stay', value='')
|
26 |
+
Rating = st.selectbox('Rate us', (1,2,3,4,5), index=3, help='1 = very bad , 5 = very nice')
|
27 |
+
st.markdown('---')
|
28 |
+
|
29 |
+
submitted = st.form_submit_button('Predict')
|
30 |
+
|
31 |
+
data_inf = {
|
32 |
+
'Review':Review,
|
33 |
+
'Rating': Rating,
|
34 |
+
}
|
35 |
+
|
36 |
+
data_inf = pd.DataFrame([data_inf])
|
37 |
+
|
38 |
+
|
39 |
+
# Define Stopwords
|
40 |
+
|
41 |
+
## Load Stopwords from NLTK
|
42 |
+
nltk.download('stopwords')
|
43 |
+
nltk.download('punkt')
|
44 |
+
|
45 |
+
stw_en = stopwords.words("english")
|
46 |
+
|
47 |
+
## Create A New Stopwords
|
48 |
+
new_stw = [ 'hotel', 'room','rooms','good','day','resort','night','restaurant','people','time', "n't", 'got' ,
|
49 |
+
'staff',
|
50 |
+
'stay',
|
51 |
+
'location',
|
52 |
+
'service',
|
53 |
+
'stayed',
|
54 |
+
'beach',
|
55 |
+
'breakfast',
|
56 |
+
'clean',
|
57 |
+
'food',
|
58 |
+
'place',
|
59 |
+
'pool',
|
60 |
+
'like',
|
61 |
+
'really',
|
62 |
+
'bed',
|
63 |
+
'area',
|
64 |
+
'bar',
|
65 |
+
'small',
|
66 |
+
'walk',
|
67 |
+
'little',
|
68 |
+
'bathroom',
|
69 |
+
'trip',
|
70 |
+
'floor',
|
71 |
+
'minute',
|
72 |
+
'water',
|
73 |
+
'lot',
|
74 |
+
'great',
|
75 |
+
'nice',
|
76 |
+
'went',
|
77 |
+
'thing',
|
78 |
+
'problem',
|
79 |
+
'want',
|
80 |
+
'drink',
|
81 |
+
'way',
|
82 |
+
'get',
|
83 |
+
'go',
|
84 |
+
'say'
|
85 |
+
]
|
86 |
+
|
87 |
+
## Merge Stopwords
|
88 |
+
stw_en = stw_en + new_stw
|
89 |
+
stw_en = list(set(stw_en))
|
90 |
+
|
91 |
+
|
92 |
+
# Membuat Function untuk preprocessing kata dalam dataframe
|
93 |
+
|
94 |
+
|
95 |
+
def text_proses(teks):
|
96 |
+
# Mengubah Teks ke Lowercase
|
97 |
+
teks = teks.lower()
|
98 |
+
|
99 |
+
# Menghilangkan Mention
|
100 |
+
teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
|
101 |
+
|
102 |
+
# Menghilangkan Hashtag
|
103 |
+
teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
|
104 |
+
|
105 |
+
# Menghilangkan \n
|
106 |
+
teks = re.sub(r"\\n", " ",teks)
|
107 |
+
|
108 |
+
# Menghilangkan Whitespace
|
109 |
+
teks = teks.strip()
|
110 |
+
|
111 |
+
|
112 |
+
# Menghilangkan Link
|
113 |
+
teks = re.sub(r"http\S+", " ", teks)
|
114 |
+
teks = re.sub(r"www.\S+", " ", teks)
|
115 |
+
|
116 |
+
# Menghilangkan yang Bukan Huruf seperti Emoji, Simbol Matematika (seperti μ), dst
|
117 |
+
teks = re.sub("[^A-Za-z\s']", " ", teks)
|
118 |
+
|
119 |
+
# Melakukan Tokenisasi
|
120 |
+
tokens = word_tokenize(teks)
|
121 |
+
|
122 |
+
# Menghilangkan Stopwords
|
123 |
+
teks = ' '.join([word for word in tokens if word not in stw_en])
|
124 |
+
|
125 |
+
return teks
|
126 |
+
|
127 |
+
# Function lemmatizer
|
128 |
+
def lemmatize_text(text):
|
129 |
+
sentence = []
|
130 |
+
for word in text.split():
|
131 |
+
lemmatizer = WordNetLemmatizer()
|
132 |
+
sentence.append(lemmatizer.lemmatize(word, 'v'))
|
133 |
+
return ' '.join(sentence)
|
134 |
+
|
135 |
+
|
136 |
+
# Mengaplikasikan Semua Teknik Preprocessing ke dalam Semua Documents
|
137 |
+
|
138 |
+
data_inf['text_processed'] = data_inf['Review'].apply(text_proses)
|
139 |
+
data_inf
|
140 |
+
|
141 |
+
# lemmatize review
|
142 |
+
nltk.download('wordnet')
|
143 |
+
data_inf['text_processed'] = data_inf['text_processed'].apply(lemmatize_text)
|
144 |
+
data_inf
|
145 |
+
|
146 |
+
inf = data_inf['text_processed']
|
147 |
+
|
148 |
+
st.dataframe(inf)
|
149 |
+
|
150 |
+
if submitted:
|
151 |
+
# Predict using model ann
|
152 |
+
y_pred = new_model.predict(inf)
|
153 |
+
y_pred_conv= np.where(y_pred >= 0.5, 1, 0)
|
154 |
+
y_pred_df = pd.DataFrame(y_pred_conv, columns=['0', '1', '2'])
|
155 |
+
y_pred_final=y_pred_df.idxmax(1).astype(int)
|
156 |
+
|
157 |
+
if y_pred_final.any() == 2:
|
158 |
+
st.write('## Dude, your guest gave Positive feedback')
|
159 |
+
if y_pred_final.any() == 1:
|
160 |
+
st.write('## Dude, your guest gave Neutral feedback')
|
161 |
+
else:
|
162 |
+
st.write('## Attention, your guest gave Negative feedback')
|
163 |
+
|
164 |
+
|
165 |
+
if __name__ == '__main__':
|
166 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# daftar library yang dibutuhkan semua
|
2 |
+
streamlit
|
3 |
+
tensorflow
|
4 |
+
pandas
|
5 |
+
seaborn
|
6 |
+
matplotlib
|
7 |
+
numpy
|
8 |
+
scikit-learn==1.2.1
|
9 |
+
plotly
|
10 |
+
nltk
|
11 |
+
keras
|
12 |
+
re
|
sensi.jpg
ADDED
![]() |