p2-m2-rmt018 / prediction.py
mukhlishr's picture
Update prediction.py
fbf816c
raw
history blame contribute delete
No virus
3.56 kB
import streamlit as st
import pandas as pd
import numpy as np
import nltk
import tensorflow as tf
from nltk.corpus import stopwords
import re
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from keras.models import load_model
# Load model tf fromat.
new_model = tf.keras.models.load_model('lstm1_model')
def run():
with st.form(key='Review & Rating'):
Review = st.text_input('Review your stay', value='')
Rating = st.selectbox('Rate us', (1,2,3,4,5), index=3, help='1 = very bad , 5 = very nice')
st.markdown('---')
submitted = st.form_submit_button('Predict')
data_inf = {
'Review':Review,
'Rating': Rating,
}
data_inf = pd.DataFrame([data_inf])
st.dataframe(data_inf)
# Define Stopwords
## Load Stopwords from NLTK
nltk.download('stopwords')
nltk.download('punkt')
stw_en = stopwords.words("english")
## Create A New Stopwords
new_stw = [ 'hotel', 'room','rooms','good','day','resort','night','restaurant','people','time', "n't", 'got' ,
'staff',
'stay',
'location',
'service',
'stayed',
'beach',
'breakfast',
'clean',
'food',
'place',
'pool',
'like',
'really',
'bed',
'area',
'bar',
'small',
'walk',
'little',
'bathroom',
'trip',
'floor',
'minute',
'water',
'lot',
'great',
'nice',
'went',
'thing',
'problem',
'want',
'drink',
'way',
'get',
'go',
'say'
]
## Merge Stopwords
stw_en = stw_en + new_stw
stw_en = list(set(stw_en))
# Membuat Function untuk preprocessing kata dalam dataframe
def text_proses(teks):
# Mengubah Teks ke Lowercase
teks = teks.lower()
# Menghilangkan Mention
teks = re.sub("@[A-Za-z0-9_]+", " ", teks)
# Menghilangkan Hashtag
teks = re.sub("#[A-Za-z0-9_]+", " ", teks)
# Menghilangkan \n
teks = re.sub(r"\\n", " ",teks)
# Menghilangkan Whitespace
teks = teks.strip()
# Menghilangkan Link
teks = re.sub(r"http\S+", " ", teks)
teks = re.sub(r"www.\S+", " ", teks)
# Menghilangkan yang Bukan Huruf seperti Emoji, Simbol Matematika (seperti μ), dst
teks = re.sub("[^A-Za-z\s']", " ", teks)
# Melakukan Tokenisasi
tokens = word_tokenize(teks)
# Menghilangkan Stopwords
teks = ' '.join([word for word in tokens if word not in stw_en])
return teks
# Function lemmatizer
def lemmatize_text(text):
sentence = []
for word in text.split():
lemmatizer = WordNetLemmatizer()
sentence.append(lemmatizer.lemmatize(word, 'v'))
return ' '.join(sentence)
# Mengaplikasikan Semua Teknik Preprocessing ke dalam Semua Documents
data_inf['text_processed'] = data_inf['Review'].apply(text_proses)
data_inf
# lemmatize review
nltk.download('wordnet')
data_inf['text_processed'] = data_inf['text_processed'].apply(lemmatize_text)
data_inf
inf = data_inf['text_processed']
if submitted:
# Predict using model ann
y_pred = new_model.predict(inf)
y_pred_conv= np.where(y_pred >= 0.5, 1, 0)
y_pred_df = pd.DataFrame(y_pred_conv, columns=['0', '1', '2'])
y_pred_final=y_pred_df.idxmax(1).astype(int)
if y_pred_final.any() == 2:
st.write('## Dude, your guest gave Positive feedback')
if y_pred_final.any() == 1:
st.write('## Dude, your guest gave Neutral feedback')
else:
st.write('## Attention, your guest gave Negative feedback')
if __name__ == '__main__':
run()