File size: 3,720 Bytes
f5b8913 62372e7 f5b8913 0c3965c f5b8913 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# Library Load Model
import pandas as pd
import numpy as np
import pickle
import streamlit as st
# Library Pre-Processing
import nltk
import re
import tensorflow as tf
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
model_path= 'model'
# Load model
model = tf.keras.models.load_model(model_path)
# Define Stopwords
## Load Stopwords from NLTK
from nltk.corpus import stopwords
stop_words_en = stopwords.words("english")
print('Stopwords from NLTK')
print(len(stop_words_en), stop_words_en)
print('')
## Create A New Stopwords
new_stop_words = ['aye', 'mine', 'have']
# Define Lemmatizer
lemmatizer = WordNetLemmatizer()
## Merge Stopwords
stop_words_en = stop_words_en + new_stop_words
stop_words_en = list(set(stop_words_en))
print('Out Final Stopwords')
print(len(stop_words_en), stop_words_en)
# Create A Function for review Preprocessing
def review_preprocessing(review):
# Case folding
review = review.lower()
# Mention removal
review = re.sub("@[A-Za-z0-9_]+", " ", review)
# Hashtags removal
review = re.sub("#[A-Za-z0-9_]+", " ", review)
# Newline removal (\n)
review = re.sub(r"\\n", " ",review)
# Whitespace removal
review = review.strip()
# URL removal
review = re.sub(r"http\S+", " ", review)
review = re.sub(r"www.\S+", " ", review)
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
review = re.sub("[^A-Za-z\s']", " ", review)
review = re.sub("['ï']", " ", review)
review = re.sub("['¿']", " ", review)
review = re.sub("['½']", " ", review)
review = re.sub("['ý']", " ", review)
# Tokenization
tokens = word_tokenize(review)
# Stopwords removal
tokens = [word for word in tokens if word not in stop_words_en]
# Lemmetize
tokens = [lemmatizer.lemmatize(word) for word in tokens]
# Combining Tokens
review = ' '.join(tokens)
return review
def preprocess_text(text):
'''
Function to preprocess text by cleaning, removing stopwords, and lemmatizing.
Parameters:
text (str): The input text to be preprocessed.
Returns:
str: The preprocessed text.
'''
text = review_preprocessing(text)
return text
def run():
# membuat title
st.title("DETECTION RATING BASED ON MCDONALD'S CUSTOMER REVIEW")
st.subheader('Detecting Reviews')
st.markdown('---')
# Buat form
with st.form(key='review'):
st.write("## Customers' Review")
# URL input
text = st.text_input("Enter The Review:")
submitted = st.form_submit_button('Predict')
# Perform prediction
if submitted:
df_inf = {'preprocessing_review': text}
df_inf = pd.DataFrame([df_inf])
df_inf['preprocessing_review'] = df_inf['preprocessing_review'].apply(lambda x: review_preprocessing(x))
y_pred_inf = model.predict(df_inf['preprocessing_review'])
y_pred_inf = np.argmax(df_inf['preprocessing_review'], axis = -1)
# Display the prediction result
if y_pred_inf == 0:
st.subheader("Prediction: Negative Comment with Rating 1 Star - 2 Stars")
elif y_pred_inf == 1:
st.subheader("Prediction: Neutral Comment with Rating 3 Stars")
else:
st.subheader("Prediction: Positive Comment with Rating 5 Stars")
# Display the extracted text
st.subheader("Extracted Text:")
st.write(text)
if __name__ == '__main__':
run() |