GC_7 / app.py
Gansol's picture
Update app.py
62372e7 verified
raw
history blame contribute delete
No virus
3.72 kB
# Library Load Model
import pandas as pd
import numpy as np
import pickle
import streamlit as st
# Library Pre-Processing
import nltk
import re
import tensorflow as tf
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
model_path= 'model'
# Load model
model = tf.keras.models.load_model(model_path)
# Define Stopwords
## Load Stopwords from NLTK
from nltk.corpus import stopwords
stop_words_en = stopwords.words("english")
print('Stopwords from NLTK')
print(len(stop_words_en), stop_words_en)
print('')
## Create A New Stopwords
new_stop_words = ['aye', 'mine', 'have']
# Define Lemmatizer
lemmatizer = WordNetLemmatizer()
## Merge Stopwords
stop_words_en = stop_words_en + new_stop_words
stop_words_en = list(set(stop_words_en))
print('Out Final Stopwords')
print(len(stop_words_en), stop_words_en)
# Create A Function for review Preprocessing
def review_preprocessing(review):
# Case folding
review = review.lower()
# Mention removal
review = re.sub("@[A-Za-z0-9_]+", " ", review)
# Hashtags removal
review = re.sub("#[A-Za-z0-9_]+", " ", review)
# Newline removal (\n)
review = re.sub(r"\\n", " ",review)
# Whitespace removal
review = review.strip()
# URL removal
review = re.sub(r"http\S+", " ", review)
review = re.sub(r"www.\S+", " ", review)
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
review = re.sub("[^A-Za-z\s']", " ", review)
review = re.sub("['ï']", " ", review)
review = re.sub("['¿']", " ", review)
review = re.sub("['½']", " ", review)
review = re.sub("['ý']", " ", review)
# Tokenization
tokens = word_tokenize(review)
# Stopwords removal
tokens = [word for word in tokens if word not in stop_words_en]
# Lemmetize
tokens = [lemmatizer.lemmatize(word) for word in tokens]
# Combining Tokens
review = ' '.join(tokens)
return review
def preprocess_text(text):
'''
Function to preprocess text by cleaning, removing stopwords, and lemmatizing.
Parameters:
text (str): The input text to be preprocessed.
Returns:
str: The preprocessed text.
'''
text = review_preprocessing(text)
return text
def run():
# membuat title
st.title("DETECTION RATING BASED ON MCDONALD'S CUSTOMER REVIEW")
st.subheader('Detecting Reviews')
st.markdown('---')
# Buat form
with st.form(key='review'):
st.write("## Customers' Review")
# URL input
text = st.text_input("Enter The Review:")
submitted = st.form_submit_button('Predict')
# Perform prediction
if submitted:
df_inf = {'preprocessing_review': text}
df_inf = pd.DataFrame([df_inf])
df_inf['preprocessing_review'] = df_inf['preprocessing_review'].apply(lambda x: review_preprocessing(x))
y_pred_inf = model.predict(df_inf['preprocessing_review'])
y_pred_inf = np.argmax(df_inf['preprocessing_review'], axis = -1)
# Display the prediction result
if y_pred_inf == 0:
st.subheader("Prediction: Negative Comment with Rating 1 Star - 2 Stars")
elif y_pred_inf == 1:
st.subheader("Prediction: Neutral Comment with Rating 3 Stars")
else:
st.subheader("Prediction: Positive Comment with Rating 5 Stars")
# Display the extracted text
st.subheader("Extracted Text:")
st.write(text)
if __name__ == '__main__':
run()