File size: 3,720 Bytes
f5b8913
 
 
 
 
 
 
 
 
 
 
 
62372e7
 
 
f5b8913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c3965c
f5b8913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Library Load Model
import pandas as pd
import numpy as np
import pickle
import streamlit as st
# Library Pre-Processing
import nltk
import re
import tensorflow as tf
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

model_path= 'model'

# Load model
model = tf.keras.models.load_model(model_path)

# Define Stopwords
## Load Stopwords from NLTK
from nltk.corpus import stopwords
stop_words_en = stopwords.words("english")

print('Stopwords from NLTK')
print(len(stop_words_en), stop_words_en)
print('')

## Create A New Stopwords
new_stop_words = ['aye', 'mine', 'have']

# Define Lemmatizer
lemmatizer = WordNetLemmatizer()

## Merge Stopwords
stop_words_en = stop_words_en + new_stop_words
stop_words_en = list(set(stop_words_en))
print('Out Final Stopwords')
print(len(stop_words_en), stop_words_en)

# Create A Function for review Preprocessing

def review_preprocessing(review):
  # Case folding
    review = review.lower()

  # Mention removal
    review = re.sub("@[A-Za-z0-9_]+", " ", review)

  # Hashtags removal
    review = re.sub("#[A-Za-z0-9_]+", " ", review)

  # Newline removal (\n)
    review = re.sub(r"\\n", " ",review)

  # Whitespace removal
    review = review.strip()

  # URL removal
    review = re.sub(r"http\S+", " ", review)
    review = re.sub(r"www.\S+", " ", review)

  # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
    review = re.sub("[^A-Za-z\s']", " ", review)
    review = re.sub("['ï']", " ", review)
    review = re.sub("['¿']", " ", review)
    review = re.sub("['½']", " ", review)
    review = re.sub("['ý']", " ", review)
  # Tokenization
    tokens = word_tokenize(review)

  # Stopwords removal
    tokens = [word for word in tokens if word not in stop_words_en]

  # Lemmetize
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

  # Combining Tokens
    review = ' '.join(tokens)

    return review

def preprocess_text(text):
    '''
    Function to preprocess text by cleaning, removing stopwords, and lemmatizing.

    Parameters:
        text (str): The input text to be preprocessed.

    Returns:
        str: The preprocessed text.
    '''
    text = review_preprocessing(text)
    return text

def run():
    # membuat title
    st.title("DETECTION RATING BASED ON MCDONALD'S CUSTOMER REVIEW")
    st.subheader('Detecting Reviews')
    st.markdown('---')
    # Buat form
    with st.form(key='review'):
        st.write("## Customers' Review")
        # URL input
        text = st.text_input("Enter The Review:")
        submitted = st.form_submit_button('Predict')
        # Perform prediction
        if submitted:
                df_inf = {'preprocessing_review': text}
                df_inf = pd.DataFrame([df_inf])
                df_inf['preprocessing_review'] = df_inf['preprocessing_review'].apply(lambda x: review_preprocessing(x))
              

                y_pred_inf = model.predict(df_inf['preprocessing_review'])
                y_pred_inf = np.argmax(df_inf['preprocessing_review'], axis = -1)

                # Display the prediction result
                if y_pred_inf == 0:
                    st.subheader("Prediction: Negative Comment with Rating 1 Star - 2 Stars")
                elif y_pred_inf == 1:
                    st.subheader("Prediction: Neutral Comment with Rating 3 Stars")
                else:
                    st.subheader("Prediction: Positive Comment with Rating 5 Stars")

                # Display the extracted text
                st.subheader("Extracted Text:")
                st.write(text)

if __name__ == '__main__':
    run()