|
import streamlit as st |
|
import os |
|
import tensorflow as tf |
|
from transformers import AutoTokenizer, TFBertModel |
|
from tensorflow.keras.layers import Input, Dense |
|
import numpy as np |
|
import re |
|
import emoji |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem.wordnet import WordNetLemmatizer |
|
|
|
nltk.download('stopwords') |
|
nltk.download('wordnet') |
|
|
|
lmtzr = WordNetLemmatizer() |
|
stop_words = stopwords.words("english") |
|
max_len = 35 |
|
|
|
def clean_text(text): |
|
|
|
text = text.lower() |
|
|
|
|
|
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) |
|
|
|
|
|
text = re.sub(r"#", "", text) |
|
|
|
|
|
text = re.sub(r"@\S+", "", text) |
|
|
|
|
|
text = emoji.demojize(text) |
|
|
|
emoticons = dict() |
|
emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ] |
|
emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD'] |
|
emoticons['EMOT_LOVE'] = ['<3', ':\*', ] |
|
emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':(('] |
|
emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;'] |
|
emoticons['EMOT_FROWN'] = [':-(', ':('] |
|
for label, emot in emoticons.items(): |
|
for word in text.split(): |
|
if word in emot: |
|
text = text.replace(word, label) |
|
|
|
text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()]) |
|
return text |
|
|
|
|
|
|
|
st.title('Welcome to my twitter airline sentiment analysis !', anchor='center') |
|
airline_tweet = st.text_input('Enter your english airline tweet here, press the prediction button and wait for the model to predict the sentiment of your review:', '@AmericanAirline My flight was awful, the flight was late and you lost my luggage!') |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2) |
|
|
|
encoded_input = tokenizer( |
|
text=airline_tweet, |
|
add_special_tokens=True, |
|
max_length=max_len, |
|
truncation=True, |
|
padding='max_length', |
|
return_tensors='tf', |
|
return_token_type_ids=False, |
|
return_attention_mask=True, |
|
verbose=False) |
|
bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2) |
|
|
|
input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids') |
|
input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask') |
|
|
|
bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask} |
|
|
|
embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] |
|
out = tf.keras.layers.GlobalMaxPool1D()(embeddings) |
|
out = Dense(512, activation='relu')(out) |
|
out = tf.keras.layers.Dropout(0.1)(out) |
|
|
|
|
|
|
|
y = Dense(2, activation = 'softmax')(out) |
|
|
|
model = tf.keras.Model(inputs=bert_inputs, outputs=y) |
|
|
|
model.load_weights('sentiment_weights.h5') |
|
|
|
if st.button('Predict sentiment'): |
|
prediction = model.predict({'input_ids': encoded_input['input_ids'], 'input_mask': encoded_input['attention_mask']}) |
|
encoded_dict = {0: 'negative', 1: 'positive'} |
|
if np.argmax(prediction) == 0: |
|
st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}') |
|
st.write(f'I\'m sorry you had a bad experience with our company :( , please accept our apologies') |
|
else: |
|
st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}') |
|
st.write('Glad your flight was good ! Hope to see you soon :)') |
|
|