File size: 3,448 Bytes
f4e514f
2f6e3a1
1e322be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0d355a
b3e4112
2847f6e
1e322be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2847f6e
3ba964c
 
2847f6e
3ba964c
 
8bdae34
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import os
import tensorflow as tf
from transformers import AutoTokenizer, TFBertModel
from tensorflow.keras.layers import Input, Dense
import numpy as np
import re
import emoji
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

lmtzr = WordNetLemmatizer()
stop_words = stopwords.words("english")
max_len = 35

def clean_text(text):
    # Put text into lower case
    text = text.lower()

    # Remove URLs
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)

    # Remove Hashtags
    text = re.sub(r"#", "", text)

    # Remove Mentions
    text = re.sub(r"@\S+", "", text)

    # Handling Emojis/Emoticons
    text = emoji.demojize(text)

    emoticons = dict()
    emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
    emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
    emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
    emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
    emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
    emoticons['EMOT_FROWN'] = [':-(', ':(']
    for label, emot in emoticons.items():
        for word in text.split():
            if word in emot:
                text = text.replace(word, label)
    # Lemmatazation
    text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
    return text



st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
airline_tweet = st.text_input('Enter your english airline tweet here, press enter, and wait for the model to predict the sentiment of your review:', '@AmericanAirline My flight was great! :)')
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)

encoded_input = tokenizer(
        text=airline_tweet,
        add_special_tokens=True,
        max_length=max_len,
        truncation=True,
        padding='max_length',
        return_tensors='tf',
        return_token_type_ids=False,
        return_attention_mask=True,
        verbose=False)
bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)

input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')

bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}

embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(512, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
# out = Dense(512, activation='relu')(out)

# Last layer
y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
# y = Dense(1, activation = 'sigmoid')(out)
model = tf.keras.Model(inputs=bert_inputs, outputs=y)

model.load_weights('sentiment_weights.h5')
prediction = model.predict({'input_ids' : encoded_input['input_ids'],'input_mask' : encoded_input['attention_mask']})
encoded_dict = {0: 'negative', 1: 'positive'}

if np.argmax(prediction) == 0:
    st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}')
    st.write(f'I\'m sorry you had a bad experience with our company :( , please accept our apologies')
else:
    st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}\n')
    st.write('Glad your flight was good ! Hope to see you soon :)')