import streamlit as st import os import tensorflow as tf from transformers import AutoTokenizer, TFBertModel from tensorflow.keras.layers import Input, Dense import numpy as np import re import emoji import nltk from nltk.corpus import stopwords from nltk.stem.wordnet import WordNetLemmatizer nltk.download('stopwords') nltk.download('wordnet') lmtzr = WordNetLemmatizer() stop_words = stopwords.words("english") max_len = 35 def clean_text(text): # Put text into lower case text = text.lower() # Remove URLs text = re.sub(r'https?:\/\/.*[\r\n]*', '', text) # Remove Hashtags text = re.sub(r"#", "", text) # Remove Mentions text = re.sub(r"@\S+", "", text) # Handling Emojis/Emoticons text = emoji.demojize(text) emoticons = dict() emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ] emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD'] emoticons['EMOT_LOVE'] = ['<3', ':\*', ] emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':(('] emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;'] emoticons['EMOT_FROWN'] = [':-(', ':('] for label, emot in emoticons.items(): for word in text.split(): if word in emot: text = text.replace(word, label) # Lemmatazation text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()]) return text st.title('Welcome to my twitter airline sentiment analysis !', anchor='center') airline_tweet = st.text_input('Enter your english airline tweet here, press the prediction button and wait for the model to predict the sentiment of your review:', '@AmericanAirline My flight was awful, the flight was late and you lost my luggage!') tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2) encoded_input = tokenizer( text=airline_tweet, add_special_tokens=True, max_length=max_len, truncation=True, padding='max_length', return_tensors='tf', return_token_type_ids=False, return_attention_mask=True, verbose=False) bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2) input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids') input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask') bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask} embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states out = tf.keras.layers.GlobalMaxPool1D()(embeddings) out = Dense(512, activation='relu')(out) out = tf.keras.layers.Dropout(0.1)(out) # out = Dense(512, activation='relu')(out) # Last layer y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities # y = Dense(1, activation = 'sigmoid')(out) model = tf.keras.Model(inputs=bert_inputs, outputs=y) model.load_weights('sentiment_weights.h5') if st.button('Predict sentiment'): prediction = model.predict({'input_ids': encoded_input['input_ids'], 'input_mask': encoded_input['attention_mask']}) encoded_dict = {0: 'negative', 1: 'positive'} if np.argmax(prediction) == 0: st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}') st.write(f'I\'m sorry you had a bad experience with our company :( , please accept our apologies') else: st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}') st.write('Glad your flight was good ! Hope to see you soon :)')