Spaces:

PierreCugnet
/

airline-sentiment-analysis

Sleeping

App Files Files Community

airline-sentiment-analysis / app.py

PIERRE CUGNET

feat(py): add a button to prevent automatic inference

1409c20 over 2 years ago

raw

history blame contribute delete

No virus

3.57 kB

	import streamlit as st
	import os
	import tensorflow as tf
	from transformers import AutoTokenizer, TFBertModel
	from tensorflow.keras.layers import Input, Dense
	import numpy as np
	import re
	import emoji
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem.wordnet import WordNetLemmatizer

	nltk.download('stopwords')
	nltk.download('wordnet')

	lmtzr = WordNetLemmatizer()
	stop_words = stopwords.words("english")
	max_len = 35

	def clean_text(text):
	# Put text into lower case
	text = text.lower()

	# Remove URLs
	text = re.sub(r'https?:\/\/.[\r\n]', '', text)

	# Remove Hashtags
	text = re.sub(r"#", "", text)

	# Remove Mentions
	text = re.sub(r"@\S+", "", text)

	# Handling Emojis/Emoticons
	text = emoji.demojize(text)

	emoticons = dict()
	emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
	emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
	emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
	emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
	emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
	emoticons['EMOT_FROWN'] = [':-(', ':(']
	for label, emot in emoticons.items():
	for word in text.split():
	if word in emot:
	text = text.replace(word, label)
	# Lemmatazation
	text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
	return text



	st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
	airline_tweet = st.text_input('Enter your english airline tweet here, press the prediction button and wait for the model to predict the sentiment of your review:', '@AmericanAirline My flight was awful, the flight was late and you lost my luggage!')

	tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)

	encoded_input = tokenizer(
	text=airline_tweet,
	add_special_tokens=True,
	max_length=max_len,
	truncation=True,
	padding='max_length',
	return_tensors='tf',
	return_token_type_ids=False,
	return_attention_mask=True,
	verbose=False)
	bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)

	input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
	input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')

	bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}

	embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
	out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
	out = Dense(512, activation='relu')(out)
	out = tf.keras.layers.Dropout(0.1)(out)
	# out = Dense(512, activation='relu')(out)

	# Last layer
	y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
	# y = Dense(1, activation = 'sigmoid')(out)
	model = tf.keras.Model(inputs=bert_inputs, outputs=y)

	model.load_weights('sentiment_weights.h5')

	if st.button('Predict sentiment'):
	prediction = model.predict({'input_ids': encoded_input['input_ids'], 'input_mask': encoded_input['attention_mask']})
	encoded_dict = {0: 'negative', 1: 'positive'}
	if np.argmax(prediction) == 0:
	st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}')
	st.write(f'I\'m sorry you had a bad experience with our company :( , please accept our apologies')
	else:
	st.write(f'Sentiment predicted : {encoded_dict[np.argmax(prediction)]}')
	st.write('Glad your flight was good ! Hope to see you soon :)')