PIERRE CUGNET
feat(py): add weights and app
1e322be
raw
history blame
No virus
3.08 kB
import streamlit as st
import os
import tensorflow as tf
from transformers import AutoTokenizer, TFBertModel
from tensorflow.keras.layers import Input, Dense
import numpy as np
import re
import emoji
import nltk
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
lmtzr = WordNetLemmatizer()
stop_words = stopwords.words("english")
max_len = 35
def clean_text(text):
# Put text into lower case
text = text.lower()
# Remove URLs
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
# Remove Hashtags
text = re.sub(r"#", "", text)
# Remove Mentions
text = re.sub(r"@\S+", "", text)
# Handling Emojis/Emoticons
text = emoji.demojize(text)
emoticons = dict()
emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
emoticons['EMOT_FROWN'] = [':-(', ':(']
for label, emot in emoticons.items():
for word in text.split():
if word in emot:
text = text.replace(word, label)
# Lemmatazation
text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
return text
st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great! :)')
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)
encoded_input = tokenizer(
text=airline_tweet,
add_special_tokens=True,
max_length=max_len,
truncation=True,
padding='max_length',
return_tensors='tf',
return_token_type_ids=False,
return_attention_mask=True,
verbose=False)
bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)
input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')
bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}
embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
out = Dense(512, activation='relu')(out)
out = tf.keras.layers.Dropout(0.1)(out)
# out = Dense(512, activation='relu')(out)
# Last layer
y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
# y = Dense(1, activation = 'sigmoid')(out)
model = tf.keras.Model(inputs=bert_inputs, outputs=y)
model.load_weights('sentiment_weights.h5')
prediction = model.predict({'input_ids' : encoded_input['input_ids'],'input_mask' : encoded_input['attention_mask']})
encoded_dict = {0: 'negative', 1: 'positive'}
st.write(f'The sentence is {encoded_dict[np.argmax(prediction)]}', )