feat(py): add weights and app

Browse files

Files changed (10) hide show

.idea/.gitignore +8 -0
.idea/airline-sentiment-analysis.iml +8 -0
.idea/inspectionProfiles/Project_Default.xml +15 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
app.py +85 -2
requirements.txt +6 -0
sentiment_weights.h5 +3 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/

.idea/airline-sentiment-analysis.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,15 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="2">
+            <item index="0" class="java.lang.String" itemvalue="python-dotenv" />
+            <item index="1" class="java.lang.String" itemvalue="google.cloud" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" filepath="$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

app.py CHANGED Viewed

@@ -1,7 +1,90 @@
 import streamlit as st
 import os
 st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
-airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great!')
-st.write('The sentence is', airline_tweet)

 import streamlit as st
 import os
+import tensorflow as tf
+from transformers import AutoTokenizer, TFBertModel
+from tensorflow.keras.layers import Input, Dense
+import numpy as np
+import re
+import emoji
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem.wordnet import WordNetLemmatizer
+nltk.download('stopwords')
+nltk.download('wordnet')
+lmtzr = WordNetLemmatizer()
+stop_words = stopwords.words("english")
+max_len = 35
+def clean_text(text):
+    # Put text into lower case
+    text = text.lower()
+    # Remove URLs
+    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
+    # Remove Hashtags
+    text = re.sub(r"#", "", text)
+    # Remove Mentions
+    text = re.sub(r"@\S+", "", text)
+    # Handling Emojis/Emoticons
+    text = emoji.demojize(text)
+    emoticons = dict()
+    emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
+    emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
+    emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
+    emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
+    emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
+    emoticons['EMOT_FROWN'] = [':-(', ':(']
+    for label, emot in emoticons.items():
+        for word in text.split():
+            if word in emot:
+                text = text.replace(word, label)
+    # Lemmatazation
+    text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
+    return text
 st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
+airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great! :)')
+tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)
+encoded_input = tokenizer(
+        text=airline_tweet,
+        add_special_tokens=True,
+        max_length=max_len,
+        truncation=True,
+        padding='max_length',
+        return_tensors='tf',
+        return_token_type_ids=False,
+        return_attention_mask=True,
+        verbose=False)
+bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)
+input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
+input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')
+bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}
+embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
+out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
+out = Dense(512, activation='relu')(out)
+out = tf.keras.layers.Dropout(0.1)(out)
+# out = Dense(512, activation='relu')(out)
+# Last layer
+y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
+# y = Dense(1, activation = 'sigmoid')(out)
+model = tf.keras.Model(inputs=bert_inputs, outputs=y)
+model.load_weights('sentiment_weights.h5')
+prediction = model.predict({'input_ids' : encoded_input['input_ids'],'input_mask' : encoded_input['attention_mask']})
+encoded_dict = {0: 'negative', 1: 'positive'}
+st.write(f'The sentence is {encoded_dict[np.argmax(prediction)]}', )

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+tensorflow
+keras
+re
+nltk
+numpy
+emoji

sentiment_weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89d3e89e8ac0bc6d7c690b4ba14475eab7fe8b1714f8e1d36880509990635273
+size 439786000