PIERRE CUGNET
commited on
Commit
•
1e322be
1
Parent(s):
b3e4112
feat(py): add weights and app
Browse files- .idea/.gitignore +8 -0
- .idea/airline-sentiment-analysis.iml +8 -0
- .idea/inspectionProfiles/Project_Default.xml +15 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- app.py +85 -2
- requirements.txt +6 -0
- sentiment_weights.h5 +3 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Datasource local storage ignored files
|
5 |
+
/dataSources/
|
6 |
+
/dataSources.local.xml
|
7 |
+
# Editor-based HTTP Client requests
|
8 |
+
/httpRequests/
|
.idea/airline-sentiment-analysis.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredPackages">
|
6 |
+
<value>
|
7 |
+
<list size="2">
|
8 |
+
<item index="0" class="java.lang.String" itemvalue="python-dotenv" />
|
9 |
+
<item index="1" class="java.lang.String" itemvalue="google.cloud" />
|
10 |
+
</list>
|
11 |
+
</value>
|
12 |
+
</option>
|
13 |
+
</inspection_tool>
|
14 |
+
</profile>
|
15 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" filepath="$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
app.py
CHANGED
@@ -1,7 +1,90 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
|
5 |
-
airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great!')
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
+
import tensorflow as tf
|
4 |
+
from transformers import AutoTokenizer, TFBertModel
|
5 |
+
from tensorflow.keras.layers import Input, Dense
|
6 |
+
import numpy as np
|
7 |
+
import re
|
8 |
+
import emoji
|
9 |
+
import nltk
|
10 |
+
from nltk.corpus import stopwords
|
11 |
+
from nltk.stem.wordnet import WordNetLemmatizer
|
12 |
+
|
13 |
+
nltk.download('stopwords')
|
14 |
+
nltk.download('wordnet')
|
15 |
+
|
16 |
+
lmtzr = WordNetLemmatizer()
|
17 |
+
stop_words = stopwords.words("english")
|
18 |
+
max_len = 35
|
19 |
+
|
20 |
+
def clean_text(text):
|
21 |
+
# Put text into lower case
|
22 |
+
text = text.lower()
|
23 |
+
|
24 |
+
# Remove URLs
|
25 |
+
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
|
26 |
+
|
27 |
+
# Remove Hashtags
|
28 |
+
text = re.sub(r"#", "", text)
|
29 |
+
|
30 |
+
# Remove Mentions
|
31 |
+
text = re.sub(r"@\S+", "", text)
|
32 |
+
|
33 |
+
# Handling Emojis/Emoticons
|
34 |
+
text = emoji.demojize(text)
|
35 |
+
|
36 |
+
emoticons = dict()
|
37 |
+
emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
|
38 |
+
emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
|
39 |
+
emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
|
40 |
+
emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
|
41 |
+
emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
|
42 |
+
emoticons['EMOT_FROWN'] = [':-(', ':(']
|
43 |
+
for label, emot in emoticons.items():
|
44 |
+
for word in text.split():
|
45 |
+
if word in emot:
|
46 |
+
text = text.replace(word, label)
|
47 |
+
# Lemmatazation
|
48 |
+
text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
|
49 |
+
return text
|
50 |
+
|
51 |
+
|
52 |
|
53 |
st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
|
54 |
+
airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great! :)')
|
55 |
+
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)
|
56 |
+
|
57 |
+
encoded_input = tokenizer(
|
58 |
+
text=airline_tweet,
|
59 |
+
add_special_tokens=True,
|
60 |
+
max_length=max_len,
|
61 |
+
truncation=True,
|
62 |
+
padding='max_length',
|
63 |
+
return_tensors='tf',
|
64 |
+
return_token_type_ids=False,
|
65 |
+
return_attention_mask=True,
|
66 |
+
verbose=False)
|
67 |
+
bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)
|
68 |
+
|
69 |
+
input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
|
70 |
+
input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')
|
71 |
+
|
72 |
+
bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}
|
73 |
+
|
74 |
+
embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
|
75 |
+
out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
|
76 |
+
out = Dense(512, activation='relu')(out)
|
77 |
+
out = tf.keras.layers.Dropout(0.1)(out)
|
78 |
+
# out = Dense(512, activation='relu')(out)
|
79 |
+
|
80 |
+
# Last layer
|
81 |
+
y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
|
82 |
+
# y = Dense(1, activation = 'sigmoid')(out)
|
83 |
+
model = tf.keras.Model(inputs=bert_inputs, outputs=y)
|
84 |
+
|
85 |
+
model.load_weights('sentiment_weights.h5')
|
86 |
+
prediction = model.predict({'input_ids' : encoded_input['input_ids'],'input_mask' : encoded_input['attention_mask']})
|
87 |
+
encoded_dict = {0: 'negative', 1: 'positive'}
|
88 |
+
|
89 |
+
st.write(f'The sentence is {encoded_dict[np.argmax(prediction)]}', )
|
90 |
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tensorflow
|
2 |
+
keras
|
3 |
+
re
|
4 |
+
nltk
|
5 |
+
numpy
|
6 |
+
emoji
|
sentiment_weights.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89d3e89e8ac0bc6d7c690b4ba14475eab7fe8b1714f8e1d36880509990635273
|
3 |
+
size 439786000
|