PIERRE CUGNET commited on
Commit
1e322be
1 Parent(s): b3e4112

feat(py): add weights and app

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Datasource local storage ignored files
5
+ /dataSources/
6
+ /dataSources.local.xml
7
+ # Editor-based HTTP Client requests
8
+ /httpRequests/
.idea/airline-sentiment-analysis.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="2">
8
+ <item index="0" class="java.lang.String" itemvalue="python-dotenv" />
9
+ <item index="1" class="java.lang.String" itemvalue="google.cloud" />
10
+ </list>
11
+ </value>
12
+ </option>
13
+ </inspection_tool>
14
+ </profile>
15
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" filepath="$PROJECT_DIR$/.idea/airline-sentiment-analysis.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py CHANGED
@@ -1,7 +1,90 @@
1
  import streamlit as st
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
5
- airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great!')
6
- st.write('The sentence is', airline_tweet)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
 
1
  import streamlit as st
2
  import os
3
+ import tensorflow as tf
4
+ from transformers import AutoTokenizer, TFBertModel
5
+ from tensorflow.keras.layers import Input, Dense
6
+ import numpy as np
7
+ import re
8
+ import emoji
9
+ import nltk
10
+ from nltk.corpus import stopwords
11
+ from nltk.stem.wordnet import WordNetLemmatizer
12
+
13
+ nltk.download('stopwords')
14
+ nltk.download('wordnet')
15
+
16
+ lmtzr = WordNetLemmatizer()
17
+ stop_words = stopwords.words("english")
18
+ max_len = 35
19
+
20
+ def clean_text(text):
21
+ # Put text into lower case
22
+ text = text.lower()
23
+
24
+ # Remove URLs
25
+ text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
26
+
27
+ # Remove Hashtags
28
+ text = re.sub(r"#", "", text)
29
+
30
+ # Remove Mentions
31
+ text = re.sub(r"@\S+", "", text)
32
+
33
+ # Handling Emojis/Emoticons
34
+ text = emoji.demojize(text)
35
+
36
+ emoticons = dict()
37
+ emoticons['EMOT_SMILEY'] = [':-)', ':)', '(:', '(-:', ';p', ':-d', ':d', ]
38
+ emoticons['EMOT_LAUGH'] = [':-D', ':D', 'X-D', 'XD', 'xD']
39
+ emoticons['EMOT_LOVE'] = ['<3', ':\*', ]
40
+ emoticons['EMOT_CRY'] = [':,(', ':\'(', ':"(', ':((']
41
+ emoticons['EMOT_WINK'] = [';-)', ';)', ';-D', ';D', '(;', '(-;']
42
+ emoticons['EMOT_FROWN'] = [':-(', ':(']
43
+ for label, emot in emoticons.items():
44
+ for word in text.split():
45
+ if word in emot:
46
+ text = text.replace(word, label)
47
+ # Lemmatazation
48
+ text = ' '.join([lmtzr.lemmatize(word, 'v') for word in text.split()])
49
+ return text
50
+
51
+
52
 
53
  st.title('Welcome to my twitter airline sentiment analysis !', anchor='center')
54
+ airline_tweet = st.text_input('Enter your english airline tweet here:', '@AmericanAirline My flight was great! :)')
55
+ tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased', num_labels=2)
56
+
57
+ encoded_input = tokenizer(
58
+ text=airline_tweet,
59
+ add_special_tokens=True,
60
+ max_length=max_len,
61
+ truncation=True,
62
+ padding='max_length',
63
+ return_tensors='tf',
64
+ return_token_type_ids=False,
65
+ return_attention_mask=True,
66
+ verbose=False)
67
+ bert = TFBertModel.from_pretrained('distilbert-base-uncased', num_labels=2)
68
+
69
+ input_ids = Input(shape=(max_len,), dtype=tf.int32, name='input_ids')
70
+ input_mask = Input(shape=(max_len,), dtype=tf.int32, name='attention_mask')
71
+
72
+ bert_inputs = {'input_ids': input_ids, 'input_mask': input_mask}
73
+
74
+ embeddings = bert.bert(input_ids, attention_mask=input_mask)[0] #Here 0 is the last hidden states
75
+ out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
76
+ out = Dense(512, activation='relu')(out)
77
+ out = tf.keras.layers.Dropout(0.1)(out)
78
+ # out = Dense(512, activation='relu')(out)
79
+
80
+ # Last layer
81
+ y = Dense(2, activation = 'softmax')(out) #Here 2 because we got 2 categories to predict and softmax because we want probabilities
82
+ # y = Dense(1, activation = 'sigmoid')(out)
83
+ model = tf.keras.Model(inputs=bert_inputs, outputs=y)
84
+
85
+ model.load_weights('sentiment_weights.h5')
86
+ prediction = model.predict({'input_ids' : encoded_input['input_ids'],'input_mask' : encoded_input['attention_mask']})
87
+ encoded_dict = {0: 'negative', 1: 'positive'}
88
+
89
+ st.write(f'The sentence is {encoded_dict[np.argmax(prediction)]}', )
90
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tensorflow
2
+ keras
3
+ re
4
+ nltk
5
+ numpy
6
+ emoji
sentiment_weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d3e89e8ac0bc6d7c690b4ba14475eab7fe8b1714f8e1d36880509990635273
3
+ size 439786000