Andy Lau commited on
Commit
af1cbd8
1 Parent(s): 282fc84

added standard file

Browse files
Files changed (6) hide show
  1. FunctionsModelSA_V1.py +377 -0
  2. app.py +174 -0
  3. figures/ModelSA.png +0 -0
  4. main_app.py +15 -0
  5. requirements.txt +4 -0
  6. utils.py +84 -0
FunctionsModelSA_V1.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import s3fs
2
+ import pandas as pd
3
+ import numpy as np
4
+ from numpy import arange
5
+ from colour import Color
6
+ import plotly.graph_objects as go
7
+ from nltk import tokenize
8
+ from IPython.display import Markdown
9
+ from PIL import ImageColor
10
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
11
+ import nltk
12
+ nltk.download('punkt')
13
+ import email
14
+ import codecs
15
+ import pickle
16
+ import string
17
+ from scipy import spatial
18
+ import re
19
+ import pytorch_lightning as pl
20
+ from bs4 import BeautifulSoup
21
+ import ipywidgets as widgets
22
+ from ipywidgets import FileUpload
23
+ from urlextract import URLExtract
24
+ from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
25
+ import torch.nn as nn
26
+ import torch
27
+ from ipywidgets import interact, Dropdown
28
+ import boto3
29
+ # from sagemaker import get_execution_role
30
+ from scipy import spatial
31
+ from ipyfilechooser import FileChooser
32
+ import streamlit as st
33
+ import utils
34
+
35
+
36
+ PARAMS={
37
+ 'BATCH_SIZE': 8,
38
+ 'MAX_TOKEN_COUNT':100,
39
+ 'BERT_MODEL_NAME':'google/bert_uncased_L-2_H-128_A-2' ,
40
+ 'N_EPOCHS': 10,
41
+ 'n_classes':8,
42
+ 'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
43
+ 'label_joyful', 'label_opstimistic', 'label_respectful',
44
+ 'label_urgent'],
45
+ 'TEXTCOL': 'text',
46
+ 'rf_labels':['label_analytical', 'label_casual', 'label_confident',
47
+ 'label_friendly', 'label_joyful', 'label_opstimistic',
48
+ 'label_respectful', 'label_urgent',
49
+ 'industry_Academic and Education', 'industry_Energy',
50
+ 'industry_Entertainment', 'industry_Finance and Banking',
51
+ 'industry_Healthcare', 'industry_Hospitality', 'industry_Real Estate',
52
+ 'industry_Retail', 'industry_Software and Technology',
53
+ 'campaign_type_Abandoned_Cart', 'campaign_type_Engagement',
54
+ 'campaign_type_Newsletter', 'campaign_type_Product_Announcement',
55
+ 'campaign_type_Promotional', 'campaign_type_Review_Request',
56
+ 'campaign_type_Survey', 'campaign_type_Transactional',
57
+ 'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
58
+ }
59
+
60
+ # # CI_rates=pd.read_csv('CI_RATES.csv')
61
+ # s3://emailcampaignmodeldata/ModelSADataSets/CI_RATES.csv
62
+ CI_rates = utils.get_files_from_aws('emailcampaignmodeldata','ModelSADataSets/CI_RATES.csv')
63
+
64
+ ### create file uploading widget
65
+ def email_upload():
66
+ print("Please upload your email (In EML Format)")
67
+ upload = FileUpload(accept='.eml', multiple=True)
68
+ display(upload)
69
+ return upload
70
+
71
+ def parse_email(uploaded_file):
72
+ check=[]
73
+ filename = list(uploaded_file.value.keys())[0]
74
+ email_body_str = codecs.decode(uploaded_file.value[filename]['content'], encoding="utf-8")
75
+ b=email.message_from_string(email_body_str)
76
+ for part in b.walk():
77
+ if part.get_content_type():
78
+ body = str(part.get_payload())
79
+ soup = BeautifulSoup(body)
80
+ paragraphs = soup.find_all('body')
81
+ for paragraph in paragraphs:
82
+ check.append(paragraph.text)
83
+ file="".join(check)
84
+ return file
85
+
86
+
87
+ def text_clean(x,punct=True):
88
+
89
+ ### Light
90
+ x = x.lower() # lowercase everything
91
+ x = x.encode('ascii', 'ignore').decode() # remove unicode characters
92
+ x = re.sub(r'https*\S+', ' ', x) # remove links
93
+ x = re.sub(r'http*\S+', ' ', x)
94
+ # cleaning up text
95
+ x = re.sub(r'\'\w+', ' ', x)
96
+ x = re.sub(r'\w*\d+\w*', ' ', x)
97
+ x = re.sub(r'\s{2,}', ' ', x)
98
+ x = re.sub(r'\s[^\w\s]\s', ' ', x)
99
+
100
+ ### Heavy
101
+ x = re.sub(r'@\S', ' ', x)
102
+ x = re.sub(r'#\S+', ' ', x)
103
+ x=x.replace('=',' ')
104
+ if(punct==True):
105
+ x = re.sub('[%s]' % re.escape(string.punctuation), ' ', x)
106
+ # remove single letters and numbers surrounded by space
107
+ x = re.sub(r'\s[a-z]\s|\s[0-9]\s', ' ', x)
108
+ clean=[' Â\x8a','\t','\n','Ã\x83','Â\x92','Â\x93','Â\x8a','Â\x95']
109
+ for y in clean:
110
+ x=x.replace(y,'')
111
+
112
+ return x
113
+
114
+ ####BERT MODEL LOAD REQUIRMENTS#########
115
+
116
+
117
+ class ToneTagger(pl.LightningModule):
118
+
119
+ def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
120
+ super().__init__()
121
+ self.bert = BertModel.from_pretrained(PARAMS['BERT_MODEL_NAME'], return_dict=True)
122
+ self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
123
+ self.n_training_steps = n_training_steps
124
+ self.n_warmup_steps = n_warmup_steps
125
+ self.criterion = nn.BCELoss()
126
+
127
+ def forward(self, input_ids, attention_mask):
128
+ output = self.bert(input_ids,attention_mask)
129
+ output = self.classifier(output.pooler_output)
130
+ output = torch.sigmoid(output)
131
+ return output
132
+
133
+
134
+ # LOAD IN PRE TRAINED MODEL WITH WEIGHTS
135
+ model=ToneTagger(8) # load up the model archetecture with 8 different tones
136
+ model.load_state_dict(torch.load("models/SAMODEL")) # populate the weights of the model
137
+ model.eval()
138
+
139
+
140
+
141
+ def bert_tones(text_sentences,model):
142
+ """ This function takes in setences and the model cleaned them then predicts the bert tones"""
143
+ predictions=[]
144
+ text=[]
145
+
146
+ tokenizer = BertTokenizer.from_pretrained('google/bert_uncased_L-2_H-128_A-2')
147
+ for sent in text_sentences:
148
+ text.append(text_clean(sent,False))
149
+ cleaned_text=text_clean(sent)
150
+ encoding = tokenizer.encode_plus(
151
+ cleaned_text,
152
+ add_special_tokens=True,
153
+ max_length=100,
154
+ return_token_type_ids=False,
155
+ padding="max_length",
156
+ truncation=True,
157
+ return_attention_mask=True,
158
+ return_tensors='pt',
159
+ )
160
+ with torch.no_grad():
161
+ inputs=encoding['input_ids']
162
+ attention=encoding['attention_mask']
163
+ pred=model(inputs,attention)
164
+ pred=pred.cpu().numpy()
165
+ predictions.append(np.array(pred[0]))
166
+
167
+ return text,predictions
168
+
169
+
170
+ def convert_text_to_tone(text,model=model,params=PARAMS):
171
+ """ This Function will convert the text to tone, it takes in the text with punctuations seperates it into senteces"""
172
+ data=[]
173
+ # Find the sentiment from vader sentiment analyzer (Not currently in use)
174
+ sid_obj = SentimentIntensityAnalyzer()
175
+ total_cleaned=text_clean(text)
176
+ sentiment_dict = sid_obj.polarity_scores(total_cleaned)# Find the sentiment from
177
+
178
+
179
+ text_sentences=tokenize.sent_tokenize(text) #Find all the different sentences through the NLTK library
180
+
181
+
182
+ plain_text,predictions=bert_tones(text_sentences,model)
183
+
184
+ data.append([plain_text,sentiment_dict,predictions])
185
+ final=pd.DataFrame(data,columns=['text','sentiment','sentencetone'])
186
+ agg_tones=final['sentencetone'].apply(np.mean,axis=0)
187
+ tones=pd.DataFrame(agg_tones.tolist(),columns=params['LABEL_COLUMNS'])
188
+ return final,tones
189
+
190
+
191
+ ### This will be abstracted away to a more dynamic model
192
+ brf='Rate_Models/bounce_rate_model.sav'
193
+ BRM = pickle.load(open(brf, 'rb'))
194
+ orf='Rate_Models/open_rate_model.sav'
195
+ ORM = pickle.load(open(orf, 'rb'))
196
+ urf='Rate_Models/unsubscribe_rate_model.sav'
197
+ URM = pickle.load(open(urf, 'rb'))
198
+ crf='Rate_Models/click_trough_rate_model.sav'
199
+ CRM = pickle.load(open(crf, 'rb'))
200
+ CV='Rate_Models/Conversion_rate.sav'
201
+ ConM = pickle.load(open(CV, 'rb'))
202
+ CTOR='Rate_Models/Click-To-Open_Rates.sav'
203
+ CTORM = pickle.load(open(CTOR, 'rb'))
204
+ RV='Rate_Models/Revenue_per_email.sav'
205
+ RVM = pickle.load(open(RV, 'rb'))
206
+
207
+ model_dict={'Open_Rate':ORM,
208
+ 'Click_Through_Rate': CRM,
209
+ 'Unsubscribe_Rate': URM,
210
+ 'Bounce_Rate':BRM,
211
+ 'Click_To_Open_Rate': CTORM,
212
+ 'Conversion_Rate': ConM,
213
+ 'Revenue_Per_Email':RVM}
214
+
215
+
216
+
217
+ def plot_CI(pred,lower,upper,scale_factor=0.5,streamlit=False):
218
+ """This function plots the confidence intervals of your prediction
219
+ pred- The prediction varaible given from the Random Forest for the target variable
220
+ lower- The lower half of the prediction confidence interval
221
+ upper- The upper half of the confidence interval
222
+ scale_factor- This will modify the size of the graph """
223
+
224
+
225
+ title=f'The Predicted Value is {pred}'
226
+ fig = go.Figure()
227
+ fig.update_xaxes(showgrid=False)
228
+ fig.update_yaxes(showgrid=False,
229
+ zeroline=True, zerolinecolor='black', zerolinewidth=3,
230
+ showticklabels=False)
231
+ fig.update_layout(height=200, plot_bgcolor='white')
232
+ fig.add_trace(go.Scatter(
233
+ x=[pred], y=[0,0], mode='markers', marker_size=10,line=dict(color="red")
234
+ ))
235
+ fig.update_layout(xaxis_range=[0,upper+upper*scale_factor])
236
+ fig.update_layout(showlegend=False)
237
+ fig.add_vline(x=lower,annotation_text=f"{lower}",annotation_position="top")
238
+ fig.add_vline(x=upper,annotation_text=f"{upper}",annotation_position="top")
239
+ fig.add_vrect(lower,upper,fillcolor='red',opacity=0.25,annotation_text='95% CI',annotation_position="outside top")
240
+ fig.update_layout(title_text=title, title_x=0.5)
241
+
242
+ if streamlit:
243
+ st.plotly_chart(fig)
244
+ else:
245
+ fig.show()
246
+
247
+ def find_max_cat(df,target,industry,campaign):
248
+ d=df[(df[campaign]==1) & (df[industry]==1)]
249
+ if(len(d)>0):
250
+ rec=df.loc[d[target].idxmax()][3:11]
251
+ return round(d[target].max(),3),rec
252
+ else:
253
+ return 0,0
254
+
255
+ def recommend(tones,recommend_changes,change,target,streamlit=False):
256
+ ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
257
+ fig = go.Figure()
258
+ fig.add_trace(go.Bar(
259
+ y=tones.columns,
260
+ x=tones.values[0],
261
+ name='Current Tones',
262
+ orientation='h',
263
+ # text=np.round(tones.values[0],3),
264
+ width=.5,
265
+ marker=dict(
266
+ color='#00e6b1',
267
+ line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
268
+ )
269
+
270
+ ))
271
+ fig.add_trace(go.Bar(
272
+ y=tones.columns,
273
+ x=recommend_changes,
274
+ name='Recommend changes',
275
+ orientation='h',
276
+ text=np.round(recommend_changes,3),
277
+ width=0.3,
278
+ marker=dict(
279
+ color='#e60f00',
280
+ line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
281
+ )
282
+ ))
283
+ fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
284
+ fig.update_layout(height=700, plot_bgcolor='white')
285
+ fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
286
+ fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
287
+
288
+ if streamlit:
289
+ st.plotly_chart(fig)
290
+ else:
291
+ fig.show()
292
+
293
+
294
+
295
+ def prediction(tones,campaign_val,industry_val,target):
296
+ model_val=pd.DataFrame(tones,columns=PARAMS['rf_labels']).fillna(0)
297
+ model_val.loc[0,campaign_val]=1
298
+ model_val.loc[0,industry_val]=1
299
+ model=model_dict[target]
300
+ pred=model.predict(model_val)[0]
301
+ CI=CI_rates[CI_rates['model']==target]
302
+ lower=pred+CI['2_5'].values[0]
303
+ higher=pred+CI['97_5'].values[0]
304
+ return round(pred,3),round(lower,3),round(higher,3),model
305
+
306
+ def load_data(buckets,key):
307
+ # data_location='Tone_and_target.csv'
308
+ # data=pd.read_csv(data_location)
309
+ df=utils.get_files_from_aws(buckets,key)
310
+ df_unique = df.drop_duplicates()
311
+ df_unique = pd.get_dummies(df_unique, columns=['industry','campaign_type'])
312
+ df_data=df_unique.drop(columns=['Unnamed: 0','body'])
313
+ df_data=df_data.rename(columns={'Click-To-Open Rates':'Click_To_Open_Rate','Conversion Rate':'Conversion_Rate','Revenue Per email':'Revenue_Per_Email'})
314
+ return df_data
315
+
316
+
317
+ def plot_table(sorted_setence_tuple,streamlit=True):
318
+ """ Plots the bottom most table, takes in a list of tuples where the tuple is the sentence the sentiment distance
319
+ from the best values """
320
+ sentences=list(zip(*sorted_setence_tuple))[0]
321
+ scores= list(zip(*sorted_setence_tuple))[1]
322
+ colors= list(zip(*sorted_setence_tuple))[2]
323
+ rbg_list=[]
324
+ for i in colors:
325
+ rbg_list.append('rgb'+str(i))
326
+ fig = go.Figure(data=[go.Table(
327
+ header=dict(values=['<b>Sentences</b>', '<b>Difference from Recommended Tone</b>'],
328
+ line_color = 'darkslategray',
329
+ fill_color = '#010405',
330
+ align = 'center',
331
+ font=dict(family="Metropolis",color='white', size=16)),
332
+ cells=dict(values=[sentences, # 1st column
333
+ scores] , # 2nd column
334
+ line_color='darkslategray',
335
+ fill_color=[rbg_list],
336
+ align=['left','center'],
337
+ font=dict(family="Arial",size=12)))
338
+ ])
339
+
340
+ if streamlit:
341
+ st.plotly_chart(fig)
342
+ else:
343
+ fig.show()
344
+
345
+ def corrections(best,df,streamlit=False):
346
+ """This function finds the the difference between the tone of each sentence and the best tone for the desired metric
347
+ best- tone values of the best email for the current categories
348
+ df- dataframe of the sentences of the uploaded email and the """
349
+ sentence_order=[]
350
+ colors=['#48f0c9','#6ef5d6','#94f7e1','#bbfaec','#e6fff9','#ffe7e6','#ffc3bf','#ffa099','#ff7c73','#ff584d'] #loxz green primary to Loxz light red
351
+ for i in range(len(df['sentencetone'][0])):
352
+ text=df['text'][0][i]
353
+ cur=df['sentencetone'][0][i]
354
+ cosine_distance= spatial.distance.cosine(best,cur)
355
+ distance=cosine_distance # Cosine distance
356
+ new_value = round(( (distance - 0) / (1 - 0) ) * (100 - 0) + 0) # for distance metric this is just normalizing the varaible
357
+ color_value=round(( (distance - 0) / (1 - 0) ) * (10 - 0) + 0) # Color whell value
358
+ col=colors[(color_value)]
359
+ rbg=ImageColor.getcolor(f'{col}', "RGB")
360
+ sentence_order.append((text,new_value,rbg))
361
+ sorted_sentences=sorted(sentence_order,key=lambda x: x[1],reverse=True)
362
+ plot_table(sorted_sentences,streamlit)
363
+
364
+ def read_file(fc):
365
+ with open(fc.selected) as file: # Use file to refer to the file object
366
+ data = file.read()
367
+ check=[]
368
+ b=email.message_from_string(data)
369
+ for part in b.walk():
370
+ if part.get_content_type():
371
+ body = str(part.get_payload())
372
+ soup = BeautifulSoup(body)
373
+ paragraphs = soup.find_all('body')
374
+ for paragraph in paragraphs:
375
+ check.append(paragraph.text)
376
+ file="".join(check)
377
+ return file
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ast import arg
2
+ import FunctionsModelSA_V1
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import PIL
6
+ # import re
7
+ # from io import StringIO
8
+ # import boto3
9
+ import time
10
+
11
+ import main_app
12
+ import utils
13
+
14
+ def table_data():
15
+ # creating table data
16
+ field = [
17
+ 'Data Scientist',
18
+ 'Dataset',
19
+ 'Algorithm',
20
+ 'Framework',
21
+ 'Ensemble',
22
+ 'Domain',
23
+ 'Model Size'
24
+ ]
25
+
26
+ data = [
27
+ 'Jeffrey Ott',
28
+ 'Internal + Campaign monitor',
29
+ 'BERT_Uncased_L_2_H_128_A-2, Single Linear Layer Neural Network, Random Forest',
30
+ 'Pytorch',
31
+ 'Bootstrapping',
32
+ 'NLP Text Classification',
33
+ '16.8 MB'
34
+ ]
35
+
36
+ data = {
37
+ 'Field':field,
38
+ 'Data':data
39
+ }
40
+
41
+ df = pd.DataFrame.from_dict(data)
42
+
43
+ return df
44
+
45
+
46
+
47
+ def add_bg_from_url():
48
+ st.markdown(
49
+ f"""
50
+ <style>
51
+ .stApp {{
52
+ background-image: linear-gradient(#0A3144,#126072,#1C8D99);
53
+ background-attachment: fixed;
54
+ background-size: cover
55
+ }}
56
+ </style>
57
+ """,
58
+ unsafe_allow_html=True
59
+ )
60
+
61
+ # add_bg_from_url()
62
+
63
+ st.markdown("# Sentiment Analysis: Email Industry")
64
+
65
+ # col1, col2, col3 = st.columns([1,1,1])
66
+
67
+ # with col2:
68
+ # img = PIL.Image.open("figures/ModelCC_solid.png")
69
+ # st.image(img)
70
+
71
+ stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1,1,1,1])
72
+
73
+ with stats_col1:
74
+ st.metric(label="Production", value="Production")
75
+ with stats_col2:
76
+ st.metric(label="Accuracy", value="85%")
77
+
78
+ with stats_col3:
79
+ st.metric(label="Speed", value="3.86 ms")
80
+
81
+ with stats_col4:
82
+ st.metric(label="Industry", value="Email")
83
+
84
+
85
+
86
+ with st.sidebar:
87
+
88
+ with st.expander('Model Description', expanded=False):
89
+ img = PIL.Image.open("figures/ModelSA.png")
90
+ st.image(img)
91
+ st.markdown('The model seeks to solve the problem of how to set the tone for an email campaign appropriately. This 5th generation model from the Loxz family uses state-of-the-art NLP to determine and predict the optimized sentiment of an email using tokenization techniques. The model will analyze any email text “shape” and help the user understand the tone and how that tone correlates with the metric of interest. We applied a pre-trained tiny BERT model to vectorize the email campaign text body, then a softmax dense layer was added to get the multi-label classifications. Email metrics are provided prior to campaign launch, and the model determines the optimal engagement rate based on several factors, including inputs by the campaign engineer.')
92
+
93
+ with st.expander('Model Information', expanded=False):
94
+ hide_table_row_index = """
95
+ <style>
96
+ thead tr th:first-child {display:none}
97
+ tbody th {display:none}
98
+ </style>
99
+ """
100
+ st.markdown(hide_table_row_index, unsafe_allow_html=True)
101
+ st.table(table_data())
102
+
103
+ utils.url_button('Model Homepage','https://loxz.com/#/models/SA')
104
+ # url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
105
+ utils.url_button('Amazon Market Place','https://aws.amazon.com/marketplace')
106
+
107
+
108
+ industry_lists = ['Software and Technology', 'Academic and Education',
109
+ 'Entertainment', 'Finance and Banking', 'Hospitality',
110
+ 'Real Estate', 'Retail', 'Energy', 'Healthcare']
111
+
112
+ campaign_types = ['Webinar', 'Engagement', 'Product_Announcement', 'Promotional',
113
+ 'Newsletter', 'Abandoned_Cart', 'Review_Request', 'Survey',
114
+ 'Transactional', 'Usage_and_Consumption']
115
+
116
+ target_variables = ['Conversion_Rate','Click_To_Open_Rate','Revenue_Per_Email']
117
+
118
+
119
+
120
+ input_text = st.text_area("Please enter your email text here", height=300)
121
+
122
+
123
+ industry = st.selectbox(
124
+ 'Please select your industry',
125
+ industry_lists,
126
+ index=6
127
+ )
128
+
129
+ campaign = st.selectbox(
130
+ 'Please select your industry',
131
+ campaign_types,
132
+ index=5
133
+ )
134
+
135
+ target = st.selectbox(
136
+ 'Please select your target variable',
137
+ target_variables,
138
+ index=1
139
+ )
140
+
141
+
142
+ if st.button('Generate Predictions'):
143
+ start_time = time.time()
144
+ if input_text is "":
145
+ st.error('Please enter a sentence!')
146
+ else:
147
+ placeholder = st.empty()
148
+ placeholder.text('Loading Data')
149
+
150
+ # Starting predictions
151
+ bucket='emailcampaignmodeldata'
152
+ # file_key = 'fullEmailBody/fullemailtextbody_labeled_3rates_8tones_20220524.csv'
153
+
154
+ # email_data = utils.get_files_from_aws(bucket,file_key)
155
+ tone_key = 'ModelSADataSets/Tone_and_target.csv'
156
+ tone_data = FunctionsModelSA_V1.load_data(bucket,tone_key)
157
+ test_predictions,tones = FunctionsModelSA_V1.convert_text_to_tone(input_text)
158
+
159
+ # st.dataframe(test_predictions)
160
+ # st.dataframe(tones)
161
+ campaign_val='campaign_type_'+campaign
162
+ industry_val='industry_'+ industry
163
+ pred,lower,upper,model = FunctionsModelSA_V1.prediction(tones,campaign_val,industry_val,target)
164
+ best_target,best_target_tones = FunctionsModelSA_V1.find_max_cat(tone_data,target,industry_val,campaign_val)
165
+
166
+ FunctionsModelSA_V1.plot_CI(pred,lower,upper,streamlit=True)
167
+ if((best_target!=0) and (pred<best_target)):
168
+ recommended_changes=(best_target_tones-tones.loc[0])
169
+ change=best_target-pred
170
+ FunctionsModelSA_V1.recommend(tones,recommended_changes,change,target,streamlit=True)
171
+ FunctionsModelSA_V1.corrections(best_target_tones,test_predictions,streamlit=True)
172
+
173
+ placeholder.empty()
174
+ # print(time.time() - start_time)
figures/ModelSA.png ADDED
main_app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(layout="wide")
4
+
5
+ st.markdown(
6
+ """
7
+ <style>
8
+ body {
9
+ background-image: linear-gradient(#2e7bcf,#2e7bcf);
10
+ color: white;
11
+ }
12
+ </style>
13
+ """,
14
+ unsafe_allow_html=True,
15
+ )
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ colour
4
+ nltk
utils.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import s3fs
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from numpy import arange
6
+ from colour import Color
7
+ import plotly.graph_objects as go
8
+ from nltk import tokenize
9
+ from IPython.display import Markdown
10
+ from PIL import ImageColor
11
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
12
+ import nltk
13
+ nltk.download('punkt')
14
+ from io import StringIO
15
+ from scipy import spatial
16
+ import re
17
+ import pytorch_lightning as pl
18
+ from bs4 import BeautifulSoup
19
+ import ipywidgets as widgets
20
+ from ipywidgets import FileUpload
21
+ from urlextract import URLExtract
22
+ from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
23
+ import torch.nn as nn
24
+ import torch
25
+ from ipywidgets import interact, Dropdown
26
+ import boto3
27
+ # from sagemaker import get_execution_role
28
+ from scipy import spatial
29
+ from bokeh.models.widgets import Div
30
+ import streamlit as st
31
+
32
+
33
+ def get_files_from_aws(bucket,prefix):
34
+ """
35
+ get files from aws s3 bucket
36
+
37
+ bucket (STRING): bucket name
38
+ prefix (STRING): file location in s3 bucket
39
+ """
40
+ s3_client = boto3.client('s3',
41
+ aws_access_key_id = st.secrets["aws_id"],
42
+ aws_secret_access_key = st.secrets["aws_key"])
43
+
44
+ file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
45
+ body = file_obj['Body']
46
+ string = body.read().decode('utf-8')
47
+
48
+ df = pd.read_csv(StringIO(string))
49
+
50
+ return df
51
+
52
+ def url_button(button_name,url):
53
+ if st.button(button_name):
54
+ js = """window.open('{url}')""".format(url=url) # New tab or window
55
+ html = '<img src onerror="{}">'.format(js)
56
+ div = Div(text=html)
57
+ st.bokeh_chart(div)
58
+
59
+
60
+
61
+ PARAMS={
62
+ 'BATCH_SIZE': 8,
63
+ 'MAX_TOKEN_COUNT':100,
64
+ 'BERT_MODEL_NAME':'google/bert_uncased_L-2_H-128_A-2' ,
65
+ 'N_EPOCHS': 10,
66
+ 'n_classes':8,
67
+ 'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
68
+ 'label_joyful', 'label_opstimistic', 'label_respectful',
69
+ 'label_urgent'],
70
+ 'TEXTCOL': 'text',
71
+ 'rf_labels':['label_analytical', 'label_casual', 'label_confident',
72
+ 'label_friendly', 'label_joyful', 'label_opstimistic',
73
+ 'label_respectful', 'label_urgent',
74
+ 'industry_Academic and Education', 'industry_Energy',
75
+ 'industry_Entertainment', 'industry_Finance and Banking',
76
+ 'industry_Healthcare', 'industry_Hospitality', 'industry_Real Estate',
77
+ 'industry_Retail', 'industry_Software and Technology',
78
+ 'campaign_type_Abandoned_Cart', 'campaign_type_Engagement',
79
+ 'campaign_type_Newsletter', 'campaign_type_Product_Announcement',
80
+ 'campaign_type_Promotional', 'campaign_type_Review_Request',
81
+ 'campaign_type_Survey', 'campaign_type_Transactional',
82
+ 'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
83
+ }
84
+