Spaces:
Runtime error
Runtime error
Duy-Anh Dang
commited on
Commit
·
8fe1e33
1
Parent(s):
41e5416
updated app.py april 2023
Browse files
app.py
CHANGED
@@ -14,6 +14,13 @@ import joblib
|
|
14 |
from bokeh.models.widgets import Div
|
15 |
|
16 |
import email
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
def table_data():
|
@@ -66,6 +73,65 @@ def get_industry_code_dict(training_dataset):
|
|
66 |
zip(training_dataset.industry, training_dataset.industry_code))
|
67 |
return industry_code_dict
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
# extract email body from parse email
|
71 |
def email_body_extractor(email_data):
|
@@ -187,7 +253,7 @@ campaign_types = [
|
|
187 |
'Survey',
|
188 |
'Newsletter',
|
189 |
'Engagement',
|
190 |
-
'
|
191 |
'Review_Request',
|
192 |
'Product_Announcement',
|
193 |
'Abandoned_Cart'
|
@@ -228,10 +294,10 @@ target = st.selectbox(
|
|
228 |
|
229 |
st.markdown("""---""")
|
230 |
|
231 |
-
char_reco_preference = st.selectbox(
|
232 |
-
'Do you want to increase or decrease your character count in the email?',
|
233 |
-
["Increase", "Decrease"],
|
234 |
-
index=1)
|
235 |
|
236 |
|
237 |
def get_files_from_aws(bucket, prefix):
|
@@ -289,9 +355,10 @@ if st.button('Generate Predictions'):
|
|
289 |
# print("Getting Data Time: %s seconds" % (time.time() - start_time))
|
290 |
|
291 |
industry_code_dict = get_industry_code_dict(email_data)
|
292 |
-
|
|
|
293 |
|
294 |
-
email_body, character_cnt, url_cnt =
|
295 |
|
296 |
# Start the prediction
|
297 |
# Need to solve X test issue
|
@@ -314,11 +381,12 @@ if st.button('Generate Predictions'):
|
|
314 |
character_cnt), unsafe_allow_html=True)
|
315 |
# st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
|
316 |
if target == 'conversion_rate':
|
317 |
-
target_vis = '
|
318 |
else:
|
319 |
-
target_vis = '
|
|
|
320 |
st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(
|
321 |
-
target_vis, str(round(output_rate*100,
|
322 |
selected_industry_code = industry_code_dict.get(industry)
|
323 |
|
324 |
if target == "click_to_open_rate":
|
@@ -335,16 +403,34 @@ if st.button('Generate Predictions'):
|
|
335 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
336 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
337 |
|
338 |
-
preference = char_reco_preference
|
339 |
-
if preference == "Increase":
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
# decrease character reco
|
344 |
-
if preference == "Decrease":
|
345 |
-
|
346 |
-
|
347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
if selected_variable == "Open_Rate":
|
350 |
selected_variable = "Click-to-Open_Rate"
|
@@ -357,12 +443,54 @@ if st.button('Generate Predictions'):
|
|
357 |
st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(
|
358 |
selected_variable), unsafe_allow_html=True)
|
359 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
for _, row in df_reco_opt_rank.iterrows():
|
361 |
Character_Count = row[1]
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
367 |
placeholder.empty()
|
368 |
# print(time.time() - start_time)
|
|
|
14 |
from bokeh.models.widgets import Div
|
15 |
|
16 |
import email
|
17 |
+
#from ipyfilechooser import FileChooser
|
18 |
+
|
19 |
+
#from IPython.display import display
|
20 |
+
from io import BytesIO
|
21 |
+
from bs4 import BeautifulSoup
|
22 |
+
import matplotlib.pyplot as plt
|
23 |
+
import numpy as np
|
24 |
|
25 |
|
26 |
def table_data():
|
|
|
73 |
zip(training_dataset.industry, training_dataset.industry_code))
|
74 |
return industry_code_dict
|
75 |
|
76 |
+
def parse_email(uploaded_file):
|
77 |
+
parsed_email = []
|
78 |
+
efile = open(uploaded_file.name,'r')
|
79 |
+
emailstr = ""
|
80 |
+
for i, line in enumerate(efile):
|
81 |
+
emailstr += line
|
82 |
+
|
83 |
+
b = email.message_from_string(emailstr)
|
84 |
+
for part in b.walk():
|
85 |
+
if part.get_content_type():
|
86 |
+
body = str(part.get_payload())
|
87 |
+
soup = BeautifulSoup(body)
|
88 |
+
paragraphs = soup.find_all('body')
|
89 |
+
for paragraph in paragraphs:
|
90 |
+
parsed_email.append(paragraph.text)
|
91 |
+
return parsed_email
|
92 |
+
|
93 |
+
def email_upload():
|
94 |
+
print("Please upload your email (In HTML Format)")
|
95 |
+
# upload = FileUpload(accept='.html', multiple=True)
|
96 |
+
# display(upload)
|
97 |
+
# return upload
|
98 |
+
fc = FileChooser()
|
99 |
+
display(fc)
|
100 |
+
return fc
|
101 |
+
|
102 |
+
|
103 |
+
# New - In-Use
|
104 |
+
def email_extractor(email_uploaded):
|
105 |
+
parse = parse_email(email_uploaded)
|
106 |
+
|
107 |
+
email_text = ''.join(parse).strip()
|
108 |
+
|
109 |
+
# extract the email body using string manipulation functions
|
110 |
+
email_body_start_index = email_text.find('Bright Apps LLC')
|
111 |
+
email_body_end_index = email_text.find('To read more')
|
112 |
+
email_body = email_text[email_body_start_index:email_body_end_index].strip()
|
113 |
+
|
114 |
+
# get rid of non-text elements
|
115 |
+
email_body = email_body.replace('\n', '')
|
116 |
+
email_body = email_body.replace('\t', '')
|
117 |
+
email_body = email_body.replace('\r', '')
|
118 |
+
email_body = email_body.replace('</b>', '')
|
119 |
+
email_body = email_body.replace('<b>', '')
|
120 |
+
email_body = email_body.replace('\xa0', '')
|
121 |
+
|
122 |
+
# find length of URLs if any
|
123 |
+
extractor = URLExtract()
|
124 |
+
urls = extractor.find_urls(email_body)
|
125 |
+
url_cnt = len(urls)
|
126 |
+
|
127 |
+
# remove URLs and get character count
|
128 |
+
body = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', email_body)
|
129 |
+
sep = '©'
|
130 |
+
body = body.split(sep, 1)[0]
|
131 |
+
character_cnt = sum(not chr.isspace() for chr in body)
|
132 |
+
|
133 |
+
return email_body, character_cnt, url_cnt
|
134 |
+
|
135 |
|
136 |
# extract email body from parse email
|
137 |
def email_body_extractor(email_data):
|
|
|
253 |
'Survey',
|
254 |
'Newsletter',
|
255 |
'Engagement',
|
256 |
+
'Usage_and_Consumption',
|
257 |
'Review_Request',
|
258 |
'Product_Announcement',
|
259 |
'Abandoned_Cart'
|
|
|
294 |
|
295 |
st.markdown("""---""")
|
296 |
|
297 |
+
#char_reco_preference = st.selectbox(
|
298 |
+
# 'Do you want to increase or decrease your character count in the email?',
|
299 |
+
# ["Increase", "Decrease"],
|
300 |
+
# index=1)
|
301 |
|
302 |
|
303 |
def get_files_from_aws(bucket, prefix):
|
|
|
355 |
# print("Getting Data Time: %s seconds" % (time.time() - start_time))
|
356 |
|
357 |
industry_code_dict = get_industry_code_dict(email_data)
|
358 |
+
#uploaded_file = FileChooser(uploaded_file)
|
359 |
+
#bytes_data = uploaded_file.getvalue()
|
360 |
|
361 |
+
email_body, character_cnt, url_cnt = email_extractor(uploaded_file)
|
362 |
|
363 |
# Start the prediction
|
364 |
# Need to solve X test issue
|
|
|
381 |
character_cnt), unsafe_allow_html=True)
|
382 |
# st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
|
383 |
if target == 'conversion_rate':
|
384 |
+
target_vis = 'Click_Through_Rate'
|
385 |
else:
|
386 |
+
target_vis = 'Open_Rate'
|
387 |
+
|
388 |
st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(
|
389 |
+
target_vis, str(round(output_rate*100, 3))), unsafe_allow_html=True)
|
390 |
selected_industry_code = industry_code_dict.get(industry)
|
391 |
|
392 |
if target == "click_to_open_rate":
|
|
|
403 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
404 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
405 |
|
406 |
+
#preference = char_reco_preference
|
407 |
+
#if preference == "Increase":
|
408 |
+
# df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
409 |
+
# df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
|
410 |
+
# df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
|
411 |
# decrease character reco
|
412 |
+
#if preference == "Decrease":
|
413 |
+
# df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
414 |
+
# df_reco["character_cnt"] < character_cnt)]
|
415 |
+
# df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
|
416 |
+
|
417 |
+
|
418 |
+
# split into two dataframes of higher and lower character_cnt (added apr 2023)
|
419 |
+
char_cnt_uploaded = character_cnt
|
420 |
+
|
421 |
+
df_reco_opt1 = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > char_cnt_uploaded) & (df_reco["character_cnt"] <= (1.5*char_cnt_uploaded))]
|
422 |
+
df_reco_opt2 = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < char_cnt_uploaded) & (df_reco["character_cnt"] >= (char_cnt_uploaded/2))]
|
423 |
+
|
424 |
+
# drop duplicates of character_cnt keeping the row with the highest output_rate
|
425 |
+
df_reco_opt1 = df_reco_opt1.sort_values(by=[selected_variable], ascending=False).drop_duplicates(subset=["character_cnt"])
|
426 |
+
df_reco_opt2 = df_reco_opt2.sort_values(by=[selected_variable], ascending=False).drop_duplicates(subset=["character_cnt"])
|
427 |
+
|
428 |
+
# get top 2 largest in higher and lower dataframe
|
429 |
+
df_reco_opt_rank1 = df_reco_opt1.nlargest(2, [selected_variable])
|
430 |
+
df_reco_opt_rank2 = df_reco_opt2.nlargest(2, [selected_variable])
|
431 |
+
|
432 |
+
df_reco_opt_rank = pd.concat([df_reco_opt_rank1, df_reco_opt_rank2])
|
433 |
+
df_reco_opt_rank = df_reco_opt_rank.nlargest(3,[selected_variable])
|
434 |
|
435 |
if selected_variable == "Open_Rate":
|
436 |
selected_variable = "Click-to-Open_Rate"
|
|
|
443 |
st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(
|
444 |
selected_variable), unsafe_allow_html=True)
|
445 |
else:
|
446 |
+
#for _, row in df_reco_opt_rank.iterrows():
|
447 |
+
# Character_Count = row[1]
|
448 |
+
# selected_variable = row[3]
|
449 |
+
# print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
|
450 |
+
# st.markdown('Number of Characters: {}, Target Rate: {}'.format(
|
451 |
+
# int(Character_Count), round(selected_variable*100, 3)))
|
452 |
+
|
453 |
+
chars = []
|
454 |
+
sel_var_values = []
|
455 |
+
|
456 |
for _, row in df_reco_opt_rank.iterrows():
|
457 |
Character_Count = row[1]
|
458 |
+
selected_variable_number = row[3]
|
459 |
+
chars.append(int(Character_Count))
|
460 |
+
sel_var_values.append(round(selected_variable_number, 3)*100)
|
461 |
+
st.write(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(round(selected_variable_number, 3)*100, 3)}", "%")
|
462 |
+
st.write("\n")
|
463 |
+
|
464 |
+
if len(chars) > 1:
|
465 |
+
#fig = plt.figure()
|
466 |
+
#ax = fig.add_axes([0,0,1,1])
|
467 |
+
fig, ax = plt.subplots(figsize=(10,4))
|
468 |
+
bars = ax.barh(np.arange(len(chars)), sel_var_values, height=0.175, color='#0F4D60')
|
469 |
+
|
470 |
+
#ax.bar_label(bars)
|
471 |
+
|
472 |
+
ax.set_yticks(np.arange(len(chars)))
|
473 |
+
ax.set_yticklabels(np.array(chars), fontsize=14)
|
474 |
+
ax.set_title('Character Counts vs. Target Variable Rates', fontsize=18)
|
475 |
+
ax.set_ylabel('Character Counts', fontsize=16)
|
476 |
+
ax.set_xlabel('Target Rates %', fontsize=16)
|
477 |
+
|
478 |
+
for i, bar in enumerate(bars):
|
479 |
+
rounded_value = round(sel_var_values[i], 2)
|
480 |
+
ax.text(bar.get_width() + 0.3, bar.get_y() + bar.get_height()/2, str(rounded_value) + '%', ha='left', va='center', fontsize=12, fontweight='bold')
|
481 |
+
|
482 |
+
ax.margins(0.1,0.05)
|
483 |
+
|
484 |
+
biggest_bar_index = np.argmax(sel_var_values)
|
485 |
+
bars[biggest_bar_index].set_color('#00BF93')
|
486 |
+
|
487 |
+
st.plotly_chart(fig, use_container_width=True)
|
488 |
+
|
489 |
+
st.write("\n")
|
490 |
+
#st.write(np.array(chars))
|
491 |
+
chars_out = dict(zip(chars, sel_var_values))
|
492 |
+
sorted_chars_out = sorted(chars_out.items(), key=lambda x: x[1], reverse=True)
|
493 |
+
|
494 |
|
495 |
placeholder.empty()
|
496 |
# print(time.time() - start_time)
|