|
from classes import numerize |
|
import streamlit as st |
|
import pandas as pd |
|
import json |
|
from classes import Channel, Scenario |
|
import numpy as np |
|
from plotly.subplots import make_subplots |
|
import plotly.graph_objects as go |
|
from classes import class_to_dict |
|
from collections import OrderedDict |
|
import io |
|
import plotly |
|
from pathlib import Path |
|
import pickle |
|
import yaml |
|
from yaml import SafeLoader |
|
from streamlit.components.v1 import html |
|
import smtplib |
|
from scipy.optimize import curve_fit |
|
from sklearn.metrics import r2_score |
|
from classes import class_from_dict |
|
import os |
|
import base64 |
|
|
|
|
|
color_palette = [ |
|
"#F3F3F0", |
|
"#5E7D7E", |
|
"#2FA1FF", |
|
"#00EDED", |
|
"#00EAE4", |
|
"#304550", |
|
"#EDEBEB", |
|
"#7FBEFD", |
|
"#003059", |
|
"#A2F3F3", |
|
"#E1D6E2", |
|
"#B6B6B6", |
|
] |
|
|
|
|
|
CURRENCY_INDICATOR = "$" |
|
|
|
try: |
|
st.session_state["bin_dict"]["Panel Level 1"] = st.session_state["bin_dict"].get( |
|
"Panel Level 1", [] |
|
) |
|
except Exception as e: |
|
st.error( |
|
"No tuned model available. Please build and tune a model to generate response curves." |
|
) |
|
st.stop() |
|
|
|
|
|
def load_authenticator(): |
|
with open("config.yaml") as file: |
|
config = yaml.load(file, Loader=SafeLoader) |
|
st.session_state["config"] = config |
|
authenticator = stauth.Authenticate( |
|
config["credentials"], |
|
config["cookie"]["name"], |
|
config["cookie"]["key"], |
|
config["cookie"]["expiry_days"], |
|
config["preauthorized"], |
|
) |
|
st.session_state["authenticator"] = authenticator |
|
return authenticator |
|
|
|
|
|
def nav_page(page_name, timeout_secs=3): |
|
nav_script = """ |
|
<script type="text/javascript"> |
|
function attempt_nav_page(page_name, start_time, timeout_secs) { |
|
var links = window.parent.document.getElementsByTagName("a"); |
|
for (var i = 0; i < links.length; i++) { |
|
if (links[i].href.toLowerCase().endsWith("/" + page_name.toLowerCase())) { |
|
links[i].click(); |
|
return; |
|
} |
|
} |
|
var elasped = new Date() - start_time; |
|
if (elasped < timeout_secs * 1000) { |
|
setTimeout(attempt_nav_page, 100, page_name, start_time, timeout_secs); |
|
} else { |
|
alert("Unable to navigate to page '" + page_name + "' after " + timeout_secs + " second(s)."); |
|
} |
|
} |
|
window.addEventListener("load", function() { |
|
attempt_nav_page("%s", new Date(), %d); |
|
}); |
|
</script> |
|
""" % ( |
|
page_name, |
|
timeout_secs, |
|
) |
|
html(nav_script) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
path = os.path.dirname(__file__) |
|
|
|
file_ = open(f"{path}/mastercard_logo.png", "rb") |
|
|
|
contents = file_.read() |
|
|
|
data_url = base64.b64encode(contents).decode("utf-8") |
|
|
|
file_.close() |
|
|
|
|
|
DATA_PATH = "./data" |
|
|
|
IMAGES_PATH = "./data/images_224_224" |
|
|
|
|
|
if "bin_dict" not in st.session_state: |
|
|
|
with open("data_import.pkl", "rb") as f: |
|
data = pickle.load(f) |
|
|
|
st.session_state["bin_dict"] = data["bin_dict"] |
|
|
|
|
|
|
|
|
|
|
|
is_panel = False |
|
|
|
if is_panel: |
|
panel_col = [ |
|
col.lower() |
|
.replace(".", "_") |
|
.replace("@", "_") |
|
.replace(" ", "_") |
|
.replace("-", "") |
|
.replace(":", "") |
|
.replace("__", "_") |
|
for col in st.session_state["bin_dict"]["Panel Level 1"] |
|
][ |
|
0 |
|
] |
|
|
|
|
|
date_col = "Date" |
|
|
|
|
|
|
|
def load_local_css(file_name): |
|
|
|
with open(file_name) as f: |
|
|
|
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
path1 = os.path.dirname(__file__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DATA_PATH1 = "./data" |
|
|
|
IMAGES_PATH1 = "./data/images_224_224" |
|
|
|
|
|
def set_header(): |
|
return st.markdown( |
|
f"""<div class='main-header'> |
|
<!-- <h1></h1> --> |
|
<div > |
|
<img class='blend-logo' src="data:image;base64,{data_url1}", alt="Logo"> |
|
</div> |
|
<img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo"> |
|
</div>""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def s_curve(x, K, b, a, x0): |
|
return K / (1 + b * np.exp(-a * (x - x0))) |
|
|
|
|
|
def overview_test_data_prep_panel(X, df, spends_X, date_col, panel_col, target_col): |
|
""" |
|
function to create the data which is used in initialize data fn |
|
X : X test with contributions |
|
df : originally uploaded data (media data) which has raw vars |
|
spends_X : spends of dates in X test |
|
""" |
|
|
|
|
|
channels = { |
|
"paid_search": ["paid_search_impressions", "paid_search_clicks"], |
|
"fb_level_achieved_tier_1": [ |
|
"fb_level_achieved_tier_1_impressions", |
|
"fb:_level_achieved_-_tier_1_clicks", |
|
], |
|
"fb_level_achieved_tier_2": [ |
|
"fb:_level_achieved_tier_2_impressions", |
|
"fb_level_achieved_tier_2_clicks", |
|
], |
|
|
|
|
|
|
|
|
|
"ga_app": ["ga_app_impressions", "ga_app_clicks"], |
|
"digital_tactic_others": [ |
|
"digital_tactic_others_impressions", |
|
"digital_tactic_others_clicks", |
|
], |
|
"kwai": ["kwai_impressions", "kwai_clicks"], |
|
"programmatic": ["programmatic_impressions", "programmatic_clicks"], |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
channel_list = list(channels.keys()) |
|
|
|
|
|
|
|
variables = {} |
|
channel_and_variables = {} |
|
new_variables = {} |
|
new_channels_and_variables = {} |
|
|
|
for transformed_var in [ |
|
col |
|
for col in X.drop( |
|
columns=[date_col, panel_col, target_col, "pred", "panel_effect"] |
|
).columns |
|
if "_contr" not in col |
|
]: |
|
if len([col for col in df.columns if col in transformed_var]) == 1: |
|
raw_var = [col for col in df.columns if col in transformed_var][0] |
|
|
|
|
|
|
|
variables[transformed_var] = raw_var |
|
|
|
|
|
|
|
channels_list = [ |
|
channel for channel, raw_vars in channels.items() if raw_var in raw_vars |
|
] |
|
if channels_list: |
|
channel_and_variables[raw_var] = channels_list[0] |
|
else: |
|
|
|
|
|
channel_and_variables[raw_var] = None |
|
else: |
|
new_variables[transformed_var] = transformed_var |
|
new_channels_and_variables[transformed_var] = "base" |
|
|
|
|
|
raw_X = pd.merge( |
|
X[[date_col, panel_col]], |
|
df[[date_col, panel_col] + list(variables.values())], |
|
how="left", |
|
on=[date_col, panel_col], |
|
) |
|
assert len(raw_X) == len(X) |
|
|
|
raw_X_cols = [] |
|
for i in raw_X.columns: |
|
if i in channel_and_variables.keys(): |
|
raw_X_cols.append(channel_and_variables[i]) |
|
else: |
|
raw_X_cols.append(i) |
|
raw_X.columns = raw_X_cols |
|
|
|
|
|
contr_X = X[ |
|
[date_col, panel_col, "panel_effect"] |
|
+ [col for col in X.columns if "_contr" in col and "sum_" not in col] |
|
].copy() |
|
new_variables = [ |
|
col |
|
for col in contr_X.columns |
|
if "_flag" in col.lower() or "trend" in col.lower() or "sine" in col.lower() |
|
] |
|
if len(new_variables) > 0: |
|
contr_X["const"] = contr_X[["panel_effect"] + new_variables].sum(axis=1) |
|
contr_X.drop(columns=["panel_effect"], inplace=True) |
|
contr_X.drop(columns=new_variables, inplace=True) |
|
else: |
|
contr_X.rename(columns={"panel_effect": "const"}, inplace=True) |
|
|
|
new_contr_X_cols = [] |
|
for col in contr_X.columns: |
|
col_clean = col.replace("_contr", "") |
|
new_contr_X_cols.append(col_clean) |
|
contr_X.columns = new_contr_X_cols |
|
|
|
contr_X_cols = [] |
|
for i in contr_X.columns: |
|
if i in variables.keys(): |
|
contr_X_cols.append(channel_and_variables[variables[i]]) |
|
else: |
|
contr_X_cols.append(i) |
|
contr_X.columns = contr_X_cols |
|
|
|
|
|
spends_X.columns = [col.replace("_cost", "") for col in spends_X.columns] |
|
|
|
raw_X.rename(columns={"date": "Date"}, inplace=True) |
|
contr_X.rename(columns={"date": "Date"}, inplace=True) |
|
spends_X.rename(columns={"date": "Week"}, inplace=True) |
|
|
|
|
|
|
|
file_name = "data_test_overview_panel@#" + target_col + ".xlsx" |
|
|
|
folder_path = os.path.join(st.session_state["project_path"], "metrics_level_data") |
|
|
|
if not os.path.exists(folder_path): |
|
os.mkdir(folder_path) |
|
|
|
file_path = os.path.join(folder_path, file_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
with pd.ExcelWriter(file_path) as writer: |
|
raw_X.to_excel(writer, sheet_name="RAW DATA MMM", index=False) |
|
contr_X.to_excel(writer, sheet_name="CONTRIBUTION MMM", index=False) |
|
spends_X.to_excel(writer, sheet_name="SPEND INPUT", index=False) |
|
|
|
|
|
original_json_file_path = os.path.join( |
|
st.session_state["project_path"], "rcs_data_original.json" |
|
) |
|
original_pickle_file_path = os.path.join( |
|
st.session_state["project_path"], "scenario_data_original.pkl" |
|
) |
|
|
|
|
|
if os.path.exists(original_json_file_path): |
|
os.remove(original_json_file_path) |
|
if os.path.exists(original_pickle_file_path): |
|
os.remove(original_pickle_file_path) |
|
|
|
|
|
def overview_test_data_prep_nonpanel(X, df, spends_X, date_col, target_col): |
|
""" |
|
function to create the data which is used in initialize data fn |
|
""" |
|
|
|
with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f: |
|
channels = pickle.load(f) |
|
|
|
channel_list = list(channels.keys()) |
|
|
|
|
|
|
|
variables = {} |
|
channel_and_variables = {} |
|
new_variables = {} |
|
new_channels_and_variables = {} |
|
|
|
cols_to_del = list( |
|
set([date_col, target_col, "pred"]).intersection((set(X.columns))) |
|
) |
|
|
|
|
|
all_exog_vars = st.session_state['bin_dict']['Exogenous'] |
|
all_exog_vars = [ |
|
var.lower().replace(".", "_").replace("@", "_").replace(" ", "_").replace("-", "").replace(":", "").replace( |
|
"__", "_") for var in all_exog_vars] |
|
exog_cols = [] |
|
if len(all_exog_vars) > 0: |
|
for col in X.columns: |
|
if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0: |
|
exog_cols.append(col) |
|
cols_to_del=cols_to_del+exog_cols |
|
for transformed_var in [ |
|
col for col in X.drop(columns=cols_to_del).columns if "_contr" not in col |
|
]: |
|
print(transformed_var) |
|
if ( |
|
len([col for col in df.columns if col in transformed_var]) == 1 |
|
): |
|
raw_var = [col for col in df.columns if col in transformed_var][0] |
|
variables[transformed_var] = raw_var |
|
channel_and_variables[raw_var] = [ |
|
channel for channel, raw_vars in channels.items() if raw_var in raw_vars |
|
][0] |
|
else: |
|
new_variables[transformed_var] = transformed_var |
|
new_channels_and_variables[transformed_var] = "base" |
|
|
|
|
|
raw_X = pd.merge( |
|
X[[date_col]], |
|
df[[date_col] + list(variables.values())], |
|
how="left", |
|
on=[date_col], |
|
) |
|
assert len(raw_X) == len(X) |
|
|
|
raw_X_cols = [] |
|
for i in raw_X.columns: |
|
if i in channel_and_variables.keys(): |
|
raw_X_cols.append(channel_and_variables[i]) |
|
else: |
|
raw_X_cols.append(i) |
|
raw_X.columns = raw_X_cols |
|
|
|
|
|
contr_X = X[ |
|
[date_col] + [col for col in X.columns if "_contr" in col and "sum_" not in col] |
|
].copy() |
|
|
|
new_variables = [ |
|
col |
|
for col in contr_X.columns |
|
if "_flag" in col.lower() or "trend" in col.lower() or "sine" in col.lower() |
|
] |
|
if ( |
|
len(new_variables) > 0 |
|
): |
|
contr_X["const_contr"] = contr_X[["const_contr"] + new_variables].sum(axis=1) |
|
contr_X.drop(columns=new_variables, inplace=True) |
|
|
|
new_contr_X_cols = [] |
|
for col in contr_X.columns: |
|
col_clean = col.replace("_contr", "") |
|
new_contr_X_cols.append(col_clean) |
|
contr_X.columns = new_contr_X_cols |
|
|
|
contr_X_cols = [] |
|
for i in contr_X.columns: |
|
if i in variables.keys(): |
|
contr_X_cols.append(channel_and_variables[variables[i]]) |
|
else: |
|
contr_X_cols.append(i) |
|
contr_X.columns = contr_X_cols |
|
|
|
|
|
|
|
|
|
|
|
|
|
spends_X_col_map = {col:bucket for col in spends_X.columns for bucket in channels.keys() if col in channels[bucket]} |
|
spends_X.rename(columns=spends_X_col_map, inplace=True) |
|
|
|
raw_X.rename(columns={"date": "Date"}, inplace=True) |
|
contr_X.rename(columns={"date": "Date"}, inplace=True) |
|
spends_X.rename(columns={"date": "Week"}, inplace=True) |
|
|
|
|
|
file_name = "data_test_overview_panel@#" + target_col + ".xlsx" |
|
folder_path = os.path.join(st.session_state["project_path"], "metrics_level_data") |
|
|
|
if not os.path.exists(folder_path): |
|
os.mkdir(folder_path) |
|
|
|
file_path = os.path.join(folder_path, file_name) |
|
with pd.ExcelWriter(file_path) as writer: |
|
raw_X.to_excel(writer, sheet_name="RAW DATA MMM", index=False) |
|
contr_X.to_excel(writer, sheet_name="CONTRIBUTION MMM", index=False) |
|
spends_X.to_excel(writer, sheet_name="SPEND INPUT", index=False) |
|
|
|
|
|
original_json_file_path = os.path.join( |
|
st.session_state["project_path"], "rcs_data_original.json" |
|
) |
|
original_pickle_file_path = os.path.join( |
|
st.session_state["project_path"], "scenario_data_original.pkl" |
|
) |
|
|
|
|
|
if os.path.exists(original_json_file_path): |
|
os.remove(original_json_file_path) |
|
if os.path.exists(original_pickle_file_path): |
|
os.remove(original_pickle_file_path) |
|
|
|
|
|
def initialize_data(target_col, is_panel, panel_col): |
|
|
|
|
|
|
|
|
|
|
|
|
|
file_name = Path("metrics_level_data") / Path( |
|
"data_test_overview_panel@#" + target_col + ".xlsx" |
|
) |
|
|
|
|
|
|
|
file_path = os.path.join(st.session_state["project_path"], file_name) |
|
if os.path.exists(file_path): |
|
excel = pd.read_excel(file_path, sheet_name=None) |
|
else: |
|
excel = pd.read_excel("data_test_overview_panel@#" + target_col + ".xlsx") |
|
|
|
raw_df = excel["RAW DATA MMM"] |
|
spend_df = excel["SPEND INPUT"] |
|
contri_df = excel["CONTRIBUTION MMM"] |
|
|
|
|
|
|
|
exclude_columns = [ |
|
"Date", |
|
"Week", |
|
"Region", |
|
"Controls_Grammarly_Index_SeasonalAVG", |
|
"Controls_Quillbot_Index", |
|
"Daily_Positive_Outliers", |
|
"External_RemoteClass_Index", |
|
"Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802", |
|
"Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206", |
|
"Intervals ON 20201005-20201019", |
|
"Promotion_PercentOff", |
|
"Promotion_TimeBased", |
|
"Seasonality_Indicator_Chirstmas", |
|
"Seasonality_Indicator_NewYears_Days", |
|
"Seasonality_Indicator_Thanksgiving", |
|
"Trend 20200302 / 20200803", |
|
date_col, |
|
] |
|
if is_panel: |
|
exclude_columns = exclude_columns + [panel_col] |
|
|
|
|
|
raw_df[date_col] = pd.to_datetime(raw_df[date_col]) |
|
raw_df_aggregations = {c: "sum" for c in raw_df.columns if c not in exclude_columns} |
|
raw_df = raw_df.groupby(date_col).agg(raw_df_aggregations).reset_index() |
|
|
|
contri_df[date_col] = pd.to_datetime(contri_df[date_col]) |
|
contri_df_aggregations = { |
|
c: "sum" for c in contri_df.columns if c not in exclude_columns |
|
} |
|
contri_df = contri_df.groupby(date_col).agg(contri_df_aggregations).reset_index() |
|
|
|
input_df = raw_df.sort_values(by=[date_col]) |
|
|
|
output_df = contri_df.sort_values(by=[date_col]) |
|
|
|
spend_df["Week"] = pd.to_datetime( |
|
spend_df["Week"], format="%Y-%m-%d", errors="coerce" |
|
) |
|
spend_df_aggregations = { |
|
c: "sum" for c in spend_df.columns if c not in exclude_columns |
|
} |
|
spend_df = spend_df.groupby("Week").agg(spend_df_aggregations).reset_index() |
|
|
|
|
|
|
|
channel_list = [col for col in input_df.columns if col not in exclude_columns] |
|
|
|
response_curves = {} |
|
mapes = {} |
|
rmses = {} |
|
upper_limits = {} |
|
powers = {} |
|
r2 = {} |
|
conv_rates = {} |
|
output_cols = [] |
|
channels = {} |
|
sales = None |
|
dates = input_df.Date.values |
|
actual_output_dic = {} |
|
actual_input_dic = {} |
|
|
|
|
|
|
|
infeasible_channels = [ |
|
c |
|
for c in contri_df.select_dtypes(include=["float", "int"]).columns |
|
if contri_df[c].sum() <= 0 |
|
] |
|
|
|
|
|
channel_list = list(set(channel_list) - set(infeasible_channels)) |
|
|
|
channel_list = [ |
|
col for col in channel_list if col != "ga_app" and col != "Unnamed: 4" |
|
] |
|
|
|
for inp_col in channel_list: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spends = input_df[inp_col].values |
|
|
|
x = spends.copy() |
|
|
|
upper_limits[inp_col] = 2 * x.max() |
|
|
|
|
|
|
|
|
|
out_col=inp_col |
|
if is_panel: |
|
output_df1 = ( |
|
output_df.groupby([date_col]).agg({out_col: "sum"}).reset_index() |
|
) |
|
y = output_df1[out_col].values.copy() |
|
else: |
|
y = output_df[out_col].values.copy() |
|
|
|
actual_output_dic[inp_col] = y.copy() |
|
actual_input_dic[inp_col] = x.copy() |
|
|
|
output_cols.append(out_col) |
|
|
|
|
|
power = np.ceil(np.log(x.max()) / np.log(10)) - 3 |
|
if power >= 0: |
|
x = x / 10**power |
|
|
|
x = x.astype("float64") |
|
y = y.astype("float64") |
|
|
|
|
|
|
|
|
|
|
|
print(y.max(), x.max()) |
|
if y.max() <= 0.01: |
|
if x.max() <= 0.0: |
|
|
|
bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, 0.01)) |
|
|
|
else: |
|
|
|
bounds = ((0, 0, 0, 0), (3 * 0.01, 1000, 1, 0.01)) |
|
else: |
|
bounds = ((0, 0, 0, 0), (3 * y.max(), 1000, 1, x.max())) |
|
|
|
params, _ = curve_fit( |
|
s_curve, |
|
x, |
|
y, |
|
p0=(2 * y.max(), 0.01, 1e-5, x.max()), |
|
bounds=bounds, |
|
maxfev=int(1e5), |
|
) |
|
mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean() |
|
rmse = np.sqrt(((y - s_curve(x, *params)) ** 2).mean()) |
|
r2_ = r2_score(y, s_curve(x, *params)) |
|
|
|
response_curves[inp_col] = { |
|
"K": params[0], |
|
"b": params[1], |
|
"a": params[2], |
|
"x0": params[3], |
|
} |
|
mapes[inp_col] = mape |
|
rmses[inp_col] = rmse |
|
r2[inp_col] = r2_ |
|
powers[inp_col] = power |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conv = ( |
|
spend_df.set_index("Week")[inp_col] |
|
/ input_df.set_index("Date")[inp_col].clip(lower=1) |
|
).reset_index() |
|
conv.rename(columns={"index": "Week"}, inplace=True) |
|
conv["year"] = conv.Week.dt.year |
|
conv_rates[inp_col] = list(conv.drop("Week", axis=1).mean().to_dict().values())[ |
|
0 |
|
] |
|
|
|
|
|
|
|
|
|
channel = Channel( |
|
name=inp_col, |
|
dates=dates, |
|
spends=spends, |
|
|
|
conversion_rate=conv_rates[inp_col], |
|
response_curve_type="s-curve", |
|
response_curve_params={ |
|
"K": params[0], |
|
"b": params[1], |
|
"a": params[2], |
|
"x0": params[3], |
|
}, |
|
bounds=np.array([-10, 10]), |
|
) |
|
channels[inp_col] = channel |
|
if sales is None: |
|
sales = channel.actual_sales |
|
else: |
|
sales += channel.actual_sales |
|
|
|
|
|
other_contributions = ( |
|
output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only=True).values |
|
) |
|
correction = output_df.drop(["Date"], axis=1).sum(axis=1).values - ( |
|
sales + other_contributions |
|
) |
|
|
|
scenario_test_df = pd.DataFrame( |
|
columns=["other_contributions", "correction", "sales"] |
|
) |
|
scenario_test_df["other_contributions"] = other_contributions |
|
scenario_test_df["correction"] = correction |
|
scenario_test_df["sales"] = sales |
|
|
|
|
|
|
|
scenario = Scenario( |
|
name="default", |
|
channels=channels, |
|
constant=other_contributions, |
|
correction=correction, |
|
) |
|
|
|
st.session_state["initialized"] = True |
|
st.session_state["actual_df"] = input_df |
|
st.session_state["raw_df"] = raw_df |
|
st.session_state["contri_df"] = output_df |
|
default_scenario_dict = class_to_dict(scenario) |
|
st.session_state["default_scenario_dict"] = default_scenario_dict |
|
st.session_state["scenario"] = scenario |
|
st.session_state["channels_list"] = channel_list |
|
st.session_state["optimization_channels"] = { |
|
channel_name: False for channel_name in channel_list |
|
} |
|
st.session_state["rcs"] = response_curves |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.session_state["powers"] = powers |
|
st.session_state["actual_contribution_df"] = pd.DataFrame(actual_output_dic) |
|
st.session_state["actual_input_df"] = pd.DataFrame(actual_input_dic) |
|
|
|
for channel in channels.values(): |
|
st.session_state[channel.name] = numerize( |
|
channel.actual_total_spends * channel.conversion_rate, 1 |
|
) |
|
|
|
st.session_state["xlsx_buffer"] = io.BytesIO() |
|
|
|
if Path("../saved_scenarios.pkl").exists(): |
|
with open("../saved_scenarios.pkl", "rb") as f: |
|
st.session_state["saved_scenarios"] = pickle.load(f) |
|
else: |
|
st.session_state["saved_scenarios"] = OrderedDict() |
|
|
|
st.session_state["total_spends_change"] = 0 |
|
st.session_state["optimization_channels"] = { |
|
channel_name: False for channel_name in channel_list |
|
} |
|
st.session_state["disable_download_button"] = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_channel_summary(scenario): |
|
summary_columns = [] |
|
|
|
actual_spends_rows = [] |
|
|
|
actual_sales_rows = [] |
|
|
|
actual_roi_rows = [] |
|
|
|
for channel in scenario.channels.values(): |
|
|
|
name_mod = channel.name.replace("_", " ") |
|
|
|
if name_mod.lower().endswith(" imp"): |
|
name_mod = name_mod.replace("Imp", " Impressions") |
|
|
|
print( |
|
name_mod, |
|
channel.actual_total_spends, |
|
channel.conversion_rate, |
|
channel.actual_total_spends * channel.conversion_rate, |
|
) |
|
|
|
summary_columns.append(name_mod) |
|
|
|
actual_spends_rows.append( |
|
format_numbers(float(channel.actual_total_spends * channel.conversion_rate)) |
|
) |
|
|
|
actual_sales_rows.append(format_numbers((float(channel.actual_total_sales)))) |
|
|
|
actual_roi_rows.append( |
|
decimal_formater( |
|
format_numbers( |
|
(channel.actual_total_sales) |
|
/ (channel.actual_total_spends * channel.conversion_rate), |
|
include_indicator=False, |
|
n_decimals=4, |
|
), |
|
n_decimals=4, |
|
) |
|
) |
|
|
|
actual_summary_df = pd.DataFrame( |
|
[ |
|
summary_columns, |
|
actual_spends_rows, |
|
actual_sales_rows, |
|
actual_roi_rows, |
|
] |
|
).T |
|
|
|
actual_summary_df.columns = ["Channel", "Spends", "Prospects", "ROI"] |
|
|
|
actual_summary_df["Prospects"] = actual_summary_df["Prospects"].map( |
|
lambda x: str(x)[1:] |
|
) |
|
|
|
return actual_summary_df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_contribution_pie(_scenario, target_col): |
|
colors = plotly.colors.qualitative.Plotly |
|
colors_map = { |
|
col: colors[i % len(colors)] |
|
for i, col in enumerate(st.session_state["channels_list"]) |
|
} |
|
|
|
spends_values = [ |
|
round( |
|
_scenario.channels[channel_name].actual_total_spends |
|
* _scenario.channels[channel_name].conversion_rate, |
|
1, |
|
) |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
spends_values.append(0) |
|
|
|
revenue_values = [ |
|
_scenario.channels[channel_name].actual_total_sales |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
revenue_values.append( |
|
_scenario.correction.sum() + _scenario.constant.sum() |
|
) |
|
|
|
total_contribution_fig = make_subplots( |
|
rows=1, |
|
cols=2, |
|
subplot_titles=["Spends", target_col], |
|
specs=[[{"type": "pie"}, {"type": "pie"}]], |
|
) |
|
|
|
total_contribution_fig.add_trace( |
|
go.Pie( |
|
labels=[ |
|
channel_name_formating(channel_name) |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
+ ["Non Media"], |
|
values=spends_values, |
|
marker=dict( |
|
colors=[ |
|
colors_map[channel_name] |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
+ ["#F0F0F0"] |
|
), |
|
hole=0.3, |
|
), |
|
row=1, |
|
col=1, |
|
) |
|
|
|
total_contribution_fig.add_trace( |
|
go.Pie( |
|
labels=[ |
|
channel_name_formating(channel_name) |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
+ ["Non Media"], |
|
values=revenue_values, |
|
marker=dict( |
|
colors=[ |
|
colors_map[channel_name] |
|
for channel_name in st.session_state["channels_list"] |
|
] |
|
+ ["#F0F0F0"] |
|
), |
|
hole=0.3, |
|
), |
|
row=1, |
|
col=2, |
|
) |
|
|
|
total_contribution_fig.update_traces( |
|
textposition="inside", texttemplate="%{percent:.1%}" |
|
) |
|
total_contribution_fig.update_layout( |
|
uniformtext_minsize=12, |
|
title="Channel contribution", |
|
uniformtext_mode="hide", |
|
) |
|
return total_contribution_fig |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_contribuion_stacked_plot(_scenario, target_col): |
|
color_palette = plotly.colors.qualitative.Plotly |
|
|
|
weekly_contribution_fig = make_subplots( |
|
rows=1, |
|
cols=2, |
|
subplot_titles=["Spends", target_col], |
|
specs=[[{"type": "bar"}, {"type": "bar"}]], |
|
) |
|
|
|
raw_df = st.session_state["raw_df"] |
|
df = raw_df.sort_values(by="Date") |
|
x = df.Date |
|
weekly_spends_data = [] |
|
weekly_sales_data = [] |
|
|
|
for i, channel_name in enumerate(st.session_state["channels_list"]): |
|
color = color_palette[i % len(color_palette)] |
|
|
|
weekly_spends_data.append( |
|
go.Bar( |
|
x=x, |
|
y=_scenario.channels[channel_name].actual_spends |
|
* _scenario.channels[channel_name].conversion_rate, |
|
name=channel_name_formating(channel_name), |
|
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}", |
|
legendgroup=channel_name, |
|
marker_color=color, |
|
) |
|
) |
|
|
|
weekly_sales_data.append( |
|
go.Bar( |
|
x=x, |
|
y=_scenario.channels[channel_name].actual_sales, |
|
name=channel_name_formating(channel_name), |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
legendgroup=channel_name, |
|
showlegend=False, |
|
marker_color=color, |
|
) |
|
) |
|
|
|
for _d in weekly_spends_data: |
|
weekly_contribution_fig.add_trace(_d, row=1, col=1) |
|
for _d in weekly_sales_data: |
|
weekly_contribution_fig.add_trace(_d, row=1, col=2) |
|
|
|
weekly_contribution_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=_scenario.constant + _scenario.correction, |
|
name="Non Media", |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
marker_color=color_palette[-1], |
|
), |
|
row=1, |
|
col=2, |
|
) |
|
|
|
weekly_contribution_fig.update_layout( |
|
barmode="stack", |
|
title="Channel contribution by week", |
|
xaxis_title="Date", |
|
) |
|
weekly_contribution_fig.update_xaxes(showgrid=False) |
|
weekly_contribution_fig.update_yaxes(showgrid=False) |
|
return weekly_contribution_fig |
|
|
|
|
|
def create_channel_spends_sales_plot(channel, target_col): |
|
if channel is not None: |
|
x = channel.dates |
|
_spends = channel.actual_spends * channel.conversion_rate |
|
_sales = channel.actual_sales |
|
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
channel_sales_spends_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=_sales, |
|
marker_color=color_palette[ |
|
3 |
|
], |
|
name=target_col, |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
), |
|
secondary_y=False, |
|
) |
|
|
|
channel_sales_spends_fig.add_trace( |
|
go.Scatter( |
|
x=x, |
|
y=_spends, |
|
line=dict( |
|
color=color_palette[2] |
|
), |
|
name="Spends", |
|
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}", |
|
), |
|
secondary_y=True, |
|
) |
|
|
|
channel_sales_spends_fig.update_layout( |
|
xaxis_title="Date", |
|
yaxis_title=target_col, |
|
yaxis2_title="Spends ($)", |
|
title="Weekly Channel Spends and " + target_col, |
|
) |
|
channel_sales_spends_fig.update_xaxes(showgrid=False) |
|
channel_sales_spends_fig.update_yaxes(showgrid=False) |
|
else: |
|
raw_df = st.session_state["raw_df"] |
|
df = raw_df.sort_values(by="Date") |
|
x = df.Date |
|
scenario = class_from_dict(st.session_state["default_scenario_dict"]) |
|
_sales = scenario.constant + scenario.correction |
|
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
channel_sales_spends_fig.add_trace( |
|
go.Bar( |
|
x=x, |
|
y=_sales, |
|
marker_color=color_palette[ |
|
0 |
|
], |
|
name="Revenue", |
|
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}", |
|
), |
|
secondary_y=False, |
|
) |
|
|
|
channel_sales_spends_fig.update_layout( |
|
xaxis_title="Date", |
|
yaxis_title="Revenue", |
|
yaxis2_title="Spends ($)", |
|
title="Channel spends and Revenue week-wise", |
|
) |
|
channel_sales_spends_fig.update_xaxes(showgrid=False) |
|
channel_sales_spends_fig.update_yaxes(showgrid=False) |
|
|
|
return channel_sales_spends_fig |
|
|
|
|
|
def format_numbers(value, n_decimals=1, include_indicator=True): |
|
if include_indicator: |
|
return f"{CURRENCY_INDICATOR} {numerize(value,n_decimals)}" |
|
else: |
|
return f"{numerize(value,n_decimals)}" |
|
|
|
|
|
def decimal_formater(num_string, n_decimals=1): |
|
parts = num_string.split(".") |
|
if len(parts) == 1: |
|
return num_string + "." + "0" * n_decimals |
|
else: |
|
to_be_padded = n_decimals - len(parts[-1]) |
|
if to_be_padded > 0: |
|
return num_string + "0" * to_be_padded |
|
else: |
|
return num_string |
|
|
|
|
|
def channel_name_formating(channel_name): |
|
name_mod = channel_name.replace("_", " ") |
|
if name_mod.lower().endswith(" imp"): |
|
name_mod = name_mod.replace("Imp", "Spend") |
|
elif name_mod.lower().endswith(" clicks"): |
|
name_mod = name_mod.replace("Clicks", "Spend") |
|
return name_mod |
|
|
|
|
|
def send_email(email, message): |
|
s = smtplib.SMTP("smtp.gmail.com", 587) |
|
s.starttls() |
|
s.login("geethu4444@gmail.com", "jgydhpfusuremcol") |
|
s.sendmail("geethu4444@gmail.com", email, message) |
|
s.quit() |
|
|
|
|
|
|
|
|
|
|