Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sklearn.preprocessing import MinMaxScaler | |
import pickle | |
import os | |
from utilities_with_panel import load_local_css, set_header | |
import yaml | |
from yaml import SafeLoader | |
import sqlite3 | |
from datetime import timedelta | |
from utilities import ( | |
set_header, | |
load_local_css, | |
update_db, | |
project_selection, | |
retrieve_pkl_object, | |
) | |
from utilities_with_panel import ( | |
overview_test_data_prep_panel, | |
overview_test_data_prep_nonpanel, | |
initialize_data_cmp, | |
create_channel_summary, | |
create_contribution_pie, | |
create_contribuion_stacked_plot, | |
create_channel_spends_sales_plot, | |
format_numbers, | |
channel_name_formating, | |
) | |
from log_application import log_message | |
import sys, traceback | |
from post_gres_cred import db_cred | |
st.set_page_config(layout="wide") | |
load_local_css("styles.css") | |
set_header() | |
schema = db_cred["schema"] | |
if "username" not in st.session_state: | |
st.session_state["username"] = None | |
if "project_name" not in st.session_state: | |
st.session_state["project_name"] = None | |
if "project_dct" not in st.session_state: | |
project_selection() | |
st.stop() | |
tuned_model = retrieve_pkl_object( | |
st.session_state["project_number"], "Model_Tuning", "tuned_model", schema | |
) | |
if tuned_model is None: | |
st.error("Please save a tuned model") | |
st.stop() | |
if ( | |
"session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() | |
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] | |
): | |
for key in ["used_response_metrics", "media_data", "bin_dict"]: | |
if key not in st.session_state: | |
st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ | |
"session_state_saved" | |
][key] | |
## DEFINE ALL FUNCTIONS | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
print(i, end="\r") | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, "random_effect"] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def process_train_and_test(train, test, features, panel_col, target_col): | |
X1 = train[features] | |
ss = MinMaxScaler() | |
X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
X1[panel_col] = train[panel_col] | |
X1[target_col] = train[target_col] | |
if test is not None: | |
X2 = test[features] | |
X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
X2[panel_col] = test[panel_col] | |
X2[target_col] = test[target_col] | |
return X1, X2 | |
return X1 | |
def mdf_predict(X_df, mdf, random_eff_df): | |
X = X_df.copy() | |
X = pd.merge( | |
X, | |
random_eff_df[[panel_col, "random_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
X["pred_fixed_effect"] = mdf.predict(X) | |
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
return X | |
try: | |
if "username" in st.session_state and st.session_state["username"] is not None: | |
# conn = sqlite3.connect( | |
# r"DB/User.db", check_same_thread=False | |
# ) # connection with sql db | |
# c = conn.cursor() | |
tuned_model = retrieve_pkl_object( | |
st.session_state["project_number"], "Model_Tuning", "tuned_model", schema | |
) | |
if tuned_model is None: | |
st.error("Please save a tuned model") | |
st.stop() | |
if ( | |
"session_state_saved" | |
in st.session_state["project_dct"]["model_tuning"].keys() | |
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] | |
!= [] | |
): | |
for key in [ | |
"used_response_metrics", | |
"is_tuned_model", | |
"media_data", | |
"X_test_spends", | |
"spends_data", | |
]: | |
st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ | |
"session_state_saved" | |
][key] | |
elif ( | |
"session_state_saved" | |
in st.session_state["project_dct"]["model_build"].keys() | |
and st.session_state["project_dct"]["model_build"]["session_state_saved"] | |
!= [] | |
): | |
for key in [ | |
"used_response_metrics", | |
"date", | |
"saved_model_names", | |
"media_data", | |
"X_test_spends", | |
]: | |
st.session_state[key] = st.session_state["project_dct"]["model_build"][ | |
"session_state_saved" | |
][key] | |
else: | |
st.error("Please tune a model first") | |
st.session_state["bin_dict"] = st.session_state["project_dct"]["model_build"][ | |
"session_state_saved" | |
]["bin_dict"] | |
st.session_state["media_data"].columns = [ | |
c.lower() for c in st.session_state["media_data"].columns | |
] | |
# with open( | |
# os.path.join(st.session_state["project_path"], "data_import.pkl"), | |
# "rb", | |
# ) as f: | |
# data = pickle.load(f) | |
# # Accessing the loaded objects | |
# st.session_state["orig_media_data"] = data["final_df"] | |
st.session_state["orig_media_data"] = st.session_state["project_dct"][ | |
"data_import" | |
][ | |
"imputed_tool_df" | |
].copy() # db | |
st.session_state["channels"] = st.session_state["project_dct"]["data_import"][ | |
"group_dict" | |
].copy() | |
# target='Revenue' | |
# set the panel column | |
panel_col = "panel" | |
is_panel = ( | |
True if st.session_state["media_data"][panel_col].nunique() > 1 else False | |
) | |
date_col = "date" | |
cols1 = st.columns([2, 1]) | |
with cols1[0]: | |
st.markdown(f"**Welcome {st.session_state['username']}**") | |
with cols1[1]: | |
st.markdown(f"**Current Project: {st.session_state['project_name']}**") | |
st.title("AI Model Media Performance") | |
def remove_response_metric(name): | |
# Convert the name to a lowercase string and remove any leading or trailing spaces | |
name_str = str(name).lower().strip() | |
# Check if the name starts with "response metric" or "response_metric" | |
if name_str.startswith("response metric"): | |
return name[len("response metric") :].replace("_", " ").strip().title() | |
elif name_str.startswith("response_metric"): | |
return name[len("response_metric") :].replace("_", " ").strip().title() | |
else: | |
return name | |
sel_target_col = st.selectbox( | |
"Select the response metric", | |
st.session_state["used_response_metrics"], | |
format_func=remove_response_metric, | |
) | |
sel_target_col_frmttd = sel_target_col.replace("_", " ").replace("-", " ") | |
sel_target_col_frmttd = sel_target_col_frmttd.title() | |
target_col = ( | |
sel_target_col.lower() | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
) | |
target = sel_target_col | |
# Contribution | |
if is_panel: | |
# read tuned mixedLM model | |
if st.session_state["is_tuned_model"][target_col] == True: | |
model_dict = retrieve_pkl_object( | |
st.session_state["project_number"], | |
"Model_Tuning", | |
"tuned_model", | |
schema, | |
) # db | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = required_saved_models[ | |
0 | |
] # only 1 tuned model available per resp metric | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train_tuned"] | |
X_test = sel_model_dict["X_test_tuned"] | |
best_feature_set = sel_model_dict["feature_set"] | |
# Calculate contributions | |
st.session_state["orig_media_data"].columns = [ | |
col.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for col in st.session_state["orig_media_data"].columns | |
] | |
media_data = st.session_state["media_data"] | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
random_eff_df = get_random_effects(media_data, panel_col, model) | |
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
random_eff_df["panel_effect"] = ( | |
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
) | |
coef_df = pd.DataFrame(model.fe_params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
# preprocessing not needed since X_train is already preprocessed | |
# X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) | |
# x_train_contribution[best_feature_set] = X1[best_feature_set] | |
# x_test_contribution[best_feature_set] = X2[best_feature_set] | |
x_train_contribution = mdf_predict( | |
x_train_contribution, model, random_eff_df | |
) | |
x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df) | |
x_train_contribution = pd.merge( | |
x_train_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
x_test_contribution = pd.merge( | |
x_test_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
for i in range(len(coef_df))[1:]: | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
if col.lower() != "intercept": | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
x_test_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
tuning_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if c | |
in [ | |
"day_of_week_contr", | |
"Trend_contr", | |
"sine_wave_contr", | |
"cosine_wave_contr", | |
] | |
] | |
flag_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if "_flag" in c | |
] | |
# add exogenous contribution to base | |
all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
all_exog_vars = [ | |
var.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for var in all_exog_vars | |
] | |
exog_cols = [] | |
if len(all_exog_vars) > 0: | |
for col in x_train_contribution.filter(regex="contr").columns: | |
if ( | |
len([exog_var for exog_var in all_exog_vars if exog_var in col]) | |
> 0 | |
): | |
exog_cols.append(col) | |
base_cols = ["panel_effect"] + flag_cols + tuning_cols + exog_cols | |
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( | |
axis=1 | |
) | |
x_train_contribution.drop(columns=base_cols, inplace=True) | |
x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( | |
axis=1 | |
) | |
x_test_contribution.drop(columns=base_cols, inplace=True) | |
overall_contributions = pd.concat( | |
[x_train_contribution, x_test_contribution] | |
).reset_index(drop=True) | |
overview_test_data_prep_panel( | |
overall_contributions, | |
st.session_state["orig_media_data"], | |
st.session_state["spends_data"], | |
date_col, | |
panel_col, | |
target_col, | |
) | |
else: # NON PANEL | |
if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 | |
# with open( | |
# os.path.join(st.session_state["project_path"], "tuned_model.pkl"), | |
# "rb", | |
# ) as file: | |
# model_dict = pickle.load(file) | |
model_dict = retrieve_pkl_object( | |
st.session_state["project_number"], | |
"Model_Tuning", | |
"tuned_model", | |
schema, | |
) # db | |
saved_models = list(model_dict.keys()) | |
required_saved_models = [ | |
m.split("__")[0] | |
for m in saved_models | |
if m.split("__")[1] == target_col | |
] | |
sel_model = required_saved_models[ | |
0 | |
] # only 1 tuned model available per resp metric | |
sel_model_dict = model_dict[sel_model + "__" + target_col] | |
model = sel_model_dict["Model_object"] | |
X_train = sel_model_dict["X_train_tuned"] | |
X_test = sel_model_dict["X_test_tuned"] | |
best_feature_set = sel_model_dict["feature_set"] | |
x_train_contribution = X_train.copy() | |
x_test_contribution = X_test.copy() | |
x_train_contribution["pred"] = model.predict( | |
x_train_contribution[best_feature_set] | |
) | |
x_test_contribution["pred"] = model.predict( | |
x_test_contribution[best_feature_set] | |
) | |
coef_df = pd.DataFrame(model.params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
# st.write(coef_df) | |
for i in range(len(coef_df)): | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
if col != "const": | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
x_test_contribution[str(col) + "_contr"] = ( | |
coef * x_test_contribution[col] | |
) | |
else: | |
x_train_contribution["const"] = coef | |
x_test_contribution["const"] = coef | |
tuning_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if c | |
in [ | |
"day_of_week_contr", | |
"Trend_contr", | |
"sine_wave_contr", | |
"cosine_wave_contr", | |
] | |
] | |
flag_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if "_flag" in c | |
] | |
# add exogenous contribution to base | |
all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
all_exog_vars = [ | |
var.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for var in all_exog_vars | |
] | |
exog_cols = [] | |
if len(all_exog_vars) > 0: | |
for col in x_train_contribution.filter(regex="contr").columns: | |
if ( | |
len([exog_var for exog_var in all_exog_vars if exog_var in col]) | |
> 0 | |
): | |
exog_cols.append(col) | |
base_cols = ["const"] + flag_cols + tuning_cols + exog_cols | |
# st.write(base_cols) | |
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( | |
axis=1 | |
) | |
x_train_contribution.drop(columns=base_cols, inplace=True) | |
x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( | |
axis=1 | |
) | |
x_test_contribution.drop(columns=base_cols, inplace=True) | |
# x_test_contribution.to_csv("Test/test_contr.csv", index=False) | |
overall_contributions = pd.concat( | |
[x_train_contribution, x_test_contribution] | |
).reset_index(drop=True) | |
# overall_contributions.to_csv("Test/overall_contributions.csv", index=False) | |
overview_test_data_prep_nonpanel( | |
overall_contributions, | |
st.session_state["orig_media_data"].copy(), | |
st.session_state["spends_data"].copy(), | |
date_col, | |
target_col, | |
) | |
# for k, v in st.session_sta | |
# te.items(): | |
# if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
# st.session_state[k] = v | |
# authenticator = st.session_state.get('authenticator') | |
# if authenticator is None: | |
# authenticator = load_authenticator() | |
# name, authentication_status, username = authenticator.login('Login', 'main') | |
# auth_status = st.session_state['authentication_status'] | |
# if auth_status: | |
# authenticator.logout('Logout', 'main') | |
# is_state_initiaized = st.session_state.get('initialized',False) | |
# if not is_state_initiaized: | |
min_date = X_train[date_col].min().date() | |
max_date = X_test[date_col].max().date() | |
if "media_performance" not in st.session_state["project_dct"]: | |
st.session_state["project_dct"]["media_performance"] = { | |
"start_date": None, | |
"end_date": None, | |
} | |
start_default = st.session_state["project_dct"]["media_performance"].get( | |
"start_date", None | |
) | |
start_default = start_default if start_default is not None else min_date | |
start_default = start_default if start_default > min_date else min_date | |
start_default = start_default if start_default < max_date else min_date | |
end_default = st.session_state["project_dct"]["media_performance"].get( | |
"end_date", None | |
) | |
end_default = end_default if end_default is not None else max_date | |
end_default = end_default if end_default > min_date else max_date | |
end_default = end_default if end_default < max_date else max_date | |
st.write("Select a timeline for analysis") | |
date_columns = st.columns(2) | |
with date_columns[0]: | |
start_date = st.date_input( | |
"Select Start Date", | |
start_default, | |
min_value=min_date, | |
max_value=max_date, | |
) | |
if (start_date < min_date) or (start_date > max_date): | |
st.error("Please select dates in the range of the dates in the data") | |
st.stop() | |
end_default = ( | |
end_default if end_default > start_date + timedelta(days=1) else max_date | |
) | |
with date_columns[1]: | |
end_default = ( | |
end_default | |
if pd.Timestamp(end_default) >= pd.Timestamp(start_date) | |
else start_date | |
) | |
end_date = st.date_input( | |
"Select End Date", | |
end_default, | |
min_value=start_date + timedelta(days=1), | |
max_value=max_date, | |
) | |
if ( | |
(start_date < min_date) | |
or (end_date < min_date) | |
or (start_date > max_date) | |
or (end_date > max_date) | |
): | |
st.error("Please select dates in the range of the dates in the data") | |
st.stop() | |
if end_date < start_date + timedelta(days=1): | |
st.error("Please select end date after start date") | |
st.stop() | |
st.session_state["project_dct"]["media_performance"]["start_date"] = start_date | |
st.session_state["project_dct"]["media_performance"]["end_date"] = end_date | |
st.header("Overview of Previous Media Spend") | |
initialize_data_cmp(target_col, is_panel, panel_col, start_date, end_date) | |
scenario = st.session_state["scenario"] | |
raw_df = st.session_state["raw_df"] | |
columns = st.columns(2) | |
with columns[0]: | |
st.metric( | |
label="Media Spend", | |
value=format_numbers(float(scenario.actual_total_spends)), | |
) | |
###print(f"##################### {scenario.actual_total_sales} ##################") | |
with columns[1]: | |
st.metric( | |
label=sel_target_col_frmttd, | |
value=format_numbers( | |
float(scenario.actual_total_sales), include_indicator=False | |
), | |
) | |
actual_summary_df = create_channel_summary(scenario, sel_target_col_frmttd) | |
actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( | |
channel_name_formating | |
) | |
columns = st.columns((3, 1)) | |
with columns[0]: | |
with st.expander("Channel wise overview"): | |
st.markdown( | |
actual_summary_df.style.set_table_styles( | |
[ | |
{ | |
"selector": "th", | |
"props": [("background-color", "#f6dcc7")], | |
}, | |
{ | |
"selector": "tr:nth-child(even)", | |
"props": [("background-color", "#f6dcc7")], | |
}, | |
] | |
).to_html(), | |
unsafe_allow_html=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
############################## | |
st.plotly_chart( | |
create_contribution_pie(scenario, sel_target_col_frmttd), | |
use_container_width=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
################################3 | |
st.plotly_chart( | |
create_contribuion_stacked_plot(scenario, sel_target_col_frmttd), | |
use_container_width=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
####################################### | |
selected_channel_name = st.selectbox( | |
"Channel", | |
st.session_state["channels_list"] + ["non media"], | |
format_func=channel_name_formating, | |
) | |
selected_channel = scenario.channels.get(selected_channel_name, None) | |
st.plotly_chart( | |
create_channel_spends_sales_plot(selected_channel, sel_target_col_frmttd), | |
use_container_width=True, | |
) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
if st.button("Save this session", use_container_width=True): | |
project_dct_pkl = pickle.dumps(st.session_state["project_dct"]) | |
update_db( | |
st.session_state["project_number"], | |
"Current_Media_Performance", | |
"project_dct", | |
project_dct_pkl, | |
schema, | |
# resp_mtrc=None, | |
) # db | |
st.success("Session Saved!") | |
# Remove "response_metric_" from the start and "_total" from the end | |
if str(target_col).startswith("response_metric_"): | |
target_col = target_col.replace("response_metric_", "", 1) | |
# Remove the last 6 characters (length of "_total") | |
if str(target_col).endswith("_total"): | |
target_col = target_col[:-6] | |
if ( | |
st.session_state["project_dct"]["current_media_performance"][ | |
"model_outputs" | |
][target_col] | |
is not None | |
): | |
if ( | |
len( | |
st.session_state["project_dct"]["current_media_performance"][ | |
"model_outputs" | |
][target_col]["contribution_data"] | |
) | |
> 0 | |
): | |
st.download_button( | |
label="Download Contribution File", | |
data=st.session_state["project_dct"]["current_media_performance"][ | |
"model_outputs" | |
][target_col]["contribution_data"].to_csv(), | |
file_name="contributions.csv", | |
key="dwnld_contr", | |
) | |
except: | |
exc_type, exc_value, exc_traceback = sys.exc_info() | |
error_message = "".join( | |
traceback.format_exception(exc_type, exc_value, exc_traceback) | |
) | |
log_message("error", f"Error: {error_message}", "Current Media Performance") | |
st.warning("An error occured, please try again", icon="⚠️") | |