import streamlit as st import pandas as pd from sklearn.preprocessing import MinMaxScaler import pickle import os from utilities_with_panel import load_local_css, set_header import yaml from yaml import SafeLoader import sqlite3 from datetime import timedelta from utilities import ( set_header, load_local_css, update_db, project_selection, retrieve_pkl_object, ) from utilities_with_panel import ( overview_test_data_prep_panel, overview_test_data_prep_nonpanel, initialize_data_cmp, create_channel_summary, create_contribution_pie, create_contribuion_stacked_plot, create_channel_spends_sales_plot, format_numbers, channel_name_formating, ) from log_application import log_message import sys, traceback from post_gres_cred import db_cred st.set_page_config(layout="wide") load_local_css("styles.css") set_header() schema = db_cred["schema"] if "username" not in st.session_state: st.session_state["username"] = None if "project_name" not in st.session_state: st.session_state["project_name"] = None if "project_dct" not in st.session_state: project_selection() st.stop() tuned_model = retrieve_pkl_object( st.session_state["project_number"], "Model_Tuning", "tuned_model", schema ) if tuned_model is None: st.error("Please save a tuned model") st.stop() if ( "session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] ): for key in ["used_response_metrics", "media_data", "bin_dict"]: if key not in st.session_state: st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ "session_state_saved" ][key] ## DEFINE ALL FUNCTIONS def get_random_effects(media_data, panel_col, mdf): random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) for i, market in enumerate(media_data[panel_col].unique()): print(i, end="\r") intercept = mdf.random_effects[market].values[0] random_eff_df.loc[i, "random_effect"] = intercept random_eff_df.loc[i, panel_col] = market return random_eff_df def process_train_and_test(train, test, features, panel_col, target_col): X1 = train[features] ss = MinMaxScaler() X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) X1[panel_col] = train[panel_col] X1[target_col] = train[target_col] if test is not None: X2 = test[features] X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) X2[panel_col] = test[panel_col] X2[target_col] = test[target_col] return X1, X2 return X1 def mdf_predict(X_df, mdf, random_eff_df): X = X_df.copy() X = pd.merge( X, random_eff_df[[panel_col, "random_effect"]], on=panel_col, how="left", ) X["pred_fixed_effect"] = mdf.predict(X) X["pred"] = X["pred_fixed_effect"] + X["random_effect"] X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) return X try: if "username" in st.session_state and st.session_state["username"] is not None: # conn = sqlite3.connect( # r"DB/User.db", check_same_thread=False # ) # connection with sql db # c = conn.cursor() tuned_model = retrieve_pkl_object( st.session_state["project_number"], "Model_Tuning", "tuned_model", schema ) if tuned_model is None: st.error("Please save a tuned model") st.stop() if ( "session_state_saved" in st.session_state["project_dct"]["model_tuning"].keys() and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] != [] ): for key in [ "used_response_metrics", "is_tuned_model", "media_data", "X_test_spends", "spends_data", ]: st.session_state[key] = st.session_state["project_dct"]["model_tuning"][ "session_state_saved" ][key] elif ( "session_state_saved" in st.session_state["project_dct"]["model_build"].keys() and st.session_state["project_dct"]["model_build"]["session_state_saved"] != [] ): for key in [ "used_response_metrics", "date", "saved_model_names", "media_data", "X_test_spends", ]: st.session_state[key] = st.session_state["project_dct"]["model_build"][ "session_state_saved" ][key] else: st.error("Please tune a model first") st.session_state["bin_dict"] = st.session_state["project_dct"]["model_build"][ "session_state_saved" ]["bin_dict"] st.session_state["media_data"].columns = [ c.lower() for c in st.session_state["media_data"].columns ] # with open( # os.path.join(st.session_state["project_path"], "data_import.pkl"), # "rb", # ) as f: # data = pickle.load(f) # # Accessing the loaded objects # st.session_state["orig_media_data"] = data["final_df"] st.session_state["orig_media_data"] = st.session_state["project_dct"][ "data_import" ][ "imputed_tool_df" ].copy() # db st.session_state["channels"] = st.session_state["project_dct"]["data_import"][ "group_dict" ].copy() # target='Revenue' # set the panel column panel_col = "panel" is_panel = ( True if st.session_state["media_data"][panel_col].nunique() > 1 else False ) date_col = "date" cols1 = st.columns([2, 1]) with cols1[0]: st.markdown(f"**Welcome {st.session_state['username']}**") with cols1[1]: st.markdown(f"**Current Project: {st.session_state['project_name']}**") st.title("AI Model Media Performance") def remove_response_metric(name): # Convert the name to a lowercase string and remove any leading or trailing spaces name_str = str(name).lower().strip() # Check if the name starts with "response metric" or "response_metric" if name_str.startswith("response metric"): return name[len("response metric") :].replace("_", " ").strip().title() elif name_str.startswith("response_metric"): return name[len("response_metric") :].replace("_", " ").strip().title() else: return name sel_target_col = st.selectbox( "Select the response metric", st.session_state["used_response_metrics"], format_func=remove_response_metric, ) sel_target_col_frmttd = sel_target_col.replace("_", " ").replace("-", " ") sel_target_col_frmttd = sel_target_col_frmttd.title() target_col = ( sel_target_col.lower() .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") ) target = sel_target_col # Contribution if is_panel: # read tuned mixedLM model if st.session_state["is_tuned_model"][target_col] == True: model_dict = retrieve_pkl_object( st.session_state["project_number"], "Model_Tuning", "tuned_model", schema, ) # db saved_models = list(model_dict.keys()) required_saved_models = [ m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col ] sel_model = required_saved_models[ 0 ] # only 1 tuned model available per resp metric sel_model_dict = model_dict[sel_model + "__" + target_col] model = sel_model_dict["Model_object"] X_train = sel_model_dict["X_train_tuned"] X_test = sel_model_dict["X_test_tuned"] best_feature_set = sel_model_dict["feature_set"] # Calculate contributions st.session_state["orig_media_data"].columns = [ col.lower() .replace(".", "_") .replace("@", "_") .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") for col in st.session_state["orig_media_data"].columns ] media_data = st.session_state["media_data"] contri_df = pd.DataFrame() y = [] y_pred = [] random_eff_df = get_random_effects(media_data, panel_col, model) random_eff_df["fixed_effect"] = model.fe_params["Intercept"] random_eff_df["panel_effect"] = ( random_eff_df["random_effect"] + random_eff_df["fixed_effect"] ) coef_df = pd.DataFrame(model.fe_params) coef_df.reset_index(inplace=True) coef_df.columns = ["feature", "coef"] x_train_contribution = X_train.copy() x_test_contribution = X_test.copy() # preprocessing not needed since X_train is already preprocessed # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col) # x_train_contribution[best_feature_set] = X1[best_feature_set] # x_test_contribution[best_feature_set] = X2[best_feature_set] x_train_contribution = mdf_predict( x_train_contribution, model, random_eff_df ) x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df) x_train_contribution = pd.merge( x_train_contribution, random_eff_df[[panel_col, "panel_effect"]], on=panel_col, how="left", ) x_test_contribution = pd.merge( x_test_contribution, random_eff_df[[panel_col, "panel_effect"]], on=panel_col, how="left", ) for i in range(len(coef_df))[1:]: coef = coef_df.loc[i, "coef"] col = coef_df.loc[i, "feature"] if col.lower() != "intercept": x_train_contribution[str(col) + "_contr"] = ( coef * x_train_contribution[col] ) x_test_contribution[str(col) + "_contr"] = ( coef * x_train_contribution[col] ) tuning_cols = [ c for c in x_train_contribution.filter(regex="contr").columns if c in [ "day_of_week_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr", ] ] flag_cols = [ c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c ] # add exogenous contribution to base all_exog_vars = st.session_state["bin_dict"]["Exogenous"] all_exog_vars = [ var.lower() .replace(".", "_") .replace("@", "_") .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") for var in all_exog_vars ] exog_cols = [] if len(all_exog_vars) > 0: for col in x_train_contribution.filter(regex="contr").columns: if ( len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0 ): exog_cols.append(col) base_cols = ["panel_effect"] + flag_cols + tuning_cols + exog_cols x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( axis=1 ) x_train_contribution.drop(columns=base_cols, inplace=True) x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( axis=1 ) x_test_contribution.drop(columns=base_cols, inplace=True) overall_contributions = pd.concat( [x_train_contribution, x_test_contribution] ).reset_index(drop=True) overview_test_data_prep_panel( overall_contributions, st.session_state["orig_media_data"], st.session_state["spends_data"], date_col, panel_col, target_col, ) else: # NON PANEL if st.session_state["is_tuned_model"][target_col] == True: # Sprint4 # with open( # os.path.join(st.session_state["project_path"], "tuned_model.pkl"), # "rb", # ) as file: # model_dict = pickle.load(file) model_dict = retrieve_pkl_object( st.session_state["project_number"], "Model_Tuning", "tuned_model", schema, ) # db saved_models = list(model_dict.keys()) required_saved_models = [ m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col ] sel_model = required_saved_models[ 0 ] # only 1 tuned model available per resp metric sel_model_dict = model_dict[sel_model + "__" + target_col] model = sel_model_dict["Model_object"] X_train = sel_model_dict["X_train_tuned"] X_test = sel_model_dict["X_test_tuned"] best_feature_set = sel_model_dict["feature_set"] x_train_contribution = X_train.copy() x_test_contribution = X_test.copy() x_train_contribution["pred"] = model.predict( x_train_contribution[best_feature_set] ) x_test_contribution["pred"] = model.predict( x_test_contribution[best_feature_set] ) coef_df = pd.DataFrame(model.params) coef_df.reset_index(inplace=True) coef_df.columns = ["feature", "coef"] # st.write(coef_df) for i in range(len(coef_df)): coef = coef_df.loc[i, "coef"] col = coef_df.loc[i, "feature"] if col != "const": x_train_contribution[str(col) + "_contr"] = ( coef * x_train_contribution[col] ) x_test_contribution[str(col) + "_contr"] = ( coef * x_test_contribution[col] ) else: x_train_contribution["const"] = coef x_test_contribution["const"] = coef tuning_cols = [ c for c in x_train_contribution.filter(regex="contr").columns if c in [ "day_of_week_contr", "Trend_contr", "sine_wave_contr", "cosine_wave_contr", ] ] flag_cols = [ c for c in x_train_contribution.filter(regex="contr").columns if "_flag" in c ] # add exogenous contribution to base all_exog_vars = st.session_state["bin_dict"]["Exogenous"] all_exog_vars = [ var.lower() .replace(".", "_") .replace("@", "_") .replace(" ", "_") .replace("-", "") .replace(":", "") .replace("__", "_") for var in all_exog_vars ] exog_cols = [] if len(all_exog_vars) > 0: for col in x_train_contribution.filter(regex="contr").columns: if ( len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0 ): exog_cols.append(col) base_cols = ["const"] + flag_cols + tuning_cols + exog_cols # st.write(base_cols) x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum( axis=1 ) x_train_contribution.drop(columns=base_cols, inplace=True) x_test_contribution["base_contr"] = x_test_contribution[base_cols].sum( axis=1 ) x_test_contribution.drop(columns=base_cols, inplace=True) # x_test_contribution.to_csv("Test/test_contr.csv", index=False) overall_contributions = pd.concat( [x_train_contribution, x_test_contribution] ).reset_index(drop=True) # overall_contributions.to_csv("Test/overall_contributions.csv", index=False) overview_test_data_prep_nonpanel( overall_contributions, st.session_state["orig_media_data"].copy(), st.session_state["spends_data"].copy(), date_col, target_col, ) # for k, v in st.session_sta # te.items(): # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): # st.session_state[k] = v # authenticator = st.session_state.get('authenticator') # if authenticator is None: # authenticator = load_authenticator() # name, authentication_status, username = authenticator.login('Login', 'main') # auth_status = st.session_state['authentication_status'] # if auth_status: # authenticator.logout('Logout', 'main') # is_state_initiaized = st.session_state.get('initialized',False) # if not is_state_initiaized: min_date = X_train[date_col].min().date() max_date = X_test[date_col].max().date() if "media_performance" not in st.session_state["project_dct"]: st.session_state["project_dct"]["media_performance"] = { "start_date": None, "end_date": None, } start_default = st.session_state["project_dct"]["media_performance"].get( "start_date", None ) start_default = start_default if start_default is not None else min_date start_default = start_default if start_default > min_date else min_date start_default = start_default if start_default < max_date else min_date end_default = st.session_state["project_dct"]["media_performance"].get( "end_date", None ) end_default = end_default if end_default is not None else max_date end_default = end_default if end_default > min_date else max_date end_default = end_default if end_default < max_date else max_date st.write("Select a timeline for analysis") date_columns = st.columns(2) with date_columns[0]: start_date = st.date_input( "Select Start Date", start_default, min_value=min_date, max_value=max_date, ) if (start_date < min_date) or (start_date > max_date): st.error("Please select dates in the range of the dates in the data") st.stop() end_default = ( end_default if end_default > start_date + timedelta(days=1) else max_date ) with date_columns[1]: end_default = ( end_default if pd.Timestamp(end_default) >= pd.Timestamp(start_date) else start_date ) end_date = st.date_input( "Select End Date", end_default, min_value=start_date + timedelta(days=1), max_value=max_date, ) if ( (start_date < min_date) or (end_date < min_date) or (start_date > max_date) or (end_date > max_date) ): st.error("Please select dates in the range of the dates in the data") st.stop() if end_date < start_date + timedelta(days=1): st.error("Please select end date after start date") st.stop() st.session_state["project_dct"]["media_performance"]["start_date"] = start_date st.session_state["project_dct"]["media_performance"]["end_date"] = end_date st.header("Overview of Previous Media Spend") initialize_data_cmp(target_col, is_panel, panel_col, start_date, end_date) scenario = st.session_state["scenario"] raw_df = st.session_state["raw_df"] columns = st.columns(2) with columns[0]: st.metric( label="Media Spend", value=format_numbers(float(scenario.actual_total_spends)), ) ###print(f"##################### {scenario.actual_total_sales} ##################") with columns[1]: st.metric( label=sel_target_col_frmttd, value=format_numbers( float(scenario.actual_total_sales), include_indicator=False ), ) actual_summary_df = create_channel_summary(scenario, sel_target_col_frmttd) actual_summary_df["Channel"] = actual_summary_df["Channel"].apply( channel_name_formating ) columns = st.columns((3, 1)) with columns[0]: with st.expander("Channel wise overview"): st.markdown( actual_summary_df.style.set_table_styles( [ { "selector": "th", "props": [("background-color", "#f6dcc7")], }, { "selector": "tr:nth-child(even)", "props": [("background-color", "#f6dcc7")], }, ] ).to_html(), unsafe_allow_html=True, ) st.markdown("