Spaces:
Sleeping
Sleeping
import plotly.express as px | |
import numpy as np | |
import plotly.graph_objects as go | |
import streamlit as st | |
import pandas as pd | |
import statsmodels.api as sm | |
# from sklearn.metrics import mean_absolute_percentage_error | |
import sys | |
import os | |
from utilities import set_header, load_local_css | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import tempfile | |
from sklearn.preprocessing import MinMaxScaler | |
# from st_aggrid import AgGrid | |
# from st_aggrid import GridOptionsBuilder, GridUpdateMode | |
# from st_aggrid import GridOptionsBuilder | |
import sys | |
import re | |
import pickle | |
from sklearn.metrics import r2_score | |
from data_prep import plot_actual_vs_predicted | |
import sqlite3 | |
from utilities import ( | |
set_header, | |
load_local_css, | |
update_db, | |
project_selection, | |
retrieve_pkl_object, | |
) | |
from post_gres_cred import db_cred | |
from log_application import log_message | |
import sys, traceback | |
schema = db_cred["schema"] | |
sys.setrecursionlimit(10**6) | |
original_stdout = sys.stdout | |
sys.stdout = open("temp_stdout.txt", "w") | |
sys.stdout.close() | |
sys.stdout = original_stdout | |
st.set_page_config(layout="wide") | |
load_local_css("styles.css") | |
set_header() | |
## DEFINE ALL FUCNTIONS | |
def plot_residual_predicted(actual, predicted, df_): | |
df_["Residuals"] = actual - pd.Series(predicted) | |
df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[ | |
"Residuals" | |
].std() | |
# Create a Plotly scatter plot | |
fig = px.scatter( | |
df_, | |
x=predicted, | |
y="StdResidual", | |
opacity=0.5, | |
color_discrete_sequence=["#11B6BD"], | |
) | |
# Add horizontal lines | |
fig.add_hline(y=0, line_dash="dash", line_color="darkorange") | |
fig.add_hline(y=2, line_color="red") | |
fig.add_hline(y=-2, line_color="red") | |
fig.update_xaxes(title="Predicted") | |
fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)") | |
# Set the same width and height for both figures | |
fig.update_layout( | |
title="Residuals over Predicted Values", | |
autosize=False, | |
width=600, | |
height=400, | |
) | |
return fig | |
def residual_distribution(actual, predicted): | |
Residuals = actual - pd.Series(predicted) | |
# Create a Seaborn distribution plot | |
sns.set(style="whitegrid") | |
plt.figure(figsize=(6, 4)) | |
sns.histplot(Residuals, kde=True, color="#11B6BD") | |
plt.title(" Distribution of Residuals") | |
plt.xlabel("Residuals") | |
plt.ylabel("Probability Density") | |
return plt | |
def qqplot(actual, predicted): | |
Residuals = actual - pd.Series(predicted) | |
Residuals = pd.Series(Residuals) | |
Resud_std = (Residuals - Residuals.mean()) / Residuals.std() | |
# Create a QQ plot using Plotly with custom colors | |
fig = go.Figure() | |
fig.add_trace( | |
go.Scatter( | |
x=sm.ProbPlot(Resud_std).theoretical_quantiles, | |
y=sm.ProbPlot(Resud_std).sample_quantiles, | |
mode="markers", | |
marker=dict(size=5, color="#11B6BD"), | |
name="QQ Plot", | |
) | |
) | |
# Add the 45-degree reference line | |
diagonal_line = go.Scatter( | |
x=[ | |
-2, | |
2, | |
], # Adjust the x values as needed to fit the range of your data | |
y=[-2, 2], # Adjust the y values accordingly | |
mode="lines", | |
line=dict(color="red"), # Customize the line color and style | |
name=" ", | |
) | |
fig.add_trace(diagonal_line) | |
# Customize the layout | |
fig.update_layout( | |
title="QQ Plot of Residuals", | |
title_x=0.5, | |
autosize=False, | |
width=600, | |
height=400, | |
xaxis_title="Theoretical Quantiles", | |
yaxis_title="Sample Quantiles", | |
) | |
return fig | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
print(i, end="\r") | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, "random_effect"] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def mdf_predict(X_df, mdf, random_eff_df): | |
X = X_df.copy() | |
X = pd.merge( | |
X, | |
random_eff_df[[panel_col, "random_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
X["pred_fixed_effect"] = mdf.predict(X) | |
X["pred"] = X["pred_fixed_effect"] + X["random_effect"] | |
X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True) | |
return X | |
def metrics_df_panel(model_dict, is_panel): | |
def wmape(actual, forecast): | |
# Weighted MAPE (WMAPE) eliminates the following shortcomings of MAPE & SMAPE | |
## 1. MAPE becomes insanely high when actual is close to 0 | |
## 2. MAPE is more favourable to underforecast than overforecast | |
return np.sum(np.abs(actual - forecast)) / np.sum(np.abs(actual)) | |
metrics_df = pd.DataFrame( | |
columns=[ | |
"Model", | |
"R2", | |
"ADJR2", | |
"Train Mape", | |
"Test Mape", | |
"Summary", | |
"Model_object", | |
] | |
) | |
i = 0 | |
for key in model_dict.keys(): | |
target = key.split("__")[1] | |
metrics_df.at[i, "Model"] = target | |
y = model_dict[key]["X_train_tuned"][target] | |
feature_set = model_dict[key]["feature_set"] | |
if is_panel: | |
random_df = get_random_effects( | |
media_data, panel_col, model_dict[key]["Model_object"] | |
) | |
pred = mdf_predict( | |
model_dict[key]["X_train_tuned"], | |
model_dict[key]["Model_object"], | |
random_df, | |
)["pred"] | |
else: | |
pred = model_dict[key]["Model_object"].predict( | |
model_dict[key]["X_train_tuned"][feature_set] | |
) | |
ytest = model_dict[key]["X_test_tuned"][target] | |
if is_panel: | |
predtest = mdf_predict( | |
model_dict[key]["X_test_tuned"], | |
model_dict[key]["Model_object"], | |
random_df, | |
)["pred"] | |
else: | |
predtest = model_dict[key]["Model_object"].predict( | |
model_dict[key]["X_test_tuned"][feature_set] | |
) | |
metrics_df.at[i, "R2"] = r2_score(y, pred) | |
metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * (len(y) - 1) / ( | |
len(y) - len(model_dict[key]["feature_set"]) - 1 | |
) | |
# metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred) | |
# metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error( | |
# ytest, predtest | |
# ) | |
metrics_df.at[i, "Train Mape"] = wmape(y, pred) | |
metrics_df.at[i, "Test Mape"] = wmape(ytest, predtest) | |
metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary() | |
metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"] | |
i += 1 | |
metrics_df = np.round(metrics_df, 2) | |
metrics_df.rename( | |
columns={"R2": "R-squared", "ADJR2": "Adj. R-squared"}, inplace=True | |
) | |
return metrics_df | |
def map_channel(transformed_var, channel_dict): | |
for key, value_list in channel_dict.items(): | |
if any(raw_var in transformed_var for raw_var in value_list): | |
return key | |
return transformed_var # Return the original value if no match is found | |
def contributions_nonpanel(model_dict): | |
# with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f: | |
# channels = pickle.load(f) | |
channels = st.session_state["project_dct"]["data_import"]["group_dict"] # db | |
media_data = st.session_state["media_data"] | |
contribution_df = pd.DataFrame(columns=["Channel"]) | |
for key in model_dict.keys(): | |
best_feature_set = model_dict[key]["feature_set"] | |
model = model_dict[key]["Model_object"] | |
target = key.split("__")[1] | |
X_train = model_dict[key]["X_train_tuned"] | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
coef_df = pd.DataFrame(model.params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
x_train_contribution = X_train.copy() | |
x_train_contribution["pred"] = model.predict(X_train[best_feature_set]) | |
for i in range(len(coef_df)): | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
if col != "const": | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
else: | |
x_train_contribution["const"] = coef | |
tuning_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if c | |
in [ | |
"day_of_week_contr", | |
"Trend_contr", | |
"sine_wave_contr", | |
"cosine_wave_contr", | |
] | |
] | |
flag_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if "_flag" in c | |
] | |
# add exogenous contribution to base | |
all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
all_exog_vars = [ | |
var.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for var in all_exog_vars | |
] | |
exog_cols = [] | |
if len(all_exog_vars) > 0: | |
for col in x_train_contribution.filter(regex="contr").columns: | |
if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0: | |
exog_cols.append(col) | |
base_cols = ["const"] + flag_cols + tuning_cols + exog_cols | |
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) | |
x_train_contribution.drop(columns=base_cols, inplace=True) | |
contri_df = pd.DataFrame(x_train_contribution.filter(regex="contr").sum(axis=0)) | |
contri_df.reset_index(inplace=True) | |
contri_df.columns = ["Channel", target] | |
contri_df["Channel"] = contri_df["Channel"].apply( | |
lambda x: map_channel(x, channels) | |
) | |
contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() | |
contri_df["Channel"].replace("base_contr", "base", inplace=True) | |
contribution_df = pd.merge( | |
contribution_df, contri_df, on="Channel", how="outer" | |
) | |
return contribution_df | |
def contributions_panel(model_dict): | |
channels = st.session_state["project_dct"]["data_import"]["group_dict"] # db | |
media_data = st.session_state["media_data"] | |
contribution_df = pd.DataFrame(columns=["Channel"]) | |
for key in model_dict.keys(): | |
best_feature_set = model_dict[key]["feature_set"] | |
model = model_dict[key]["Model_object"] | |
target = key.split("__")[1] | |
X_train = model_dict[key]["X_train_tuned"] | |
contri_df = pd.DataFrame() | |
y = [] | |
y_pred = [] | |
random_eff_df = get_random_effects(media_data, panel_col, model) | |
random_eff_df["fixed_effect"] = model.fe_params["Intercept"] | |
random_eff_df["panel_effect"] = ( | |
random_eff_df["random_effect"] + random_eff_df["fixed_effect"] | |
) | |
coef_df = pd.DataFrame(model.fe_params) | |
coef_df.reset_index(inplace=True) | |
coef_df.columns = ["feature", "coef"] | |
x_train_contribution = X_train.copy() | |
x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df) | |
x_train_contribution = pd.merge( | |
x_train_contribution, | |
random_eff_df[[panel_col, "panel_effect"]], | |
on=panel_col, | |
how="left", | |
) | |
for i in range(len(coef_df)): | |
coef = coef_df.loc[i, "coef"] | |
col = coef_df.loc[i, "feature"] | |
if col.lower() != "intercept": | |
x_train_contribution[str(col) + "_contr"] = ( | |
coef * x_train_contribution[col] | |
) | |
# x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1) | |
# x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution[ | |
# 'panel_effect'] | |
# base_cols = ["panel_effect"] + [ | |
# c | |
# for c in x_train_contribution.filter(regex="contr").columns | |
# if c | |
# in [ | |
# "day_of_week_contr", | |
# "Trend_contr", | |
# "sine_wave_contr", | |
# "cosine_wave_contr", | |
# ] | |
# ] | |
tuning_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if c | |
in [ | |
"day_of_week_contr", | |
"Trend_contr", | |
"sine_wave_contr", | |
"cosine_wave_contr", | |
] | |
] | |
flag_cols = [ | |
c | |
for c in x_train_contribution.filter(regex="contr").columns | |
if "_flag" in c | |
] | |
# add exogenous contribution to base | |
all_exog_vars = st.session_state["bin_dict"]["Exogenous"] | |
all_exog_vars = [ | |
var.lower() | |
.replace(".", "_") | |
.replace("@", "_") | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for var in all_exog_vars | |
] | |
exog_cols = [] | |
if len(all_exog_vars) > 0: | |
for col in x_train_contribution.filter(regex="contr").columns: | |
if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0: | |
exog_cols.append(col) | |
base_cols = ["panel_effect"] + flag_cols + tuning_cols + exog_cols | |
x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1) | |
x_train_contribution.drop(columns=base_cols, inplace=True) | |
contri_df = pd.DataFrame(x_train_contribution.filter(regex="contr").sum(axis=0)) | |
contri_df.reset_index(inplace=True) | |
contri_df.columns = ["Channel", target] | |
contri_df[target] = 100 * contri_df[target] / contri_df[target].sum() | |
contri_df["Channel"] = contri_df["Channel"].apply( | |
lambda x: map_channel(x, channels) | |
) | |
contri_df["Channel"].replace("base_contr", "base", inplace=True) | |
contribution_df = pd.merge( | |
contribution_df, contri_df, on="Channel", how="outer" | |
) | |
# st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) | |
return contribution_df | |
def create_grouped_bar_plot(contribution_df, contribution_selections): | |
# Extract the 'Channel' names | |
channel_names = contribution_df["Channel"].tolist() | |
# Dictionary to store all contributions except 'const' and 'base' | |
all_contributions = { | |
name: [] for name in channel_names if name not in ["const", "base"] | |
} | |
# Dictionary to store base sales for each selection | |
base_sales_dict = {} | |
# Accumulate contributions for each channel from each selection | |
for selection in contribution_selections: | |
contributions = contribution_df[selection].values.astype(float) | |
base_sales = 0 # Initialize base sales for the current selection | |
for channel_name, contribution in zip(channel_names, contributions): | |
if channel_name in all_contributions: | |
all_contributions[channel_name].append(contribution) | |
elif channel_name == "base": | |
base_sales = ( | |
contribution # Capture base sales for the current selection | |
) | |
# Store base sales for each selection | |
base_sales_dict[selection] = base_sales | |
# Calculate the average of contributions and sort by this average | |
sorted_channels = sorted(all_contributions.items(), key=lambda x: -np.mean(x[1])) | |
sorted_channel_names = [name for name, _ in sorted_channels] | |
sorted_channel_names = [ | |
"Base Sales" | |
] + sorted_channel_names # Adding 'Base Sales' at the start | |
trace_data = [] | |
max_value = 0 # Initialize max_value to find the highest bar for y-axis adjustment | |
# Create traces for the grouped bar chart | |
for i, selection in enumerate(contribution_selections): | |
display_name = sorted_channel_names | |
display_contribution = [base_sales_dict[selection]] + [ | |
all_contributions[name][i] for name in sorted_channel_names[1:] | |
] # Start with base sales for the current selection | |
# Generating text labels for each bar | |
text_values = [ | |
f"{val}%" for val in np.round(display_contribution, 0).astype(int) | |
] | |
# Find the max value for y-axis calculation | |
max_contribution = max(display_contribution) | |
if max_contribution > max_value: | |
max_value = max_contribution | |
# Create a bar trace for each selection | |
trace = go.Bar( | |
x=display_name, | |
y=display_contribution, | |
name=selection, | |
text=text_values, | |
textposition="outside", | |
) | |
trace_data.append(trace) | |
# Define layout for the bar chart | |
layout = go.Layout( | |
title="Metrics Contribution by Channel (Train)", | |
xaxis=dict(title="Channel Name"), | |
yaxis=dict( | |
title="Metrics Contribution", range=[0, max_value * 1.2] | |
), # Set y-axis 20% higher than the max bar | |
barmode="group", | |
plot_bgcolor="white", | |
) | |
# Create the figure with trace data and layout | |
fig = go.Figure(data=trace_data, layout=layout) | |
return fig | |
def preprocess_and_plot(contribution_df, contribution_selections): | |
# Extract the 'Channel' names | |
channel_names = contribution_df["Channel"].tolist() | |
# Dictionary to store all contributions except 'const' and 'base' | |
all_contributions = { | |
name: [] for name in channel_names if name not in ["const", "base"] | |
} | |
# Dictionary to store base sales for each selection | |
base_sales_dict = {} | |
# Accumulate contributions for each channel from each selection | |
for selection in contribution_selections: | |
contributions = contribution_df[selection].values.astype(float) | |
base_sales = 0 # Initialize base sales for the current selection | |
for channel_name, contribution in zip(channel_names, contributions): | |
if channel_name in all_contributions: | |
all_contributions[channel_name].append(contribution) | |
elif channel_name == "base": | |
base_sales = ( | |
contribution # Capture base sales for the current selection | |
) | |
# Store base sales for each selection | |
base_sales_dict[selection] = base_sales | |
# Calculate the average of contributions and sort by this average | |
sorted_channels = sorted(all_contributions.items(), key=lambda x: -np.mean(x[1])) | |
sorted_channel_names = [name for name, _ in sorted_channels] | |
sorted_channel_names = [ | |
"Base Sales" | |
] + sorted_channel_names # Adding 'Base Sales' at the start | |
# Initialize a Plotly figure | |
fig = go.Figure() | |
for i, selection in enumerate(contribution_selections): | |
display_name = ["Base Sales"] + sorted_channel_names[ | |
1: | |
] # Channel names for the plot | |
display_contribution = [ | |
base_sales_dict[selection] | |
] # Start with base sales for the current selection | |
# Append average contributions for other channels | |
for name in sorted_channel_names[1:]: | |
display_contribution.append(all_contributions[name][i]) | |
# Generating text labels for each bar | |
text_values = [ | |
f"{val}%" for val in np.round(display_contribution, 0).astype(int) | |
] | |
# Add a waterfall trace for each selection | |
fig.add_trace( | |
go.Waterfall( | |
orientation="v", | |
measure=["relative"] * len(display_contribution), | |
x=display_name, | |
text=text_values, | |
textposition="outside", | |
y=display_contribution, | |
increasing={"marker": {"color": "green"}}, | |
decreasing={"marker": {"color": "red"}}, | |
totals={"marker": {"color": "blue"}}, | |
name=selection, | |
) | |
) | |
# Update layout of the figure | |
fig.update_layout( | |
title="Metrics Contribution by Channel (Train)", | |
xaxis={"title": "Channel Name"}, | |
yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]), | |
) | |
return fig | |
def selection_change(): | |
edited_rows: dict = st.session_state.project_selection["edited_rows"] | |
st.session_state["selected_row_index_gd_table"] = next(iter(edited_rows)) | |
st.session_state["gd_table"] = st.session_state["gd_table"].assign(selected=False) | |
update_dict = {idx: values for idx, values in edited_rows.items()} | |
st.session_state["gd_table"].update( | |
pd.DataFrame.from_dict(update_dict, orient="index") | |
) | |
if "username" not in st.session_state: | |
st.session_state["username"] = None | |
if "project_name" not in st.session_state: | |
st.session_state["project_name"] = None | |
if "project_dct" not in st.session_state: | |
project_selection() | |
st.stop() | |
try: | |
st.session_state["bin_dict"] = st.session_state["project_dct"]["data_import"][ | |
"category_dict" | |
] # db | |
except Exception as e: | |
st.warning("Save atleast one tuned model to proceed") | |
log_message("warning", "No tuned models available", "AI Model Results") | |
st.stop() | |
if "gd_table" not in st.session_state: | |
st.session_state["gd_table"] = pd.DataFrame() | |
try: | |
if "username" in st.session_state and st.session_state["username"] is not None: | |
if ( | |
retrieve_pkl_object( | |
st.session_state["project_number"], | |
"Model_Tuning", | |
"tuned_model", | |
schema, | |
) | |
is None | |
): | |
st.error("Please save a tuned model") | |
st.stop() | |
if ( | |
"session_state_saved" | |
in st.session_state["project_dct"]["model_tuning"].keys() | |
and st.session_state["project_dct"]["model_tuning"]["session_state_saved"] | |
!= [] | |
): | |
for key in ["used_response_metrics", "media_data", "bin_dict"]: | |
if key not in st.session_state: | |
st.session_state[key] = st.session_state["project_dct"][ | |
"model_tuning" | |
]["session_state_saved"][key] | |
# st.session_state["bin_dict"] = st.session_state["project_dct"][ | |
# "model_build" | |
# ]["session_state_saved"]["bin_dict"] | |
media_data = st.session_state["media_data"] | |
# st.write(media_data.columns) | |
# set the panel column | |
panel_col = "panel" | |
is_panel = ( | |
True if st.session_state["media_data"][panel_col].nunique() > 1 else False | |
) | |
# st.write(is_panel) | |
date_col = "date" | |
transformed_data = st.session_state["project_dct"]["transformations"][ | |
"final_df" | |
] # db | |
tuned_model_dict = retrieve_pkl_object( | |
st.session_state["project_number"], "Model_Tuning", "tuned_model", schema | |
) # db | |
feature_set_dct = { | |
key.split("__")[1]: key_dict["feature_set"] | |
for key, key_dict in tuned_model_dict.items() | |
} | |
# """ the above part should be modified so that we are fetching features set from the saved model""" | |
if "contribution_df" not in st.session_state: | |
st.session_state["contribution_df"] = None | |
metrics_table = metrics_df_panel(tuned_model_dict, is_panel) | |
cols1 = st.columns([2, 1]) | |
with cols1[0]: | |
st.markdown(f"**Welcome {st.session_state['username']}**") | |
with cols1[1]: | |
st.markdown(f"**Current Project: {st.session_state['project_name']}**") | |
st.title("AI Model Validation") | |
st.header("Contribution Overview") | |
# Get list of response metrics | |
st.session_state["used_response_metrics"] = list( | |
set([model.split("__")[1] for model in tuned_model_dict.keys()]) | |
) | |
options = st.session_state["used_response_metrics"] | |
if len(options) == 0: | |
st.error("Please save and tune a model") | |
st.stop() | |
options = [ | |
opt.lower() | |
.replace(" ", "_") | |
.replace("-", "") | |
.replace(":", "") | |
.replace("__", "_") | |
for opt in options | |
] | |
default_options = ( | |
st.session_state["project_dct"]["saved_model_results"].get( | |
"selected_options" | |
) | |
if st.session_state["project_dct"]["saved_model_results"].get( | |
"selected_options" | |
) | |
is not None | |
else [options[-1]] | |
) | |
for i in default_options: | |
if i not in options: | |
# st.write(i) | |
default_options.remove(i) | |
def remove_response_metric(name): | |
# Convert the name to a lowercase string and remove any leading or trailing spaces | |
name_str = str(name).lower().strip() | |
# Check if the name starts with "response metric" or "response_metric" | |
if name_str.startswith("response metric"): | |
return name[len("response metric") :].replace("_", " ").strip().title() | |
elif name_str.startswith("response_metric"): | |
return name[len("response_metric") :].replace("_", " ").strip().title() | |
else: | |
return name | |
contribution_selections = st.multiselect( | |
"Select the Response Metrics to compare contributions", | |
options, | |
default=default_options, | |
format_func=remove_response_metric, | |
) | |
trace_data = [] | |
if is_panel: | |
st.session_state["contribution_df"] = contributions_panel(tuned_model_dict) | |
else: | |
st.session_state["contribution_df"] = contributions_nonpanel( | |
tuned_model_dict | |
) | |
# st.write(st.session_state["contribution_df"].columns) | |
# for selection in contribution_selections: | |
# trace = go.Bar( | |
# x=st.session_state["contribution_df"]["Channel"], | |
# y=st.session_state["contribution_df"][selection], | |
# name=selection, | |
# text=np.round(st.session_state["contribution_df"][selection], 0) | |
# .astype(int) | |
# .astype(str) | |
# + "%", | |
# textposition="outside", | |
# ) | |
# trace_data.append(trace) | |
# layout = go.Layout( | |
# title="Metrics Contribution by Channel", | |
# xaxis=dict(title="Channel Name"), | |
# yaxis=dict(title="Metrics Contribution"), | |
# barmode="group", | |
# ) | |
# fig = go.Figure(data=trace_data, layout=layout) | |
# st.plotly_chart(fig, use_container_width=True) | |
# Display the chart in Streamlit | |
st.plotly_chart( | |
create_grouped_bar_plot( | |
st.session_state["contribution_df"], contribution_selections | |
), | |
use_container_width=True, | |
) | |
############################################ Waterfall Chart ############################################ | |
import plotly.graph_objects as go | |
st.plotly_chart( | |
preprocess_and_plot( | |
st.session_state["contribution_df"], contribution_selections | |
), | |
use_container_width=True, | |
) | |
############################################ Waterfall Chart ############################################ | |
st.header("Analysis of Models Result") | |
gd_table = metrics_table.iloc[:, :-2] | |
target_column = gd_table.at[0, "Model"] # sprint8 | |
st.session_state["gd_table"] = gd_table | |
with st.container(): | |
table = st.data_editor( | |
st.session_state["gd_table"], | |
hide_index=True, | |
# on_change=selection_change, | |
key="project_selection", | |
use_container_width=True, | |
) | |
target_column = st.selectbox( | |
"Select a Model to analyse its results", | |
options=st.session_state.used_response_metrics, | |
placeholder=options[0], | |
) | |
feature_set = feature_set_dct[target_column] | |
model = metrics_table[metrics_table["Model"] == target_column][ | |
"Model_object" | |
].iloc[0] | |
target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0] | |
st.header("Model Summary") | |
st.write(model.summary()) | |
sel_dict = tuned_model_dict[ | |
[k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0] | |
] | |
feature_set = sel_dict["feature_set"] | |
X_train = sel_dict["X_train_tuned"] | |
y_train = X_train[target] | |
if is_panel: | |
random_effects = get_random_effects(media_data, panel_col, model) | |
pred = mdf_predict(X_train, model, random_effects)["pred"] | |
else: | |
pred = model.predict(X_train[feature_set]) | |
X_test = sel_dict["X_test_tuned"] | |
y_test = X_test[target] | |
if is_panel: | |
predtest = mdf_predict(X_test, model, random_effects)["pred"] | |
else: | |
predtest = model.predict(X_test[feature_set]) | |
metrics_table_train, _, fig_train = plot_actual_vs_predicted( | |
X_train[date_col], | |
y_train, | |
pred, | |
model, | |
target_column=target, | |
flag=None, | |
repeat_all_years=False, | |
is_panel=is_panel, | |
) | |
metrics_table_test, _, fig_test = plot_actual_vs_predicted( | |
X_test[date_col], | |
y_test, | |
predtest, | |
model, | |
target_column=target, | |
flag=None, | |
repeat_all_years=False, | |
is_panel=is_panel, | |
) | |
metrics_table_train = metrics_table_train.set_index("Metric").transpose() | |
metrics_table_train.index = ["Train"] | |
metrics_table_test = metrics_table_test.set_index("Metric").transpose() | |
metrics_table_test.index = ["Test"] | |
metrics_table = np.round( | |
pd.concat([metrics_table_train, metrics_table_test]), 2 | |
) | |
st.markdown("Result Overview") | |
st.dataframe(np.round(metrics_table, 2), use_container_width=True) | |
st.header("Model Accuracy") | |
st.subheader("Actual vs Predicted Plot (Train)") | |
st.plotly_chart(fig_train, use_container_width=True) | |
st.subheader("Actual vs Predicted Plot (Test)") | |
st.plotly_chart(fig_test, use_container_width=True) | |
st.markdown("## Residual Analysis (Train)") | |
columns = st.columns(2) | |
Xtrain1 = X_train.copy() | |
with columns[0]: | |
fig = plot_residual_predicted(y_train, pred, Xtrain1) | |
st.plotly_chart(fig) | |
with columns[1]: | |
st.empty() | |
fig = qqplot(y_train, pred) | |
st.plotly_chart(fig) | |
with columns[0]: | |
fig = residual_distribution(y_train, pred) | |
st.pyplot(fig) | |
if st.button("Save this session", use_container_width=True): | |
project_dct_pkl = pickle.dumps(st.session_state["project_dct"]) | |
update_db( | |
st.session_state["project_number"], | |
"AI_Model_Results", | |
"project_dct", | |
project_dct_pkl, | |
schema, | |
# resp_mtrc=None, | |
) # db | |
log_message("info", "Session saved!", "AI Model Results") | |
st.success("Session Saved!") | |
except: | |
exc_type, exc_value, exc_traceback = sys.exc_info() | |
error_message = "".join( | |
traceback.format_exception(exc_type, exc_value, exc_traceback) | |
) | |
log_message("error", f"Error: {error_message}", "AI Model Results") | |
st.warning("An error occured, please try again", icon="⚠️") | |