Spaces:

samkeet
/

MediaMixOptimization

Sleeping

App Files Files Community

MediaMixOptimization / pages /6_AI_Model_Validation.py

samkeet

Upload 40 files

00b00eb verified 2 months ago

raw

history blame contribute delete

33.5 kB

	import plotly.express as px
	import numpy as np
	import plotly.graph_objects as go
	import streamlit as st
	import pandas as pd
	import statsmodels.api as sm

	# from sklearn.metrics import mean_absolute_percentage_error
	import sys
	import os
	from utilities import set_header, load_local_css
	import seaborn as sns
	import matplotlib.pyplot as plt
	import tempfile
	from sklearn.preprocessing import MinMaxScaler

	# from st_aggrid import AgGrid
	# from st_aggrid import GridOptionsBuilder, GridUpdateMode
	# from st_aggrid import GridOptionsBuilder
	import sys
	import re
	import pickle
	from sklearn.metrics import r2_score
	from data_prep import plot_actual_vs_predicted
	import sqlite3
	from utilities import (
	set_header,
	load_local_css,
	update_db,
	project_selection,
	retrieve_pkl_object,
	)
	from post_gres_cred import db_cred
	from log_application import log_message
	import sys, traceback

	schema = db_cred["schema"]

	sys.setrecursionlimit(10**6)

	original_stdout = sys.stdout
	sys.stdout = open("temp_stdout.txt", "w")
	sys.stdout.close()
	sys.stdout = original_stdout

	st.set_page_config(layout="wide")
	load_local_css("styles.css")
	set_header()


	## DEFINE ALL FUCNTIONS
	def plot_residual_predicted(actual, predicted, df_):
	df_["Residuals"] = actual - pd.Series(predicted)
	df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[
	"Residuals"
	].std()

	# Create a Plotly scatter plot
	fig = px.scatter(
	df_,
	x=predicted,
	y="StdResidual",
	opacity=0.5,
	color_discrete_sequence=["#11B6BD"],
	)

	# Add horizontal lines
	fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
	fig.add_hline(y=2, line_color="red")
	fig.add_hline(y=-2, line_color="red")

	fig.update_xaxes(title="Predicted")
	fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)")

	# Set the same width and height for both figures
	fig.update_layout(
	title="Residuals over Predicted Values",
	autosize=False,
	width=600,
	height=400,
	)

	return fig


	def residual_distribution(actual, predicted):
	Residuals = actual - pd.Series(predicted)

	# Create a Seaborn distribution plot
	sns.set(style="whitegrid")
	plt.figure(figsize=(6, 4))
	sns.histplot(Residuals, kde=True, color="#11B6BD")

	plt.title(" Distribution of Residuals")
	plt.xlabel("Residuals")
	plt.ylabel("Probability Density")

	return plt


	def qqplot(actual, predicted):
	Residuals = actual - pd.Series(predicted)
	Residuals = pd.Series(Residuals)
	Resud_std = (Residuals - Residuals.mean()) / Residuals.std()

	# Create a QQ plot using Plotly with custom colors
	fig = go.Figure()
	fig.add_trace(
	go.Scatter(
	x=sm.ProbPlot(Resud_std).theoretical_quantiles,
	y=sm.ProbPlot(Resud_std).sample_quantiles,
	mode="markers",
	marker=dict(size=5, color="#11B6BD"),
	name="QQ Plot",
	)
	)

	# Add the 45-degree reference line
	diagonal_line = go.Scatter(
	x=[
	-2,
	2,
	], # Adjust the x values as needed to fit the range of your data
	y=[-2, 2], # Adjust the y values accordingly
	mode="lines",
	line=dict(color="red"), # Customize the line color and style
	name=" ",
	)
	fig.add_trace(diagonal_line)

	# Customize the layout
	fig.update_layout(
	title="QQ Plot of Residuals",
	title_x=0.5,
	autosize=False,
	width=600,
	height=400,
	xaxis_title="Theoretical Quantiles",
	yaxis_title="Sample Quantiles",
	)

	return fig


	def get_random_effects(media_data, panel_col, mdf):
	random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
	for i, market in enumerate(media_data[panel_col].unique()):
	print(i, end="\r")
	intercept = mdf.random_effects[market].values[0]
	random_eff_df.loc[i, "random_effect"] = intercept
	random_eff_df.loc[i, panel_col] = market

	return random_eff_df


	def mdf_predict(X_df, mdf, random_eff_df):
	X = X_df.copy()
	X = pd.merge(
	X,
	random_eff_df[[panel_col, "random_effect"]],
	on=panel_col,
	how="left",
	)
	X["pred_fixed_effect"] = mdf.predict(X)

	X["pred"] = X["pred_fixed_effect"] + X["random_effect"]
	X.drop(columns=["pred_fixed_effect", "random_effect"], inplace=True)
	return X


	def metrics_df_panel(model_dict, is_panel):
	def wmape(actual, forecast):
	# Weighted MAPE (WMAPE) eliminates the following shortcomings of MAPE & SMAPE
	## 1. MAPE becomes insanely high when actual is close to 0
	## 2. MAPE is more favourable to underforecast than overforecast
	return np.sum(np.abs(actual - forecast)) / np.sum(np.abs(actual))

	metrics_df = pd.DataFrame(
	columns=[
	"Model",
	"R2",
	"ADJR2",
	"Train Mape",
	"Test Mape",
	"Summary",
	"Model_object",
	]
	)
	i = 0
	for key in model_dict.keys():
	target = key.split("__")[1]
	metrics_df.at[i, "Model"] = target
	y = model_dict[key]["X_train_tuned"][target]

	feature_set = model_dict[key]["feature_set"]

	if is_panel:
	random_df = get_random_effects(
	media_data, panel_col, model_dict[key]["Model_object"]
	)
	pred = mdf_predict(
	model_dict[key]["X_train_tuned"],
	model_dict[key]["Model_object"],
	random_df,
	)["pred"]
	else:
	pred = model_dict[key]["Model_object"].predict(
	model_dict[key]["X_train_tuned"][feature_set]
	)

	ytest = model_dict[key]["X_test_tuned"][target]
	if is_panel:

	predtest = mdf_predict(
	model_dict[key]["X_test_tuned"],
	model_dict[key]["Model_object"],
	random_df,
	)["pred"]

	else:
	predtest = model_dict[key]["Model_object"].predict(
	model_dict[key]["X_test_tuned"][feature_set]
	)

	metrics_df.at[i, "R2"] = r2_score(y, pred)
	metrics_df.at[i, "ADJR2"] = 1 - (1 - metrics_df.loc[i, "R2"]) * (len(y) - 1) / (
	len(y) - len(model_dict[key]["feature_set"]) - 1
	)
	# metrics_df.at[i, "Train Mape"] = mean_absolute_percentage_error(y, pred)
	# metrics_df.at[i, "Test Mape"] = mean_absolute_percentage_error(
	# ytest, predtest
	# )
	metrics_df.at[i, "Train Mape"] = wmape(y, pred)
	metrics_df.at[i, "Test Mape"] = wmape(ytest, predtest)
	metrics_df.at[i, "Summary"] = model_dict[key]["Model_object"].summary()
	metrics_df.at[i, "Model_object"] = model_dict[key]["Model_object"]
	i += 1
	metrics_df = np.round(metrics_df, 2)

	metrics_df.rename(
	columns={"R2": "R-squared", "ADJR2": "Adj. R-squared"}, inplace=True
	)
	return metrics_df


	def map_channel(transformed_var, channel_dict):
	for key, value_list in channel_dict.items():
	if any(raw_var in transformed_var for raw_var in value_list):
	return key
	return transformed_var # Return the original value if no match is found


	def contributions_nonpanel(model_dict):
	# with open(os.path.join(st.session_state["project_path"], "channel_groups.pkl"), "rb") as f:
	# channels = pickle.load(f)

	channels = st.session_state["project_dct"]["data_import"]["group_dict"] # db
	media_data = st.session_state["media_data"]
	contribution_df = pd.DataFrame(columns=["Channel"])

	for key in model_dict.keys():

	best_feature_set = model_dict[key]["feature_set"]
	model = model_dict[key]["Model_object"]
	target = key.split("__")[1]
	X_train = model_dict[key]["X_train_tuned"]
	contri_df = pd.DataFrame()
	y = []
	y_pred = []

	coef_df = pd.DataFrame(model.params)
	coef_df.reset_index(inplace=True)
	coef_df.columns = ["feature", "coef"]
	x_train_contribution = X_train.copy()
	x_train_contribution["pred"] = model.predict(X_train[best_feature_set])

	for i in range(len(coef_df)):

	coef = coef_df.loc[i, "coef"]
	col = coef_df.loc[i, "feature"]
	if col != "const":
	x_train_contribution[str(col) + "_contr"] = (
	coef * x_train_contribution[col]
	)
	else:
	x_train_contribution["const"] = coef

	tuning_cols = [
	c
	for c in x_train_contribution.filter(regex="contr").columns
	if c
	in [
	"day_of_week_contr",
	"Trend_contr",
	"sine_wave_contr",
	"cosine_wave_contr",
	]
	]
	flag_cols = [
	c
	for c in x_train_contribution.filter(regex="contr").columns
	if "_flag" in c
	]

	# add exogenous contribution to base
	all_exog_vars = st.session_state["bin_dict"]["Exogenous"]
	all_exog_vars = [
	var.lower()
	.replace(".", "_")
	.replace("@", "_")
	.replace(" ", "_")
	.replace("-", "")
	.replace(":", "")
	.replace("__", "_")
	for var in all_exog_vars
	]
	exog_cols = []
	if len(all_exog_vars) > 0:
	for col in x_train_contribution.filter(regex="contr").columns:
	if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0:
	exog_cols.append(col)

	base_cols = ["const"] + flag_cols + tuning_cols + exog_cols

	x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1)
	x_train_contribution.drop(columns=base_cols, inplace=True)

	contri_df = pd.DataFrame(x_train_contribution.filter(regex="contr").sum(axis=0))

	contri_df.reset_index(inplace=True)
	contri_df.columns = ["Channel", target]
	contri_df["Channel"] = contri_df["Channel"].apply(
	lambda x: map_channel(x, channels)
	)
	contri_df[target] = 100 * contri_df[target] / contri_df[target].sum()
	contri_df["Channel"].replace("base_contr", "base", inplace=True)
	contribution_df = pd.merge(
	contribution_df, contri_df, on="Channel", how="outer"
	)

	return contribution_df


	def contributions_panel(model_dict):
	channels = st.session_state["project_dct"]["data_import"]["group_dict"] # db
	media_data = st.session_state["media_data"]
	contribution_df = pd.DataFrame(columns=["Channel"])
	for key in model_dict.keys():
	best_feature_set = model_dict[key]["feature_set"]
	model = model_dict[key]["Model_object"]
	target = key.split("__")[1]
	X_train = model_dict[key]["X_train_tuned"]
	contri_df = pd.DataFrame()

	y = []
	y_pred = []

	random_eff_df = get_random_effects(media_data, panel_col, model)
	random_eff_df["fixed_effect"] = model.fe_params["Intercept"]
	random_eff_df["panel_effect"] = (
	random_eff_df["random_effect"] + random_eff_df["fixed_effect"]
	)

	coef_df = pd.DataFrame(model.fe_params)
	coef_df.reset_index(inplace=True)
	coef_df.columns = ["feature", "coef"]

	x_train_contribution = X_train.copy()
	x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df)

	x_train_contribution = pd.merge(
	x_train_contribution,
	random_eff_df[[panel_col, "panel_effect"]],
	on=panel_col,
	how="left",
	)
	for i in range(len(coef_df)):
	coef = coef_df.loc[i, "coef"]
	col = coef_df.loc[i, "feature"]
	if col.lower() != "intercept":
	x_train_contribution[str(col) + "_contr"] = (
	coef * x_train_contribution[col]
	)

	# x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1)
	# x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution[
	# 'panel_effect']

	# base_cols = ["panel_effect"] + [
	# c
	# for c in x_train_contribution.filter(regex="contr").columns
	# if c
	# in [
	# "day_of_week_contr",
	# "Trend_contr",
	# "sine_wave_contr",
	# "cosine_wave_contr",
	# ]
	# ]
	tuning_cols = [
	c
	for c in x_train_contribution.filter(regex="contr").columns
	if c
	in [
	"day_of_week_contr",
	"Trend_contr",
	"sine_wave_contr",
	"cosine_wave_contr",
	]
	]
	flag_cols = [
	c
	for c in x_train_contribution.filter(regex="contr").columns
	if "_flag" in c
	]

	# add exogenous contribution to base
	all_exog_vars = st.session_state["bin_dict"]["Exogenous"]
	all_exog_vars = [
	var.lower()
	.replace(".", "_")
	.replace("@", "_")
	.replace(" ", "_")
	.replace("-", "")
	.replace(":", "")
	.replace("__", "_")
	for var in all_exog_vars
	]
	exog_cols = []
	if len(all_exog_vars) > 0:
	for col in x_train_contribution.filter(regex="contr").columns:
	if len([exog_var for exog_var in all_exog_vars if exog_var in col]) > 0:
	exog_cols.append(col)

	base_cols = ["panel_effect"] + flag_cols + tuning_cols + exog_cols

	x_train_contribution["base_contr"] = x_train_contribution[base_cols].sum(axis=1)
	x_train_contribution.drop(columns=base_cols, inplace=True)

	contri_df = pd.DataFrame(x_train_contribution.filter(regex="contr").sum(axis=0))
	contri_df.reset_index(inplace=True)
	contri_df.columns = ["Channel", target]

	contri_df[target] = 100 * contri_df[target] / contri_df[target].sum()
	contri_df["Channel"] = contri_df["Channel"].apply(
	lambda x: map_channel(x, channels)
	)

	contri_df["Channel"].replace("base_contr", "base", inplace=True)
	contribution_df = pd.merge(
	contribution_df, contri_df, on="Channel", how="outer"
	)
	# st.session_state["contribution_df"] = contributions_panel(tuned_model_dict)
	return contribution_df


	def create_grouped_bar_plot(contribution_df, contribution_selections):
	# Extract the 'Channel' names
	channel_names = contribution_df["Channel"].tolist()

	# Dictionary to store all contributions except 'const' and 'base'
	all_contributions = {
	name: [] for name in channel_names if name not in ["const", "base"]
	}

	# Dictionary to store base sales for each selection
	base_sales_dict = {}

	# Accumulate contributions for each channel from each selection
	for selection in contribution_selections:
	contributions = contribution_df[selection].values.astype(float)
	base_sales = 0 # Initialize base sales for the current selection

	for channel_name, contribution in zip(channel_names, contributions):
	if channel_name in all_contributions:
	all_contributions[channel_name].append(contribution)
	elif channel_name == "base":
	base_sales = (
	contribution # Capture base sales for the current selection
	)

	# Store base sales for each selection
	base_sales_dict[selection] = base_sales

	# Calculate the average of contributions and sort by this average
	sorted_channels = sorted(all_contributions.items(), key=lambda x: -np.mean(x[1]))
	sorted_channel_names = [name for name, _ in sorted_channels]
	sorted_channel_names = [
	"Base Sales"
	] + sorted_channel_names # Adding 'Base Sales' at the start

	trace_data = []
	max_value = 0 # Initialize max_value to find the highest bar for y-axis adjustment

	# Create traces for the grouped bar chart
	for i, selection in enumerate(contribution_selections):
	display_name = sorted_channel_names
	display_contribution = [base_sales_dict[selection]] + [
	all_contributions[name][i] for name in sorted_channel_names[1:]
	] # Start with base sales for the current selection

	# Generating text labels for each bar
	text_values = [
	f"{val}%" for val in np.round(display_contribution, 0).astype(int)
	]

	# Find the max value for y-axis calculation
	max_contribution = max(display_contribution)
	if max_contribution > max_value:
	max_value = max_contribution

	# Create a bar trace for each selection
	trace = go.Bar(
	x=display_name,
	y=display_contribution,
	name=selection,
	text=text_values,
	textposition="outside",
	)
	trace_data.append(trace)

	# Define layout for the bar chart
	layout = go.Layout(
	title="Metrics Contribution by Channel (Train)",
	xaxis=dict(title="Channel Name"),
	yaxis=dict(
	title="Metrics Contribution", range=[0, max_value * 1.2]
	), # Set y-axis 20% higher than the max bar
	barmode="group",
	plot_bgcolor="white",
	)

	# Create the figure with trace data and layout
	fig = go.Figure(data=trace_data, layout=layout)

	return fig


	def preprocess_and_plot(contribution_df, contribution_selections):
	# Extract the 'Channel' names
	channel_names = contribution_df["Channel"].tolist()

	# Dictionary to store all contributions except 'const' and 'base'
	all_contributions = {
	name: [] for name in channel_names if name not in ["const", "base"]
	}

	# Dictionary to store base sales for each selection
	base_sales_dict = {}

	# Accumulate contributions for each channel from each selection
	for selection in contribution_selections:
	contributions = contribution_df[selection].values.astype(float)
	base_sales = 0 # Initialize base sales for the current selection

	for channel_name, contribution in zip(channel_names, contributions):
	if channel_name in all_contributions:
	all_contributions[channel_name].append(contribution)
	elif channel_name == "base":
	base_sales = (
	contribution # Capture base sales for the current selection
	)

	# Store base sales for each selection
	base_sales_dict[selection] = base_sales

	# Calculate the average of contributions and sort by this average
	sorted_channels = sorted(all_contributions.items(), key=lambda x: -np.mean(x[1]))
	sorted_channel_names = [name for name, _ in sorted_channels]
	sorted_channel_names = [
	"Base Sales"
	] + sorted_channel_names # Adding 'Base Sales' at the start

	# Initialize a Plotly figure
	fig = go.Figure()

	for i, selection in enumerate(contribution_selections):
	display_name = ["Base Sales"] + sorted_channel_names[
	1:
	] # Channel names for the plot
	display_contribution = [
	base_sales_dict[selection]
	] # Start with base sales for the current selection

	# Append average contributions for other channels
	for name in sorted_channel_names[1:]:
	display_contribution.append(all_contributions[name][i])

	# Generating text labels for each bar
	text_values = [
	f"{val}%" for val in np.round(display_contribution, 0).astype(int)
	]

	# Add a waterfall trace for each selection
	fig.add_trace(
	go.Waterfall(
	orientation="v",
	measure=["relative"] * len(display_contribution),
	x=display_name,
	text=text_values,
	textposition="outside",
	y=display_contribution,
	increasing={"marker": {"color": "green"}},
	decreasing={"marker": {"color": "red"}},
	totals={"marker": {"color": "blue"}},
	name=selection,
	)
	)

	# Update layout of the figure
	fig.update_layout(
	title="Metrics Contribution by Channel (Train)",
	xaxis={"title": "Channel Name"},
	yaxis=dict(title="Metrics Contribution", range=[0, 100 * 1.2]),
	)

	return fig


	def selection_change():
	edited_rows: dict = st.session_state.project_selection["edited_rows"]
	st.session_state["selected_row_index_gd_table"] = next(iter(edited_rows))
	st.session_state["gd_table"] = st.session_state["gd_table"].assign(selected=False)

	update_dict = {idx: values for idx, values in edited_rows.items()}

	st.session_state["gd_table"].update(
	pd.DataFrame.from_dict(update_dict, orient="index")
	)


	if "username" not in st.session_state:
	st.session_state["username"] = None

	if "project_name" not in st.session_state:
	st.session_state["project_name"] = None

	if "project_dct" not in st.session_state:
	project_selection()
	st.stop()

	try:
	st.session_state["bin_dict"] = st.session_state["project_dct"]["data_import"][
	"category_dict"
	] # db

	except Exception as e:
	st.warning("Save atleast one tuned model to proceed")
	log_message("warning", "No tuned models available", "AI Model Results")
	st.stop()


	if "gd_table" not in st.session_state:
	st.session_state["gd_table"] = pd.DataFrame()

	try:
	if "username" in st.session_state and st.session_state["username"] is not None:

	if (
	retrieve_pkl_object(
	st.session_state["project_number"],
	"Model_Tuning",
	"tuned_model",
	schema,
	)
	is None
	):

	st.error("Please save a tuned model")
	st.stop()

	if (
	"session_state_saved"
	in st.session_state["project_dct"]["model_tuning"].keys()
	and st.session_state["project_dct"]["model_tuning"]["session_state_saved"]
	!= []
	):
	for key in ["used_response_metrics", "media_data", "bin_dict"]:
	if key not in st.session_state:
	st.session_state[key] = st.session_state["project_dct"][
	"model_tuning"
	]["session_state_saved"][key]
	# st.session_state["bin_dict"] = st.session_state["project_dct"][
	# "model_build"
	# ]["session_state_saved"]["bin_dict"]

	media_data = st.session_state["media_data"]

	# st.write(media_data.columns)

	# set the panel column
	panel_col = "panel"
	is_panel = (
	True if st.session_state["media_data"][panel_col].nunique() > 1 else False
	)
	# st.write(is_panel)

	date_col = "date"

	transformed_data = st.session_state["project_dct"]["transformations"][
	"final_df"
	] # db
	tuned_model_dict = retrieve_pkl_object(
	st.session_state["project_number"], "Model_Tuning", "tuned_model", schema
	) # db

	feature_set_dct = {
	key.split("__")[1]: key_dict["feature_set"]
	for key, key_dict in tuned_model_dict.items()
	}

	# """ the above part should be modified so that we are fetching features set from the saved model"""

	if "contribution_df" not in st.session_state:
	st.session_state["contribution_df"] = None

	metrics_table = metrics_df_panel(tuned_model_dict, is_panel)

	cols1 = st.columns([2, 1])
	with cols1[0]:
	st.markdown(f"Welcome {st.session_state['username']}")
	with cols1[1]:
	st.markdown(f"Current Project: {st.session_state['project_name']}")

	st.title("AI Model Validation")

	st.header("Contribution Overview")

	# Get list of response metrics
	st.session_state["used_response_metrics"] = list(
	set([model.split("__")[1] for model in tuned_model_dict.keys()])
	)
	options = st.session_state["used_response_metrics"]

	if len(options) == 0:
	st.error("Please save and tune a model")
	st.stop()
	options = [
	opt.lower()
	.replace(" ", "_")
	.replace("-", "")
	.replace(":", "")
	.replace("__", "_")
	for opt in options
	]

	default_options = (
	st.session_state["project_dct"]["saved_model_results"].get(
	"selected_options"
	)
	if st.session_state["project_dct"]["saved_model_results"].get(
	"selected_options"
	)
	is not None
	else [options[-1]]
	)
	for i in default_options:
	if i not in options:
	# st.write(i)
	default_options.remove(i)

	def remove_response_metric(name):
	# Convert the name to a lowercase string and remove any leading or trailing spaces
	name_str = str(name).lower().strip()

	# Check if the name starts with "response metric" or "response_metric"
	if name_str.startswith("response metric"):
	return name[len("response metric") :].replace("_", " ").strip().title()
	elif name_str.startswith("response_metric"):
	return name[len("response_metric") :].replace("_", " ").strip().title()
	else:
	return name

	contribution_selections = st.multiselect(
	"Select the Response Metrics to compare contributions",
	options,
	default=default_options,
	format_func=remove_response_metric,
	)
	trace_data = []

	if is_panel:
	st.session_state["contribution_df"] = contributions_panel(tuned_model_dict)

	else:
	st.session_state["contribution_df"] = contributions_nonpanel(
	tuned_model_dict
	)

	# st.write(st.session_state["contribution_df"].columns)
	# for selection in contribution_selections:

	# trace = go.Bar(
	# x=st.session_state["contribution_df"]["Channel"],
	# y=st.session_state["contribution_df"][selection],
	# name=selection,
	# text=np.round(st.session_state["contribution_df"][selection], 0)
	# .astype(int)
	# .astype(str)
	# + "%",
	# textposition="outside",
	# )
	# trace_data.append(trace)

	# layout = go.Layout(
	# title="Metrics Contribution by Channel",
	# xaxis=dict(title="Channel Name"),
	# yaxis=dict(title="Metrics Contribution"),
	# barmode="group",
	# )
	# fig = go.Figure(data=trace_data, layout=layout)
	# st.plotly_chart(fig, use_container_width=True)

	# Display the chart in Streamlit
	st.plotly_chart(
	create_grouped_bar_plot(
	st.session_state["contribution_df"], contribution_selections
	),
	use_container_width=True,
	)

	############################################ Waterfall Chart ############################################

	import plotly.graph_objects as go

	st.plotly_chart(
	preprocess_and_plot(
	st.session_state["contribution_df"], contribution_selections
	),
	use_container_width=True,
	)

	############################################ Waterfall Chart ############################################
	st.header("Analysis of Models Result")
	gd_table = metrics_table.iloc[:, :-2]
	target_column = gd_table.at[0, "Model"] # sprint8
	st.session_state["gd_table"] = gd_table

	with st.container():
	table = st.data_editor(
	st.session_state["gd_table"],
	hide_index=True,
	# on_change=selection_change,
	key="project_selection",
	use_container_width=True,
	)

	target_column = st.selectbox(
	"Select a Model to analyse its results",
	options=st.session_state.used_response_metrics,
	placeholder=options[0],
	)
	feature_set = feature_set_dct[target_column]

	model = metrics_table[metrics_table["Model"] == target_column][
	"Model_object"
	].iloc[0]
	target = metrics_table[metrics_table["Model"] == target_column]["Model"].iloc[0]
	st.header("Model Summary")
	st.write(model.summary())

	sel_dict = tuned_model_dict[
	[k for k in tuned_model_dict.keys() if k.split("__")[1] == target][0]
	]

	feature_set = sel_dict["feature_set"]
	X_train = sel_dict["X_train_tuned"]
	y_train = X_train[target]

	if is_panel:
	random_effects = get_random_effects(media_data, panel_col, model)
	pred = mdf_predict(X_train, model, random_effects)["pred"]
	else:
	pred = model.predict(X_train[feature_set])

	X_test = sel_dict["X_test_tuned"]
	y_test = X_test[target]
	if is_panel:
	predtest = mdf_predict(X_test, model, random_effects)["pred"]
	else:
	predtest = model.predict(X_test[feature_set])

	metrics_table_train, _, fig_train = plot_actual_vs_predicted(
	X_train[date_col],
	y_train,
	pred,
	model,
	target_column=target,
	flag=None,
	repeat_all_years=False,
	is_panel=is_panel,
	)

	metrics_table_test, _, fig_test = plot_actual_vs_predicted(
	X_test[date_col],
	y_test,
	predtest,
	model,
	target_column=target,
	flag=None,
	repeat_all_years=False,
	is_panel=is_panel,
	)

	metrics_table_train = metrics_table_train.set_index("Metric").transpose()
	metrics_table_train.index = ["Train"]
	metrics_table_test = metrics_table_test.set_index("Metric").transpose()
	metrics_table_test.index = ["Test"]
	metrics_table = np.round(
	pd.concat([metrics_table_train, metrics_table_test]), 2
	)

	st.markdown("Result Overview")
	st.dataframe(np.round(metrics_table, 2), use_container_width=True)

	st.header("Model Accuracy")
	st.subheader("Actual vs Predicted Plot (Train)")

	st.plotly_chart(fig_train, use_container_width=True)
	st.subheader("Actual vs Predicted Plot (Test)")
	st.plotly_chart(fig_test, use_container_width=True)

	st.markdown("## Residual Analysis (Train)")
	columns = st.columns(2)

	Xtrain1 = X_train.copy()
	with columns[0]:
	fig = plot_residual_predicted(y_train, pred, Xtrain1)
	st.plotly_chart(fig)

	with columns[1]:
	st.empty()
	fig = qqplot(y_train, pred)
	st.plotly_chart(fig)

	with columns[0]:
	fig = residual_distribution(y_train, pred)
	st.pyplot(fig)

	if st.button("Save this session", use_container_width=True):
	project_dct_pkl = pickle.dumps(st.session_state["project_dct"])

	update_db(
	st.session_state["project_number"],
	"AI_Model_Results",
	"project_dct",
	project_dct_pkl,
	schema,
	# resp_mtrc=None,
	) # db

	log_message("info", "Session saved!", "AI Model Results")
	st.success("Session Saved!")
	except:
	exc_type, exc_value, exc_traceback = sys.exc_info()
	error_message = "".join(
	traceback.format_exception(exc_type, exc_value, exc_traceback)
	)
	log_message("error", f"Error: {error_message}", "AI Model Results")
	st.warning("An error occured, please try again", icon="⚠️")