MediaMixOptimization / ppt_utils.py
samkeet's picture
Upload 40 files
00b00eb verified
import pandas as pd
import numpy as np
import pptx
from pptx import Presentation
from pptx.chart.data import CategoryChartData, ChartData
from pptx.enum.chart import XL_CHART_TYPE, XL_LEGEND_POSITION, XL_LABEL_POSITION
from pptx.enum.chart import XL_TICK_LABEL_POSITION
from pptx.util import Inches, Pt
import os
import pickle
from pathlib import Path
from sklearn.metrics import (
mean_absolute_error,
r2_score,
mean_absolute_percentage_error,
)
import streamlit as st
from collections import OrderedDict
from utilities import get_metrics_names, initialize_data, retrieve_pkl_object_without_warning
from io import BytesIO
from pptx.dml.color import RGBColor
from post_gres_cred import db_cred
schema=db_cred['schema']
from constants import (
TITLE_FONT_SIZE,
AXIS_LABEL_FONT_SIZE,
CHART_TITLE_FONT_SIZE,
AXIS_TITLE_FONT_SIZE,
DATA_LABEL_FONT_SIZE,
LEGEND_FONT_SIZE,
PIE_LEGEND_FONT_SIZE
)
def format_response_metric(target):
if target.startswith('response_metric_'):
target = target.replace('response_metric_', '')
target = target.replace("_", " ").title()
return target
def smape(actual, forecast):
# Symmetric Mape (SMAPE) eliminates shortcomings of MAPE :
## 1. MAPE becomes insanely high when actual is close to 0
## 2. MAPE is more favourable to underforecast than overforecast
return (1 / len(actual)) * np.sum(1 * np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)))
def safe_num_to_per(num):
try:
return "{:.0%}".format(num)
except:
return num
# Function to convert numbers to abbreviated format
def convert_number_to_abbreviation(number):
try:
number = float(number)
if number >= 1000000:
return f'{number / 1000000:.1f} M'
elif number >= 1000:
return f'{number / 1000:.1f} K'
else:
return str(number)
except:
return number
def round_off(x, round_off_decimal=0):
# round off
try:
x = float(x)
if x < 1 and x > 0:
round_off_decimal = int(np.floor(np.abs(np.log10(x)))) + max(round_off_decimal, 1)
x = np.round(x, round_off_decimal)
elif x < 0 and x > -1:
round_off_decimal = int(np.floor(np.abs(np.log10(np.abs(x))))) + max(round_off_decimal, 1)
x = -np.round(x, round_off_decimal)
else:
x = np.round(x, round_off_decimal)
return x
except:
return x
def fill_table_placeholder(table_placeholder, slide, df, column_width=None, table_height=None):
cols = len(df.columns)
rows = len(df)
if table_height is None:
table_height = table_placeholder.height
x, y, cx, cy = table_placeholder.left, table_placeholder.top, table_placeholder.width, table_height
table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
# Populate the table with data from the DataFrame
for row_idx, row in enumerate(df.values):
for col_idx, value in enumerate(row):
cell = table.cell(row_idx + 1, col_idx)
cell.text = str(value)
for col_idx, value in enumerate(df.columns):
cell = table.cell(0, col_idx)
cell.text = str(value)
if column_width is not None:
for col_idx, column_width in column_width.items():
table.columns[col_idx].width = Inches(column_width)
table_placeholder._element.getparent().remove(table_placeholder._element)
def bar_chart(chart_placeholder, slide, chart_data, titles={}, min_y=None, max_y=None, type='V', legend=True,
label_type=None, xaxis_pos=None):
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
if type == 'V':
graphic_frame = slide.shapes.add_chart(
XL_CHART_TYPE.COLUMN_CLUSTERED, x, y, cx, cy, chart_data
)
if type == 'H':
graphic_frame = slide.shapes.add_chart(
XL_CHART_TYPE.BAR_CLUSTERED, x, y, cx, cy, chart_data
)
chart = graphic_frame.chart
category_axis = chart.category_axis
value_axis = chart.value_axis
# Add chart title
if 'chart_title' in titles.keys():
chart.has_title = True
chart.chart_title.text_frame.text = titles['chart_title']
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
# Add axis titles
if 'x_axis' in titles.keys():
category_axis.has_title = True
category_axis.axis_title.text_frame.text = titles['x_axis']
category_title = category_axis.axis_title.text_frame.paragraphs[0].runs[0]
category_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if 'y_axis' in titles.keys():
value_axis.has_title = True
value_axis.axis_title.text_frame.text = titles['y_axis']
value_title = value_axis.axis_title.text_frame.paragraphs[0].runs[0]
value_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if xaxis_pos == 'low':
category_axis.tick_label_position = XL_TICK_LABEL_POSITION.LOW
# Customize the chart
if legend:
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.BOTTOM
chart.legend.font.size = Pt(LEGEND_FONT_SIZE)
chart.legend.include_in_layout = False
# Adjust font size for axis labels
category_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
value_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
if min_y is not None:
value_axis.minimum_scale = min_y # Adjust this value as needed
if max_y is not None:
value_axis.maximum_scale = max_y # Adjust this value as needed
plot = chart.plots[0]
plot.has_data_labels = True
data_labels = plot.data_labels
if label_type == 'per':
data_labels.number_format = '0"%"'
elif label_type == '$':
data_labels.number_format = '$[>=1000000]#,##0.0,,"M";$[>=1000]#,##0.0,"K";$#,##0'
elif label_type == '$1':
data_labels.number_format = '$[>=1000000]#,##0,,"M";$[>=1000]#,##0,"K";$#,##0'
elif label_type == 'M':
data_labels.number_format = '#0.0,,"M"'
elif label_type == 'M1':
data_labels.number_format = '#0.00,,"M"'
elif label_type == 'K':
data_labels.number_format = '#0.0,"K"'
data_labels.font.size = Pt(DATA_LABEL_FONT_SIZE)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def line_chart(chart_placeholder, slide, chart_data, titles={}, min_y=None, max_y=None):
# Add the chart to the slide
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
chart = slide.shapes.add_chart(
XL_CHART_TYPE.LINE, x, y, cx, cy, chart_data
).chart
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.BOTTOM
chart.legend.font.size = Pt(LEGEND_FONT_SIZE)
category_axis = chart.category_axis
value_axis = chart.value_axis
if min_y is not None:
value_axis.minimum_scale = min_y
if max_y is not None:
value_axis.maximum_scale = max_y
if min_y is not None and max_y is not None:
value_axis.major_unit = int((max_y - min_y) / 2)
if 'chart_title' in titles.keys():
chart.has_title = True
chart.chart_title.text_frame.text = titles['chart_title']
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
if 'x_axis' in titles.keys():
category_axis.has_title = True
category_axis.axis_title.text_frame.text = titles['x_axis']
category_title = category_axis.axis_title.text_frame.paragraphs[0].runs[0]
category_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if 'y_axis' in titles.keys():
value_axis.has_title = True
value_axis.axis_title.text_frame.text = titles['y_axis']
value_title = value_axis.axis_title.text_frame.paragraphs[0].runs[0]
value_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
# Adjust font size for axis labels
category_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
value_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
plot = chart.plots[0]
series = plot.series[1]
line = series.format.line
line.color.rgb = RGBColor(141, 47, 0)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def pie_chart(chart_placeholder, slide, chart_data, title):
# Add the chart to the slide
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
chart = slide.shapes.add_chart(
XL_CHART_TYPE.PIE, x, y, cx, cy, chart_data
).chart
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.RIGHT
chart.legend.include_in_layout = False
chart.legend.font.size = Pt(PIE_LEGEND_FONT_SIZE)
chart.plots[0].has_data_labels = True
data_labels = chart.plots[0].data_labels
data_labels.number_format = '0%'
data_labels.position = XL_LABEL_POSITION.OUTSIDE_END
data_labels.font.size = Pt(DATA_LABEL_FONT_SIZE)
chart.has_title = True
chart.chart_title.text_frame.text = title
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def title_and_table(slide, title, df, column_width=None, custom_table_height=False):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = title
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
table_placeholder = slide.placeholders[ph_idx[1]]
table_height = None
if custom_table_height:
if len(df) < 4:
table_height = int(np.ceil(table_placeholder.height / 2))
fill_table_placeholder(table_placeholder, slide, df, column_width, table_height)
# try:
# font_size = 18 # default for 3*3
# if cols < 3:
# row_diff = 3 - rows
# font_size = font_size + ((row_diff)*2) # 1 row less -> 2 pt font size increase & vice versa
# else:
# row_diff = 2 - rows
# font_size = font_size + ((row_diff)*2)
# for row in table.rows:
# for cell in row.cells:
# cell.text_frame.paragraphs[0].runs[0].font.size = Pt(font_size)
# except Exception as e :
# print("**"*30)
# print(e)
# else:
# except Exception as e:
# print('table', e)
return slide
def data_import(data, bin_dict):
import_df = pd.DataFrame(columns=['Category', 'Value'])
import_df.at[0, 'Category'] = 'Date Range'
date_start = data['date'].min().date()
date_end = data['date'].max().date()
import_df.at[0, 'Value'] = str(date_start) + ' - ' + str(date_end)
import_df.at[1, 'Category'] = 'Response Metrics'
import_df.at[1, 'Value'] = ', '.join(bin_dict['Response Metrics'])
import_df.at[2, 'Category'] = 'Media Variables'
import_df.at[2, 'Value'] = ', '.join(bin_dict['Media'])
import_df.at[3, 'Category'] = 'Spend Variables'
import_df.at[3, 'Value'] = ', '.join(bin_dict['Spends'])
if bin_dict['Exogenous'] != []:
import_df.at[4, 'Category'] = 'Exogenous Variables'
import_df.at[4, 'Value'] = ', '.join(bin_dict['Exogenous'])
return import_df
def channel_groups_df(channel_groups_dct={}, bin_dict={}):
df = pd.DataFrame(columns=['Channel', 'Media Variables', 'Spend Variables'])
i = 0
for channel, vars in channel_groups_dct.items():
media_vars = ", ".join(list(set(vars).intersection(set(bin_dict["Media"]))))
spend_vars = ", ".join(list(set(vars).intersection(set(bin_dict["Spends"]))))
df.at[i, "Channel"] = channel
df.at[i, 'Media Variables'] = media_vars
df.at[i, 'Spend Variables'] = spend_vars
i += 1
return df
def transformations(transform_dict):
transform_df = pd.DataFrame(columns=['Category', 'Transformation', 'Value'])
i = 0
for category in ['Media', 'Exogenous']:
transformations = f'transformation_{category}'
category_dict = transform_dict[category]
if transformations in category_dict.keys():
for transformation in category_dict[transformations]:
transform_df.at[i, 'Category'] = category
transform_df.at[i, 'Transformation'] = transformation
transform_df.at[i, 'Value'] = str(category_dict[transformation][0]) + ' - ' + str(
category_dict[transformation][1])
i += 1
return transform_df
def model_metrics(model_dict, is_panel):
metrics_df = pd.DataFrame(
columns=[
"Response Metric",
"Model",
"R2",
"ADJR2",
"Train MAPE",
"Test MAPE"
]
)
i = 0
for key in model_dict.keys():
target = key.split("__")[1]
metrics_df.at[i, "Response Metric"] = format_response_metric(target)
metrics_df.at[i, "Model"] = key.split("__")[0]
y = model_dict[key]["X_train_tuned"][target]
feature_set = model_dict[key]["feature_set"]
if is_panel:
random_df = get_random_effects(
media_data, panel_col, model_dict[key]["Model_object"]
)
pred = mdf_predict(
model_dict[key]["X_train_tuned"],
model_dict[key]["Model_object"],
random_df,
)["pred"]
else:
pred = model_dict[key]["Model_object"].predict(model_dict[key]["X_train_tuned"][feature_set])
ytest = model_dict[key]["X_test_tuned"][target]
if is_panel:
predtest = mdf_predict(
model_dict[key]["X_test_tuned"],
model_dict[key]["Model_object"],
random_df,
)["pred"]
else:
predtest = model_dict[key]["Model_object"].predict(model_dict[key]["X_test_tuned"][feature_set])
metrics_df.at[i, "R2"] = np.round(r2_score(y, pred), 2)
adjr2 = 1 - (1 - metrics_df.loc[i, "R2"]) * (
len(y) - 1
) / (len(y) - len(model_dict[key]["feature_set"]) - 1)
metrics_df.at[i, "ADJR2"] = np.round(adjr2, 2)
# y = np.where(np.abs(y) < 0.00001, 0.00001, y)
metrics_df.at[i, "Train MAPE"] = np.round(smape(y, pred), 2)
metrics_df.at[i, "Test MAPE"] = np.round(smape(ytest, predtest), 2)
i += 1
metrics_df = np.round(metrics_df, 2)
return metrics_df
def model_result(slide, model_key, model_dict, model_metrics_df, date_col):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = model_key.split('__')[0]
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
target = model_key.split('__')[1]
metrics_table_placeholder = slide.placeholders[ph_idx[1]]
metrics_df = model_metrics_df[model_metrics_df['Model'] == model_key.split('__')[0]].reset_index(drop=True)
# Accuracy = 1-mape
metrics_df['Accuracy'] = 100 * (1 - metrics_df['Train MAPE'])
metrics_df['Accuracy'] = metrics_df['Accuracy'].apply(lambda x: f'{np.round(x, 0)}%')
## Removing metrics as requested by Ioannis
metrics_df = metrics_df.drop(columns=['R2', 'ADJR2', 'Train MAPE', 'Test MAPE'])
fill_table_placeholder(metrics_table_placeholder, slide, metrics_df)
# coeff_table_placeholder = slide.placeholders[ph_idx[2]]
# coeff_df = pd.DataFrame(model_dict['Model_object'].params)
# coeff_df.reset_index(inplace=True)
# coeff_df.columns = ['Feature', 'Coefficent']
# fill_table_placeholder(coeff_table_placeholder, slide, coeff_df)
chart_placeholder = slide.placeholders[ph_idx[2]]
full_df = pd.concat([model_dict['X_train_tuned'], model_dict['X_test_tuned']])
full_df['Predicted'] = model_dict['Model_object'].predict(full_df[model_dict['feature_set']])
pred_df = full_df[[date_col, target, 'Predicted']]
pred_df.rename(columns={target: 'Actual'}, inplace=True)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = pred_df[date_col]
chart_data.add_series('Actual', pred_df['Actual'])
chart_data.add_series('Predicted', pred_df['Predicted'])
# Set range for y axis
min_y = np.floor(min(pred_df['Actual'].min(), pred_df['Predicted'].min()))
max_y = np.ceil(max(pred_df['Actual'].max(), pred_df['Predicted'].max()))
# Create the chart
line_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Actual VS Predicted',
'x_axis': 'Date',
'y_axis': target.title().replace('_', ' ')
},
min_y=min_y,
max_y=max_y
)
return slide
def metrics_contributions(slide, contributions_excels_dict, panel_col):
# Create data for metrics contributions
all_contribution_df = pd.DataFrame(columns=['Channel'])
target_sum_dict = {}
sort_support_dct = {}
for target in contributions_excels_dict.keys():
contribution_df = contributions_excels_dict[target]['CONTRIBUTION MMM'].copy()
if 'Date' in contribution_df.columns:
contribution_df.drop(columns=['Date'], inplace=True)
if panel_col in contribution_df.columns:
contribution_df.drop(columns=[panel_col], inplace=True)
contribution_df = pd.DataFrame(np.sum(contribution_df, axis=0)).reset_index()
contribution_df.columns = ['Channel', target]
target_sum = contribution_df[target].sum()
target_sum_dict[target] = target_sum
contribution_df[target] = 100 * contribution_df[target] / target_sum
all_contribution_df = pd.merge(all_contribution_df, contribution_df, on='Channel', how='outer')
sorted_target_sum_dict = sorted(target_sum_dict.items(), key=lambda kv: kv[1], reverse=True)
sorted_target_sum_keys = [kv[0] for kv in sorted_target_sum_dict]
if len([metric for metric in sorted_target_sum_keys if metric.lower() == 'revenue']) == 1:
rev_metric = [metric for metric in sorted_target_sum_keys if metric.lower() == 'revenue'][0]
sorted_target_sum_keys.remove(rev_metric)
sorted_target_sum_keys.append(rev_metric)
all_contribution_df = all_contribution_df[['Channel'] + sorted_target_sum_keys]
# for col in all_contribution_df.columns:
# all_contribution_df[col]=all_contribution_df[col].apply(lambda x: round_off(x,1))
# Sort Data by Average contribution of the channels keeping base first <Removed>
# all_contribution_df['avg'] = np.mean(all_contribution_df[list(contributions_excels_dict.keys())],axis=1)
# all_contribution_df['rank'] = all_contribution_df['avg'].rank(ascending=False)
# Sort data by contribution of bottom funnel metric
bottom_funnel_metric = sorted_target_sum_keys[-1]
all_contribution_df['rank'] = all_contribution_df[bottom_funnel_metric].rank(ascending=False)
all_contribution_df.loc[all_contribution_df[all_contribution_df['Channel'] == 'base'].index, 'rank'] = 0
all_contribution_df = all_contribution_df.sort_values(by='rank')
all_contribution_df.drop(columns=['rank'], inplace=True)
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = "Response Metrics Contributions"
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
for target in contributions_excels_dict.keys():
all_contribution_df[target] = all_contribution_df[target].astype(float)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = all_contribution_df['Channel']
for target in sorted_target_sum_keys:
chart_data.add_series(format_response_metric(target), all_contribution_df[target])
chart_placeholder = slide.placeholders[ph_idx[1]]
if isinstance(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])), float):
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Response Metrics Contributions',
# 'x_axis':'Channels',
'y_axis': 'Contributions'},
min_y=np.floor(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime']))),
max_y=np.ceil(np.max(all_contribution_df.select_dtypes(exclude=['object', 'datetime']))),
type='V',
label_type='per'
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Response Metrics Contributions',
# 'x_axis':'Channels',
'y_axis': 'Contributions'},
min_y=np.floor(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
max_y=np.ceil(np.max(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
type='V',
label_type='per'
)
return slide
def model_media_performance(slide, target, contributions_excels_dict, date_col='Date', is_panel=False,
panel_col='panel'):
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = "Media Performance - " + target.title().replace("_", " ")
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
# CONTRIBUTION CHART
# Create contribution data
contribution_df = contributions_excels_dict[target]['CONTRIBUTION MMM']
if panel_col in contribution_df.columns:
contribution_df.drop(columns=[panel_col], inplace=True)
# contribution_df.drop(columns=[date_col], inplace=True)
contribution_df = pd.DataFrame(np.sum(contribution_df, axis=0)).reset_index()
contribution_df.columns = ['Channel', format_response_metric(target)]
contribution_df['Channel'] = contribution_df['Channel'].apply(lambda x: x.title())
target_sum = contribution_df[format_response_metric(target)].sum()
contribution_df[format_response_metric(target)] = contribution_df[format_response_metric(target)] / target_sum
contribution_df.sort_values(by=['Channel'], ascending=False, inplace=True)
# for col in contribution_df.columns:
# contribution_df[col] = contribution_df[col].apply(lambda x : round_off(x))
# Create Chart Data
chart_data = ChartData()
chart_data.categories = contribution_df['Channel']
chart_data.add_series('Contribution', contribution_df[format_response_metric(target)])
chart_placeholder = slide.placeholders[ph_idx[2]]
pie_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
title='Contribution')
# SPENDS CHART
initialize_data(panel='aggregated', metrics=target)
scenario = st.session_state["scenario"]
spends_values = {
channel_name: round(
scenario.channels[channel_name].actual_total_spends
* scenario.channels[channel_name].conversion_rate,
1,
)
for channel_name in st.session_state["channels_list"]
}
spends_df = pd.DataFrame(columns=['Channel', 'Media Spend'])
spends_df['Channel'] = list(spends_values.keys())
spends_df['Media Spend'] = list(spends_values.values())
spends_sum = spends_df['Media Spend'].sum()
spends_df['Media Spend'] = spends_df['Media Spend'] / spends_sum
spends_df['Channel'] = spends_df['Channel'].apply(lambda x: x.title())
spends_df.sort_values(by='Channel', ascending=False, inplace=True)
# for col in spends_df.columns:
# spends_df[col] = spends_df[col].apply(lambda x : round_off(x))
# Create Chart Data
spends_chart_data = ChartData()
spends_chart_data = ChartData()
spends_chart_data.categories = spends_df['Channel']
spends_chart_data.add_series('Media Spend', spends_df['Media Spend'])
spends_chart_placeholder = slide.placeholders[ph_idx[1]]
pie_chart(chart_placeholder=spends_chart_placeholder,
slide=slide,
chart_data=spends_chart_data,
title='Media Spend')
# spends_values.append(0)
return contribution_df, spends_df
# def get_saved_scenarios_dict(project_path):
# # Path to the saved scenarios file
# saved_scenarios_dict_path = os.path.join(
# project_path, "saved_scenarios.pkl"
# )
#
# # Load existing scenarios if the file exists
# if os.path.exists(saved_scenarios_dict_path):
# with open(saved_scenarios_dict_path, "rb") as f:
# saved_scenarios_dict = pickle.load(f)
# else:
# saved_scenarios_dict = OrderedDict()
#
# return saved_scenarios_dict
def optimization_summary(slide, scenario, scenario_name):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Optimization Summary' # + ' (Scenario: ' + scenario_name + ')'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
multiplier = 1 / float(scenario['multiplier'])
# st.write(scenario['multiplier'], multiplier)
## Multiplier is an indicator of selected time fram
## Doesn't effect CPA
opt_on = scenario['optimization']
if opt_on.lower() == 'spends':
opt_on = 'Media Spend'
details_ph = slide.placeholders[ph_idx[3]]
details_ph.text = 'Scenario Name: ' + scenario_name + \
'\nResponse Metric: ' + str(scenario['metrics_selected']).replace("_", " ").title() + \
'\nOptimized on: ' + str(opt_on).replace("_", " ").title()
scenario_df = pd.DataFrame(columns=['Category', 'Actual', 'Simulated', 'Change'])
scenario_df.at[0, 'Category'] = 'Media Spend'
scenario_df.at[0, 'Actual'] = scenario['actual_total_spends'] * multiplier
scenario_df.at[0, 'Simulated'] = scenario['modified_total_spends'] * multiplier
scenario_df.at[0, 'Change'] = (scenario['modified_total_spends'] - scenario['actual_total_spends']) * multiplier
scenario_df.at[1, 'Category'] = scenario['metrics_selected'].replace("_", " ").title()
scenario_df.at[1, 'Actual'] = scenario['actual_total_sales'] * multiplier
scenario_df.at[1, 'Simulated'] = (scenario['modified_total_sales']) * multiplier
scenario_df.at[1, 'Change'] = (scenario['modified_total_sales'] - scenario['actual_total_sales']) * multiplier
scenario_df.at[2, 'Category'] = 'CPA'
actual_cpa = scenario['actual_total_spends'] / scenario['actual_total_sales']
modified_cpa = scenario['modified_total_spends'] / scenario['modified_total_sales']
scenario_df.at[2, 'Actual'] = actual_cpa
scenario_df.at[2, 'Simulated'] = modified_cpa
scenario_df.at[2, 'Change'] = modified_cpa - actual_cpa
scenario_df.at[3, 'Category'] = 'ROI'
act_roi = scenario['actual_total_sales'] / scenario['actual_total_spends']
opt_roi = scenario['modified_total_sales'] / scenario['modified_total_spends']
scenario_df.at[3, 'Actual'] = act_roi
scenario_df.at[3, 'Simulated'] = opt_roi
scenario_df.at[3, 'Change'] = opt_roi - act_roi
for col in scenario_df.columns:
scenario_df[col] = scenario_df[col].apply(lambda x: round_off(x, 1))
scenario_df[col] = scenario_df[col].apply(lambda x: convert_number_to_abbreviation(x))
table_placeholder = slide.placeholders[ph_idx[1]]
fill_table_placeholder(table_placeholder, slide, scenario_df)
channel_spends_df = pd.DataFrame(columns=['Channel', 'Actual Spends', 'Optimized Spends'])
for i, channel in enumerate(scenario['channels'].values()):
channel_spends_df.at[i, 'Channel'] = channel['name']
channel_conversion_rate = channel[
"conversion_rate"
]
channel_spends_df.at[i, 'Actual Spends'] = (
channel["actual_total_spends"]
* channel_conversion_rate
) * multiplier
channel_spends_df.at[i, 'Optimized Spends'] = (
channel["modified_total_spends"]
* channel_conversion_rate
) * multiplier
channel_spends_df['Actual Spends'] = channel_spends_df['Actual Spends'].astype('float')
channel_spends_df['Optimized Spends'] = channel_spends_df['Optimized Spends'].astype('float')
for col in channel_spends_df.columns:
channel_spends_df[col] = channel_spends_df[col].apply(lambda x: round_off(x, 0))
# Sort data on Actual Spends
channel_spends_df.sort_values(by='Actual Spends', inplace=True, ascending=False)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_spends_df['Channel']
for col in ['Actual Spends', 'Optimized Spends']:
chart_data.add_series(col, channel_spends_df[col])
chart_placeholder = slide.placeholders[ph_idx[2]]
# Add the chart to the slide
if isinstance(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime'])),float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise Spends',
# 'x_axis':'Channels',
'y_axis': 'Spends'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.ceil(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
label_type='$'
)
else:
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise Spends',
# 'x_axis':'Channels',
'y_axis': 'Spends'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.ceil(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
label_type='$'
)
def channel_wise_spends(slide, scenario):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Channel Spends and Impact'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
# print(scenario.keys())
multiplier = 1 / float(scenario['multiplier'])
channel_spends_df = pd.DataFrame(columns=['Channel', 'Actual Spends', 'Optimized Spends'])
for i, channel in enumerate(scenario['channels'].values()):
channel_spends_df.at[i, 'Channel'] = channel['name']
channel_conversion_rate = channel["conversion_rate"]
channel_spends_df.at[i, 'Actual Spends'] = (channel[
"actual_total_spends"] * channel_conversion_rate) * multiplier
channel_spends_df.at[i, 'Optimized Spends'] = (channel[
"modified_total_spends"] * channel_conversion_rate) * multiplier
channel_spends_df['Actual Spends'] = channel_spends_df['Actual Spends'].astype('float')
channel_spends_df['Optimized Spends'] = channel_spends_df['Optimized Spends'].astype('float')
actual_sum = channel_spends_df['Actual Spends'].sum()
opt_sum = channel_spends_df['Optimized Spends'].sum()
for col in channel_spends_df.columns:
channel_spends_df[col] = channel_spends_df[col].apply(lambda x: round_off(x, 0))
channel_spends_df['Actual Spends %'] = 100 * (channel_spends_df['Actual Spends'] / actual_sum)
channel_spends_df['Optimized Spends %'] = 100 * (channel_spends_df['Optimized Spends'] / opt_sum)
channel_spends_df['Actual Spends %'] = np.round(channel_spends_df['Actual Spends %'])
channel_spends_df['Optimized Spends %'] = np.round(channel_spends_df['Optimized Spends %'])
# Sort Data based on Actual Spends %
channel_spends_df.sort_values(by='Actual Spends %', inplace=True)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_spends_df['Channel']
for col in ['Actual Spends %', 'Optimized Spends %']:
# for col in ['Actual Spends %']:
chart_data.add_series(col, channel_spends_df[col])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
if isinstance(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']]), float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Spend Split %',
# 'x_axis':'Channels',
'y_axis': 'Spend %'},
min_y=0,
max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']])),
type='H',
legend=True,
label_type='per',
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Spend Split %',
# 'x_axis':'Channels',
'y_axis': 'Spend %'},
min_y=0,
max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']]).values[0]),
type='H',
legend=True,
label_type='per',
xaxis_pos='low'
)
#
# # Create chart data
# chart_data_1 = CategoryChartData()
# chart_data_1.categories = channel_spends_df['Channel']
# # for col in ['Actual Spends %', 'Optimized Spends %']:
# for col in ['Optimized Spends %']:
# chart_data_1.add_series(col, channel_spends_df[col])
# chart_placeholder_1 = slide.placeholders[ph_idx[3]]
#
# # Add the chart to the slide
# bar_chart(chart_placeholder=chart_placeholder_1,
# slide=slide,
# chart_data=chart_data_1,
# titles={'chart_title': 'Optimized Spends Split %',
# # 'x_axis':'Channels',
# 'y_axis': 'Spends %'},
# min_y=0,
# max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']])),
# type='H',
# legend=False,
# label_type='per'
# )
channel_spends_df['Delta %'] = 100 * (channel_spends_df['Optimized Spends'] - channel_spends_df['Actual Spends']) / \
channel_spends_df['Actual Spends']
channel_spends_df['Delta %'] = channel_spends_df['Delta %'].apply(lambda x: round_off(x, 0))
# Create chart data
delta_chart_data = CategoryChartData()
delta_chart_data.categories = channel_spends_df['Channel']
col = 'Delta %'
delta_chart_data.add_series(col, channel_spends_df[col])
delta_chart_placeholder = slide.placeholders[ph_idx[3]]
# Add the chart to the slide
if isinstance(np.min(channel_spends_df['Delta %']), float):
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Spend Delta %',
'y_axis': 'Spend Delta %'},
min_y=np.floor(np.min(channel_spends_df['Delta %'])),
max_y=np.ceil(np.max(channel_spends_df['Delta %'])),
type='H',
legend=False,
label_type='per',
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Spend Delta %',
'y_axis': 'Spend Delta %'},
min_y=np.floor(np.min(channel_spends_df['Delta %']).values[0]),
max_y=np.ceil(np.max(channel_spends_df['Delta %']).values[0]),
type='H',
legend=False,
label_type='per',
xaxis_pos='low'
)
# Incremental Impact
channel_inc_df = pd.DataFrame(columns=['Channel', 'Increment'])
for i, channel in enumerate(scenario['channels'].values()):
channel_inc_df.at[i, 'Channel'] = channel['name']
act_impact = channel['actual_total_sales']
opt_impact = channel['modified_total_sales']
impact = opt_impact - act_impact
impact = round_off(impact, 0)
impact = impact if abs(impact) > 0.0001 else 0
channel_inc_df.at[i, 'Increment'] = impact
channel_inc_df_1 = pd.merge(channel_spends_df, channel_inc_df, how='left', on='Channel')
# Create chart data
delta_chart_data = CategoryChartData()
delta_chart_data.categories = channel_inc_df_1['Channel']
col = 'Increment'
delta_chart_data.add_series(col, channel_inc_df_1[col])
delta_chart_placeholder = slide.placeholders[ph_idx[2]]
label_req = True
if min(np.abs(channel_inc_df_1[col])) > 100000: # 0.1M
label_type = 'M'
elif min(np.abs(channel_inc_df_1[col])) > 10000 and max(np.abs(channel_inc_df_1[col])) > 1000000:
label_type = 'M1'
elif min(np.abs(channel_inc_df_1[col])) > 100 and max(np.abs(channel_inc_df_1[col])) > 1000:
label_type = 'K'
else:
label_req = False
# Add the chart to the slide
if label_req:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Incremental Impact',
'y_axis': format_response_metric(scenario['metrics_selected'])},
# min_y=np.floor(np.min(channel_inc_df_1['Delta %'])),
# max_y=np.ceil(np.max(channel_inc_df_1['Delta %'])),
type='H',
label_type=label_type,
legend=False,
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Increment',
'y_axis': scenario['metrics_selected']},
# min_y=np.floor(np.min(channel_inc_df_1['Delta %'])),
# max_y=np.ceil(np.max(channel_inc_df_1['Delta %'])),
type='H',
legend=False,
xaxis_pos='low'
)
def channel_wise_roi(slide, scenario):
channel_roi_mroi = scenario['channel_roi_mroi']
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Channel ROIs'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
channel_roi_df = pd.DataFrame(columns=['Channel', 'Actual ROI', 'Optimized ROI'])
for i, channel in enumerate(channel_roi_mroi.keys()):
channel_roi_df.at[i, 'Channel'] = channel
channel_roi_df.at[i, 'Actual ROI'] = channel_roi_mroi[channel]['actual_roi']
channel_roi_df.at[i, 'Optimized ROI'] = channel_roi_mroi[channel]['optimized_roi']
channel_roi_df['Actual ROI'] = channel_roi_df['Actual ROI'].astype('float')
channel_roi_df['Optimized ROI'] = channel_roi_df['Optimized ROI'].astype('float')
for col in channel_roi_df.columns:
channel_roi_df[col] = channel_roi_df[col].apply(lambda x: round_off(x, 2))
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_roi_df['Channel']
for col in ['Actual ROI', 'Optimized ROI']:
chart_data.add_series(col, channel_roi_df[col])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
if isinstance(channel_roi_df.select_dtypes(exclude=['object', 'datetime']), float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise ROI',
# 'x_axis':'Channels',
'y_axis': 'ROI'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime']))
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise ROI',
# 'x_axis':'Channels',
'y_axis': 'ROI'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])).values[0]
)
# act_roi = scenario['actual_total_sales']/scenario['actual_total_spends']
# opt_roi = scenario['modified_total_sales']/scenario['modified_total_spends']
#
# act_roi_ph = slide.placeholders[ph_idx[2]]
# act_roi_ph.text = 'Actual ROI: ' + str(round_off(act_roi,2))
# opt_roi_ph = slide.placeholders[ph_idx[3]]
# opt_roi_ph.text = 'Optimized ROI: ' + str(round_off(opt_roi, 2))
## Removing mroi chart as per Ioannis' feedback
# channel_mroi_df = pd.DataFrame(columns=['Channel', 'Actual mROI', 'Optimized mROI'])
# for i, channel in enumerate(channel_roi_mroi.keys()):
# channel_mroi_df.at[i, 'Channel'] = channel
# channel_mroi_df.at[i, 'Actual mROI'] = channel_roi_mroi[channel]['actual_mroi']
# channel_mroi_df.at[i, 'Optimized mROI'] = channel_roi_mroi[channel]['optimized_mroi']
# channel_mroi_df['Actual mROI']=channel_mroi_df['Actual mROI'].astype('float')
# channel_mroi_df['Optimized mROI']=channel_mroi_df['Optimized mROI'].astype('float')
#
# for col in channel_mroi_df.columns:
# channel_mroi_df[col]=channel_mroi_df[col].apply(lambda x: round_off(x))
#
# # Create chart data
# mroi_chart_data = CategoryChartData()
# mroi_chart_data.categories = channel_mroi_df['Channel']
# for col in ['Actual mROI', 'Optimized mROI']:
# mroi_chart_data.add_series(col, channel_mroi_df[col])
#
# mroi_chart_placeholder=slide.placeholders[ph_idx[2]]
#
# # Add the chart to the slide
# bar_chart(chart_placeholder=mroi_chart_placeholder,
# slide=slide,
# chart_data=mroi_chart_data,
# titles={'chart_title':'Channel Wise mROI',
# # 'x_axis':'Channels',
# 'y_axis':'mROI'},
# # min_y=np.floor(np.min(channel_mroi_df.select_dtypes(exclude=['object', 'datetime']))),
# min_y=0,
# max_y=np.ceil(np.max(channel_mroi_df.select_dtypes(exclude=['object', 'datetime'])))
# )
def effictiveness_efficiency(slide, final_data, bin_dct, scenario):
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Effectiveness and Efficiency'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
response_metrics = bin_dct['Response Metrics']
kpi_df = final_data[response_metrics].sum(axis=0).reset_index()
kpi_df.columns = ['Response Metric', 'Effectiveness']
kpi_df['Efficiency'] = kpi_df['Effectiveness'] / scenario['modified_total_spends']
kpi_df['Efficiency'] = kpi_df['Efficiency'].apply(lambda x: round_off(x, 1))
kpi_df.sort_values(by='Effectiveness', inplace=True)
kpi_df['Response Metric'] = kpi_df['Response Metric'].apply(lambda x: format_response_metric(x))
# Create chart data for effectiveness
chart_data = CategoryChartData()
chart_data.categories = kpi_df['Response Metric']
chart_data.add_series('Effectiveness', kpi_df['Effectiveness'])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Effectiveness',
# 'x_axis':'Channels',
# 'y_axis': 'ROI'
},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
# max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])),
type='H',
label_type='M'
)
# Create chart data for efficiency
chart_data_1 = CategoryChartData()
chart_data_1.categories = kpi_df['Response Metric']
chart_data_1.add_series('Efficiency', kpi_df['Efficiency'])
chart_placeholder_1 = slide.placeholders[ph_idx[2]]
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder_1,
slide=slide,
chart_data=chart_data_1,
titles={'chart_title': 'Efficiency',
# 'x_axis':'Channels',
# 'y_axis': 'ROI'
},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
# max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])),
type='H'
)
definition_ph_1 = slide.placeholders[ph_idx[3]]
definition_ph_1.text = 'Effectiveness is measured as the total sum of the Response Metric'
definition_ph_2 = slide.placeholders[ph_idx[4]]
definition_ph_2.text = 'Efficiency is measured as the ratio of sum of the Response Metric and sum of Media Spend'
def load_pickle(path):
with open(path, "rb") as f:
file_data = pickle.load(f)
return file_data
def read_all_files():
files=[]
# Read data and bin dictionary
if st.session_state["project_dct"]["data_import"]["imputed_tool_df"] is not None:
final_df_loaded = st.session_state["project_dct"]["data_import"]["imputed_tool_df"].copy()
bin_dict_loaded = st.session_state["project_dct"]["data_import"]["category_dict"].copy()
files.append(final_df_loaded)
files.append(bin_dict_loaded)
if "group_dict" in st.session_state["project_dct"]["data_import"].keys():
channels = st.session_state["project_dct"]["data_import"]["group_dict"]
files.append(channels)
if st.session_state["project_dct"]["transformations"]["final_df"] is not None:
transform_dict = st.session_state["project_dct"]["transformations"]
files.append(transform_dict)
if retrieve_pkl_object_without_warning(st.session_state['project_number'], "Model_Tuning", "tuned_model", schema) is not None:
tuned_model_dict = retrieve_pkl_object_without_warning(st.session_state['project_number'], "Model_Tuning",
"tuned_model", schema) # db
files.append(tuned_model_dict)
else:
files.append(None)
else:
files.append(None)
if len(list(st.session_state["project_dct"]["current_media_performance"]["model_outputs"].keys()))>0: # check if there are model outputs for at least one metric
metrics_list = list(st.session_state["project_dct"]["current_media_performance"]["model_outputs"].keys())
contributions_excels_dict = {}
for metrics in metrics_list:
# raw_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["raw_data"]
# spend_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["spends_data"]
contribution_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["contribution_data"]
contributions_excels_dict[metrics] = {'CONTRIBUTION MMM':contribution_df}
files.append(contributions_excels_dict)
# Get Saved Scenarios
if len(list(st.session_state["project_dct"]["saved_scenarios"]["saved_scenarios_dict"].keys()))>0:
files.append(st.session_state["project_dct"]["saved_scenarios"]["saved_scenarios_dict"])
# saved_scenarios_loaded = get_saved_scenarios_dict(project_path)
return files
'''
Template Layout
0 : Title
1 : Data Details Section {no changes required}
2 : Data Import
3 : Data Import - Channel Groups
4 : Model Results {Duplicate for each model}
5 : Metrics Contribution
6 : Media performance {Duplicate for each model}
7 : Media performance Tabular View {Duplicate for each model}
8 : Optimization Section {no changes}
9 : Optimization Summary {Duplicate for each section}
10 : Channel Spends {Duplicate for each model}
11 : Channel Wise ROI {Duplicate for each model}
12 : Efficiency & Efficacy
13 : Appendix
14 : Transformations
15 : Model Summary
16 : Thank You Slide
'''
def create_ppt(project_name, username, panel_col):
# Read saved files
files = read_all_files()
transform_dict, tuned_model_dict, contributions_excels_dict, saved_scenarios_loaded = None, None, None, None
if len(files)>0:
# saved_data = files[0]
data = files[0]
bin_dict = files[1]
channel_groups_dct = files[2]
try:
transform_dict = files[3]
tuned_model_dict = files[4]
contributions_excels_dict = files[5]
saved_scenarios_loaded = files[6]
except Exception as e:
print(e)
else:
return False
is_panel = True if data[panel_col].nunique()>1 else False
template_path = 'ppt/template.pptx'
# ppt_path = os.path.join('ProjectSummary.pptx')
prs = Presentation(template_path)
num_slides = len(prs.slides)
slides = prs.slides
# Title Slide
title_slide_layout = slides[0].slide_layout
title_slide = prs.slides.add_slide(title_slide_layout)
# Add title & project name
placeholders = title_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = title_slide.placeholders[ph_idx[0]]
title_ph.text = 'Media Mix Optimization Summary'
txt_ph = title_slide.placeholders[ph_idx[1]]
txt_ph.text = 'Project Name: ' + project_name + '\nCreated By: ' + username
# Model Details Section
model_section_slide_layout = slides[1].slide_layout
model_section_slide = prs.slides.add_slide(model_section_slide_layout)
## Add title
placeholders = model_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = model_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Model Details'
section_ph = model_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 1'
# Data Import
data_import_slide_layout = slides[2].slide_layout
data_import_slide = prs.slides.add_slide(data_import_slide_layout)
data_import_slide = title_and_table(slide=data_import_slide,
title='Data Import',
df=data_import(data, bin_dict),
column_width={0: 2, 1: 7}
)
# Channel Groups
channel_group_slide_layout = slides[3].slide_layout
channel_group_slide = prs.slides.add_slide(channel_group_slide_layout)
channel_group_slide = title_and_table(slide=channel_group_slide,
title='Channels - Media and Spend',
df=channel_groups_df(channel_groups_dct, bin_dict),
column_width={0: 2, 1: 5, 2: 2}
)
if tuned_model_dict is not None:
model_metrics_df = model_metrics(tuned_model_dict, False)
# Model Results
for model_key, model_dict in tuned_model_dict.items():
model_result_slide_layout = slides[4].slide_layout
model_result_slide = prs.slides.add_slide(model_result_slide_layout)
model_result_slide = model_result(slide=model_result_slide,
model_key=model_key,
model_dict=model_dict,
model_metrics_df=model_metrics_df,
date_col='date')
if contributions_excels_dict is not None:
# Metrics Contributions
metrics_contributions_slide_layout = slides[5].slide_layout
metrics_contributions_slide = prs.slides.add_slide(metrics_contributions_slide_layout)
metrics_contributions_slide = metrics_contributions(slide=metrics_contributions_slide,
contributions_excels_dict=contributions_excels_dict,
panel_col=panel_col
)
# Media Performance
for target in contributions_excels_dict.keys():
# Chart
model_media_perf_slide_layout = slides[6].slide_layout
model_media_perf_slide = prs.slides.add_slide(model_media_perf_slide_layout)
contribution_df, spends_df = model_media_performance(slide=model_media_perf_slide,
target=target,
contributions_excels_dict=contributions_excels_dict
)
# Tabular View
contri_spends_df = pd.merge(spends_df, contribution_df, on='Channel', how='outer')
contri_spends_df.fillna(0, inplace=True)
for col in [c for c in contri_spends_df.columns if c != 'Channel']:
contri_spends_df[col] = contri_spends_df[col].apply(lambda x: safe_num_to_per(x))
media_performance_table_slide_layout = slides[7].slide_layout
media_performance_table_slide = prs.slides.add_slide(media_performance_table_slide_layout)
media_performance_table_slide = title_and_table(slide=media_performance_table_slide,
title='Media and Spends Channels Tabular View',
df=contri_spends_df,
# column_width={0:2, 1:5, 2:2}
)
if saved_scenarios_loaded is not None:
# Optimization Details
opt_section_slide_layout = slides[8].slide_layout
opt_section_slide = prs.slides.add_slide(opt_section_slide_layout)
## Add title
placeholders = opt_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = opt_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Optimizations Details'
section_ph = opt_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 2'
# Optimization
for scenario_name, scenario in saved_scenarios_loaded.items():
opt_summary_slide_layout = slides[9].slide_layout
opt_summary_slide = prs.slides.add_slide(opt_summary_slide_layout)
optimization_summary(opt_summary_slide, scenario, scenario_name)
channel_spends_slide_layout = slides[10].slide_layout
channel_spends_slide = prs.slides.add_slide(channel_spends_slide_layout)
channel_wise_spends(channel_spends_slide, scenario)
channel_roi_slide_layout = slides[11].slide_layout
channel_roi_slide = prs.slides.add_slide(channel_roi_slide_layout)
channel_wise_roi(channel_roi_slide, scenario)
effictiveness_efficiency_slide_layout = slides[12].slide_layout
effictiveness_efficiency_slide = prs.slides.add_slide(effictiveness_efficiency_slide_layout)
effictiveness_efficiency(effictiveness_efficiency_slide,
data,
bin_dict,
scenario)
# Appendix Section
appendix_section_slide_layout = slides[13].slide_layout
appendix_section_slide = prs.slides.add_slide(appendix_section_slide_layout)
if tuned_model_dict is not None:
## Add title
placeholders = appendix_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = appendix_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Appendix'
section_ph = appendix_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 3'
# Add transformations
# if transform_dict is not None:
# # Transformations
# transformation_slide_layout = slides[14].slide_layout
# transformation_slide = prs.slides.add_slide(transformation_slide_layout)
# transformation_slide = title_and_table(slide=transformation_slide,
# title='Transformations',
# df=transformations(transform_dict),
# custom_table_height=True
# )
# Add model summary
# Model Summary
model_metrics_df = model_metrics(tuned_model_dict, False)
model_summary_slide_layout = slides[15].slide_layout
model_summary_slide = prs.slides.add_slide(model_summary_slide_layout)
model_summary_slide = title_and_table(slide=model_summary_slide,
title='Model Summary',
df=model_metrics_df,
custom_table_height=True
)
# Last Slide
last_slide_layout = slides[num_slides - 1].slide_layout
last_slide = prs.slides.add_slide(last_slide_layout)
# Add title
placeholders = last_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = last_slide.placeholders[ph_idx[0]]
title_ph.text = 'Thank You'
# Remove template slides
xml_slides = prs.slides._sldIdLst
slides = list(xml_slides)
for index in range(num_slides):
xml_slides.remove(slides[index])
# prs.save(ppt_path)
# save the output into binary form
binary_output = BytesIO()
prs.save(binary_output)
return binary_output