mcpc_dashboard / marginal_cpc_calculator.py
itheenigma's picture
changed meta country code for UK from GB
0dda556
raw
history blame
12.8 kB
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import pandas as pd
def hello():
print ("world!")
def polynomial_func(x, a,b,c,d):
y = a * x - b * x**2 + c * x**3 + d
return y
def get_country_metrics(country,min_date='2023-07-01',max_date='2023-09-01'):
df_country = pd.read_csv('ecom_cvr_aov_by_month.csv',parse_dates=['DATE'])
df_country['CVR']=df_country['CVR'].apply(lambda x: x.replace("%","")).astype(float)/100
df_country['PC']=df_country['PC'].apply(lambda x: x.replace("%","")).astype(float)/100
if country in df_country.Country.unique():
return df_country[(df_country.Country==country) &
(df_country.DATE>=min_date) &
(df_country.DATE<max_date)].groupby('Country')[['CVR','AOV','PC']].mean().values[0]
else:
return df_country.loc[(df_country.DATE>=min_date) &
(df_country.DATE<max_date),['CVR','AOV','PC']].mean().values
def get_comparison_dates(min_date,max_date,period='previous_period'):
'''
period is either previous period or last year
'''
if period=='previous_period':
delta = (pd.to_datetime(max_date).to_period('M') - pd.to_datetime(min_date).to_period('M')).n
else:
delta = 12
mid,mad=(pd.DataFrame([pd.to_datetime(min_date), pd.to_datetime(max_date)]) - pd.DateOffset(months=delta)).values
return pd.to_datetime(mid[0]).strftime('%Y-%m-%d'),pd.to_datetime(mad[0]).strftime('%Y-%m-%d')
def calculate_max_spend(df,min_date,max_date,
revenue_or_ebitda='revenue',
country='DE',
platform='meta',
sub_channel='',
pprint=False):
# consider only ads with positive click outcomes and sort by cpc
if sub_channel=='':
df_temp = df[(df.clicks>0) & (df.country==country) &
(df.platform==platform) &
(df['date']>min_date) & (df['date']<max_date)
].sort_values('cpc').reset_index()
else:
df_temp = df[(df.clicks>0) & (df.country==country) &
(df.platform==platform) &
(df.sub_channel==sub_channel) &
(df['date']>min_date) & (df['date']<max_date)
].sort_values('cpc').reset_index()
# calculate cumulative spend and cumulative clicks
df_temp['cum_cost']=df_temp['cost'].cumsum()
df_temp['cum_clicks']=df_temp['clicks'].cumsum()
df_temp['cum_cpc']=df_temp['cum_cost']/df_temp['cum_clicks']
# calculate revenue and income
cvr,aov,pc = get_country_metrics(country,min_date,max_date)
df_temp['cum_revenue']=df_temp['cum_clicks']*cvr*aov
# at some stage, the ebitda becomes negative
if revenue_or_ebitda=='revenue':
df_temp['cum_income'] = df_temp['cum_revenue']-df_temp['cum_cost']
else:
df_temp['cum_income'] = df_temp['cum_revenue'] * pc - df_temp['cum_cost']
# calculate marginal cpc and marginal income
df_temp['marginal_cpc'] = (df_temp.cum_cost - df_temp.cum_cost.shift(1))/(df_temp.cum_clicks - df_temp.cum_clicks.shift(1))
df_temp['marginal_income'] = df_temp.cum_income - df_temp.cum_income.shift(1) - df_temp['marginal_cpc']
try:
output_msg=[]
plt.style.use('ggplot')
fig = plt.figure(figsize=(18,6))
params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1,1))
a,b,c,d = params
# determine quality of the fit
squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c,d))
squaredDiffsFromMean = np.square(df_temp['marginal_income'].fillna(0) - np.mean(df_temp['marginal_income'].fillna(0)))
rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)
if pprint==True:
output_msg.append(f"R² of fit for marginal income = {rSquared}")
# print(f"R² of fit for marginal income = {rSquared}")
# inspect the parameters
if pprint==True:
output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3 + {d}")
# calculate max costs when it becomes negative ROAS
df_marginal = pd.DataFrame(zip(np.arange(500000),
polynomial_func(np.arange(500000), a,b,c,d)
)).rename(columns={0:'cumulative_cost',1:'marginal_income'})
# join actuals to get the full picture
_ = df_temp[['cost','cpc','cum_cost', 'cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']]
# converting cost to integer in order to join with the marginal dataset
_['cum_cost']=_['cum_cost'].apply(lambda x:np.round(x,0).astype(int))
df_marginal = pd.merge(df_marginal,
_.groupby('cum_cost')[
['cost','cpc','cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']
].max().reset_index().rename(columns={#'cum_cost':'cumulative_cost',
'cpc':'marginal_cpc'}),
how='left',left_on='cumulative_cost',right_on='cum_cost').fillna(method='ffill')
max_spend_threshold = df_marginal.loc[(df_marginal.marginal_income.shift(1)>=0) & (df_marginal.marginal_income.shift(-1)<0),'cumulative_cost'].min()
highest_amount_spent = df_marginal.cum_cost.max()
except:
if pprint==True:
output_msg.append("Something happened during marginal CPC and equation calculations. We couldn't calculate max_spend_threshold and highest_amount_spent...")
max_spend_threshold=np.nan
highest_amount_spent=np.nan
rSquared=np.nan
total_negative_roas_spend=np.nan
if pd.notnull(max_spend_threshold):
if (max_spend_threshold>highest_amount_spent):
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
if pprint==True:
output_msg.append(f"Highest amount spent during this period is: {highest_amount_spent}")
else:
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = df_marginal.loc[df_marginal.cumulative_cost==max_spend_threshold,
['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
marginal_cpc_HAS,cum_clicks_HAS,cum_cpc_HAS,cum_revenue_HAS = df_marginal.loc[df_marginal.cumulative_cost==highest_amount_spent,
['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
# any spend beyond the max_spend_threshold needs to be saved
total_negative_roas_spend = df_temp.loc[df_temp.cum_cost>max_spend_threshold,['cost']].sum().values[0]
# # so to get the max cost for this tactic or strategy,
if pprint==True:
output_msg.append(f"Max spend threshold is: {max_spend_threshold}")
output_msg.append(f"For this spend, marginal_cpc={marginal_cpc}, cumulative_clicks={cum_clicks}, Average_cpc={cum_cpc}, cumulative_revenue={cum_revenue}")
output_msg.append(f"Amount spent during this period is: {highest_amount_spent}")
output_msg.append(f"For this spend, marginal_cpc={marginal_cpc_HAS}, cumulative_clicks={cum_clicks_HAS}, Average_cpc={cum_cpc_HAS}, cumulative_revenue={cum_revenue_HAS}")
output_msg.append(f"Total amount spent in negative ROAS={total_negative_roas_spend}")
# print (output_msg)
try:
ax1 = fig.add_subplot(121);
plt.scatter(df_temp['cum_cost'],df_temp['cpc']);
plt.axhline(1*cvr*aov,color='black',linestyle='--');plt.axhline(1*cvr*aov*pc,color='blue',linestyle='--');
plt.annotate(f'Any clicks above {np.round(cvr*aov,2)} SEK cpc has negative ROAS \n Above {np.round(cvr*aov*pc,2)} SEK is negative PC',size=12,xy=[0,40], color="black")
plt.xlabel('cumulative cost'); plt.ylabel('cpc')
# ax2.axes.get_xaxis().set_visible(False)
ax1.title.set_text('CPC for at different spend levels');
except:
pass
try:
ax2 = fig.add_subplot(122)
plt.plot(df_temp['cum_cost'],df_temp['marginal_income'], '.', label="data")
plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c,d), '--', color='blue', label="fitted")
plt.xlabel('cumulative cost'); plt.ylabel('marginal income')
ax2.title.set_text('Net income at different spend levels')
except:
pass
else:
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
total_negative_roas_spend=np.nan
if pprint==True:
print (f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}")
# return df_marginal
values = (max_spend_threshold,highest_amount_spent,total_negative_roas_spend,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared)
return fig,output_msg,values
def loop_mCPC_countries(df,min_date,max_date,
country_list=['UK', 'DE', 'US', 'NL', 'SE', 'CH', 'BE', 'EU', 'FR', 'AU'],
platform='meta'):
country_flag = ''
df_result = pd.DataFrame()
for i,d in df[(df.index>min_date) &
(df.index<max_date) & (df.platform==platform) &
(df.country.isin(country_list))].groupby(['country','sub_channel'])['cost'].sum().reset_index().iterrows():
if country_flag!=d['country']:
country_flag=d['country']
print (f"Processing {country_flag}...")
# run numbers for prior delta
mid, mad = get_comparison_dates(min_date,max_date,period='previous_period')
mcost = calculate_max_spend(
df,min_date=mid,max_date=mad,
revenue_or_ebitda='revenue',
country=d['country'],
platform=platform,
sub_channel=d['sub_channel'],
pprint=False)
# rerun values for LY this time
mid, mad = get_comparison_dates(min_date,max_date,period='LY')
mcost_ly = calculate_max_spend(
df,min_date=mid,max_date=mad,
revenue_or_ebitda='revenue',
country=d['country'],
platform=platform,
sub_channel=d['sub_channel'],
pprint=False)
# max_spend_threshold,highest_amount_spent,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared = \
# combine the 2 results
df_result = pd.concat([df_result,
pd.Series({'platform':platform,
'country':d['country'],
'sub_channel':d['sub_channel'],
'budget':'',
'max_spend_PP':mcost[0],
'amt_spent_PP':mcost[1],
'rSquared_PP':mcost[6], # can you trust this mcpc analysis for this market?
'max_spend_LY':mcost_ly[0],
'amt_spent_LY':mcost_ly[1],
'rSquared_LY':mcost_ly[6], # can you trust this mcpc analysis for this market?
'cpc_PP':mcost[2],
'cpc_LY':mcost_ly[2],
'cum_clicks_PP':mcost[3],
'cum_clicks_LY':mcost_ly[3],
'cum_cpc_PP':mcost[4],
'cum_cpc_LY':mcost_ly[4],
})],axis=1)
df_result = df_result.T
df_result['diff']=df_result['max_spend_PP']-df_result['amt_spent_PP']
print (f"Total impact due to unrealized revenue and negative ROAS spend between planned and threshold costs is {(df_result['diff'].apply(abs)*-1).sum()} SEK")
return df_result.sort_values('diff',ascending=True).reset_index(drop=True)