Spaces:
Sleeping
Sleeping
import numpy as np | |
from scipy.optimize import curve_fit | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
def hello(): | |
print ("world!") | |
def polynomial_func(x, a,b,c,d): | |
y = a * x - b * x**2 + c * x**3 + d | |
return y | |
def get_country_metrics(country,min_date='2023-07-01',max_date='2023-09-01'): | |
df_country = pd.read_csv('ecom_cvr_aov_by_month.csv',parse_dates=['DATE']) | |
df_country['CVR']=df_country['CVR'].apply(lambda x: x.replace("%","")).astype(float)/100 | |
df_country['PC']=df_country['PC'].apply(lambda x: x.replace("%","")).astype(float)/100 | |
if country in df_country.Country.unique(): | |
return df_country[(df_country.Country==country) & | |
(df_country.DATE>=min_date) & | |
(df_country.DATE<max_date)].groupby('Country')[['CVR','AOV','PC']].mean().values[0] | |
else: | |
return df_country.loc[(df_country.DATE>=min_date) & | |
(df_country.DATE<max_date),['CVR','AOV','PC']].mean().values | |
def get_comparison_dates(min_date,max_date,period='previous_period'): | |
''' | |
period is either previous period or last year | |
''' | |
if period=='previous_period': | |
delta = (pd.to_datetime(max_date).to_period('M') - pd.to_datetime(min_date).to_period('M')).n | |
else: | |
delta = 12 | |
mid,mad=(pd.DataFrame([pd.to_datetime(min_date), pd.to_datetime(max_date)]) - pd.DateOffset(months=delta)).values | |
return pd.to_datetime(mid[0]).strftime('%Y-%m-%d'),pd.to_datetime(mad[0]).strftime('%Y-%m-%d') | |
def calculate_max_spend(df,min_date,max_date, | |
revenue_or_ebitda='revenue', | |
country='DE', | |
platform='meta', | |
sub_channel='', | |
pprint=False): | |
# consider only ads with positive click outcomes and sort by cpc | |
if sub_channel=='': | |
df_temp = df[(df.clicks>0) & (df.country==country) & | |
(df.platform==platform) & | |
(df.index>min_date) & (df.index<max_date) | |
].sort_values('cpc').reset_index() | |
else: | |
df_temp = df[(df.clicks>0) & (df.country==country) & | |
(df.platform==platform) & | |
(df.sub_channel==sub_channel) & | |
(df.index>min_date) & (df.index<max_date) | |
].sort_values('cpc').reset_index() | |
# calculate cumulative spend and cumulative clicks | |
df_temp['cum_cost']=df_temp['cost'].cumsum() | |
df_temp['cum_clicks']=df_temp['clicks'].cumsum() | |
df_temp['cum_cpc']=df_temp['cum_cost']/df_temp['cum_clicks'] | |
# calculate revenue and income | |
cvr,aov,pc = get_country_metrics(country,min_date,max_date) | |
df_temp['cum_revenue']=df_temp['cum_clicks']*cvr*aov | |
# at some stage, the ebitda becomes negative | |
if revenue_or_ebitda=='revenue': | |
df_temp['cum_income'] = df_temp['cum_revenue']-df_temp['cum_cost'] | |
else: | |
df_temp['cum_income'] = df_temp['cum_revenue'] * pc - df_temp['cum_cost'] | |
# calculate marginal cpc and marginal income | |
df_temp['marginal_cpc'] = (df_temp.cum_cost - df_temp.cum_cost.shift(1))/(df_temp.cum_clicks - df_temp.cum_clicks.shift(1)) | |
df_temp['marginal_income'] = df_temp.cum_income - df_temp.cum_income.shift(1) - df_temp['marginal_cpc'] | |
try: | |
params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1,1)) | |
a,b,c,d = params | |
# determine quality of the fit | |
squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c,d)) | |
squaredDiffsFromMean = np.square(df_temp['marginal_income'].fillna(0) - np.mean(df_temp['marginal_income'].fillna(0))) | |
rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean) | |
output_msg=[] | |
if pprint==True: | |
output_msg.append(f"R² of fit for marginal income = {rSquared}") | |
# print(f"R² of fit for marginal income = {rSquared}") | |
# inspect the parameters | |
if pprint==True: | |
output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3 + {d}") | |
# calculate max costs when it becomes negative ROAS | |
df_marginal = pd.DataFrame(zip(np.arange(500000), | |
polynomial_func(np.arange(500000), a,b,c,d) | |
)).rename(columns={0:'cumulative_cost',1:'marginal_income'}) | |
# join actuals to get the full picture | |
_ = df_temp[['cost','cpc','cum_cost', 'cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']] | |
# converting cost to integer in order to join with the marginal dataset | |
_['cum_cost']=_['cum_cost'].apply(lambda x:np.round(x,0).astype(int)) | |
df_marginal = pd.merge(df_marginal, | |
_.groupby('cum_cost')[ | |
['cost','cpc','cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income'] | |
].max().reset_index().rename(columns={#'cum_cost':'cumulative_cost', | |
'cpc':'marginal_cpc'}), | |
how='left',left_on='cumulative_cost',right_on='cum_cost').fillna(method='ffill') | |
max_spend_threshold = df_marginal.loc[(df_marginal.marginal_income.shift(1)>=0) & (df_marginal.marginal_income.shift(-1)<0),'cumulative_cost'].min() | |
highest_amount_spent = df_marginal.cum_cost.max() | |
except: | |
if pprint==True: | |
output_msg.append("Something happened during marginal CPC and equation calculations. We couldn't calculate max_spend_threshold and highest_amount_spent...") | |
max_spend_threshold=np.nan | |
highest_amount_spent=np.nan | |
rSquared=np.nan | |
total_negative_roas_spend=np.nan | |
if pd.notnull(max_spend_threshold): | |
if (max_spend_threshold>highest_amount_spent): | |
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan | |
if pprint==True: | |
output_msg.append(f"Highest amount spent during this period is: {highest_amount_spent}") | |
else: | |
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = df_marginal.loc[df_marginal.cumulative_cost==max_spend_threshold, | |
['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0] | |
# any spend beyond the max_spend_threshold needs to be saved | |
total_negative_roas_spend = df_temp.loc[df_temp.cum_cost>max_spend_threshold,['cost']].sum().values[0] | |
# # so to get the max cost for this tactic or strategy, | |
if pprint==True: | |
output_msg.append(f"Max spend threshold is: {max_spend_threshold}") | |
output_msg.append(f"For this spend, marginal_cpc={marginal_cpc}, cumulative_clicks={cum_clicks}, cumulative_cpc={cum_cpc}, cumulative_revenue={cum_revenue}") | |
output_msg.append(f"Total amount spent in negative ROAS={total_negative_roas_spend}") | |
print (output_msg) | |
plt.style.use('ggplot') | |
fig = plt.figure(figsize=(18,6)) | |
ax1 = fig.add_subplot(121) | |
plt.plot(df_temp['cum_cost'],df_temp['marginal_income'], '.', label="data") | |
plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c,d), '--', color='blue', label="fitted") | |
plt.xlabel('cumulative cost'); plt.ylabel('marginal income') | |
ax1.title.set_text('Net income at different spend levels') | |
ax2 = fig.add_subplot(122); | |
plt.scatter(df_temp['cum_cost'],df_temp['cpc']); | |
plt.axhline(1*cvr*aov,color='black',linestyle='--');plt.axhline(1*cvr*aov*pc,color='blue',linestyle='--'); | |
plt.annotate(f'Any clicks above {np.round(cvr*aov,2)} SEK cpc has negative ROAS \n Above {np.round(cvr*aov*pc,2)} SEK is negative PC',size=12,xy=[0,60], color="black") | |
plt.xlabel('cumulative cost'); plt.ylabel('cpc') | |
# ax2.axes.get_xaxis().set_visible(False) | |
ax2.title.set_text('CPC for at different spend levels'); | |
return fig,output_msg | |
else: | |
marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan | |
total_negative_roas_spend=np.nan | |
if pprint==True: | |
print (f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}") | |
# return df_marginal | |
return max_spend_threshold,highest_amount_spent,total_negative_roas_spend,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared | |
def loop_mCPC_countries(df,min_date,max_date, | |
country_list=['UK', 'DE', 'US', 'NL', 'SE', 'CH', 'BE', 'EU', 'FR', 'AU'], | |
platform='meta'): | |
country_flag = '' | |
df_result = pd.DataFrame() | |
for i,d in df[(df.index>min_date) & | |
(df.index<max_date) & (df.platform==platform) & | |
(df.country.isin(country_list))].groupby(['country','sub_channel'])['cost'].sum().reset_index().iterrows(): | |
if country_flag!=d['country']: | |
country_flag=d['country'] | |
print (f"Processing {country_flag}...") | |
# run numbers for prior delta | |
mid, mad = get_comparison_dates(min_date,max_date,period='previous_period') | |
mcost = calculate_max_spend( | |
df,min_date=mid,max_date=mad, | |
revenue_or_ebitda='revenue', | |
country=d['country'], | |
platform=platform, | |
sub_channel=d['sub_channel'], | |
pprint=False) | |
# rerun values for LY this time | |
mid, mad = get_comparison_dates(min_date,max_date,period='LY') | |
mcost_ly = calculate_max_spend( | |
df,min_date=mid,max_date=mad, | |
revenue_or_ebitda='revenue', | |
country=d['country'], | |
platform=platform, | |
sub_channel=d['sub_channel'], | |
pprint=False) | |
# max_spend_threshold,highest_amount_spent,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared = \ | |
# combine the 2 results | |
df_result = pd.concat([df_result, | |
pd.Series({'platform':platform, | |
'country':d['country'], | |
'sub_channel':d['sub_channel'], | |
'budget':'', | |
'max_spend_PP':mcost[0], | |
'amt_spent_PP':mcost[1], | |
'rSquared_PP':mcost[6], # can you trust this mcpc analysis for this market? | |
'max_spend_LY':mcost_ly[0], | |
'amt_spent_LY':mcost_ly[1], | |
'rSquared_LY':mcost_ly[6], # can you trust this mcpc analysis for this market? | |
'cpc_PP':mcost[2], | |
'cpc_LY':mcost_ly[2], | |
'cum_clicks_PP':mcost[3], | |
'cum_clicks_LY':mcost_ly[3], | |
'cum_cpc_PP':mcost[4], | |
'cum_cpc_LY':mcost_ly[4], | |
})],axis=1) | |
df_result = df_result.T | |
df_result['diff']=df_result['max_spend_PP']-df_result['amt_spent_PP'] | |
print (f"Total impact due to unrealized revenue and negative ROAS spend between planned and threshold costs is {(df_result['diff'].apply(abs)*-1).sum()} SEK") | |
return df_result.sort_values('diff',ascending=True).reset_index(drop=True) |