Spaces:

itheenigma
/

mcpc_dashboard

Sleeping

mcpc_dashboard / marginal_cpc_calculator.py

fixed code to get marginal cpc curve to start from origin and moved graphs to show cpc even when breakeven is not reached

7d3e93d over 1 year ago

raw

history blame

13.8 kB

	import numpy as np
	from scipy.optimize import curve_fit
	import matplotlib.pyplot as plt
	import pandas as pd

	def hello():
	print ("world!")

	# def polynomial_func(x, a,b,c,d):
	# y = a * x - b * x*2 + c x**3 + d
	def polynomial_func(x, a,b,c):
	y = a * x - b * x*2 + c x**3
	return y

	def get_country_metrics(country,min_date='2023-07-01',max_date='2023-09-01'):
	df_country = pd.read_csv('ecom_cvr_aov_by_month.csv',parse_dates=['DATE'])
	df_country['CVR']=df_country['CVR'].apply(lambda x: x.replace("%","")).astype(float)/100
	df_country['PC']=df_country['PC'].apply(lambda x: x.replace("%","")).astype(float)/100
	if country in df_country.Country.unique():
	return df_country[(df_country.Country==country) &
	(df_country.DATE>=min_date) &
	(df_country.DATE<max_date)].groupby('Country')[['CVR','AOV','PC']].mean().values[0]
	else:
	return df_country.loc[(df_country.DATE>=min_date) &
	(df_country.DATE<max_date),['CVR','AOV','PC']].mean().values

	def get_comparison_dates(min_date,max_date,period='previous_period'):
	'''
	period is either previous period or last year
	'''
	if period=='previous_period':
	delta = (pd.to_datetime(max_date).to_period('M') - pd.to_datetime(min_date).to_period('M')).n
	else:
	delta = 12
	mid,mad=(pd.DataFrame([pd.to_datetime(min_date), pd.to_datetime(max_date)]) - pd.DateOffset(months=delta)).values
	return pd.to_datetime(mid[0]).strftime('%Y-%m-%d'),pd.to_datetime(mad[0]).strftime('%Y-%m-%d')


	def calculate_max_spend(df,min_date,max_date,
	revenue_or_ebitda='revenue',
	country='DE',
	platform='meta',
	sub_channel='',
	pprint=False):
	# consider only ads with positive click outcomes and sort by cpc
	if sub_channel=='':
	df_temp = df[(df.clicks>0) & (df.country==country) &
	(df.platform==platform) &
	(df['date']>min_date) & (df['date']<max_date)
	].sort_values('cpc').reset_index()
	else:
	df_temp = df[(df.clicks>0) & (df.country==country) &
	(df.platform==platform) &
	(df.sub_channel==sub_channel) &
	(df['date']>min_date) & (df['date']<max_date)
	].sort_values('cpc').reset_index()

	# calculate cumulative spend and cumulative clicks
	df_temp['cum_cost']=df_temp['cost'].cumsum()
	df_temp['cum_clicks']=df_temp['clicks'].cumsum()
	df_temp['cum_cpc']=df_temp['cum_cost']/df_temp['cum_clicks']

	# calculate revenue and income
	cvr,aov,pc = get_country_metrics(country,min_date,max_date)
	df_temp['cum_revenue']=df_temp['cum_clicks']cvraov
	# at some stage, the ebitda becomes negative
	if revenue_or_ebitda=='revenue':
	df_temp['cum_income'] = df_temp['cum_revenue']-df_temp['cum_cost']
	else:
	df_temp['cum_income'] = df_temp['cum_revenue'] * pc - df_temp['cum_cost']

	# calculate marginal cpc and marginal income
	df_temp['marginal_cpc'] = (df_temp.cum_cost - df_temp.cum_cost.shift(1))/(df_temp.cum_clicks - df_temp.cum_clicks.shift(1))
	df_temp['marginal_income'] = df_temp.cum_income - df_temp.cum_income.shift(1) - df_temp['marginal_cpc']

	try:
	output_msg=[]
	plt.style.use('fivethirtyeight')
	fig = plt.figure(figsize=(18,6))

	# params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1,1))
	# a,b,c,d = params
	params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1))
	a,b,c = params


	# determine quality of the fit
	# squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c,d))
	squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c))
	squaredDiffsFromMean = np.square(df_temp['marginal_income'].fillna(0) - np.mean(df_temp['marginal_income'].fillna(0)))
	rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)

	if pprint==True:
	output_msg.append(f"R² of fit for marginal income = {rSquared}")
	# print(f"R² of fit for marginal income = {rSquared}")

	# inspect the parameters
	if pprint==True:
	# output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3 + {d}")
	output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3")

	# calculate max costs when it becomes negative ROAS
	# df_marginal = pd.DataFrame(zip(np.arange(500000),
	# polynomial_func(np.arange(500000), a,b,c,d)
	# )).rename(columns={0:'cumulative_cost',1:'marginal_income'})
	df_marginal = pd.DataFrame(zip(np.arange(500000),
	polynomial_func(np.arange(500000), a,b,c)
	)).rename(columns={0:'cumulative_cost',1:'marginal_income'})
	# join actuals to get the full picture
	_ = df_temp[['cost','cpc','cum_cost', 'cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']]
	# converting cost to integer in order to join with the marginal dataset
	_['cum_cost']=_['cum_cost'].apply(lambda x:np.round(x,0).astype(int))
	df_marginal = pd.merge(df_marginal,
	_.groupby('cum_cost')[
	['cost','cpc','cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']
	].max().reset_index().rename(columns={#'cum_cost':'cumulative_cost',
	'cpc':'marginal_cpc'}),
	how='left',left_on='cumulative_cost',right_on='cum_cost').fillna(method='ffill')
	max_spend_threshold = df_marginal.loc[(df_marginal.marginal_income.shift(1)>=0) & (df_marginal.marginal_income.shift(-1)<0),'cumulative_cost'].min()
	highest_amount_spent = df_marginal.cum_cost.max()
	except:
	if pprint==True:
	output_msg.append("Something happened during marginal CPC and equation calculations. We couldn't calculate max_spend_threshold and highest_amount_spent...")
	max_spend_threshold=np.nan
	highest_amount_spent=np.nan
	rSquared=np.nan
	total_negative_roas_spend=np.nan

	if pd.notnull(max_spend_threshold):
	if (max_spend_threshold>highest_amount_spent):
	marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
	if pprint==True:
	output_msg.append(f"Highest amount spent during this period is: {highest_amount_spent}")
	else:
	marginal_cpc,cum_clicks,cum_cpc,cum_revenue = df_marginal.loc[df_marginal.cumulative_cost==max_spend_threshold,
	['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
	marginal_cpc_HAS,cum_clicks_HAS,cum_cpc_HAS,cum_revenue_HAS = df_marginal.loc[df_marginal.cumulative_cost==highest_amount_spent,
	['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
	# any spend beyond the max_spend_threshold needs to be saved
	total_negative_roas_spend = df_temp.loc[df_temp.cum_cost>max_spend_threshold,['cost']].sum().values[0]
	# # so to get the max cost for this tactic or strategy,
	if pprint==True:
	output_msg.append(f"Max spend threshold is: {max_spend_threshold}")
	output_msg.append(f"For this spend, marginal_cpc={marginal_cpc}, cumulative_clicks={cum_clicks}, Average_cpc={cum_cpc}, cumulative_revenue={cum_revenue}")
	output_msg.append(f"Amount spent during this period is: {highest_amount_spent}")
	output_msg.append(f"For this spend, marginal_cpc={marginal_cpc_HAS}, cumulative_clicks={cum_clicks_HAS}, Average_cpc={cum_cpc_HAS}, cumulative_revenue={cum_revenue_HAS}")
	output_msg.append(f"Total amount spent in negative ROAS={total_negative_roas_spend}")
	# print (output_msg)
	else:
	marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
	total_negative_roas_spend=np.nan
	if pprint==True:
	output_msg.append(f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}")
	print (f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}")
	try:
	ax1 = fig.add_subplot(121);
	plt.scatter(df_temp['cum_cost'],df_temp['cpc'],color='black');
	plt.axhline(1cvraov,color='red',linestyle='--');plt.axhline(1cvraov*pc,color='blue',linestyle='--');
	plt.annotate(f'Any clicks above {np.round(cvraov,2)} SEK cpc has negative ROAS \n Above {np.round(cvraov*pc,2)} SEK is negative PC',size=12,xy=[0,40], color="black")
	plt.xlabel('Cumulative ad spend that month (in SEK)'); plt.ylabel('Cost per individual click (in SEK)')
	# ax2.axes.get_xaxis().set_visible(False)
	ax1.title.set_text('CPC for at different spend levels');
	except:
	pass
	try:
	ax2 = fig.add_subplot(122)
	plt.plot(df_temp['cum_cost'],df_temp['marginal_income'], '.', label="data", color='r')
	# plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c,d), '--', color='blue', label="fitted")
	plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c), '--', color='blue', label="fitted")
	plt.xlabel('cumulative cost'); plt.ylabel('marginal income')
	ax2.title.set_text('Net income at different spend levels')
	except:
	pass
	values = (max_spend_threshold,highest_amount_spent,total_negative_roas_spend,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared)
	return fig,output_msg,values
	# return fig,output_msg,values,df_marginal

	def loop_mCPC_countries(df,min_date,max_date,
	country_list=['UK', 'DE', 'US', 'NL', 'SE', 'CH', 'BE', 'EU', 'FR', 'AU'],
	platform='meta'):
	country_flag = ''
	df_result = pd.DataFrame()

	for i,d in df[(df.index>min_date) &
	(df.index<max_date) & (df.platform==platform) &
	(df.country.isin(country_list))].groupby(['country','sub_channel'])['cost'].sum().reset_index().iterrows():
	if country_flag!=d['country']:
	country_flag=d['country']
	print (f"Processing {country_flag}...")

	# run numbers for prior delta
	mid, mad = get_comparison_dates(min_date,max_date,period='previous_period')
	mcost = calculate_max_spend(
	df,min_date=mid,max_date=mad,
	revenue_or_ebitda='revenue',
	country=d['country'],
	platform=platform,
	sub_channel=d['sub_channel'],
	pprint=False)

	# rerun values for LY this time
	mid, mad = get_comparison_dates(min_date,max_date,period='LY')
	mcost_ly = calculate_max_spend(
	df,min_date=mid,max_date=mad,
	revenue_or_ebitda='revenue',
	country=d['country'],
	platform=platform,
	sub_channel=d['sub_channel'],
	pprint=False)
	# max_spend_threshold,highest_amount_spent,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared = \
	# combine the 2 results
	df_result = pd.concat([df_result,
	pd.Series({'platform':platform,
	'country':d['country'],
	'sub_channel':d['sub_channel'],
	'budget':'',
	'max_spend_PP':mcost[0],
	'amt_spent_PP':mcost[1],
	'rSquared_PP':mcost[6], # can you trust this mcpc analysis for this market?
	'max_spend_LY':mcost_ly[0],
	'amt_spent_LY':mcost_ly[1],
	'rSquared_LY':mcost_ly[6], # can you trust this mcpc analysis for this market?
	'cpc_PP':mcost[2],
	'cpc_LY':mcost_ly[2],
	'cum_clicks_PP':mcost[3],
	'cum_clicks_LY':mcost_ly[3],
	'cum_cpc_PP':mcost[4],
	'cum_cpc_LY':mcost_ly[4],
	})],axis=1)
	df_result = df_result.T
	df_result['diff']=df_result['max_spend_PP']-df_result['amt_spent_PP']
	print (f"Total impact due to unrealized revenue and negative ROAS spend between planned and threshold costs is {(df_result['diff'].apply(abs)*-1).sum()} SEK")
	return df_result.sort_values('diff',ascending=True).reset_index(drop=True)