File size: 12,628 Bytes
3bd8465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2791186
3bd8465
 
 
 
 
2791186
3bd8465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2791186
 
3bd8465
 
 
 
 
 
2791186
 
3bd8465
 
 
 
 
 
 
 
 
 
 
 
 
2791186
3bd8465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import pandas as pd

def hello():
    print ("world!")

def polynomial_func(x, a,b,c,d):
    y = a * x - b * x**2 + c * x**3 + d
    return y

def get_country_metrics(country,min_date='2023-07-01',max_date='2023-09-01'):
    df_country = pd.read_csv('ecom_cvr_aov_by_month.csv',parse_dates=['DATE'])
    df_country['CVR']=df_country['CVR'].apply(lambda x: x.replace("%","")).astype(float)/100
    df_country['PC']=df_country['PC'].apply(lambda x: x.replace("%","")).astype(float)/100
    if country in df_country.Country.unique():
        return df_country[(df_country.Country==country) & 
                           (df_country.DATE>=min_date) & 
                           (df_country.DATE<max_date)].groupby('Country')[['CVR','AOV','PC']].mean().values[0]
    else:
        return df_country.loc[(df_country.DATE>=min_date) & 
                               (df_country.DATE<max_date),['CVR','AOV','PC']].mean().values

def get_comparison_dates(min_date,max_date,period='previous_period'):
    '''
    period is either previous period or last year
    '''
    if period=='previous_period':
        delta = (pd.to_datetime(max_date).to_period('M') - pd.to_datetime(min_date).to_period('M')).n
    else:
        delta = 12
    mid,mad=(pd.DataFrame([pd.to_datetime(min_date), pd.to_datetime(max_date)]) - pd.DateOffset(months=delta)).values
    return pd.to_datetime(mid[0]).strftime('%Y-%m-%d'),pd.to_datetime(mad[0]).strftime('%Y-%m-%d')


def calculate_max_spend(df,min_date,max_date,
                        revenue_or_ebitda='revenue',
                        country='DE',
                        platform='meta',
                        sub_channel='',
                        pprint=False):
    # consider only ads with positive click outcomes and sort by cpc
    if sub_channel=='':
        df_temp = df[(df.clicks>0) & (df.country==country) & 
                    (df.platform==platform) & 
                    (df['date']>min_date) & (df['date']<max_date)
                        ].sort_values('cpc').reset_index()
    else:
        df_temp = df[(df.clicks>0) & (df.country==country) & 
                    (df.platform==platform) & 
                    (df.sub_channel==sub_channel) &
                    (df['date']>min_date) & (df['date']<max_date)
                        ].sort_values('cpc').reset_index()

    # calculate cumulative spend and cumulative clicks
    df_temp['cum_cost']=df_temp['cost'].cumsum()
    df_temp['cum_clicks']=df_temp['clicks'].cumsum()
    df_temp['cum_cpc']=df_temp['cum_cost']/df_temp['cum_clicks']

    # calculate revenue and income
    cvr,aov,pc = get_country_metrics(country,min_date,max_date)
    df_temp['cum_revenue']=df_temp['cum_clicks']*cvr*aov
    # at some stage, the ebitda becomes negative
    if revenue_or_ebitda=='revenue':
        df_temp['cum_income'] = df_temp['cum_revenue']-df_temp['cum_cost']
    else:
        df_temp['cum_income'] = df_temp['cum_revenue'] * pc - df_temp['cum_cost']
    
    # calculate marginal cpc and marginal income
    df_temp['marginal_cpc'] = (df_temp.cum_cost - df_temp.cum_cost.shift(1))/(df_temp.cum_clicks - df_temp.cum_clicks.shift(1))
    df_temp['marginal_income'] = df_temp.cum_income - df_temp.cum_income.shift(1) - df_temp['marginal_cpc']
    
    try:
        params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1,1))
        a,b,c,d = params

        # determine quality of the fit
        squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c,d))
        squaredDiffsFromMean = np.square(df_temp['marginal_income'].fillna(0) - np.mean(df_temp['marginal_income'].fillna(0)))
        rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)
        output_msg=[]
        if pprint==True:
            output_msg.append(f"R² of fit for marginal income = {rSquared}")
            # print(f"R² of fit for marginal income = {rSquared}")

        # inspect the parameters
        if pprint==True:
            output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3 + {d}")

        # calculate max costs when it becomes negative ROAS
        df_marginal = pd.DataFrame(zip(np.arange(500000), 
                                       polynomial_func(np.arange(500000), a,b,c,d)
                                      )).rename(columns={0:'cumulative_cost',1:'marginal_income'})
        # join actuals to get the full picture
        _ = df_temp[['cost','cpc','cum_cost', 'cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']]
        # converting cost to integer in order to join with the marginal dataset
        _['cum_cost']=_['cum_cost'].apply(lambda x:np.round(x,0).astype(int))
        df_marginal = pd.merge(df_marginal, 
                               _.groupby('cum_cost')[
                                   ['cost','cpc','cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']
                                    ].max().reset_index().rename(columns={#'cum_cost':'cumulative_cost',
                                                                          'cpc':'marginal_cpc'}),
                              how='left',left_on='cumulative_cost',right_on='cum_cost').fillna(method='ffill')
        max_spend_threshold = df_marginal.loc[(df_marginal.marginal_income.shift(1)>=0) & (df_marginal.marginal_income.shift(-1)<0),'cumulative_cost'].min()
        highest_amount_spent = df_marginal.cum_cost.max()
    except:
        if pprint==True:
            output_msg.append("Something happened during marginal CPC and equation calculations. We couldn't calculate max_spend_threshold and highest_amount_spent...")
        max_spend_threshold=np.nan
        highest_amount_spent=np.nan
        rSquared=np.nan
        total_negative_roas_spend=np.nan

    if pd.notnull(max_spend_threshold):
        if (max_spend_threshold>highest_amount_spent):
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
            if pprint==True:
                output_msg.append(f"Highest amount spent during this period is: {highest_amount_spent}")
        else:
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = df_marginal.loc[df_marginal.cumulative_cost==max_spend_threshold,
                                                                      ['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
            marginal_cpc_HAS,cum_clicks_HAS,cum_cpc_HAS,cum_revenue_HAS = df_marginal.loc[df_marginal.cumulative_cost==highest_amount_spent,
                                                                      ['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
            # any spend beyond the max_spend_threshold needs to be saved
            total_negative_roas_spend = df_temp.loc[df_temp.cum_cost>max_spend_threshold,['cost']].sum().values[0]
    #     # so to get the max cost for this tactic or strategy, 
        if pprint==True:
            output_msg.append(f"Max spend threshold is: {max_spend_threshold}") 
            output_msg.append(f"For this spend, marginal_cpc={marginal_cpc}, cumulative_clicks={cum_clicks}, cumulative_cpc={cum_cpc}, cumulative_revenue={cum_revenue}")
            output_msg.append(f"Amount spent during this period is: {highest_amount_spent}") 
            output_msg.append(f"For this spend, marginal_cpc={marginal_cpc_HAS}, cumulative_clicks={cum_clicks_HAS}, cumulative_cpc={cum_cpc_HAS}, cumulative_revenue={cum_revenue_HAS}")
            output_msg.append(f"Total amount spent in negative ROAS={total_negative_roas_spend}")
            print (output_msg)

            plt.style.use('ggplot')
            fig = plt.figure(figsize=(18,6))
            ax1 = fig.add_subplot(121)
            plt.plot(df_temp['cum_cost'],df_temp['marginal_income'], '.', label="data")
            plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c,d), '--', color='blue', label="fitted")
            plt.xlabel('cumulative cost'); plt.ylabel('marginal income')
            ax1.title.set_text('Net income at different spend levels')
            ax2 = fig.add_subplot(122);
            plt.scatter(df_temp['cum_cost'],df_temp['cpc']);
            plt.axhline(1*cvr*aov,color='black',linestyle='--');plt.axhline(1*cvr*aov*pc,color='blue',linestyle='--');
            plt.annotate(f'Any clicks above {np.round(cvr*aov,2)} SEK cpc has negative ROAS \n Above {np.round(cvr*aov*pc,2)} SEK is negative PC',size=12,xy=[0,40], color="black")
            plt.xlabel('cumulative cost'); plt.ylabel('cpc')
            # ax2.axes.get_xaxis().set_visible(False)
            ax2.title.set_text('CPC for at different spend levels');
            return fig,output_msg
    else:
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
            total_negative_roas_spend=np.nan
            if pprint==True:
                print (f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}") 
    #     return df_marginal
    return max_spend_threshold,highest_amount_spent,total_negative_roas_spend,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared

def loop_mCPC_countries(df,min_date,max_date,
                        country_list=['UK', 'DE', 'US', 'NL', 'SE', 'CH', 'BE', 'EU', 'FR', 'AU'],
                        platform='meta'):
    country_flag = ''
    df_result = pd.DataFrame()

    for i,d in df[(df.index>min_date) & 
                  (df.index<max_date) & (df.platform==platform) & 
                  (df.country.isin(country_list))].groupby(['country','sub_channel'])['cost'].sum().reset_index().iterrows():
        if country_flag!=d['country']:
            country_flag=d['country']
            print (f"Processing {country_flag}...")

        # run numbers for prior delta
        mid, mad = get_comparison_dates(min_date,max_date,period='previous_period')
        mcost = calculate_max_spend(
                        df,min_date=mid,max_date=mad,
                        revenue_or_ebitda='revenue',
                        country=d['country'],
                        platform=platform,
                        sub_channel=d['sub_channel'],
                        pprint=False)
        
        # rerun values for LY this time
        mid, mad = get_comparison_dates(min_date,max_date,period='LY')
        mcost_ly = calculate_max_spend(
                        df,min_date=mid,max_date=mad,
                        revenue_or_ebitda='revenue',
                        country=d['country'],
                        platform=platform,
                        sub_channel=d['sub_channel'],
                        pprint=False)
        # max_spend_threshold,highest_amount_spent,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared = \
        # combine the 2 results
        df_result = pd.concat([df_result,
                            pd.Series({'platform':platform,
                                        'country':d['country'], 
                                        'sub_channel':d['sub_channel'],
                                        'budget':'',
                                        'max_spend_PP':mcost[0],
                                        'amt_spent_PP':mcost[1],
                                        'rSquared_PP':mcost[6], # can you trust this mcpc analysis for this market?                                     
                                        'max_spend_LY':mcost_ly[0],
                                        'amt_spent_LY':mcost_ly[1],
                                        'rSquared_LY':mcost_ly[6], # can you trust this mcpc analysis for this market?                                     
                                        'cpc_PP':mcost[2],
                                        'cpc_LY':mcost_ly[2],
                                        'cum_clicks_PP':mcost[3],
                                        'cum_clicks_LY':mcost_ly[3],
                                        'cum_cpc_PP':mcost[4],
                                        'cum_cpc_LY':mcost_ly[4],                                     
                                        })],axis=1)
    df_result = df_result.T
    df_result['diff']=df_result['max_spend_PP']-df_result['amt_spent_PP']
    print (f"Total impact due to unrealized revenue and negative ROAS spend between planned and threshold costs is {(df_result['diff'].apply(abs)*-1).sum()} SEK")
    return df_result.sort_values('diff',ascending=True).reset_index(drop=True)