Spaces:

itheenigma
/

mcpc_dashboard

Sleeping

File size: 12,628 Bytes

import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import pandas as pd

def hello():
    print ("world!")

def polynomial_func(x, a,b,c,d):
    y = a * x - b * x**2 + c * x**3 + d
    return y

def get_country_metrics(country,min_date='2023-07-01',max_date='2023-09-01'):
    df_country = pd.read_csv('ecom_cvr_aov_by_month.csv',parse_dates=['DATE'])
    df_country['CVR']=df_country['CVR'].apply(lambda x: x.replace("%","")).astype(float)/100
    df_country['PC']=df_country['PC'].apply(lambda x: x.replace("%","")).astype(float)/100
    if country in df_country.Country.unique():
        return df_country[(df_country.Country==country) & 
                           (df_country.DATE>=min_date) & 
                           (df_country.DATE<max_date)].groupby('Country')[['CVR','AOV','PC']].mean().values[0]
    else:
        return df_country.loc[(df_country.DATE>=min_date) & 
                               (df_country.DATE<max_date),['CVR','AOV','PC']].mean().values

def get_comparison_dates(min_date,max_date,period='previous_period'):
    '''
    period is either previous period or last year
    '''
    if period=='previous_period':
        delta = (pd.to_datetime(max_date).to_period('M') - pd.to_datetime(min_date).to_period('M')).n
    else:
        delta = 12
    mid,mad=(pd.DataFrame([pd.to_datetime(min_date), pd.to_datetime(max_date)]) - pd.DateOffset(months=delta)).values
    return pd.to_datetime(mid[0]).strftime('%Y-%m-%d'),pd.to_datetime(mad[0]).strftime('%Y-%m-%d')


def calculate_max_spend(df,min_date,max_date,
                        revenue_or_ebitda='revenue',
                        country='DE',
                        platform='meta',
                        sub_channel='',
                        pprint=False):
    # consider only ads with positive click outcomes and sort by cpc
    if sub_channel=='':
        df_temp = df[(df.clicks>0) & (df.country==country) & 
                    (df.platform==platform) & 
                    (df['date']>min_date) & (df['date']<max_date)
                        ].sort_values('cpc').reset_index()
    else:
        df_temp = df[(df.clicks>0) & (df.country==country) & 
                    (df.platform==platform) & 
                    (df.sub_channel==sub_channel) &
                    (df['date']>min_date) & (df['date']<max_date)
                        ].sort_values('cpc').reset_index()

    # calculate cumulative spend and cumulative clicks
    df_temp['cum_cost']=df_temp['cost'].cumsum()
    df_temp['cum_clicks']=df_temp['clicks'].cumsum()
    df_temp['cum_cpc']=df_temp['cum_cost']/df_temp['cum_clicks']

    # calculate revenue and income
    cvr,aov,pc = get_country_metrics(country,min_date,max_date)
    df_temp['cum_revenue']=df_temp['cum_clicks']*cvr*aov
    # at some stage, the ebitda becomes negative
    if revenue_or_ebitda=='revenue':
        df_temp['cum_income'] = df_temp['cum_revenue']-df_temp['cum_cost']
    else:
        df_temp['cum_income'] = df_temp['cum_revenue'] * pc - df_temp['cum_cost']
    
    # calculate marginal cpc and marginal income
    df_temp['marginal_cpc'] = (df_temp.cum_cost - df_temp.cum_cost.shift(1))/(df_temp.cum_clicks - df_temp.cum_clicks.shift(1))
    df_temp['marginal_income'] = df_temp.cum_income - df_temp.cum_income.shift(1) - df_temp['marginal_cpc']
    
    try:
        params, cv = curve_fit(polynomial_func, df_temp['cum_cost'],df_temp['marginal_income'].fillna(0), p0=(1, 1,1,1))
        a,b,c,d = params

        # determine quality of the fit
        squaredDiffs = np.square(df_temp['marginal_income'].fillna(0) - polynomial_func(df_temp['cum_cost'], a,b,c,d))
        squaredDiffsFromMean = np.square(df_temp['marginal_income'].fillna(0) - np.mean(df_temp['marginal_income'].fillna(0)))
        rSquared = 1 - np.sum(squaredDiffs) / np.sum(squaredDiffsFromMean)
        output_msg=[]
        if pprint==True:
            output_msg.append(f"R² of fit for marginal income = {rSquared}")
            # print(f"R² of fit for marginal income = {rSquared}")

        # inspect the parameters
        if pprint==True:
            output_msg.append(f"Marginal income equation Y = {a} * x - {b} * x^2 + {c} * x^3 + {d}")

        # calculate max costs when it becomes negative ROAS
        df_marginal = pd.DataFrame(zip(np.arange(500000), 
                                       polynomial_func(np.arange(500000), a,b,c,d)
                                      )).rename(columns={0:'cumulative_cost',1:'marginal_income'})
        # join actuals to get the full picture
        _ = df_temp[['cost','cpc','cum_cost', 'cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']]
        # converting cost to integer in order to join with the marginal dataset
        _['cum_cost']=_['cum_cost'].apply(lambda x:np.round(x,0).astype(int))
        df_marginal = pd.merge(df_marginal, 
                               _.groupby('cum_cost')[
                                   ['cost','cpc','cum_clicks', 'cum_cpc', 'cum_revenue', 'cum_income']
                                    ].max().reset_index().rename(columns={#'cum_cost':'cumulative_cost',
                                                                          'cpc':'marginal_cpc'}),
                              how='left',left_on='cumulative_cost',right_on='cum_cost').fillna(method='ffill')
        max_spend_threshold = df_marginal.loc[(df_marginal.marginal_income.shift(1)>=0) & (df_marginal.marginal_income.shift(-1)<0),'cumulative_cost'].min()
        highest_amount_spent = df_marginal.cum_cost.max()
    except:
        if pprint==True:
            output_msg.append("Something happened during marginal CPC and equation calculations. We couldn't calculate max_spend_threshold and highest_amount_spent...")
        max_spend_threshold=np.nan
        highest_amount_spent=np.nan
        rSquared=np.nan
        total_negative_roas_spend=np.nan

    if pd.notnull(max_spend_threshold):
        if (max_spend_threshold>highest_amount_spent):
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
            if pprint==True:
                output_msg.append(f"Highest amount spent during this period is: {highest_amount_spent}")
        else:
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = df_marginal.loc[df_marginal.cumulative_cost==max_spend_threshold,
                                                                      ['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
            marginal_cpc_HAS,cum_clicks_HAS,cum_cpc_HAS,cum_revenue_HAS = df_marginal.loc[df_marginal.cumulative_cost==highest_amount_spent,
                                                                      ['marginal_cpc','cum_clicks','cum_cpc','cum_revenue']].values[0]
            # any spend beyond the max_spend_threshold needs to be saved
            total_negative_roas_spend = df_temp.loc[df_temp.cum_cost>max_spend_threshold,['cost']].sum().values[0]
    #     # so to get the max cost for this tactic or strategy, 
        if pprint==True:
            output_msg.append(f"Max spend threshold is: {max_spend_threshold}") 
            output_msg.append(f"For this spend, marginal_cpc={marginal_cpc}, cumulative_clicks={cum_clicks}, cumulative_cpc={cum_cpc}, cumulative_revenue={cum_revenue}")
            output_msg.append(f"Amount spent during this period is: {highest_amount_spent}") 
            output_msg.append(f"For this spend, marginal_cpc={marginal_cpc_HAS}, cumulative_clicks={cum_clicks_HAS}, cumulative_cpc={cum_cpc_HAS}, cumulative_revenue={cum_revenue_HAS}")
            output_msg.append(f"Total amount spent in negative ROAS={total_negative_roas_spend}")
            print (output_msg)

            plt.style.use('ggplot')
            fig = plt.figure(figsize=(18,6))
            ax1 = fig.add_subplot(121)
            plt.plot(df_temp['cum_cost'],df_temp['marginal_income'], '.', label="data")
            plt.plot(df_temp['cum_cost'], polynomial_func(df_temp['cum_cost'], a,b,c,d), '--', color='blue', label="fitted")
            plt.xlabel('cumulative cost'); plt.ylabel('marginal income')
            ax1.title.set_text('Net income at different spend levels')
            ax2 = fig.add_subplot(122);
            plt.scatter(df_temp['cum_cost'],df_temp['cpc']);
            plt.axhline(1*cvr*aov,color='black',linestyle='--');plt.axhline(1*cvr*aov*pc,color='blue',linestyle='--');
            plt.annotate(f'Any clicks above {np.round(cvr*aov,2)} SEK cpc has negative ROAS \n Above {np.round(cvr*aov*pc,2)} SEK is negative PC',size=12,xy=[0,40], color="black")
            plt.xlabel('cumulative cost'); plt.ylabel('cpc')
            # ax2.axes.get_xaxis().set_visible(False)
            ax2.title.set_text('CPC for at different spend levels');
            return fig,output_msg
    else:
            marginal_cpc,cum_clicks,cum_cpc,cum_revenue = np.nan, np.nan, np.nan, np.nan
            total_negative_roas_spend=np.nan
            if pprint==True:
                print (f"Not enough data for mCPC analysis. Max threshold resulted in {max_spend_threshold}. Highest amount spent during this period is: {highest_amount_spent}") 
    #     return df_marginal
    return max_spend_threshold,highest_amount_spent,total_negative_roas_spend,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared

def loop_mCPC_countries(df,min_date,max_date,
                        country_list=['UK', 'DE', 'US', 'NL', 'SE', 'CH', 'BE', 'EU', 'FR', 'AU'],
                        platform='meta'):
    country_flag = ''
    df_result = pd.DataFrame()

    for i,d in df[(df.index>min_date) & 
                  (df.index<max_date) & (df.platform==platform) & 
                  (df.country.isin(country_list))].groupby(['country','sub_channel'])['cost'].sum().reset_index().iterrows():
        if country_flag!=d['country']:
            country_flag=d['country']
            print (f"Processing {country_flag}...")

        # run numbers for prior delta
        mid, mad = get_comparison_dates(min_date,max_date,period='previous_period')
        mcost = calculate_max_spend(
                        df,min_date=mid,max_date=mad,
                        revenue_or_ebitda='revenue',
                        country=d['country'],
                        platform=platform,
                        sub_channel=d['sub_channel'],
                        pprint=False)
        
        # rerun values for LY this time
        mid, mad = get_comparison_dates(min_date,max_date,period='LY')
        mcost_ly = calculate_max_spend(
                        df,min_date=mid,max_date=mad,
                        revenue_or_ebitda='revenue',
                        country=d['country'],
                        platform=platform,
                        sub_channel=d['sub_channel'],
                        pprint=False)
        # max_spend_threshold,highest_amount_spent,marginal_cpc,cum_clicks,cum_cpc,cum_revenue,rSquared = \
        # combine the 2 results
        df_result = pd.concat([df_result,
                            pd.Series({'platform':platform,
                                        'country':d['country'], 
                                        'sub_channel':d['sub_channel'],
                                        'budget':'',
                                        'max_spend_PP':mcost[0],
                                        'amt_spent_PP':mcost[1],
                                        'rSquared_PP':mcost[6], # can you trust this mcpc analysis for this market?                                     
                                        'max_spend_LY':mcost_ly[0],
                                        'amt_spent_LY':mcost_ly[1],
                                        'rSquared_LY':mcost_ly[6], # can you trust this mcpc analysis for this market?                                     
                                        'cpc_PP':mcost[2],
                                        'cpc_LY':mcost_ly[2],
                                        'cum_clicks_PP':mcost[3],
                                        'cum_clicks_LY':mcost_ly[3],
                                        'cum_cpc_PP':mcost[4],
                                        'cum_cpc_LY':mcost_ly[4],                                     
                                        })],axis=1)
    df_result = df_result.T
    df_result['diff']=df_result['max_spend_PP']-df_result['amt_spent_PP']
    print (f"Total impact due to unrealized revenue and negative ROAS spend between planned and threshold costs is {(df_result['diff'].apply(abs)*-1).sum()} SEK")
    return df_result.sort_values('diff',ascending=True).reset_index(drop=True)