Spaces:
Sleeping
Sleeping
File size: 7,317 Bytes
6f4f21f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import pandas as pd
from functools import reduce
from random import randint
from causalml.dataset import make_uplift_classification
class UpliftSimulation:
def __init__(self, n=50000, y_name='conversion',
treatment_group_keys=['control', 'discount_05', 'discount_10', 'discount_15'],
n_classification_features=15, n_classification_informative=7,
n_classification_repeated=0,
n_uplift_increase_dict={'discount_05': 4, 'discount_10': 3, 'discount_15': 3},
n_uplift_decrease_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0},
delta_uplift_increase_dict={'discount_05': 0.0020, 'discount_10': 0.0045, 'discount_15': 0.008},
delta_uplift_decrease_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0},
n_uplift_increase_mix_informative_dict={'discount_05': 3, 'discount_10': 2, 'discount_15': 3},
n_uplift_decrease_mix_informative_dict={'discount_05': 0, 'discount_10': 0, 'discount_15': 0},
positive_class_proportion=0.05, random_seed=8097):
self.n = n
self.y_name = y_name
self.treatment_group_keys = treatment_group_keys
self.n_classification_features = n_classification_features
self.n_classification_informative = n_classification_informative
self.n_classification_repeated = n_classification_repeated
self.n_uplift_increase_dict = n_uplift_increase_dict
self.n_uplift_decrease_dict = n_uplift_decrease_dict
self.delta_uplift_increase_dict = delta_uplift_increase_dict
self.delta_uplift_decrease_dict = delta_uplift_decrease_dict
self.n_uplift_increase_mix_informative_dict = n_uplift_increase_mix_informative_dict
self.n_uplift_decrease_mix_informative_dict = n_uplift_decrease_mix_informative_dict
self.positive_class_proportion = positive_class_proportion
self.random_seed = random_seed
self.df = None
self.X_names = None
def simulate_dataset(self):
self.df, self.X_names = make_uplift_classification(
treatment_name=self.treatment_group_keys,
y_name=self.y_name,
n_samples=self.n,
n_classification_features=self.n_classification_features,
n_classification_informative=self.n_classification_informative,
n_classification_repeated=self.n_classification_repeated,
n_uplift_increase_dict=self.n_uplift_increase_dict,
n_uplift_decrease_dict=self.n_uplift_decrease_dict,
delta_uplift_increase_dict=self.delta_uplift_increase_dict,
delta_uplift_decrease_dict=self.delta_uplift_decrease_dict,
n_uplift_increase_mix_informative_dict=self.n_uplift_increase_mix_informative_dict,
n_uplift_decrease_mix_informative_dict=self.n_uplift_decrease_mix_informative_dict,
positive_class_proportion=self.positive_class_proportion,
random_seed=self.random_seed,
)
def apply_discounts_and_clean(self):
discounts_dict = {'control': 0, 'discount_05': 0.05, 'discount_10': 0.10, 'discount_15': 0.15}
self.df['discount'] = self.df['treatment_group_key']
self.df = self.df.replace({"discount": discounts_dict})
self.df.drop(columns=['treatment_effect'], inplace=True)
def postprocess_tables(self):
# Add a synthetic UserID for each entry
self.df['UserID'] = range(len(self.df))
# Mapping the columns
informative_cols = [col for col in self.df.columns if 'informative' in col]
uplift_cols = [col for col in self.df.columns if 'uplift' in col]
irrelevant_cols = [col for col in self.df.columns if 'irrelevant' in col]
transaction_cols = ['treatment_group_key', 'conversion', 'discount']
# User Demographics and Profiles Table (Including Informative Features)
user_profiles = self.df[['UserID'] + informative_cols].copy()
# Web Interaction Data Table (This might need adjustment based on actual data)
# If any of the 'informative' columns relate to web interaction, include them here.
# Uplift-Related Data Table
uplift_data = self.df[['UserID'] + uplift_cols].copy()
# Adjusting the Uplift-Related Data table to include the mixed features
mixed_uplift_columns = ['x31_increase_mix', 'x22_increase_mix', 'x20_increase_mix',
'x33_increase_mix', 'x32_increase_mix', 'x27_increase_mix',
'x21_increase_mix', 'x26_increase_mix']
# Assuming uplift_data already includes the 'UserID' column
uplift_data = pd.concat([uplift_data, self.df[mixed_uplift_columns]], axis=1)
# Irrelevant Data Table
irrelevant_data = self.df[['UserID'] + irrelevant_cols].copy()
# Transaction Data Table
transaction_data = self.df[['UserID'] + transaction_cols].copy()
user_profiles.columns = [
'UserID', 'AgeIndex', 'IncomeIndex', 'PurchaseFrequencyIndex',
'AccountLifetimeIndex', 'AverageTransactionValueIndex', 'PreferredPaymentMethodIndex', 'RegionIndex'
]
uplift_data.columns = [
'UserID', 'EmailDiscountCTRIndex', 'WebDiscountCTRIndex', 'SocialMediaEngagementIndex',
'DirectMailDiscountResponseIndex', 'InAppDiscountEngagementIndex', 'FlashSaleParticipationIndex',
'SeasonalPromoInterestIndex', 'LoyaltyProgramEngagementIndex', 'ReferralBonusUsageIndex',
'DiscountCodeRedemptionIndex', 'VIPSaleAccessIndex', 'EarlyAccessOptInIndex',
'ProductReviewAfterDiscountIndex', 'UpsellConversionIndex', 'CrossSellInterestIndex',
'BundlePurchaseIndex', 'SubscriptionUpgradeIndex', 'CustomerFeedbackIndex'
]
irrelevant_data.columns = [
'UserID', 'BrowserTypeIndex', 'DeviceCategoryIndex', 'OperatingSystemIndex',
'SessionStartTimeIndex', 'LanguagePreferenceIndex', 'NewsletterSubscriptionIndex',
'AccountVerificationStatusIndex', 'AdBlockerPresenceIndex'
]
# transaction_data.columns = [
# 'UserID', 'DiscountCategoryIndex', 'PurchaseIndex', 'DiscountPercentageIndex'
# ]
transaction_data.columns = ['UserID'] + transaction_cols
# List of all DataFrames to be merged
self.dataframes = [user_profiles, uplift_data, irrelevant_data, transaction_data]
# Merge all DataFrames on 'UserID' in one line
self.df = reduce(lambda left, right: pd.merge(left, right, on='UserID'), self.dataframes)
def add_monetary_effect(self):
# Adding a monetary effect column
def base_price(df, informative_features):
if df.conversion == 0:
base_price = 0
else:
base_price = randint(1, 100)
return base_price
informative_features = [k for k in self.X_names if 'informative' in k]
self.df['base_price'] = self.df.apply(lambda x: base_price(x, informative_features), axis=1)
self.df['discounted_price'] = self.df['base_price']*(1-self.df['discount'])
self.df['benefit'] = self.df['discounted_price']-0.8*self.df['base_price'] |