uplift_modeling / data_utils /exploratory_data_analysis.py
howardroark's picture
initial commit
6f4f21f
import pandas as pd
class ExploratoryAnalysis:
def __init__(self, df):
self.df = df
def compute_summaries(self):
sum_conversions = self.df.pivot_table(values=['conversion', 'discounted_price', 'benefit'],
index='treatment_group_key',
aggfunc='sum',
margins=False)
mean_conversions = self.df.pivot_table(values=['conversion', 'discounted_price', 'benefit'],
index='treatment_group_key',
aggfunc='mean',
margins=False)
return sum_conversions, mean_conversions
def compute_mean_benefit_vs_conversion(self):
_, mean_conversions = self.compute_summaries()
return mean_conversions[['conversion', 'benefit']]
def compute_ate(self):
_, mean_conversions = self.compute_summaries()
control_mean = mean_conversions.loc['control']
mean_conversions_ate = mean_conversions - control_mean
return mean_conversions_ate