reshark / dashboard /modules /lead_ids.py
kbberendsen's picture
update docker and data locations
320450f
raw
history blame contribute delete
787 Bytes
import pandas as pd
parquet_file = 'data/data_dump_ai_assingment.parquet'
df = pd.read_parquet(parquet_file, engine='pyarrow')
# Setting 3 random campaigns aside as testing examples for final models
campaign_ids = [8, 123, 256]
df_final_testing = df[df['campaign_id'].isin(campaign_ids)==True].copy()
def get_unique_lead_ids(df, campaign_id):
df_campaign = df_final_testing[df_final_testing['campaign_id'] == campaign_id].copy()
lead_ids = list(df_campaign['lead_id'].unique())
return lead_ids
leads_8 = get_unique_lead_ids(df_final_testing, 8)
leads_123 = get_unique_lead_ids(df_final_testing, 123)
leads_256 = get_unique_lead_ids(df_final_testing, 256)
leads_8 = [str(i) for i in leads_8]
leads_123 = [str(i) for i in leads_123]
leads_256 = [str(i) for i in leads_256]