#!/usr/bin/env python # coding: utf-8 # In[13]: import altair as alt import pandas as pd import panel as pn # Load the Panel extension pn.extension('vega') # Load the dataset data = pd.read_csv("/Users/kenzabaddou/Downloads/archive/marketing_campaign.csv", sep=";") # Data cleaning data = data.rename(columns=lambda x: x.strip()) # Remove leading and trailing spaces from column names data = data.dropna(subset=['Income']) mean_income = data['Income'].mean() data['Income'] = data['Income'].fillna(mean_income) data['total_spent'] = data[['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum(axis=1) # Define widgets education_dropdown = alt.binding_select(options=sorted(data['Education'].unique()), name='Education Level:') education_select = alt.selection_single(fields=['Education'], bind=education_dropdown, name='Select') # New widget: marital status dropdown marital_status_dropdown = alt.binding_select(options=sorted(data['Marital_Status'].unique()), name='Marital Status:') marital_status_select = alt.selection_single(fields=['Marital_Status'], bind=marital_status_dropdown, name='Select') # New widget: range slider for number of web visits num_web_visits_slider = alt.binding_range(min=0, max=data['NumWebVisitsMonth'].max(), step=1, name='Web Visits per Month:') num_web_visits_select = alt.selection_single(fields=['NumWebVisitsMonth'], bind=num_web_visits_slider, name='Select') # Add new widgets for product selection products = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds'] product_x_dropdown = alt.binding_select(options=products, name='Product X:') product_x_select = alt.selection_single(fields=['x_product'], bind=product_x_dropdown, init={'x_product': 'MntWines'}) product_y_dropdown = alt.binding_select(options=products, name='Product Y:') product_y_select = alt.selection_single(fields=['y_product'], bind=product_y_dropdown, init={'y_product': 'MntMeatProducts'}) # Define views # View 1: Bar chart of customer distribution by education level education_chart = alt.Chart(data).mark_bar().encode( x=alt.X('Education:N', title='Education Level'), y=alt.Y('count():Q', title='Number of Customers'), color='Education:N' ).properties(title='Customer Distribution by Education Level') # Define widgets education_dropdown = alt.binding_select(options=sorted(data['Education'].unique()), name='Education Level:') education_select = alt.selection_single(fields=['Education'], bind=education_dropdown, name='Select') income_slider = alt.binding_range(min=0, max=data['Income'].max(), step=1000, name='Annual Income:') income_select = alt.selection_single(fields=['Income'], bind=income_slider, name='Select') # View 2: Scatter plot of customer annual income and total amount spent on products (with interaction) scatter_chart = alt.Chart(data).mark_circle().encode( x=alt.X('Income:Q', title='Annual Income'), y=alt.Y('total_spent:Q', title='Total Amount Spent on Products'), color='Education:N', tooltip=[ alt.Tooltip('Education'), alt.Tooltip('Marital_Status'), alt.Tooltip('Income', format='$,.0f'), alt.Tooltip('total_spent', format='$,.0f') ] ).properties(title='Customer Annual Income vs Amount Spent on Products').add_selection( education_select, income_select, marital_status_select, num_web_visits_select ).transform_filter( education_select ).transform_filter( income_select ).transform_filter( marital_status_select ).transform_filter( num_web_visits_select ) # Define linked selection brush = alt.selection(type='interval') # View 5: Bar chart - Average Total Spending per Education Level (linked to scatter chart) avg_spending_by_education = alt.Chart(data).mark_bar().encode( x=alt.X('Education:N', title='Education Level'), y=alt.Y('mean(total_spent):Q', title='Average Total Spending'), tooltip=['Education', 'mean(total_spent):Q'] ).properties(title='Average Total Spending per Education Level').add_selection( brush ).transform_filter( education_select ) # View 6: Yearly Total Amount Spent on Products (linked to enrollment chart) yearly_total_spent_chart = alt.Chart(data).mark_line().encode( x=alt.X('year(Dt_Customer):O', title='Year'), y=alt.Y('sum(total_spent):Q', title='Total Amount Spent'), tooltip=['year(Dt_Customer):O', 'sum(total_spent):Q'] ).transform_filter( education_select ) # View 3: Timeline chart of new customer enrollments (with linked highlighting) enrollment_chart = alt.Chart(data).mark_line().encode( x=alt.X('year(Dt_Customer):T', title='Year of Enrollment'), y=alt.Y('count():Q', title='Number of New Enrollments'), color='Marital_Status:N').properties(title='New Customer Enrollments Over Time').add_selection( brush, education_select, income_select, marital_status_select, num_web_visits_select).transform_filter( education_select).transform_filter(income_select).transform_filter(marital_status_select).transform_filter( num_web_visits_select) # New View: Average spending per marital status (with linked highlighting) avg_spending_by_marital_status = alt.Chart(data).mark_bar().encode( x=alt.X('Marital_Status:N', title='Marital Status'), y=alt.Y('mean(total_spent):Q', title='Average Total Spending'), color='Marital_Status:N', tooltip=['Marital_Status', 'mean(total_spent):Q']).properties(title='Average Total Spending per Marital Status').add_selection(brush) import pandas as pd import altair as alt # Convert the 'Dt_Customer' column to datetime format data['Dt_Customer'] = pd.to_datetime(data['Dt_Customer']) # Create 'total_spent' column by summing up spending in different product categories data['total_spent'] = data[['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds']].sum(axis=1) # Calculate the average spending for each combination of Teenhome, Kidhome, and Marital_Status avg_spending = data.groupby(['Teenhome', 'Kidhome', 'Marital_Status'])['total_spent'].mean().reset_index() # Create a hover selection hover = alt.selection_single(on='mouseover', nearest=True, empty='none') # Create a selection for the dropdown marital_status_dropdown = alt.binding_select(options=sorted(data['Marital_Status'].unique()), name='Marital Status:') marital_status_select = alt.selection_single(fields=['Marital_Status'], bind=marital_status_dropdown, init={'Marital_Status': 'Married'}) # Create a grouped bar chart for kids chart_kids = alt.Chart(avg_spending).mark_bar().encode( x=alt.X('Kidhome:O', title='Number of Kids at Home'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.condition(hover, 'Kidhome:O', alt.value('lightgray'), legend=alt.Legend(title='Number of Kids at Home')), tooltip=['Teenhome', 'Kidhome', 'total_spent', 'Marital_Status'] ).properties(title='Average Spending by Number of Kids at Home (Filtered by Marital Status)').add_selection( hover ) # Create a grouped bar chart for teens chart_teens = alt.Chart(avg_spending).mark_bar().encode( x=alt.X('Teenhome:O', title='Number of Teens at Home'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.condition(hover, 'Teenhome:O', alt.value('lightgray'), legend=alt.Legend(title='Number of Teens at Home')), tooltip=['Teenhome', 'Kidhome', 'total_spent', 'Marital_Status'] ).properties(title='Average Spending by Number of Teens at Home (Filtered by Marital Status)').add_selection( hover ) # Combine the charts concat_chart = alt.hconcat(chart_kids, chart_teens).add_selection( marital_status_select ).transform_filter( marital_status_select ) """scatter_plot = alt.Chart(data).mark_circle().encode( x=alt.X('NumWebVisitsMonth:Q', title='Number of Web Visits per Month'), y=alt.Y('NumWebPurchases:Q', title='Number of Web Purchases'), size=alt.Size('count():Q', scale=alt.Scale(range=[50, 500]), legend=alt.Legend(title='Number of Customers')), color=alt.Color('count():Q', scale=alt.Scale(scheme='viridis'), legend=None), tooltip=['NumWebVisitsMonth', 'NumWebPurchases', 'count()'] ).properties(title='Scatter Plot of Web Visits per Month vs. Web Purchases') scatter_plot.interactive()""" # Define dropdown selection for marital status marital_status_dropdown = alt.binding_select(options=data['Marital_Status'].unique().tolist(), name='Marital Status: ') marital_status_selection = alt.selection_single(fields=['Marital_Status'], bind=marital_status_dropdown, name='Marital_Status', init={'Marital_Status': data['Marital_Status'].iloc[0]}) # Add a scatter plot and filter by marital status scatter_plot_filtered = alt.Chart(data).mark_circle().encode( x=alt.X('NumWebVisitsMonth:Q', title='Number of Web Visits per Month'), y=alt.Y('NumWebPurchases:Q', title='Number of Web Purchases'), size=alt.Size('count():Q', scale=alt.Scale(range=[50, 500]), legend=alt.Legend(title='Number of Customers')), color=alt.Color('count():Q', scale=alt.Scale(scheme='viridis'), legend=None), tooltip=['NumWebVisitsMonth', 'NumWebPurchases', 'count()'] ).properties(title='Scatter Plot of Web Visits per Month vs. Web Purchases Filtered by Marital Status').transform_filter( marital_status_selection ) # Add interactivity and the marital status selection to the scatter plot interactive_scatter_plot_filtered = scatter_plot_filtered.interactive().add_selection(marital_status_selection) interactive_scatter_plot_filtered # Group by 'Dt_Customer' and calculate average spending for new and returning customers new_customers = data[data['NumWebPurchases'] == 0].groupby('Dt_Customer')['total_spent'].mean().reset_index() returning_customers = data[data['NumWebPurchases'] > 0].groupby('Dt_Customer')['total_spent'].mean().reset_index() # new_customers and returning_customers DataFrames # Create line charts for new and returning customers without filtering new_line = alt.Chart(new_customers).mark_line().encode( x=alt.X('Dt_Customer:T', title='Date'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.value('blue'), tooltip=['Dt_Customer', 'total_spent'] ).properties(title='New Customers') returning_line = alt.Chart(returning_customers).mark_line().encode( x=alt.X('Dt_Customer:T', title='Date'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.value('green'), tooltip=['Dt_Customer', 'total_spent'] ).properties(title='Returning Customers') # Display the charts side by side combined_chart = alt.hconcat(new_line, returning_line) combined_chart # Merge Marital_Status to new_customers and returning_customers DataFrames new_customers = new_customers.merge(data[['Dt_Customer', 'Marital_Status']], on='Dt_Customer', how='left') returning_customers = returning_customers.merge(data[['Dt_Customer', 'Marital_Status']], on='Dt_Customer', how='left') # Filter data by marital status using transform_filter within the charts new_line_filtered = alt.Chart(new_customers).mark_line().encode( x=alt.X('Dt_Customer:T', title='Date'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.value('blue'), tooltip=['Dt_Customer', 'total_spent'] ).properties(title='New Customers').transform_filter( marital_status_selection ) returning_line_filtered = alt.Chart(returning_customers).mark_line().encode( x=alt.X('Dt_Customer:T', title='Date'), y=alt.Y('total_spent:Q', title='Average Spending'), color=alt.value('green'), tooltip=['Dt_Customer', 'total_spent'] ).properties(title='Returning Customers').transform_filter( marital_status_selection ) # Add interactivity for panning and zooming interactive_chart_filtered = alt.layer(new_line_filtered, returning_line_filtered).resolve_scale(y='shared').interactive().add_selection(marital_status_selection) # Combine the interactive chart, the legend, and the marital status selection # Create legend legend = alt.Chart(pd.DataFrame({'legend': ['New Customers', 'Returning Customers'], 'color': ['blue', 'green']})).mark_point().encode( x=alt.value(20), y=alt.Y('legend', title=None), color=alt.Color('color', scale=None), tooltip=['legend'] ) # Combine the interactive chart, the legend, and the marital status selection final_chart_filtered = alt.hconcat(interactive_chart_filtered, legend).properties(title='Average Spending of New vs Returning Customers by Marital Status') final_chart_filtered import panel as pn pn.extension('vega') # Define dashboard layout dashboard = pn.Column( pn.Row( pn.Column( education_chart, avg_spending_by_education, avg_spending_by_marital_status, width=350 ), pn.Column( scatter_chart, enrollment_chart, width=700 ), ), pn.Row( concat_chart, width=1000 ), pn.Row( final_chart_filtered, width=1000 ), ) # Display dashboard dashboard.servable() # In[ ]: