File size: 2,191 Bytes
1e4147d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import numpy as np

# Title
st.title("🧍 Customer Cluster Explorer")
st.markdown("#### Discover Groups of Customers Based on Their Income and Spending Habits using a Visual Tree", unsafe_allow_html=True)

# Load dataset
df = pd.read_csv("Mall_Customers.csv")

# Prepare data
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Build customer clusters using a visual tree method
linked = linkage(X_scaled, method='ward')
df['Cluster'] = fcluster(linked, t=5, criterion='maxclust')

# Create tabs
tab1, tab2, tab3 = st.tabs([
    "πŸ“‹ Customer List",
    "🌳 Customer Group Tree",
    "πŸ” Find My Customer Group"
])

with tab1:
    st.header("πŸ“‹ Customer Data")
    st.write("Here's a quick look at some customers and their income and spending habits.")
    st.dataframe(df[['CustomerID', 'Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']].head(10))

with tab2:
    st.header("🌳 How Customers Are Grouped")
    st.write("This diagram shows how customers are grouped based on how similar they are in terms of spending and income.")
    fig, ax = plt.subplots(figsize=(10, 4))
    dendrogram(linked, truncate_mode='lastp', p=12, leaf_rotation=45., leaf_font_size=12., ax=ax)
    st.pyplot(fig)

with tab3:
    st.header("πŸ” Which Group Does a Customer Belong To?")
    st.write("Use the sliders below to try different values and see which customer group they might belong to.")

    income = st.slider("Customer's Annual Income (k$)", 15, 150, 40)
    score = st.slider("Customer's Spending Score (1–100)", 1, 100, 50)

    # Compare with existing customers
    new_point = scaler.transform([[income, score]])
    distances = np.linalg.norm(X_scaled - new_point, axis=1)
    closest_idx = distances.argmin()
    predicted_cluster = df.iloc[closest_idx]['Cluster']

    st.success(f"This customer is likely in **Group {int(predicted_cluster)}**, based on similar spending and income behavior.")