File size: 3,233 Bytes
a7b2523 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import pickle
import pandas as pd
import streamlit as st
from models import KMeans
from models import KMedoids
import matplotlib.pyplot as plt
from models import EnsembleClustering
from sklearn.decomposition import PCA
st.title("Customer Purchase Behavior")
if "form_submitted" not in st.session_state:
st.session_state["form_submitted"] = False
# Reading csv file with clustering
@st.cache_data
def read_csv():
df = pd.read_csv("dataset/clustered_dataset.csv")
return df
# Loading the model
@st.cache_resource
def load_model():
model = pickle.load(open("model/clustering.pkl", 'rb'))
return model
# Prediction of cluster on the given feature
@st.cache_data
def predict(_model, features):
label = _model.predict(features)
return label.item()
# Form submission
def form_submission():
st.session_state["form_submitted"] = True
# Data transformation
def transform_data():
df = read_csv().drop(['Cluster'], axis=1)
input_data = [st.session_state["Revenue_given"], st.session_state["Frequency"], st.session_state["Recency"], 0 if st.session_state["uk"] == "No" else 1]
df.loc[len(df)] = input_data
pca = PCA(n_components = 2, random_state=42)
features = pca.fit_transform(df)
return features
# Features clustering centroid points
@st.cache_data
def clusterPointsMean(df):
clusters = df['Cluster']
df = df.drop(['Cluster'], axis=1)
pca = PCA(n_components = 2, random_state=42)
features = pca.fit_transform(df)
x1 = [features[i][0] for i in range(len(features))]
x2 = [features[i][1] for i in range(len(features))]
cluster = clusters
data = {
"X1": x1,
"X2": x2,
"Cluster": cluster
}
data = pd.DataFrame(data)
return data
# Display plot
@st.cache_data
def displayPoints(data, feature):
c1_x, c1_y = data[data['Cluster'] == 0].drop(['Cluster'], axis=1).mean()
c2_x, c2_y = data[data['Cluster'] == 1].drop(['Cluster'], axis=1).mean()
fig, ax = plt.subplots()
ax.scatter(c1_x, c1_y, color="red", label="Cluster-1", s = 150)
ax.scatter(c2_x, c2_y, color="blue", label="Cluster-2", s = 150)
ax.scatter(feature[0], feature[1], color="green", label="Data point", s = 150)
ax.legend()
st.pyplot(fig=fig)
def main():
form = st.form(border=False, key="data_form")
with form:
c1, c2 = st.columns(2)
c1.number_input(label="Revenue Contribution", key="Revenue_given")
c1.number_input(label="Days past since last buy", min_value=0, value=0, key="Recency")
c2.number_input(label="How Frequently bought", min_value=0, value=0, key="Frequency")
c2.selectbox(label="From United Kingdom?", options=["Yes", "No"], index=0, key="uk")
st.form_submit_button(label="Cluster Customer", type='primary', on_click=form_submission)
if st.session_state["form_submitted"]:
features = transform_data()
feature = features[len(features)-1]
model = load_model()
st.markdown(f"## The customer falls within Cluster - {predict(model, feature.reshape(1, 2))+1}")
df = read_csv()
data = clusterPointsMean(df=df)
displayPoints(data, feature)
if __name__ == "__main__":
main()
|