|
import pickle |
|
import pandas as pd |
|
import streamlit as st |
|
from models import KMeans |
|
from models import KMedoids |
|
import matplotlib.pyplot as plt |
|
from models import EnsembleClustering |
|
from sklearn.decomposition import PCA |
|
|
|
st.title("Customer Purchase Behavior") |
|
|
|
if "form_submitted" not in st.session_state: |
|
st.session_state["form_submitted"] = False |
|
|
|
|
|
@st.cache_data |
|
def read_csv(): |
|
df = pd.read_csv("dataset/clustered_dataset.csv") |
|
return df |
|
|
|
|
|
@st.cache_resource |
|
def load_model(): |
|
model = pickle.load(open("model/clustering.pkl", 'rb')) |
|
return model |
|
|
|
|
|
@st.cache_data |
|
def predict(_model, features): |
|
label = _model.predict(features) |
|
return label.item() |
|
|
|
|
|
def form_submission(): |
|
st.session_state["form_submitted"] = True |
|
|
|
|
|
def transform_data(): |
|
df = read_csv().drop(['Cluster'], axis=1) |
|
input_data = [st.session_state["Revenue_given"], st.session_state["Frequency"], st.session_state["Recency"], 0 if st.session_state["uk"] == "No" else 1] |
|
df.loc[len(df)] = input_data |
|
pca = PCA(n_components = 2, random_state=42) |
|
features = pca.fit_transform(df) |
|
return features |
|
|
|
|
|
@st.cache_data |
|
def clusterPointsMean(df): |
|
clusters = df['Cluster'] |
|
df = df.drop(['Cluster'], axis=1) |
|
pca = PCA(n_components = 2, random_state=42) |
|
features = pca.fit_transform(df) |
|
|
|
x1 = [features[i][0] for i in range(len(features))] |
|
x2 = [features[i][1] for i in range(len(features))] |
|
|
|
cluster = clusters |
|
data = { |
|
"X1": x1, |
|
"X2": x2, |
|
"Cluster": cluster |
|
} |
|
data = pd.DataFrame(data) |
|
return data |
|
|
|
|
|
@st.cache_data |
|
def displayPoints(data, feature): |
|
c1_x, c1_y = data[data['Cluster'] == 0].drop(['Cluster'], axis=1).mean() |
|
c2_x, c2_y = data[data['Cluster'] == 1].drop(['Cluster'], axis=1).mean() |
|
|
|
fig, ax = plt.subplots() |
|
ax.scatter(c1_x, c1_y, color="red", label="Cluster-1", s = 150) |
|
ax.scatter(c2_x, c2_y, color="blue", label="Cluster-2", s = 150) |
|
ax.scatter(feature[0], feature[1], color="green", label="Data point", s = 150) |
|
ax.legend() |
|
st.pyplot(fig=fig) |
|
|
|
def main(): |
|
form = st.form(border=False, key="data_form") |
|
|
|
with form: |
|
c1, c2 = st.columns(2) |
|
|
|
c1.number_input(label="Revenue Contribution", key="Revenue_given") |
|
c1.number_input(label="Days past since last buy", min_value=0, value=0, key="Recency") |
|
|
|
c2.number_input(label="How Frequently bought", min_value=0, value=0, key="Frequency") |
|
c2.selectbox(label="From United Kingdom?", options=["Yes", "No"], index=0, key="uk") |
|
|
|
st.form_submit_button(label="Cluster Customer", type='primary', on_click=form_submission) |
|
|
|
if st.session_state["form_submitted"]: |
|
|
|
features = transform_data() |
|
|
|
feature = features[len(features)-1] |
|
|
|
model = load_model() |
|
|
|
st.markdown(f"## The customer falls within Cluster - {predict(model, feature.reshape(1, 2))+1}") |
|
|
|
df = read_csv() |
|
|
|
data = clusterPointsMean(df=df) |
|
|
|
displayPoints(data, feature) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|