|
from sklearn.preprocessing import StandardScaler |
|
from kmodes.kprototypes import KPrototypes |
|
from kmodes.kprototypes import euclidean_dissim |
|
import streamlit as st |
|
import algos.clustering.kmeans |
|
|
|
def process(data): |
|
|
|
|
|
"""Process K-prototype""" |
|
df = data[0] |
|
if 'object' not in list(df.dtypes): |
|
return algos.clustering.kmeans.process(data) |
|
|
|
k = st.slider('Number of Clusters :',2,9) |
|
|
|
numerical_columns = df.select_dtypes('number').columns |
|
categorical_columns = df.select_dtypes('object').columns |
|
categorical_indexes = [] |
|
|
|
|
|
scaler = StandardScaler() |
|
for c in categorical_columns: |
|
categorical_indexes.append(df.columns.get_loc(c)) |
|
if len(numerical_columns) == 0 or len(categorical_columns) == 0: |
|
return |
|
|
|
df_scale = df.copy() |
|
|
|
for c in numerical_columns: |
|
df_scale[c] = scaler.fit_transform(df[[c]]) |
|
|
|
|
|
kproto = KPrototypes(n_clusters=k, |
|
num_dissim=euclidean_dissim, |
|
random_state=0) |
|
|
|
kproto.fit_predict(df_scale, categorical= categorical_indexes) |
|
|
|
|
|
df = data[0] |
|
df["cluster"] = kproto.labels_ |
|
|
|
return df |