AiNext / kmeans.py
AICOE-Datamatics's picture
Initial code
247c8df
raw
history blame
1.74 kB
import pandas as pd
from sklearn.cluster import KMeans
import plotly.express as px
def k_means(dataset, cols, drop_features, sample_data):
X = sample_data
print(X)
N = len(sample_data.columns)
print(N)
distortions = []
K = range(1,11)
print('ok')
for i in K:
try:
print(i)
kmeans = KMeans(n_clusters=i, init='k-means++')
print("length before",len(X.columns))
kmeans = kmeans.fit(X)
print("length after fit",len(X.columns))
distortions.append(kmeans.inertia_)
except Exception as e:
print(e)
pass
print(distortions)
df = pd.DataFrame({'Clusters': K, 'Distortions': distortions})
print(df)
elbow_curve = (px.line(df, x='Clusters', y='Distortions')).update_traces(mode='lines+markers')
#Silhouette score
# silhouette_scores = []
# rang = range(2,12)
# for cluster_size in rang:
# kmeans = cluster.KMeans(n_clusters=cluster_size, init='k-means++', random_state=200)
# labels = kmeans.fit(X).labels_
# silhouette_score = metrics.silhouette_score(sample_data,
# labels,
# metric='euclidean',
# sample_size=1000,
# random_state=200)
# silhouette_scores.append(silhouette_score)
# df = pd.DataFrame({'Clusters': rang, 'Silhouette Score': silhouette_scores})
# silhouette = (px.line(df, x='Clusters', y='Silhouette Score', template='seaborn')).update_traces(mode='lines+markers')
return elbow_curve