Spaces:
Build error
Build error
import pandas as pd | |
from sklearn.cluster import KMeans | |
import plotly.express as px | |
def k_means(dataset, cols, drop_features, sample_data): | |
X = sample_data | |
print(X) | |
N = len(sample_data.columns) | |
print(N) | |
distortions = [] | |
K = range(1,11) | |
print('ok') | |
for i in K: | |
try: | |
print(i) | |
kmeans = KMeans(n_clusters=i, init='k-means++') | |
print("length before",len(X.columns)) | |
kmeans = kmeans.fit(X) | |
print("length after fit",len(X.columns)) | |
distortions.append(kmeans.inertia_) | |
except Exception as e: | |
print(e) | |
pass | |
print(distortions) | |
df = pd.DataFrame({'Clusters': K, 'Distortions': distortions}) | |
print(df) | |
elbow_curve = (px.line(df, x='Clusters', y='Distortions')).update_traces(mode='lines+markers') | |
#Silhouette score | |
# silhouette_scores = [] | |
# rang = range(2,12) | |
# for cluster_size in rang: | |
# kmeans = cluster.KMeans(n_clusters=cluster_size, init='k-means++', random_state=200) | |
# labels = kmeans.fit(X).labels_ | |
# silhouette_score = metrics.silhouette_score(sample_data, | |
# labels, | |
# metric='euclidean', | |
# sample_size=1000, | |
# random_state=200) | |
# silhouette_scores.append(silhouette_score) | |
# df = pd.DataFrame({'Clusters': rang, 'Silhouette Score': silhouette_scores}) | |
# silhouette = (px.line(df, x='Clusters', y='Silhouette Score', template='seaborn')).update_traces(mode='lines+markers') | |
return elbow_curve |