wu981526092's picture
update
760a88c
raw
history blame contribute delete
No virus
1.3 kB
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
def calculate_wcss(data):
wcss = []
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(data)
wcss.append(kmeans.inertia_)
return wcss
def calculate_silhouette_scores(data):
scores = []
range_values = range(2, 11)
for i in range_values:
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(data)
score = silhouette_score(data, kmeans.labels_, metric='euclidean')
scores.append(score)
return scores
def plot_elbow(wcss):
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
def get_optimal_clusters_silhouette(scores):
optimal_clusters = scores.index(max(scores)) + 2 # +2 because range_values starts from 2
print(f"Optimal number of clusters: {optimal_clusters}")
return optimal_clusters
def fit_kmeans(data, n_clusters):
kmeans = KMeans(n_clusters=n_clusters, random_state=0)
clusters = kmeans.fit_predict(data)
data['cluster'] = clusters
return kmeans, data