|
from matplotlib import pyplot as plt |
|
from sklearn.cluster import KMeans |
|
from sklearn.metrics import silhouette_score |
|
|
|
|
|
def calculate_wcss(data): |
|
wcss = [] |
|
for i in range(1, 11): |
|
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
|
kmeans.fit(data) |
|
wcss.append(kmeans.inertia_) |
|
return wcss |
|
|
|
def calculate_silhouette_scores(data): |
|
scores = [] |
|
range_values = range(2, 11) |
|
for i in range_values: |
|
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) |
|
kmeans.fit(data) |
|
score = silhouette_score(data, kmeans.labels_, metric='euclidean') |
|
scores.append(score) |
|
return scores |
|
|
|
def plot_elbow(wcss): |
|
plt.plot(range(1, 11), wcss) |
|
plt.title('Elbow Method') |
|
plt.xlabel('Number of clusters') |
|
plt.ylabel('WCSS') |
|
plt.show() |
|
|
|
def get_optimal_clusters_silhouette(scores): |
|
optimal_clusters = scores.index(max(scores)) + 2 |
|
print(f"Optimal number of clusters: {optimal_clusters}") |
|
return optimal_clusters |
|
|
|
def fit_kmeans(data, n_clusters): |
|
kmeans = KMeans(n_clusters=n_clusters, random_state=0) |
|
clusters = kmeans.fit_predict(data) |
|
data['cluster'] = clusters |
|
return kmeans, data |