Spaces:
Runtime error
Runtime error
Update clustering.py
Browse files- clustering.py +3 -0
clustering.py
CHANGED
@@ -614,6 +614,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
|
|
614 |
most_similar_comments = {}
|
615 |
|
616 |
for distance_threshold in threshold_values:
|
|
|
617 |
clustering = AgglomerativeClustering(
|
618 |
n_clusters=None,
|
619 |
distance_threshold=distance_threshold,
|
@@ -636,6 +637,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
|
|
636 |
embeddings_matrix, metric="euclidean"
|
637 |
)
|
638 |
ch_score = calinski_harabasz_score(euclidean_distances, labels)
|
|
|
639 |
else:
|
640 |
ch_score = -1 # Valor predeterminado si solo hay un clúster
|
641 |
calinski_harabasz_scores[distance_threshold] = ch_score
|
@@ -643,6 +645,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
|
|
643 |
# Calcular Silhouette Score
|
644 |
if len(np.unique(labels)) > 1:
|
645 |
sil_score = silhouette_score(embeddings_matrix, labels, metric="cosine")
|
|
|
646 |
else:
|
647 |
sil_score = -1 # Valor predeterminado si solo hay un clúster
|
648 |
silhouette_scores[distance_threshold] = sil_score
|
|
|
614 |
most_similar_comments = {}
|
615 |
|
616 |
for distance_threshold in threshold_values:
|
617 |
+
distance_threshold = round(distance_threshold, 6)
|
618 |
clustering = AgglomerativeClustering(
|
619 |
n_clusters=None,
|
620 |
distance_threshold=distance_threshold,
|
|
|
637 |
embeddings_matrix, metric="euclidean"
|
638 |
)
|
639 |
ch_score = calinski_harabasz_score(euclidean_distances, labels)
|
640 |
+
ch_score = round(ch_score, 2)
|
641 |
else:
|
642 |
ch_score = -1 # Valor predeterminado si solo hay un clúster
|
643 |
calinski_harabasz_scores[distance_threshold] = ch_score
|
|
|
645 |
# Calcular Silhouette Score
|
646 |
if len(np.unique(labels)) > 1:
|
647 |
sil_score = silhouette_score(embeddings_matrix, labels, metric="cosine")
|
648 |
+
sil_score = round(sil_score, 2)
|
649 |
else:
|
650 |
sil_score = -1 # Valor predeterminado si solo hay un clúster
|
651 |
silhouette_scores[distance_threshold] = sil_score
|