Kuautli commited on
Commit
8cc3b3e
·
verified ·
1 Parent(s): 5c12284

Update clustering.py

Browse files
Files changed (1) hide show
  1. clustering.py +3 -0
clustering.py CHANGED
@@ -614,6 +614,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
614
  most_similar_comments = {}
615
 
616
  for distance_threshold in threshold_values:
 
617
  clustering = AgglomerativeClustering(
618
  n_clusters=None,
619
  distance_threshold=distance_threshold,
@@ -636,6 +637,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
636
  embeddings_matrix, metric="euclidean"
637
  )
638
  ch_score = calinski_harabasz_score(euclidean_distances, labels)
 
639
  else:
640
  ch_score = -1 # Valor predeterminado si solo hay un clúster
641
  calinski_harabasz_scores[distance_threshold] = ch_score
@@ -643,6 +645,7 @@ def perform_clustering(data, min_eps, max_eps=0.95, n=5, threshold_values=None,
643
  # Calcular Silhouette Score
644
  if len(np.unique(labels)) > 1:
645
  sil_score = silhouette_score(embeddings_matrix, labels, metric="cosine")
 
646
  else:
647
  sil_score = -1 # Valor predeterminado si solo hay un clúster
648
  silhouette_scores[distance_threshold] = sil_score
 
614
  most_similar_comments = {}
615
 
616
  for distance_threshold in threshold_values:
617
+ distance_threshold = round(distance_threshold, 6)
618
  clustering = AgglomerativeClustering(
619
  n_clusters=None,
620
  distance_threshold=distance_threshold,
 
637
  embeddings_matrix, metric="euclidean"
638
  )
639
  ch_score = calinski_harabasz_score(euclidean_distances, labels)
640
+ ch_score = round(ch_score, 2)
641
  else:
642
  ch_score = -1 # Valor predeterminado si solo hay un clúster
643
  calinski_harabasz_scores[distance_threshold] = ch_score
 
645
  # Calcular Silhouette Score
646
  if len(np.unique(labels)) > 1:
647
  sil_score = silhouette_score(embeddings_matrix, labels, metric="cosine")
648
+ sil_score = round(sil_score, 2)
649
  else:
650
  sil_score = -1 # Valor predeterminado si solo hay un clúster
651
  silhouette_scores[distance_threshold] = sil_score