File size: 3,358 Bytes
671ff9d
509822c
 
 
 
 
 
 
 
 
 
 
 
671ff9d
509822c
 
 
 
 
 
 
 
 
 
 
671ff9d
 
 
509822c
671ff9d
509822c
bdeb145
7c8640b
 
671ff9d
 
 
 
509822c
671ff9d
3002aa2
 
bdeb145
671ff9d
bdeb145
 
 
 
 
 
 
 
 
 
671ff9d
 
 
509822c
 
 
 
671ff9d
 
 
 
 
 
509822c
 
 
 
 
 
671ff9d
 
509822c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671ff9d
509822c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import datasets
import evaluate
from sklearn.metrics import (
    adjusted_mutual_info_score,
    adjusted_rand_score,
    calinski_harabasz_score,
    completeness_score,
    davies_bouldin_score,
    fowlkes_mallows_score,
    homogeneity_score,
    silhouette_score,
)
from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix

_CITATION = """
@article{scikit-learn,
  title={Scikit-learn: Machine Learning in {P}ython},
  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
  journal={Journal of Machine Learning Research},
  volume={12},
  pages={2825--2830},
  year={2011}
}
"""


_DESCRIPTION = """\
This evaluator computes multiple clustering metrics to assess the quality of a clustering.
By default, the evaluator works as in an unsupervised setting, evaluating the clustering just from
the samples and the predictions.
However, it allows to compute additional metrics when truth labels are passed too, which is not shown in this demo.
"""


_KWARGS_DESCRIPTION = """
Computes the quality of clustering results.
Args:
    samples: vector representations
    predictions: predicted cluster labels
    truth_labels (optional): truth labels to compute additional metrics
Returns:
    silhouete_score
    davies_bouldin_score
    calinski_harabasz_score
    completeness_score
    davies_bouldin_score
    fowlkes_mallows_score
    homogeneity_score
    silhouette_score
    contingency_matrix
    pair_confusion_matrix
"""


@evaluate.utils.file_utils.add_start_docstrings(
    _DESCRIPTION, _KWARGS_DESCRIPTION
)
class ClusteringEvaluator(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "samples": datasets.Sequence(datasets.Value("float32")),
                    "predictions": datasets.Value("int64"),
                }
            ),
        )

    def _compute(self, samples, predictions, truth_labels=None):
        unsupervised_metrics = [
            silhouette_score,
            davies_bouldin_score,
            calinski_harabasz_score,
        ]
        supervised_metrics = [
            adjusted_rand_score,
            adjusted_mutual_info_score,
            homogeneity_score,
            completeness_score,
            fowlkes_mallows_score,
            contingency_matrix,
            pair_confusion_matrix,
        ]
        results = {}

        # Compute unsupervised metrics always
        for fn in unsupervised_metrics:
            results[fn.__name__] = float(fn(samples, predictions))

        # Compute supervised metrics if reference labels are passed
        if truth_labels is not None:
            for fn in supervised_metrics:
                score = fn(truth_labels, predictions)
                try:
                    score = float(score)
                except (AttributeError, TypeError):
                    pass
                results[fn.__name__] = score
        return results