File size: 5,820 Bytes
eb36f93
 
 
 
 
 
 
 
 
 
 
 
 
6a2e481
 
eb36f93
 
 
6a2e481
eb36f93
 
 
6a2e481
 
 
 
 
 
 
 
 
 
eb36f93
 
 
 
6a2e481
 
 
 
 
eb36f93
 
 
 
 
6a2e481
 
c15140e
6a2e481
 
 
 
 
 
 
 
 
4b1329a
eb36f93
4b1329a
6a2e481
 
4b1329a
6a2e481
4b1329a
 
c15140e
4b1329a
 
eb36f93
 
 
 
 
 
 
 
 
 
 
 
 
 
6a2e481
4b1329a
eb36f93
 
1a601d7
eb36f93
6a2e481
 
eb36f93
 
 
c15140e
6a2e481
5aabb05
6a2e481
92428f1
be21c76
92428f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""F-Beta score"""

import evaluate
import datasets
from sklearn.metrics import fbeta_score


_CITATION = """\
@article{scikit-learn,
    title={Scikit-learn: Machine Learning in {P}ython},
    author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
           and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
           and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
           Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
    journal={Journal of Machine Learning Research},
    volume={12},
    pages={2825--2830},
    year={2011}
}
"""

_DESCRIPTION = """\
Compute the F-beta score.

The F-beta score is the weighted harmonic mean of precision and recall, reaching its optimal value at 1 and its worst value at 0.

The beta parameter determines the weight of recall in the combined score. beta < 1 lends more weight to precision, while beta > 1 favors recall (beta -> 0 considers only precision, beta -> +inf only recall).
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
    beta (`float`): Determines the weight of recall in the combined score. Defaults to 0.5
    labels (`list` of `int`): The set of labels to include when `average` is not set to `'binary'`, and the order of the labels if `average` is `None`. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class. Labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in `predictions` and `references` are used in sorted order. Defaults to None.
    pos_label (`int`): The class to be considered the positive class, in the case where `average` is set to `binary`. Defaults to 1.
    average (`string`): This parameter is required for multiclass/multilabel targets. If set to `None`, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Defaults to `'binary'`.
        - 'binary': Only report results for the class specified by `pos_label`. This is applicable only if the classes found in `predictions` and `references` are binary.
        - 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
        - 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
        - 'weighted': Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters `'macro'` to account for label imbalance. This option can result in an F-score that is not between precision and recall.
        - 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification).
    sample_weight (`list` of `float`): Sample weights Defaults to None.
        
Returns:
    fbeta_score (`float` (if average is not None) or `array` of `float`, shape =\ [n_unique_labels]): of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task.
    The F-beta score is the weighted harmonic mean of precision and recall, reaching its optimal value at 1 and its worst value at 0.

Examples:    
    
    Example 1-A simple binary example    
        >>> f_beta = evaluate.load("leslyarun/f_beta")
        >>> results = f_beta.compute(references=[0, 1], predictions=[0, 1], beta=0.5)
        >>> print(results)
        {'f_beta_score': 1.0}    
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class F_Beta(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=datasets.Features({
                'predictions': datasets.Value('int32'),
                'references': datasets.Value('int32')
            }),
            # Homepage of the module for documentation
            homepage="https://huggingface.co/spaces/leslyarun/fbeta_score",
            # Additional links to the codebase or references
            codebase_urls=["https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/metrics/_classification.py#L1148"],
            reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.fbeta_score.html#sklearn.metrics.fbeta_score"]
        )


    def _compute(self, predictions, references, beta=0.5, labels=None, pos_label=1, average="binary", sample_weight=None):
        score = fbeta_score(
            references, predictions, beta=beta, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight
        )
        return {"f_beta_score": float(score) if score.size == 1 else score}