# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Geometric mean metric."""

import datasets
from imblearn.metrics import geometric_mean_score
import evaluate

_DESCRIPTION = """
The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure 
tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. For binary 
classification G-mean is the squared root of the product of the sensitivity and specificity. For multi-class problems 
it is a higher root of the product of sensitivity for each class. 
"""

_KWARGS_DESCRIPTION = """
Calculates how good are predictions given some references, using certain scores
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
    labels (`list` of `int`): The set of labels to include when average != 'binary', and their order if average is None. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. Defaults to None.
    pos_label ('string' or `int`): The class to report if average='binary' and the data is binary. If the data are multiclass, this will be ignored; setting labels=[pos_label] and average != 'binary' will report scores for that label only. Defaults to 1.
    average (`string`): If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Defaults to `'multiclass'`.
    
    - 'binary': Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary.
    - 'micro': Calculate metrics globally by counting the total true positives, false negatives and false positives.
    - 'macro': Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
    - 'weighted': Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
    - 'samples': Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from accuracy_score).
    
    sample_weight (`list` of `float`): Sample weights. Defaults to None.
    correction (`float`): Substitutes sensitivity of unrecognized classes from zero to a given value. Defaults to 0.0.
    
Returns:
    geometric_mean (`float` or `array` of `float`): geometric mean score or list of geometric mean scores, depending on the value passed to `average`. Minimum possible value is 0. Maximum possible value is 1. Higher geometric mean scores are better.

Examples:
    Example 1-A simple binary example
        >>> geometric_mean = evaluate.load("geometric_mean")
        >>> results = geometric_mean.compute(references=[0, 1, 0, 1, 0], predictions=[0, 0, 1, 1, 0])
        >>> print(round(res['geometric-mean'], 2))
        0.58
    Example 2-The same simple binary example as in Example 1, but with `sample_weight` included.
        >>> geometric_mean = evaluate.load("geometric_mean")
        >>> results = geometric_mean.compute(references=[0, 1, 0, 1, 0], predictions=[0, 0, 1, 1, 0], sample_weight=[0.9, 0.5, 3.9, 1.2, 0.3])
        >>> print(round(results['geometric-mean'], 2))
        0.35
    Example 3-A multiclass example, with `average` equal to `macro`.
        >>> predictions = [0, 2, 1, 0, 0, 1]
        >>> references = [0, 1, 2, 0, 1, 2]
        >>> results = geometric_mean.compute(predictions=predictions, references=references, average="macro")
        >>> print(round(results['geometric-mean'], 2))
        0.47
"""

_CITATION = """
@article{imbalanced-learn,
  title={Imbalanced-learn: A Python Toolbox to Tackle the Curse of
Imbalanced Datasets in Machine Learning},
  author={Lemaˆıtre, G. and Nogueira, F. and Aridas, C.},
  journal={Journal of Machine Learning Research},
  volume={18},
  pages={1-5},
  year={2017}
}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class GeometricMean(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=datasets.Features(
                {
                    "predictions": datasets.Sequence(datasets.Value("int32")),
                    "references": datasets.Sequence(datasets.Value("int32")),
                }
                if self.config_name == "multilabel"
                else {
                    "predictions": datasets.Value("int32"),
                    "references": datasets.Value("int32"),
                }
            ),
            reference_urls=["http://glemaitre.github.io/imbalanced-learn/generated/imblearn.metrics.geometric_mean_score.html#:~:text=The%20geometric%20mean%20(G%2Dmean,of%20the%20sensitivity%20and%20specificity."],
        )

    def _compute(self, predictions, references, labels=None, pos_label=1, average="multiclass", sample_weight=None, correction=0.0):
        score = geometric_mean_score(
            references, predictions, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight, correction=correction
        )
        return {"geometric-mean": float(score) if score.size == 1 else score}