# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TODO: Add a description here.""" import evaluate import datasets import pandas as pd import numpy as np import torch # TODO: Add BibTeX citation _CITATION = """\ @InProceedings{huggingface:module, title = {A great new module}, authors={huggingface, Inc.}, year={2020} } """ # TODO: Add description of the module here _DESCRIPTION = """\ This new module is designed to solve this great ML task and is crafted with a lot of care. """ # TODO: Add description of the arguments of the module here _KWARGS_DESCRIPTION = """ Calculates how good are predictions given some references, using certain scores Args: predictions: list of predictions to score. Each predictions should be a string with tokens separated by spaces. references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. Returns: accuracy: description of the first score, another_score: description of the second score, Examples: Examples should be written in doctest format, and should illustrate how to use the function. >>> my_new_module = evaluate.load("my_new_module") >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) >>> print(results) {'accuracy': 1.0} """ # TODO: Define external resources urls if needed BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class metric_tp_fp_Datasets(evaluate.Metric): """TODO: Short description of my metric.""" def _info(self): # TODO: Specifies the evaluate.EvaluationModuleInfo object return evaluate.MetricInfo( # This is the description that will appear on the metrics page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=datasets.Features({ 'predictions': datasets.features.Sequence(datasets.Value('float32')), 'references': datasets.features.Sequence(datasets.Value('int32')), }), # Homepage of the metric for documentation homepage="http://module.homepage", # Additional links to the codebase or references codebase_urls=["http://github.com/path/to/codebase/of/new_module"], reference_urls=["http://path.to.reference.url/new_module"] ) def _download_and_prepare(self, dl_manager): """Optional: download external resources useful to compute the scores""" # TODO: Download external resources if needed pass #Prediction strategy function selector######################################## def predict(self, logits, prediction_strategy): if prediction_strategy[0] == "argmax_max": results = self.argmax_max(logits) elif prediction_strategy[0] == "softmax_threshold": results = self.softmax_threshold(logits, prediction_strategy[1]) elif prediction_strategy[0] == "softmax_topk": results = self.softmax_topk(logits, prediction_strategy[1]) elif prediction_strategy[0] == "threshold": results = self.threshold(logits, prediction_strategy[1]) elif prediction_strategy[0] == "topk": results = self.topk(logits, prediction_strategy[1]) return results #Prediction strategy functions______________________________________________ def argmax_max(self, logits): predictions = [] argmax = torch.argmax(logits, dim=-1) for prediction in argmax: predicted_indexes = [prediction.item()] predictions.append(predicted_indexes) return predictions def softmax_threshold(logits, threshold): predictions = [] softmax = torch.softmax(logits, dim=-1) for prediction in softmax: predicted_indexes =[] for index, value in enumerate(prediction): if value >= threshold: predicted_indexes.append(index) predictions.append(predicted_indexes) return predictions def softmax_topk(self, logits, topk): softmax = torch.softmax(logits, dim=-1) predictions = softmax.topk(topk).indices.tolist() return predictions def threshold(self, logits, threshold): predictions = [] for prediction in logits: predicted_indexes =[] for index, value in enumerate(prediction): if value >= threshold: predicted_indexes.append(index) predictions.append(predicted_indexes) return predictions def topk(self, logits, topk): predictions = logits.topk(topk).indices.tolist() #print(logits) #print(predictions) return predictions #Builds a report with the metrics#################################################### def metrics_report(self, true_positives = "", false_positives = ""): classes = true_positives.loc[true_positives["class"] != 'total']["class"].tolist() samples = [0 for i in range(len(classes))] results = pd.DataFrame({ "class": classes, "N# of True samples": samples, "N# of False samples": samples, "True Positives": samples, "False Positives": samples, "r": samples, "p": samples, "f1": samples, "acc": samples, }) results.loc[len(results.index)] = ["total", 0, 0, 0, 0, 0, 0, 0, 0] for label in results["class"].tolist(): if label in true_positives["class"].tolist(): label_true_samples = true_positives.loc[true_positives["class"] == label, "number of samples"].iloc[0] label_true_positives = true_positives.loc[true_positives["class"] == label, "coincidence count"].iloc[0] else: label_true_samples = 0 label_true_positives = 0 if label in false_positives["class"].tolist(): label_false_samples = false_positives.loc[false_positives["class"] == label, "number of samples"].iloc[0] label_false_positives = false_positives.loc[false_positives["class"] == label, "coincidence count"].iloc[0] else: label_false_samples = 0 label_false_positives = 0 r = label_true_positives/label_true_samples p = label_true_positives/(label_true_positives+label_false_positives) f1 = 2*r*p/(r+p) if label_false_samples >> 0: label_true_negatives = label_false_samples-label_false_positives else: label_true_negatives = 0 acc = (label_true_positives+label_true_negatives)/(label_true_samples+label_false_samples) results.loc[results["class"] == label, "N# of True samples"] = label_true_samples results.loc[results["class"] == label, "N# of False samples"] = label_false_samples results.loc[results["class"] == label, "True Positives"] = label_true_positives results.loc[results["class"] == label, "False Positives"] = label_false_positives if label != "total": results.loc[results["class"] == label, "r"] = r results.loc[results["class"] == label, "p"] = p results.loc[results["class"] == label, "f1"] = f1 results.loc[results["class"] == label, "acc"] = acc else: results.loc[results["class"] == label, "r"] = "" results.loc[results["class"] == label, "p"] = "" results.loc[results["class"] == label, "f1"] = "" results.loc[results["class"] == label, "acc"] = "" results.loc[len(results.index)] = ["", "", "", "", "Micro avg.", r , p, f1, acc] results = results.fillna(0.0) final_values = results.loc[:len(results.index)-3] results.loc[len(results.index)] = ["", "", "", "", "Macro avg.", final_values["r"].mean(), final_values["p"].mean(), final_values["f1"].mean(), final_values["acc"].mean()] return results #Computes the metric for each prediction strategy############################################## def _compute(self, predictions, references, prediction_strategies = [["argmax_max"],], FPifWrong = False): """Returns the scores""" # TODO: Compute the different scores of the metric predictions = torch.from_numpy(np.array(predictions, dtype = 'float32')) classes = [i for i in range(len(predictions[0]))] #for value in references: # if value[0] not in classes: # classes.append(value[0]) results = {} for prediction_strategy in prediction_strategies: prediction_strategy_name = '-'.join(map(str, prediction_strategy)) print(prediction_strategy_name) results[prediction_strategy_name] = {} predicted_labels = self.predict(predictions, prediction_strategy) samples = [0 for i in range(len(classes))] TP_data = pd.DataFrame({ "class": classes, "number of samples": samples, "coincidence count": samples, }) FP_data = pd.DataFrame({ "class": classes, "number of samples": samples, "coincidence count": samples, }) for i, j in zip(predicted_labels, references): #print(i) if j[1] == 0: TP_data.loc[TP_data["class"] == j[0], "number of samples"] += 1 if len(i) >> 0: if j[0] in i: TP_data.loc[TP_data["class"] == j[0], "coincidence count"] += 1 TP_data = TP_data.sort_values(by=["class"], ignore_index = True) else: if FPifWrong: for k in i: FP_data.loc[FP_data["class"] == k, "coincidence count"] += 1 FP_data = FP_data.sort_values(by=["class"], ignore_index = True) if j[1] == 2: FP_data.loc[FP_data["class"] == j[0], "number of samples"] += 1 if len(i) >> 0: if j[0] in i: FP_data.loc[FP_data["class"] == j[0], "coincidence count"] += 1 FP_data = FP_data.sort_values(by=["class"], ignore_index = True) TP_data.loc[len(TP_data.index)] =["total", TP_data["number of samples"].sum(), TP_data["coincidence count"].sum()] FP_data.loc[len(FP_data.index)] =["total", FP_data["number of samples"].sum(), FP_data["coincidence count"].sum()] report_table = self.metrics_report( true_positives = TP_data, false_positives = FP_data ) results[prediction_strategy_name] = report_table.rename_axis(prediction_strategy_name, axis='columns') return results