metric_for_tp_fp_samples / metric_for_tp_fp_samples.py
gorkaartola's picture
Upload metric_for_tp_fp_samples.py
b751253
raw history blame
No virus
9.84 kB
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TODO: Add a description here."""
import evaluate
import datasets
import pandas as pd
import numpy as np
import torch
# TODO: Add BibTeX citation
_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={huggingface, Inc.},
year={2020}
}
"""
# TODO: Add description of the module here
_DESCRIPTION = """\
This new module is designed to solve this great ML task and is crafted with a lot of care.
"""
# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Calculates how good are predictions given some references, using certain scores
Args:
predictions: list of predictions to score. Each predictions
should be a string with tokens separated by spaces.
references: list of reference for each prediction. Each
reference should be a string with tokens separated by spaces.
Returns:
accuracy: description of the first score,
another_score: description of the second score,
Examples:
Examples should be written in doctest format, and should illustrate how
to use the function.
>>> my_new_module = evaluate.load("my_new_module")
>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
>>> print(results)
{'accuracy': 1.0}
"""
# TODO: Define external resources urls if needed
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class metric_tp_fp_Datasets(evaluate.Metric):
"""TODO: Short description of my metric."""
def _info(self):
# TODO: Specifies the evaluate.EvaluationModuleInfo object
return evaluate.MetricInfo(
# This is the description that will appear on the metrics page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=datasets.Features({
'predictions': datasets.features.Sequence(datasets.Value('float32')),
'references': datasets.features.Sequence(datasets.Value('int32')),
}),
# Homepage of the metric for documentation
homepage="http://module.homepage",
# Additional links to the codebase or references
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
reference_urls=["http://path.to.reference.url/new_module"]
)
def _download_and_prepare(self, dl_manager):
"""Optional: download external resources useful to compute the scores"""
# TODO: Download external resources if needed
pass
#Prediction strategy function selector########################################
def predict(self, logits, prediction_strategy):
if prediction_strategy[0] == "argmax_max":
results = self.argmax_max(logits)
elif prediction_strategy[0] == "softmax_threshold":
results = self.softmax_threshold(logits, prediction_strategy[1])
elif prediction_strategy[0] == "softmax_topk":
results = self.softmax_topk(logits, prediction_strategy[1])
elif prediction_strategy[0] == "threshold":
results = self.threshold(logits, prediction_strategy[1])
elif prediction_strategy[0] == "topk":
results = self.topk(logits, prediction_strategy[1])
return results
#Prediction strategy functions______________________________________________
def argmax_max(self, logits):
predictions = []
argmax = torch.argmax(logits, dim=-1)
for prediction in argmax:
predicted_indexes = [prediction.item()]
predictions.append(predicted_indexes)
return predictions
def softmax_threshold(logits, threshold):
predictions = []
softmax = torch.softmax(logits, dim=-1)
for prediction in softmax:
predicted_indexes =[]
for index, value in enumerate(prediction):
if value >= threshold:
predicted_indexes.append(index)
predictions.append(predicted_indexes)
return predictions
def softmax_topk(self, logits, topk):
softmax = torch.softmax(logits, dim=-1)
predictions = softmax.topk(topk).indices.tolist()
return predictions
def threshold(self, logits, threshold):
predictions = []
for prediction in logits:
predicted_indexes =[]
for index, value in enumerate(prediction):
if value >= threshold:
predicted_indexes.append(index)
predictions.append(predicted_indexes)
return predictions
def topk(self, logits, topk):
predictions = logits.topk(topk).indices.tolist()
return predictions
#Builds a report with the metrics####################################################
def metrics_report(self, true_positives = "", false_positives = ""):
classes = true_positives.loc[true_positives["class"] != 'total']["class"].tolist()
samples = [0 for i in range(len(classes))]
results = pd.DataFrame({
"class": classes,
"N# of True samples": samples,
"N# of False samples": samples,
"True Positives": samples,
"False Positives": samples,
"r": samples,
"p": samples,
"f1": samples,
"acc": samples,
})
results.loc[len(results.index)] = ["total", 0, 0, 0, 0, 0, 0, 0, 0]
for label in results["class"].tolist():
if label in true_positives["class"].tolist():
label_true_samples = true_positives.loc[true_positives["class"] == label, "number of samples"].iloc[0]
label_true_positives = true_positives.loc[true_positives["class"] == label, "coincidence count"].iloc[0]
else:
label_true_samples = 0
label_true_positives = 0
if label in false_positives["class"].tolist():
label_false_samples = false_positives.loc[false_positives["class"] == label, "number of samples"].iloc[0]
label_false_positives = false_positives.loc[false_positives["class"] == label, "coincidence count"].iloc[0]
else:
label_false_samples = 0
label_false_positives = 0
r = label_true_positives/label_true_samples
p = label_true_positives/(label_true_positives+label_false_positives)
f1 = 2*r*p/(r+p)
acc = (label_true_positives+(label_false_samples-label_false_positives))/(label_true_samples+label_false_samples)
results.loc[results["class"] == label, "N# of True samples"] = label_true_samples
results.loc[results["class"] == label, "N# of False samples"] = label_false_samples
results.loc[results["class"] == label, "True Positives"] = label_true_positives
results.loc[results["class"] == label, "False Positives"] = label_false_positives
if label != "total":
results.loc[results["class"] == label, "r"] = r
results.loc[results["class"] == label, "p"] = p
results.loc[results["class"] == label, "f1"] = f1
results.loc[results["class"] == label, "acc"] = acc
else:
results.loc[results["class"] == label, "r"] = ""
results.loc[results["class"] == label, "p"] = ""
results.loc[results["class"] == label, "f1"] = ""
results.loc[results["class"] == label, "acc"] = ""
results.loc[len(results.index)] = ["", "", "", "", "Micro avg.", r , p, f1, acc]
results = results.fillna(0.0)
final_values = results.loc[:len(results.index)-3]
results.loc[len(results.index)] = ["", "", "", "", "Macro avg.", final_values["r"].mean(), final_values["p"].mean(), final_values["f1"].mean(), final_values["acc"].mean()]
return results
#Computes the metric for each prediction strategy##############################################
def _compute(self, predictions, references, prediction_strategies = []):
"""Returns the scores"""
# TODO: Compute the different scores of the metric
predictions = torch.from_numpy(np.array(predictions, dtype = 'float32'))
classes = []
for value in references:
if value[0] not in classes:
classes.append(value[0])
results = {}
for prediction_strategy in prediction_strategies:
prediction_strategy_name = '-'.join(map(str, prediction_strategy))
results[prediction_strategy_name] = {}
predicted_labels = self.predict(predictions, prediction_strategy)
samples = [0 for i in range(len(classes))]
TP_data = pd.DataFrame({
"class": classes,
"number of samples": samples,
"coincidence count": samples,
})
FP_data = pd.DataFrame({
"class": classes,
"number of samples": samples,
"coincidence count": samples,
})
for i, j in zip(predicted_labels, references):
if j[1] == 0:
TP_data.loc[TP_data["class"] == j[0], "number of samples"] += 1
if len(i) >> 0:
if j[0] in i:
TP_data.loc[TP_data["class"] == j[0], "coincidence count"] += 1
TP_data = TP_data.sort_values(by=["class"], ignore_index = True)
if j[1] == 2:
FP_data.loc[FP_data["class"] == j[0], "number of samples"] += 1
if len(i) >> 0:
if j[0] in i:
FP_data.loc[FP_data["class"] == j[0], "coincidence count"] += 1
FP_data = FP_data.sort_values(by=["class"], ignore_index = True)
TP_data.loc[len(TP_data.index)] =["total", TP_data["number of samples"].sum(), TP_data["coincidence count"].sum()]
FP_data.loc[len(FP_data.index)] =["total", FP_data["number of samples"].sum(), FP_data["coincidence count"].sum()]
report_table = self.metrics_report(
true_positives = TP_data,
false_positives = FP_data
)
results[prediction_strategy_name] = report_table.rename_axis(prediction_strategy_name, axis='columns')
return results