Spaces:

gorkaartola
/

metric_for_tp_fp_samples

Runtime error

File size: 9,931 Bytes

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TODO: Add a description here."""

import evaluate
import datasets
import pandas as pd
import numpy as np
import torch

# TODO: Add BibTeX citation
_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={huggingface, Inc.},
year={2020}
}
"""

# TODO: Add description of the module here
_DESCRIPTION = """\
This new module is designed to solve this great ML task and is crafted with a lot of care.
"""


# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Calculates how good are predictions given some references, using certain scores
Args:
    predictions: list of predictions to score. Each predictions
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
Returns:
    accuracy: description of the first score,
    another_score: description of the second score,
Examples:
    Examples should be written in doctest format, and should illustrate how
    to use the function.

    >>> my_new_module = evaluate.load("my_new_module")
    >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
    >>> print(results)
    {'accuracy': 1.0}
"""

# TODO: Define external resources urls if needed
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"

@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class metric_tp_fp_Datasets(evaluate.Metric):
	"""TODO: Short description of my metric."""
	def _info(self):
		# TODO: Specifies the evaluate.EvaluationModuleInfo object
		return evaluate.MetricInfo(
			# This is the description that will appear on the metrics page.
            		module_type="metric",			
			description=_DESCRIPTION,
			citation=_CITATION,
			inputs_description=_KWARGS_DESCRIPTION,
			# This defines the format of each prediction and reference
			features=datasets.Features({
				'predictions': datasets.features.Sequence(datasets.Value('float32')),
				'references': datasets.features.Sequence(datasets.Value('int32')),
				}),
			# Homepage of the metric for documentation
			homepage="http://module.homepage",
			# Additional links to the codebase or references
			codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
			reference_urls=["http://path.to.reference.url/new_module"]
			)

	def _download_and_prepare(self, dl_manager):
		"""Optional: download external resources useful to compute the scores"""
		# TODO: Download external resources if needed
		pass
	
	#Prediction strategy function selector########################################
	def predict(self, logits, prediction_strategy):
		if prediction_strategy[0] == "argmax_max":
			results = self.argmax_max(logits)
		elif prediction_strategy[0] == "softmax_threshold":
			results = self.softmax_threshold(logits, prediction_strategy[1])
		elif prediction_strategy[0] == "softmax_topk":
			results = self.softmax_topk(logits, prediction_strategy[1])
		elif prediction_strategy[0] == "threshold":
			results = self.threshold(logits, prediction_strategy[1])
		elif prediction_strategy[0] == "topk":
			results = self.topk(logits, prediction_strategy[1])
		return results
	#Prediction strategy functions______________________________________________
	def argmax_max(self, logits):
		predictions = []
		argmax = torch.argmax(logits, dim=-1)
		for prediction in argmax:
			predicted_indexes = [prediction.item()]
			predictions.append(predicted_indexes)	
		return predictions
	def softmax_threshold(logits, threshold):
		predictions = []
		softmax = torch.softmax(logits, dim=-1)
		for prediction in softmax:
			predicted_indexes =[]
			for index, value in enumerate(prediction):
				if value >= threshold:
					predicted_indexes.append(index)
			predictions.append(predicted_indexes)
		return predictions
	def softmax_topk(self, logits, topk):
		softmax = torch.softmax(logits, dim=-1)
		predictions = softmax.topk(topk).indices.tolist()
		return predictions
	def threshold(self, logits, threshold):
		predictions = []
		for prediction in logits:
			predicted_indexes =[]
			for index, value in enumerate(prediction):
				if value >= threshold:
					predicted_indexes.append(index)
			predictions.append(predicted_indexes)
		return predictions
	def topk(self, logits, topk):
		predictions = logits.topk(topk).indices.tolist()
		return predictions

	#Builds a report with the metrics####################################################
	def metrics_report(self, true_positives = "", false_positives = ""):
		classes = true_positives.loc[true_positives["class"] != 'total']["class"].tolist()
		samples = [0 for i in range(len(classes))]
		results = pd.DataFrame({
			"class": classes,
			"N# of True samples": samples,
			"N# of False samples": samples,
			"True Positives": samples,
			"False Positives": samples,
			"r": samples,
			"p": samples,
			"f1": samples,
			"acc": samples,
			})
		results.loc[len(results.index)] =  ["total", 0, 0, 0, 0, 0, 0, 0, 0]
			
		for label in results["class"].tolist():
			if label in true_positives["class"].tolist():
				label_true_samples = true_positives.loc[true_positives["class"] == label, "number of samples"].iloc[0]
				label_true_positives = true_positives.loc[true_positives["class"] == label, "coincidence count"].iloc[0]
			else:
				label_true_samples = 0
				label_true_positives = 0		
			if label in false_positives["class"].tolist():		
				label_false_samples = false_positives.loc[false_positives["class"] == label, "number of samples"].iloc[0]
				label_false_positives = false_positives.loc[false_positives["class"] == label, "coincidence count"].iloc[0]
			else:
				label_false_samples = 0
				label_false_positives = 0
	
			r = label_true_positives/label_true_samples	
			p = label_true_positives/(label_true_positives+label_false_positives)
			f1 = 2*r*p/(r+p)
			acc = (label_true_positives+(label_false_samples-label_false_positives))/(label_true_samples+label_false_samples)
		
			results.loc[results["class"] == label, "N# of True samples"] = label_true_samples
			results.loc[results["class"] == label, "N# of False samples"] = label_false_samples
			results.loc[results["class"] == label, "True Positives"] = label_true_positives
			results.loc[results["class"] == label, "False Positives"] = label_false_positives
			if label != "total":
				results.loc[results["class"] == label, "r"] = r
				results.loc[results["class"] == label, "p"] = p
				results.loc[results["class"] == label, "f1"] = f1
				results.loc[results["class"] == label, "acc"] = acc
			else:
				results.loc[results["class"] == label, "r"] = ""
				results.loc[results["class"] == label, "p"] = ""
				results.loc[results["class"] == label, "f1"] = ""
				results.loc[results["class"] == label, "acc"] = ""
				results.loc[len(results.index)] =  ["", "", "", "", "Micro avg.", r , p, f1, acc]
		results = results.fillna(0.0)
		final_values = results.loc[:len(results.index)-3]
		results.loc[len(results.index)] =  ["", "", "", "", "Macro avg.", final_values["r"].mean(), final_values["p"].mean(), final_values["f1"].mean(), final_values["acc"].mean()]
		return results

	#Computes the metric for each prediction strategy##############################################
	def _compute(self, predictions, references, prediction_strategies = [["argmax_max"],]):
		"""Returns the scores"""
		# TODO: Compute the different scores of the metric
		predictions = torch.from_numpy(np.array(predictions, dtype = 'float32'))
		classes = [i for i in range(len(predictions[0]))]
		#for value in references:
		#	if value[0] not in classes:
		#		classes.append(value[0])
		results = {}
		for prediction_strategy in prediction_strategies:
			prediction_strategy_name = '-'.join(map(str, prediction_strategy))
			print(prediction_strategy_name)
			results[prediction_strategy_name] = {}
			predicted_labels = self.predict(predictions, prediction_strategy)
			samples = [0 for i in range(len(classes))]
			TP_data = pd.DataFrame({
				"class": classes,
				"number of samples": samples,
				"coincidence count": samples,
				})
			FP_data = pd.DataFrame({
				"class": classes,
				"number of samples": samples,
				"coincidence count": samples,
				})
			for i, j in zip(predicted_labels, references):
				if j[1] == 0:
					TP_data.loc[TP_data["class"] == j[0], "number of samples"] += 1
					if len(i) >> 0:	
						if j[0] in i:
							TP_data.loc[TP_data["class"] == j[0], "coincidence count"] += 1
							TP_data = TP_data.sort_values(by=["class"], ignore_index  = True)
				if j[1] == 2:
					FP_data.loc[FP_data["class"] == j[0], "number of samples"] += 1
					if len(i) >> 0:
						if j[0] in i:
							FP_data.loc[FP_data["class"] == j[0], "coincidence count"] += 1
							FP_data = FP_data.sort_values(by=["class"], ignore_index  = True)			
			TP_data.loc[len(TP_data.index)] =["total", TP_data["number of samples"].sum(), TP_data["coincidence count"].sum()]
			FP_data.loc[len(FP_data.index)] =["total", FP_data["number of samples"].sum(), FP_data["coincidence count"].sum()]
			report_table = self.metrics_report(
				true_positives = TP_data,
				false_positives = FP_data
				)
			results[prediction_strategy_name] = report_table.rename_axis(prediction_strategy_name, axis='columns')
		return results