Spaces:

gorkaartola
/

metric_for_tp_fp_samples

Runtime error

App Files Files Community

metric_for_tp_fp_samples / metric_for_tp_fp_samples.py

gorkaartola

Update metric_for_tp_fp_samples.py

4b97a39 about 2 years ago

raw

history blame

9.93 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""TODO: Add a description here."""

	import evaluate
	import datasets
	import pandas as pd
	import numpy as np
	import torch

	# TODO: Add BibTeX citation
	_CITATION = """\
	@InProceedings{huggingface:module,
	title = {A great new module},
	authors={huggingface, Inc.},
	year={2020}
	}
	"""

	# TODO: Add description of the module here
	_DESCRIPTION = """\
	This new module is designed to solve this great ML task and is crafted with a lot of care.
	"""


	# TODO: Add description of the arguments of the module here
	_KWARGS_DESCRIPTION = """
	Calculates how good are predictions given some references, using certain scores
	Args:
	predictions: list of predictions to score. Each predictions
	should be a string with tokens separated by spaces.
	references: list of reference for each prediction. Each
	reference should be a string with tokens separated by spaces.
	Returns:
	accuracy: description of the first score,
	another_score: description of the second score,
	Examples:
	Examples should be written in doctest format, and should illustrate how
	to use the function.

	>>> my_new_module = evaluate.load("my_new_module")
	>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
	>>> print(results)
	{'accuracy': 1.0}
	"""

	# TODO: Define external resources urls if needed
	BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"

	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class metric_tp_fp_Datasets(evaluate.Metric):
	"""TODO: Short description of my metric."""
	def _info(self):
	# TODO: Specifies the evaluate.EvaluationModuleInfo object
	return evaluate.MetricInfo(
	# This is the description that will appear on the metrics page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=datasets.Features({
	'predictions': datasets.features.Sequence(datasets.Value('float32')),
	'references': datasets.features.Sequence(datasets.Value('int32')),
	}),
	# Homepage of the metric for documentation
	homepage="http://module.homepage",
	# Additional links to the codebase or references
	codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
	reference_urls=["http://path.to.reference.url/new_module"]
	)

	def _download_and_prepare(self, dl_manager):
	"""Optional: download external resources useful to compute the scores"""
	# TODO: Download external resources if needed
	pass

	#Prediction strategy function selector########################################
	def predict(self, logits, prediction_strategy):
	if prediction_strategy[0] == "argmax_max":
	results = self.argmax_max(logits)
	elif prediction_strategy[0] == "softmax_threshold":
	results = self.softmax_threshold(logits, prediction_strategy[1])
	elif prediction_strategy[0] == "softmax_topk":
	results = self.softmax_topk(logits, prediction_strategy[1])
	elif prediction_strategy[0] == "threshold":
	results = self.threshold(logits, prediction_strategy[1])
	elif prediction_strategy[0] == "topk":
	results = self.topk(logits, prediction_strategy[1])
	return results
	#Prediction strategy functions______________________________________________
	def argmax_max(self, logits):
	predictions = []
	argmax = torch.argmax(logits, dim=-1)
	for prediction in argmax:
	predicted_indexes = [prediction.item()]
	predictions.append(predicted_indexes)
	return predictions
	def softmax_threshold(logits, threshold):
	predictions = []
	softmax = torch.softmax(logits, dim=-1)
	for prediction in softmax:
	predicted_indexes =[]
	for index, value in enumerate(prediction):
	if value >= threshold:
	predicted_indexes.append(index)
	predictions.append(predicted_indexes)
	return predictions
	def softmax_topk(self, logits, topk):
	softmax = torch.softmax(logits, dim=-1)
	predictions = softmax.topk(topk).indices.tolist()
	return predictions
	def threshold(self, logits, threshold):
	predictions = []
	for prediction in logits:
	predicted_indexes =[]
	for index, value in enumerate(prediction):
	if value >= threshold:
	predicted_indexes.append(index)
	predictions.append(predicted_indexes)
	return predictions
	def topk(self, logits, topk):
	predictions = logits.topk(topk).indices.tolist()
	return predictions

	#Builds a report with the metrics####################################################
	def metrics_report(self, true_positives = "", false_positives = ""):
	classes = true_positives.loc[true_positives["class"] != 'total']["class"].tolist()
	samples = [0 for i in range(len(classes))]
	results = pd.DataFrame({
	"class": classes,
	"N# of True samples": samples,
	"N# of False samples": samples,
	"True Positives": samples,
	"False Positives": samples,
	"r": samples,
	"p": samples,
	"f1": samples,
	"acc": samples,
	})
	results.loc[len(results.index)] = ["total", 0, 0, 0, 0, 0, 0, 0, 0]

	for label in results["class"].tolist():
	if label in true_positives["class"].tolist():
	label_true_samples = true_positives.loc[true_positives["class"] == label, "number of samples"].iloc[0]
	label_true_positives = true_positives.loc[true_positives["class"] == label, "coincidence count"].iloc[0]
	else:
	label_true_samples = 0
	label_true_positives = 0
	if label in false_positives["class"].tolist():
	label_false_samples = false_positives.loc[false_positives["class"] == label, "number of samples"].iloc[0]
	label_false_positives = false_positives.loc[false_positives["class"] == label, "coincidence count"].iloc[0]
	else:
	label_false_samples = 0
	label_false_positives = 0

	r = label_true_positives/label_true_samples
	p = label_true_positives/(label_true_positives+label_false_positives)
	f1 = 2rp/(r+p)
	acc = (label_true_positives+(label_false_samples-label_false_positives))/(label_true_samples+label_false_samples)

	results.loc[results["class"] == label, "N# of True samples"] = label_true_samples
	results.loc[results["class"] == label, "N# of False samples"] = label_false_samples
	results.loc[results["class"] == label, "True Positives"] = label_true_positives
	results.loc[results["class"] == label, "False Positives"] = label_false_positives
	if label != "total":
	results.loc[results["class"] == label, "r"] = r
	results.loc[results["class"] == label, "p"] = p
	results.loc[results["class"] == label, "f1"] = f1
	results.loc[results["class"] == label, "acc"] = acc
	else:
	results.loc[results["class"] == label, "r"] = ""
	results.loc[results["class"] == label, "p"] = ""
	results.loc[results["class"] == label, "f1"] = ""
	results.loc[results["class"] == label, "acc"] = ""
	results.loc[len(results.index)] = ["", "", "", "", "Micro avg.", r , p, f1, acc]
	results = results.fillna(0.0)
	final_values = results.loc[:len(results.index)-3]
	results.loc[len(results.index)] = ["", "", "", "", "Macro avg.", final_values["r"].mean(), final_values["p"].mean(), final_values["f1"].mean(), final_values["acc"].mean()]
	return results

	#Computes the metric for each prediction strategy##############################################
	def _compute(self, predictions, references, prediction_strategies = [["argmax_max"],]):
	"""Returns the scores"""
	# TODO: Compute the different scores of the metric
	predictions = torch.from_numpy(np.array(predictions, dtype = 'float32'))
	classes = [i for i in range(len(predictions[0]))]
	#for value in references:
	# if value[0] not in classes:
	# classes.append(value[0])
	results = {}
	for prediction_strategy in prediction_strategies:
	prediction_strategy_name = '-'.join(map(str, prediction_strategy))
	print(prediction_strategy_name)
	results[prediction_strategy_name] = {}
	predicted_labels = self.predict(predictions, prediction_strategy)
	samples = [0 for i in range(len(classes))]
	TP_data = pd.DataFrame({
	"class": classes,
	"number of samples": samples,
	"coincidence count": samples,
	})
	FP_data = pd.DataFrame({
	"class": classes,
	"number of samples": samples,
	"coincidence count": samples,
	})
	for i, j in zip(predicted_labels, references):
	if j[1] == 0:
	TP_data.loc[TP_data["class"] == j[0], "number of samples"] += 1
	if len(i) >> 0:
	if j[0] in i:
	TP_data.loc[TP_data["class"] == j[0], "coincidence count"] += 1
	TP_data = TP_data.sort_values(by=["class"], ignore_index = True)
	if j[1] == 2:
	FP_data.loc[FP_data["class"] == j[0], "number of samples"] += 1
	if len(i) >> 0:
	if j[0] in i:
	FP_data.loc[FP_data["class"] == j[0], "coincidence count"] += 1
	FP_data = FP_data.sort_values(by=["class"], ignore_index = True)
	TP_data.loc[len(TP_data.index)] =["total", TP_data["number of samples"].sum(), TP_data["coincidence count"].sum()]
	FP_data.loc[len(FP_data.index)] =["total", FP_data["number of samples"].sum(), FP_data["coincidence count"].sum()]
	report_table = self.metrics_report(
	true_positives = TP_data,
	false_positives = FP_data
	)
	results[prediction_strategy_name] = report_table.rename_axis(prediction_strategy_name, axis='columns')
	return results