gorkaartola commited on
Commit
b751253
1 Parent(s): b17ddff

Upload metric_for_tp_fp_samples.py

Browse files
Files changed (1) hide show
  1. metric_for_tp_fp_samples.py +238 -0
metric_for_tp_fp_samples.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """TODO: Add a description here."""
15
+
16
+ import evaluate
17
+ import datasets
18
+ import pandas as pd
19
+ import numpy as np
20
+ import torch
21
+
22
+ # TODO: Add BibTeX citation
23
+ _CITATION = """\
24
+ @InProceedings{huggingface:module,
25
+ title = {A great new module},
26
+ authors={huggingface, Inc.},
27
+ year={2020}
28
+ }
29
+ """
30
+
31
+ # TODO: Add description of the module here
32
+ _DESCRIPTION = """\
33
+ This new module is designed to solve this great ML task and is crafted with a lot of care.
34
+ """
35
+
36
+
37
+ # TODO: Add description of the arguments of the module here
38
+ _KWARGS_DESCRIPTION = """
39
+ Calculates how good are predictions given some references, using certain scores
40
+ Args:
41
+ predictions: list of predictions to score. Each predictions
42
+ should be a string with tokens separated by spaces.
43
+ references: list of reference for each prediction. Each
44
+ reference should be a string with tokens separated by spaces.
45
+ Returns:
46
+ accuracy: description of the first score,
47
+ another_score: description of the second score,
48
+ Examples:
49
+ Examples should be written in doctest format, and should illustrate how
50
+ to use the function.
51
+
52
+ >>> my_new_module = evaluate.load("my_new_module")
53
+ >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
54
+ >>> print(results)
55
+ {'accuracy': 1.0}
56
+ """
57
+
58
+ # TODO: Define external resources urls if needed
59
+ BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
60
+
61
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
62
+ class metric_tp_fp_Datasets(evaluate.Metric):
63
+ """TODO: Short description of my metric."""
64
+ def _info(self):
65
+ # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
+ return evaluate.MetricInfo(
67
+ # This is the description that will appear on the metrics page.
68
+ module_type="metric",
69
+ description=_DESCRIPTION,
70
+ citation=_CITATION,
71
+ inputs_description=_KWARGS_DESCRIPTION,
72
+ # This defines the format of each prediction and reference
73
+ features=datasets.Features({
74
+ 'predictions': datasets.features.Sequence(datasets.Value('float32')),
75
+ 'references': datasets.features.Sequence(datasets.Value('int32')),
76
+ }),
77
+ # Homepage of the metric for documentation
78
+ homepage="http://module.homepage",
79
+ # Additional links to the codebase or references
80
+ codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
+ reference_urls=["http://path.to.reference.url/new_module"]
82
+ )
83
+
84
+ def _download_and_prepare(self, dl_manager):
85
+ """Optional: download external resources useful to compute the scores"""
86
+ # TODO: Download external resources if needed
87
+ pass
88
+
89
+ #Prediction strategy function selector########################################
90
+ def predict(self, logits, prediction_strategy):
91
+ if prediction_strategy[0] == "argmax_max":
92
+ results = self.argmax_max(logits)
93
+ elif prediction_strategy[0] == "softmax_threshold":
94
+ results = self.softmax_threshold(logits, prediction_strategy[1])
95
+ elif prediction_strategy[0] == "softmax_topk":
96
+ results = self.softmax_topk(logits, prediction_strategy[1])
97
+ elif prediction_strategy[0] == "threshold":
98
+ results = self.threshold(logits, prediction_strategy[1])
99
+ elif prediction_strategy[0] == "topk":
100
+ results = self.topk(logits, prediction_strategy[1])
101
+ return results
102
+ #Prediction strategy functions______________________________________________
103
+ def argmax_max(self, logits):
104
+ predictions = []
105
+ argmax = torch.argmax(logits, dim=-1)
106
+ for prediction in argmax:
107
+ predicted_indexes = [prediction.item()]
108
+ predictions.append(predicted_indexes)
109
+ return predictions
110
+ def softmax_threshold(logits, threshold):
111
+ predictions = []
112
+ softmax = torch.softmax(logits, dim=-1)
113
+ for prediction in softmax:
114
+ predicted_indexes =[]
115
+ for index, value in enumerate(prediction):
116
+ if value >= threshold:
117
+ predicted_indexes.append(index)
118
+ predictions.append(predicted_indexes)
119
+ return predictions
120
+ def softmax_topk(self, logits, topk):
121
+ softmax = torch.softmax(logits, dim=-1)
122
+ predictions = softmax.topk(topk).indices.tolist()
123
+ return predictions
124
+ def threshold(self, logits, threshold):
125
+ predictions = []
126
+ for prediction in logits:
127
+ predicted_indexes =[]
128
+ for index, value in enumerate(prediction):
129
+ if value >= threshold:
130
+ predicted_indexes.append(index)
131
+ predictions.append(predicted_indexes)
132
+ return predictions
133
+ def topk(self, logits, topk):
134
+ predictions = logits.topk(topk).indices.tolist()
135
+ return predictions
136
+
137
+ #Builds a report with the metrics####################################################
138
+ def metrics_report(self, true_positives = "", false_positives = ""):
139
+ classes = true_positives.loc[true_positives["class"] != 'total']["class"].tolist()
140
+ samples = [0 for i in range(len(classes))]
141
+ results = pd.DataFrame({
142
+ "class": classes,
143
+ "N# of True samples": samples,
144
+ "N# of False samples": samples,
145
+ "True Positives": samples,
146
+ "False Positives": samples,
147
+ "r": samples,
148
+ "p": samples,
149
+ "f1": samples,
150
+ "acc": samples,
151
+ })
152
+ results.loc[len(results.index)] = ["total", 0, 0, 0, 0, 0, 0, 0, 0]
153
+
154
+ for label in results["class"].tolist():
155
+ if label in true_positives["class"].tolist():
156
+ label_true_samples = true_positives.loc[true_positives["class"] == label, "number of samples"].iloc[0]
157
+ label_true_positives = true_positives.loc[true_positives["class"] == label, "coincidence count"].iloc[0]
158
+ else:
159
+ label_true_samples = 0
160
+ label_true_positives = 0
161
+ if label in false_positives["class"].tolist():
162
+ label_false_samples = false_positives.loc[false_positives["class"] == label, "number of samples"].iloc[0]
163
+ label_false_positives = false_positives.loc[false_positives["class"] == label, "coincidence count"].iloc[0]
164
+ else:
165
+ label_false_samples = 0
166
+ label_false_positives = 0
167
+
168
+ r = label_true_positives/label_true_samples
169
+ p = label_true_positives/(label_true_positives+label_false_positives)
170
+ f1 = 2*r*p/(r+p)
171
+ acc = (label_true_positives+(label_false_samples-label_false_positives))/(label_true_samples+label_false_samples)
172
+
173
+ results.loc[results["class"] == label, "N# of True samples"] = label_true_samples
174
+ results.loc[results["class"] == label, "N# of False samples"] = label_false_samples
175
+ results.loc[results["class"] == label, "True Positives"] = label_true_positives
176
+ results.loc[results["class"] == label, "False Positives"] = label_false_positives
177
+ if label != "total":
178
+ results.loc[results["class"] == label, "r"] = r
179
+ results.loc[results["class"] == label, "p"] = p
180
+ results.loc[results["class"] == label, "f1"] = f1
181
+ results.loc[results["class"] == label, "acc"] = acc
182
+ else:
183
+ results.loc[results["class"] == label, "r"] = ""
184
+ results.loc[results["class"] == label, "p"] = ""
185
+ results.loc[results["class"] == label, "f1"] = ""
186
+ results.loc[results["class"] == label, "acc"] = ""
187
+ results.loc[len(results.index)] = ["", "", "", "", "Micro avg.", r , p, f1, acc]
188
+ results = results.fillna(0.0)
189
+ final_values = results.loc[:len(results.index)-3]
190
+ results.loc[len(results.index)] = ["", "", "", "", "Macro avg.", final_values["r"].mean(), final_values["p"].mean(), final_values["f1"].mean(), final_values["acc"].mean()]
191
+ return results
192
+
193
+ #Computes the metric for each prediction strategy##############################################
194
+ def _compute(self, predictions, references, prediction_strategies = []):
195
+ """Returns the scores"""
196
+ # TODO: Compute the different scores of the metric
197
+ predictions = torch.from_numpy(np.array(predictions, dtype = 'float32'))
198
+ classes = []
199
+ for value in references:
200
+ if value[0] not in classes:
201
+ classes.append(value[0])
202
+ results = {}
203
+ for prediction_strategy in prediction_strategies:
204
+ prediction_strategy_name = '-'.join(map(str, prediction_strategy))
205
+ results[prediction_strategy_name] = {}
206
+ predicted_labels = self.predict(predictions, prediction_strategy)
207
+ samples = [0 for i in range(len(classes))]
208
+ TP_data = pd.DataFrame({
209
+ "class": classes,
210
+ "number of samples": samples,
211
+ "coincidence count": samples,
212
+ })
213
+ FP_data = pd.DataFrame({
214
+ "class": classes,
215
+ "number of samples": samples,
216
+ "coincidence count": samples,
217
+ })
218
+ for i, j in zip(predicted_labels, references):
219
+ if j[1] == 0:
220
+ TP_data.loc[TP_data["class"] == j[0], "number of samples"] += 1
221
+ if len(i) >> 0:
222
+ if j[0] in i:
223
+ TP_data.loc[TP_data["class"] == j[0], "coincidence count"] += 1
224
+ TP_data = TP_data.sort_values(by=["class"], ignore_index = True)
225
+ if j[1] == 2:
226
+ FP_data.loc[FP_data["class"] == j[0], "number of samples"] += 1
227
+ if len(i) >> 0:
228
+ if j[0] in i:
229
+ FP_data.loc[FP_data["class"] == j[0], "coincidence count"] += 1
230
+ FP_data = FP_data.sort_values(by=["class"], ignore_index = True)
231
+ TP_data.loc[len(TP_data.index)] =["total", TP_data["number of samples"].sum(), TP_data["coincidence count"].sum()]
232
+ FP_data.loc[len(FP_data.index)] =["total", FP_data["number of samples"].sum(), FP_data["coincidence count"].sum()]
233
+ report_table = self.metrics_report(
234
+ true_positives = TP_data,
235
+ false_positives = FP_data
236
+ )
237
+ results[prediction_strategy_name] = report_table.rename_axis(prediction_strategy_name, axis='columns')
238
+ return results