John Graham Reynolds
commited on
Commit
·
8b58e10
1
Parent(s):
ca651ac
fix minor errors
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from fixed_recall import FixedRecall
|
|
4 |
import evaluate
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
|
|
7 |
|
8 |
title = "'Combine' multiple metrics with this 🤗 Evaluate 🪲 Fix!"
|
9 |
|
@@ -14,10 +15,29 @@ Check out the original, longstanding issue [here](https://github.com/huggingface
|
|
14 |
`evaluate.combine()` multiple metrics related to multilabel text classification. Particularly, one cannot `combine` the `f1`, `precision`, and `recall` scores for \
|
15 |
evaluation. I encountered this issue specifically while training [RoBERTa-base-DReiFT](https://huggingface.co/MarioBarbeque/RoBERTa-base-DReiFT) for multilabel \
|
16 |
text classification of 805 labeled medical conditions based on drug reviews. The [following workaround](https://github.com/johngrahamreynolds/FixedMetricsForHF) was
|
17 |
-
created to address this
|
18 |
|
19 |
This Space shows how one can instantiate these custom `evaluate.Metric`s, each with their own unique methodology for averaging across labels, before `combine`-ing them into a
|
20 |
-
HF `evaluate.CombinedEvaluations` object. From here, we can easily compute each of the metrics simultaneously using `compute
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
"""
|
22 |
|
23 |
|
@@ -39,13 +59,13 @@ def evaluation(predictions_df: pd.DataFrame, metrics_df: pd.DataFrame) -> str:
|
|
39 |
combined_list = []
|
40 |
|
41 |
if "f1" in metric_set:
|
42 |
-
f1 = FixedF1(average=metric_map["f1"])
|
43 |
combined_list.append(f1)
|
44 |
if "precision" in metric_set:
|
45 |
-
precision = FixedPrecision(average=metric_map["
|
46 |
combined_list.append(precision)
|
47 |
if "recall" in metric_set:
|
48 |
-
recall = FixedRecall(average=metric_map["
|
49 |
combined_list.append(recall)
|
50 |
|
51 |
combined = evaluate.combine(combined_list)
|
|
|
4 |
import evaluate
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
|
9 |
title = "'Combine' multiple metrics with this 🤗 Evaluate 🪲 Fix!"
|
10 |
|
|
|
15 |
`evaluate.combine()` multiple metrics related to multilabel text classification. Particularly, one cannot `combine` the `f1`, `precision`, and `recall` scores for \
|
16 |
evaluation. I encountered this issue specifically while training [RoBERTa-base-DReiFT](https://huggingface.co/MarioBarbeque/RoBERTa-base-DReiFT) for multilabel \
|
17 |
text classification of 805 labeled medical conditions based on drug reviews. The [following workaround](https://github.com/johngrahamreynolds/FixedMetricsForHF) was
|
18 |
+
created to address this - follow the link to view the source! \n
|
19 |
|
20 |
This Space shows how one can instantiate these custom `evaluate.Metric`s, each with their own unique methodology for averaging across labels, before `combine`-ing them into a
|
21 |
+
HF `evaluate.CombinedEvaluations` object. From here, we can easily compute each of the metrics simultaneously using `compute`. \n
|
22 |
+
|
23 |
+
In general, one writes the following:\n
|
24 |
+
|
25 |
+
```python
|
26 |
+
f1 = FixedF1(average=...)
|
27 |
+
precision = FixedPrecision(average=...)
|
28 |
+
recall = FixedRecall(average=...)
|
29 |
+
|
30 |
+
combined = evaluate.combine([f1, precision, recall])
|
31 |
+
|
32 |
+
combined.add_batch(predictions=..., references=...)
|
33 |
+
combined.compute()
|
34 |
+
```\n
|
35 |
+
|
36 |
+
where the `average` parameter can be different at instantiation time for each of the metrics. Acceptable values include `[None, 'micro', 'macro', 'weighted']` (
|
37 |
+
or `binary` if there exist only two labels). \n
|
38 |
+
|
39 |
+
Try it out using the examples below! Then try picking some various averaging methods yourself!
|
40 |
+
</p>
|
41 |
"""
|
42 |
|
43 |
|
|
|
59 |
combined_list = []
|
60 |
|
61 |
if "f1" in metric_set:
|
62 |
+
f1 = FixedF1(average=metric_map["f1"] if metric_map["f1"] != "None" else None)
|
63 |
combined_list.append(f1)
|
64 |
if "precision" in metric_set:
|
65 |
+
precision = FixedPrecision(average=metric_map["precision"] if metric_map["precision"] != "None" else None, zero_division=np.nan)
|
66 |
combined_list.append(precision)
|
67 |
if "recall" in metric_set:
|
68 |
+
recall = FixedRecall(average=metric_map["recall"] if metric_map["recall"] != "None" else None)
|
69 |
combined_list.append(recall)
|
70 |
|
71 |
combined = evaluate.combine(combined_list)
|