Spaces:

MarioBarbeque
/

FixedF1

Sleeping

John Graham Reynolds commited on Nov 6, 2024

Commit

7ca7f15

1 Parent(s): 709f580

update f1 file and load directly from imported class

Files changed (3) hide show

__init__.py ADDED Viewed

+from fixed_f1 import FixedF1
+__all__ = [
+    "FixedF1"
+]

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import evaluate
 from evaluate.utils import launch_gradio_widget
-module = evaluate.load("./fixed_f1.py")
 launch_gradio_widget(module)

 import evaluate
 from evaluate.utils import launch_gradio_widget
+from fixed_f1 import FixedF1
+module = FixedF1()
 launch_gradio_widget(module)

fixed_f1.py CHANGED Viewed

@@ -4,6 +4,13 @@ from evaluate import evaluator, Metric
 # from evaluate.metrics.f1 import F1
 from sklearn.metrics import f1_score
 _CITATION = """
 @online{MarioBarbeque@HuggingFace,
@@ -15,7 +22,15 @@ _CITATION = """
 }
 """
-# could in principle subclass F1, but ideally we can work the fix into the HF main F1 class to maintain SOLID code
 class FixedF1(evaluate.Metric):
     def __init__(self, average="binary"):
@@ -25,9 +40,9 @@ class FixedF1(evaluate.Metric):
     def _info(self):
         return evaluate.MetricInfo(
-            description="Custom built F1 metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a plethora of labels. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
-            citation="",
-            inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),

 # from evaluate.metrics.f1 import F1
 from sklearn.metrics import f1_score
+_DESCRIPTION = """
+Custom built F1 metric that accept underlying kwargs at instantiation time.
+This class allows one to circumvent the current issue of `combine`-ing the f1 metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
+In general, the F1 score is the harmonic mean of the precision and recall. It can be computed with the equation:
+F1 = 2 * (precision * recall) / (precision + recall)
+"""
 _CITATION = """
 @online{MarioBarbeque@HuggingFace,
 }
 """
+_INPUTS = """
+'average': This parameter is required for multiclass/multilabel targets.
+If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
+Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.
+"""
+# could in principle subclass the F1 Metric, but ideally we can work the fix into the HF main F1 class to maintain SOLID code
+# for this fix we create a new class
 class FixedF1(evaluate.Metric):
     def __init__(self, average="binary"):
     def _info(self):
         return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_INPUTS,
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),