File size: 2,667 Bytes
19d19d4
 
44921ac
 
 
 
 
 
19d19d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44921ac
19d19d4
 
 
 
 
 
 
 
 
 
c50c9d2
19d19d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from abc import ABC, abstractmethod

from nervaluate import Evaluator
from sklearn.metrics import classification_report

from token_level_output import get_token_output_labels


class EvaluationMetric(ABC):
    """Base class defining the attributes & methods of an evaluation metric"""

    name: str
    description: str

    @abstractmethod
    def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float:
        pass


class PartialSpanOverlapMetric(EvaluationMetric):
    def __init__(self) -> None:
        super().__init__()

        self.name = "Span Based Evaluation with Partial Overlap"
        self.description = ""

    @staticmethod
    def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float:
        evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=tags)
        return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2)


class ExactSpanOverlapMetric(EvaluationMetric):
    def __init__(self) -> None:
        super().__init__()

        self.name = "Span Based Evaluation with Exact Overlap"
        self.description = ""

    @staticmethod
    def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float:
        evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=tags)
        return round(evaluator.evaluate()[0]["strict"]["f1"], 2)


class TokenMicroMetric(EvaluationMetric):
    def __init__(self) -> None:
        super().__init__()

        self.name = "Token Based Evaluation with Micro Average"
        self.description = ""

    @staticmethod
    def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float:
        return round(
            classification_report(
                get_token_output_labels(gt_ner_span, text),
                get_token_output_labels(pred_ner_span, text),
                labels=tags,
                output_dict=True,
            )["micro avg"]["f1-score"],
            2,
        )


class TokenMacroMetric(EvaluationMetric):
    def __init__(self) -> None:
        super().__init__()

        self.name = "Token Based Evaluation with Macro Average"
        self.description = ""

    @staticmethod
    def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float:
        return round(
            classification_report(
                get_token_output_labels(gt_ner_span, text),
                get_token_output_labels(pred_ner_span, text),
                labels=tags,
                output_dict=True,
            )["macro avg"]["f1-score"],
            2,
        )


EVALUATION_METRICS = [
    PartialSpanOverlapMetric(),
    ExactSpanOverlapMetric(),
    TokenMicroMetric(),
    TokenMacroMetric(),
]