Spaces:
Running
Running
| from abc import ABC, abstractmethod | |
| from nervaluate import Evaluator | |
| from sklearn.metrics import classification_report | |
| from token_level_output import get_token_output_labels | |
| class EvaluationMetric(ABC): | |
| """Base class defining the attributes & methods of an evaluation metric""" | |
| name: str | |
| description: str | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| pass | |
| class PartialSpanOverlapMetric(EvaluationMetric): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self.name = "Span Based Evaluation with Partial Overlap" | |
| self.description = "" | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=tags) | |
| return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2) | |
| class ExactSpanOverlapMetric(EvaluationMetric): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self.name = "Span Based Evaluation with Exact Overlap" | |
| self.description = "" | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=tags) | |
| return round(evaluator.evaluate()[0]["strict"]["f1"], 2) | |
| class TokenMicroMetric(EvaluationMetric): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self.name = "Token Based Evaluation with Micro Average" | |
| self.description = "" | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| return round( | |
| classification_report( | |
| get_token_output_labels(gt_ner_span, text), | |
| get_token_output_labels(pred_ner_span, text), | |
| labels=tags, | |
| output_dict=True, | |
| )["micro avg"]["f1-score"], | |
| 2, | |
| ) | |
| class TokenMacroMetric(EvaluationMetric): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self.name = "Token Based Evaluation with Macro Average" | |
| self.description = "" | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| return round( | |
| classification_report( | |
| get_token_output_labels(gt_ner_span, text), | |
| get_token_output_labels(pred_ner_span, text), | |
| labels=tags, | |
| output_dict=True, | |
| )["macro avg"]["f1-score"], | |
| 2, | |
| ) | |
| class TokenWeightedMetric(EvaluationMetric): | |
| def __init__(self) -> None: | |
| super().__init__() | |
| self.name = "Token Based Evaluation with Weighted Average" | |
| self.description = "" | |
| def get_evaluation_metric(gt_ner_span, pred_ner_span, text, tags) -> float: | |
| return round( | |
| classification_report( | |
| get_token_output_labels(gt_ner_span, text), | |
| get_token_output_labels(pred_ner_span, text), | |
| labels=tags, | |
| output_dict=True, | |
| )["weighted avg"]["f1-score"], | |
| 2, | |
| ) | |
| EVALUATION_METRICS = [ | |
| PartialSpanOverlapMetric(), | |
| ExactSpanOverlapMetric(), | |
| TokenMicroMetric(), | |
| TokenMacroMetric(), | |
| TokenWeightedMetric(), | |
| ] | |