from nervaluate import Evaluator from sklearn.metrics import classification_report from token_level_output import get_token_output_labels EVALUATION_METRICS = [ "Span Based Evaluation with Partial Overlap", "Token Based Evaluation with Micro Avg", "Token Based Evaluation with Macro Avg", ] def get_span_eval(gt_ner_span, pred_ner_span, text): evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=["Disease", "Drug"]) return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2) def get_token_micro_eval(gt_ner_span, pred_ner_span, text): return round( classification_report( get_token_output_labels(gt_ner_span, text), get_token_output_labels(pred_ner_span, text), labels=["Disease", "Drug"], output_dict=True, )["micro avg"]["f1-score"], 2, ) def get_token_macro_eval(gt_ner_span, pred_ner_span, text): return round( classification_report( get_token_output_labels(gt_ner_span, text), get_token_output_labels(pred_ner_span, text), labels=["Disease", "Drug"], output_dict=True, )["macro avg"]["f1-score"], 2, ) def get_evaluation_metric(metric_type, gt_ner_span, pred_ner_span, text): match metric_type: case "Span Based Evaluation with Partial Overlap": return get_span_eval(gt_ner_span, pred_ner_span, text) case "Token Based Evaluation with Micro Avg": return get_token_micro_eval(gt_ner_span, pred_ner_span, text) case "Token Based Evaluation with Macro Avg": return get_token_macro_eval(gt_ner_span, pred_ner_span, text)