ner_evaluation_metrics / evaluation_metrics.py
wadood's picture
init working commit
44921ac
raw
history blame
1.68 kB
from nervaluate import Evaluator
from sklearn.metrics import classification_report
from token_level_output import get_token_output_labels
EVALUATION_METRICS = [
"Span Based Evaluation with Partial Overlap",
"Token Based Evaluation with Micro Avg",
"Token Based Evaluation with Macro Avg",
]
def get_span_eval(gt_ner_span, pred_ner_span, text):
evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=["Disease", "Drug"])
return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2)
def get_token_micro_eval(gt_ner_span, pred_ner_span, text):
return round(
classification_report(
get_token_output_labels(gt_ner_span, text),
get_token_output_labels(pred_ner_span, text),
labels=["Disease", "Drug"],
output_dict=True,
)["micro avg"]["f1-score"],
2,
)
def get_token_macro_eval(gt_ner_span, pred_ner_span, text):
return round(
classification_report(
get_token_output_labels(gt_ner_span, text),
get_token_output_labels(pred_ner_span, text),
labels=["Disease", "Drug"],
output_dict=True,
)["macro avg"]["f1-score"],
2,
)
def get_evaluation_metric(metric_type, gt_ner_span, pred_ner_span, text):
match metric_type:
case "Span Based Evaluation with Partial Overlap":
return get_span_eval(gt_ner_span, pred_ner_span, text)
case "Token Based Evaluation with Micro Avg":
return get_token_micro_eval(gt_ner_span, pred_ner_span, text)
case "Token Based Evaluation with Macro Avg":
return get_token_macro_eval(gt_ner_span, pred_ner_span, text)