import spacy from typing import Any def _load_spacy_model(name: str) -> spacy.Language: if not hasattr(_load_spacy_model, "nlp"): # pipeline info https://spacy.io/models/en#en_core_web_lg all_except_ner: list[str] = [ "tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"] nlp = spacy.load(name=name, exclude=all_except_ner) _load_spacy_model.nlp = nlp print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}") return _load_spacy_model.nlp class EndpointHandler: def __init__(self, path: str = ""): print(f"EndpointHandler(path='{path}')") self._nlp: spacy.Language = _load_spacy_model(name="en_core_web_lg") def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]: inputs: str = data.pop("inputs", "") if not inputs: return [] outputs: list[dict[str, Any]] = [] doc = self._nlp(text=inputs) for ent in doc.ents: if ent.label_ != "PERSON": continue entity: dict = { "entity_group": ent.label_, "score": 1, "word": ent.text, "start": ent.start_char, "end": ent.end_char} outputs.append(entity) return outputs