|
import spacy |
|
|
|
from typing import Any |
|
|
|
|
|
def _load_spacy_model(name: str) -> spacy.Language: |
|
if not hasattr(_load_spacy_model, "nlp"): |
|
|
|
all_except_ner: list[str] = [ |
|
"tok2vec", |
|
"tagger", |
|
"parser", |
|
"attribute_ruler", |
|
"lemmatizer"] |
|
|
|
nlp = spacy.load(name=name, exclude=all_except_ner) |
|
_load_spacy_model.nlp = nlp |
|
print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}") |
|
|
|
return _load_spacy_model.nlp |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path: str = ""): |
|
print(f"EndpointHandler(path='{path}')") |
|
self._nlp: spacy.Language = _load_spacy_model(name="en_core_web_lg") |
|
|
|
def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]: |
|
inputs: str = data.pop("inputs", "") |
|
if not inputs: |
|
return [] |
|
|
|
outputs: list[dict[str, Any]] = [] |
|
|
|
doc = self._nlp(text=inputs) |
|
for ent in doc.ents: |
|
if ent.label_ != "PERSON": |
|
continue |
|
|
|
entity: dict = { |
|
"entity_group": ent.label_, |
|
"score": 1, |
|
"word": ent.text, |
|
"start": ent.start_char, |
|
"end": ent.end_char} |
|
|
|
outputs.append(entity) |
|
|
|
return outputs |
|
|