Spaces:
Runtime error
Runtime error
"""Compute named entity recognition with SpaCy.""" | |
from typing import TYPE_CHECKING, Iterable, Optional | |
from pydantic import Field as PydanticField | |
from typing_extensions import override | |
from ..schema import Field, Item, RichData, SignalInputType, field, lilac_span | |
from ..signal import TextSignal | |
if TYPE_CHECKING: | |
import spacy | |
class SpacyNER(TextSignal): | |
"""Named entity recognition with SpaCy. | |
For details see: [spacy.io/models](https://spacy.io/models). | |
""" | |
name = 'spacy_ner' | |
display_name = 'Named Entity Recognition' | |
model: str = PydanticField(title='SpaCy package name or model path.', default='en_core_web_sm') | |
input_type = SignalInputType.TEXT | |
_nlp: Optional['spacy.language.Language'] = None | |
def setup(self) -> None: | |
try: | |
import spacy | |
import spacy.cli | |
except ImportError: | |
raise ImportError('Could not import the "spacy" python package. ' | |
'Please install it with `pip install spacy`.') | |
if not spacy.util.is_package(self.model): | |
spacy.cli.download(self.model) | |
self._nlp = spacy.load( | |
self.model, | |
# Disable everything except the NER component. See: https://spacy.io/models | |
disable=['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']) | |
def fields(self) -> Field: | |
return field(fields=[field('string_span', fields={'label': 'string'})]) | |
def compute(self, data: Iterable[RichData]) -> Iterable[Optional[Item]]: | |
if not self._nlp: | |
raise RuntimeError('SpaCy model is not initialized.') | |
text_data = (row if isinstance(row, str) else '' for row in data) | |
for doc in self._nlp.pipe(text_data): | |
result = [lilac_span(ent.start_char, ent.end_char, {'label': ent.label_}) for ent in doc.ents] | |
if result: | |
yield result | |
else: | |
yield None | |