Spaces:
Paused
Paused
from typing import TYPE_CHECKING, List, Tuple | |
if TYPE_CHECKING: | |
from spacy.tokens import Doc | |
class AspectExtractor: | |
def __init__(self, spacy_model: str) -> None: | |
super().__init__() | |
import spacy | |
self.nlp = spacy.load(spacy_model) | |
def find_groups(self, aspect_mask: List[bool]): | |
start = None | |
for idx, flag in enumerate(aspect_mask): | |
if flag: | |
if start is None: | |
start = idx | |
else: | |
if start is not None: | |
yield slice(start, idx) | |
start = None | |
if start is not None: | |
yield slice(start, idx + 1) | |
def __call__(self, texts: List[str]) -> Tuple[List["Doc"], List[slice]]: | |
aspects_list = [] | |
docs = list(self.nlp.pipe(texts)) | |
for doc in docs: | |
aspect_mask = [token.pos_ in ("NOUN", "PROPN") for token in doc] | |
aspects_list.append(list(self.find_groups(aspect_mask))) | |
return docs, aspects_list | |