3ie-intervention-outcome-entity-linking
/
relik
/inference
/data
/splitters
/base_sentence_splitter.py
from typing import List, Union | |
class BaseSentenceSplitter: | |
""" | |
A `BaseSentenceSplitter` splits strings into sentences. | |
""" | |
def __call__(self, *args, **kwargs): | |
""" | |
Calls :meth:`split_sentences`. | |
""" | |
return self.split_sentences(*args, **kwargs) | |
def split_sentences( | |
self, text: str, max_len: int = 0, *args, **kwargs | |
) -> List[str]: | |
""" | |
Splits a `text` :class:`str` paragraph into a list of :class:`str`, where each is a sentence. | |
""" | |
raise NotImplementedError | |
def split_sentences_batch( | |
self, texts: List[str], *args, **kwargs | |
) -> List[List[str]]: | |
""" | |
Default implementation is to just iterate over the texts and call `split_sentences`. | |
""" | |
return [self.split_sentences(text) for text in texts] | |
def check_is_batched( | |
texts: Union[str, List[str], List[List[str]]], is_split_into_words: bool | |
): | |
""" | |
Check if input is batched or a single sample. | |
Args: | |
texts (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`): | |
Text to check. | |
is_split_into_words (:obj:`bool`): | |
If :obj:`True` and the input is a string, the input is split on spaces. | |
Returns: | |
:obj:`bool`: ``True`` if ``texts`` is batched, ``False`` otherwise. | |
""" | |
return bool( | |
(not is_split_into_words and isinstance(texts, (list, tuple))) | |
or ( | |
is_split_into_words | |
and isinstance(texts, (list, tuple)) | |
and texts | |
and isinstance(texts[0], (list, tuple)) | |
) | |
) | |