Spaces:
Build error
Build error
"""Separate text to zh en lists.""" | |
# pylint: disable= | |
from typing import Iterable, List, Tuple, Union # noqa | |
from fastlid import fastlid | |
from logzero import logger | |
def text2lists(text: Union[Iterable[str], str]) -> List[Tuple[str, str]]: | |
"""Separate text to zh en lists.""" | |
if not isinstance(text, str) and isinstance(text, Iterable): | |
try: | |
text = "\n".join(text) | |
except Exception as e: | |
logger.error(e) | |
raise | |
set_languages = ["en", "zh"] | |
fastlid.set_languages = set_languages | |
list1 = [] | |
list2 = [] # for determining en-zh or zh-en | |
lang0, _ = fastlid(text[:15000]) | |
res = "" | |
left = False # start with left list1 | |
for elm in [_ for _ in text.splitlines() if _.strip()]: | |
lang, _ = fastlid(elm) | |
if lang == lang0: | |
res = res + "\n" + elm | |
else: | |
left = not left | |
if left: | |
list1.append(res.strip()) | |
else: | |
list2.append(res.strip()) # strip first \n | |
res = elm | |
lang0 = lang | |
# find offset | |
left = [] # noqa | |
right = [] # noqa | |
return [("", "")] | |