Spaces:
Build error
Build error
"""Align sents via gale-church.""" | |
# pylint: disable=invalid-name | |
from typing import List, Tuple # noqa | |
import re | |
# from itertools import tee | |
# from more_itertools import ilen | |
from nltk.translate.gale_church import align_blocks | |
from radiobee.amend_avec import amend_avec | |
def align_sents(lst1: List[str], lst2: List[str]) -> List[Tuple[str, str]]: | |
"""Align sents. | |
>>> lst1, lst2 = ['a', 'bs',], ['aaa', '34', 'a', 'b'] | |
""" | |
if isinstance(lst1, str): | |
lst1 = [lst1] | |
if isinstance(lst2, str): | |
lst2 = [lst2] | |
src_blocks = [len(re.sub(r"\s+", "", elm)) for elm in lst1] | |
tgt_blocks = [len(re.sub(r"\s+", "", elm)) for elm in lst2] | |
avec = align_blocks(src_blocks, tgt_blocks) | |
len1, len2 = len(lst1), len(lst2) | |
# lst1, _ = tee(lst1) | |
# len1 = ilen(_) | |
# lst2, _ = tee(lst2) | |
# len2 = ilen(_) | |
amended_avec = amend_avec(avec, len1, len2) | |
texts = [] | |
# for elm in aset: | |
# for elm0, elm1 in amended_avec: | |
for elm in amended_avec: | |
# elm0, elm1, elm2 = elm | |
elm0, elm1 = elm[:2] | |
_ = [] | |
# src_text first | |
if isinstance(elm0, str): | |
_.append("") | |
else: | |
# _.append(src_text[int(elm0)]) | |
_.append(lst1[int(elm0)]) | |
if isinstance(elm1, str): | |
_.append("") | |
else: | |
# _.append(tgt_text[int(elm0)]) | |
_.append(lst2[int(elm1)]) | |
_a = """ | |
if isinstance(elm2, str): | |
_.append("") | |
else: | |
_.append(round(elm2, 2)) | |
# """ | |
del _a | |
texts.append(tuple(_)) | |
_ = """ | |
_ = [] | |
for elm in texts: | |
_.extend(elm) | |
return _ | |
""" | |
return texts | |