"""Align sents via gale-church.""" # pylint: disable= from typing import List, Tuple # noqa import re # from itertools import tee # from more_itertools import ilen from nltk.translate.gale_church import align_blocks from radiobee.amend_avec import amend_avec def align_sents(lst1: List[str], lst2: List[str]) -> List[Tuple[str, str]]: """Align sents. >>> lst1, lst2 = ['a', 'bs',], ['aaa', '34', 'a', 'b'] """ if isinstance(lst1, str): lst1 = [lst1] if isinstance(lst2, str): lst2 = [lst2] src_blocks = [len(re.sub(r"\s+", "", elm)) for elm in lst1] tgt_blocks = [len(re.sub(r"\s+", "", elm)) for elm in lst2] avec = align_blocks(src_blocks, tgt_blocks) len1, len2 = len(lst1), len(lst2) # lst1, _ = tee(lst1) # len1 = ilen(_) # lst2, _ = tee(lst2) # len2 = ilen(_) amended_avec = amend_avec(avec, len1, len2) texts = [] # for elm in aset: for elm0, elm1 in amended_avec: # elm0, elm1, elm2 = elm _ = [] # src_text first if isinstance(elm0, str): _.append("") else: # _.append(src_text[int(elm0)]) _.append(lst1[int(elm0)]) if isinstance(elm1, str): _.append("") else: # _.append(tgt_text[int(elm0)]) _.append(lst2[int(elm1)]) _a = """ if isinstance(elm2, str): _.append("") else: _.append(round(elm2, 2)) # """ del _a texts.append(tuple(_)) return texts # return ["", ""]