Spaces:
Build error
Build error
| """Genereat align set (aset) based on pset (pair set), src_lang and tgt_len.""" | |
| from typing import List, Tuple, Union | |
| from itertools import zip_longest | |
| # from logzero import logger | |
| # fmt: off | |
| def gen_aset( | |
| pset: List[Tuple[Union[str, float], Union[str, float], Union[str, float]]], | |
| src_len: int, # n_rows | |
| tgt_len: int, # n_cols | |
| ) -> List[Tuple[Union[str, float], Union[str, float], Union[str, float]]]: | |
| # fmt: on | |
| """Genereat align set (aset) based on pset, src_lang and tgt_len. | |
| src_len, tgt_len = cmat.shape | |
| zip_longest(..., fillvalue="") | |
| Args: | |
| pset: [x(lang2 zh), y(lang1 en), cos] | |
| src_len: lang1 (en) | |
| tgt_len: lang2 (zh) | |
| Returns: | |
| aset: | |
| [0...tgt_len, 0...src_len] | |
| [0, 0, .] | |
| ... | |
| [tgt_len-1, src_len-1, .] | |
| """ | |
| # empty pset [] | |
| if not pset: | |
| return [*zip_longest(range(tgt_len), range(src_len), fillvalue="")] | |
| # empty [[]] | |
| if len(pset) == 1: | |
| if not pset[0]: | |
| return [*zip_longest(range(tgt_len), range(src_len), fillvalue="")] | |
| buff = [] | |
| pos0, pos1 = -1, -1 | |
| for elm in pset: | |
| # elm0, elm1, elm2 = elm | |
| elm0, elm1, *elm2 = elm | |
| elm0 = int(elm0) | |
| elm1 = int(elm1) | |
| interval = max(elm0 - pos0 - 1, elm1 - pos1 - 1) | |
| _ = zip_longest(range(pos0 + 1, elm0), range(pos1 + 1, elm1), [""] * interval, fillvalue="") | |
| buff.extend(_) | |
| buff.append(elm) | |
| pos0, pos1 = elm0, elm1 | |
| # last batch if any | |
| elm0, elm1 = tgt_len, src_len | |
| interval = max(elm0 - pos0 - 1, elm1 - pos1 - 1) | |
| _ = zip_longest(range(pos0 + 1, elm0), range(pos1 + 1, elm1), [""] * interval, fillvalue="") | |
| buff.extend(_) | |
| return buff | |