File size: 1,506 Bytes
3d38118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Align texts based on aset, src_text, tgt_text."""
from typing import List, Tuple, Union
from logzero import logger


# fmt: off
def align_texts(
        aset: List[Tuple[Union[str, float], Union[str, float], Union[str, float]]],
        src_text: List[str],
        tgt_text: List[str],
) -> List[Tuple[Union[str], Union[str], Union[str, float]]]:
    # fmt: on
    """Align texts (paras/sents) based on aset, src_text, tgt_text.

    Args:
        aset: align set
        src_text: source text
        tgt_text: target text

    Returns:
        aligned texts with possible mertics
    """
    xset, yset, metrics = zip(*aset)  # unzip aset
    xset = [elm for elm in xset if elm != ""]
    yset = [elm for elm in yset if elm != ""]

    if (len(xset), len(yset)) != (len(tgt_text), len(src_text)):
        logger.warning(
            " (%s, %s) != (%s, %s) ", len(xset), len(yset), len(tgt_text), len(src_text)
        )
        # raise Exception(" See previous message")

    texts = []
    for elm in aset:
        elm0, elm1, elm2 = elm
        _ = []

        # src_text first
        if isinstance(elm1, str):
            _.append("")
        else:
            _.append(src_text[int(elm1)])

        if isinstance(elm0, str):
            _.append("")
        else:
            _.append(tgt_text[int(elm0)])

        if isinstance(elm2, str):
            _.append("")
        else:
            _.append(round(elm2, 2))

        texts.append(tuple(_))

    # return [("", "", 0.)]
    return texts