ultimatumbee / ubee /ubee.py
freemt
Switch to blocks, attempt
d7cdc67
raw
history blame
No virus
1.14 kB
"""Align via ubee,"""
# pylint: disable=
from itertools import zip_longest
from typing import Iterable, List, Tuple
from icecream import ic
from logzero import logger
from ubee.uclas import uclas
def ubee(
sents_zh: Iterable,
sents_en: Iterable,
thresh: float = 0.5,
) -> Tuple[List[Tuple[str, str, float]], List[Tuple[str, str]]]:
"""Align blocks.
Args:
sents_zh: list of text, can be any langauge supported by clas-l-user
sents_en: ditto
Returns:
three tuples of aligned blocked
leftovers (unaligned)
"""
res = []
labels = [*sents_en]
lo1 = []
lo2 = labels[:]
for seq in sents_zh:
ic(seq)
label, likelihood = uclas(seq, labels, thresh=thresh)
if label:
likelihood = round(float(likelihood), 2)
res.append((seq, label, likelihood))
try:
lo2.remove(label)
except Exception as exc:
logger.error(exc)
logger.info("seq: %s, lable: %s", seq, label)
else:
lo1.append(seq)
return res, [*zip_longest(lo1, lo2)]