Spaces:
Build error
Build error
File size: 1,135 Bytes
844aef2 d7cdc67 89d669f d7cdc67 844aef2 d7cdc67 844aef2 89d669f 844aef2 d7cdc67 844aef2 5ae3f92 844aef2 c978e0b 844aef2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
"""Align via ubee,"""
# pylint: disable=
from itertools import zip_longest
from typing import Iterable, List, Tuple
from icecream import ic
from logzero import logger
from ubee.uclas import uclas
def ubee(
sents_zh: Iterable,
sents_en: Iterable,
thresh: float = 0.5,
) -> Tuple[List[Tuple[str, str, float]], List[Tuple[str, str]]]:
"""Align blocks.
Args:
sents_zh: list of text, can be any langauge supported by clas-l-user
sents_en: ditto
Returns:
three tuples of aligned blocked
leftovers (unaligned)
"""
res = []
labels = [*sents_en]
lo1 = []
lo2 = labels[:]
for seq in sents_zh:
ic(seq)
label, likelihood = uclas(seq, labels, thresh=thresh)
if label:
likelihood = round(float(likelihood), 2)
res.append((seq, label, likelihood))
try:
lo2.remove(label)
except Exception as exc:
logger.error(exc)
logger.info("seq: %s, lable: %s", seq, label)
else:
lo1.append(seq)
return res, [*zip_longest(lo1, lo2)]
|