radio-mlbee / app.py
freemt
Update gradio sdk_version from 3.0.17 to 3.1.17, examples added
5811a7e
"""Create entry."""
# pylint: disbale=invalid-name
import os
import time
from pathlib import Path
import gradio as gr
import logzero
import pandas as pd
from about_time import about_time
from aset2pairs import aset2pairs
from cmat2aset import cmat2aset
from icecream import install as ic_install, ic
from logzero import logger
from seg_text import seg_text
from set_loglevel import set_loglevel
from radio_mlbee import __version__
from radio_mlbee.gen_cmat import gen_cmat
from radio_mlbee.utils import text1, text2
os.environ["LOGLEVEL"] = "10" # turn debug on
os.environ["LOGLEVEL"] = "20" # turn debug off
logzero.loglevel(set_loglevel())
if set_loglevel() <= 10:
logger.info(" debug is on ")
else:
logger.info(" debug is off ")
ic_install()
ic.configureOutput(
includeContext=True,
outputFunction=logger.info,
)
ic.enable()
# ic.disenable() # to turn off
os.environ["TZ"] = "Asia/Shanghai"
try:
time.tzset() # type: ignore
except Exception as _:
logger.warning("time.tzset() error: %s. Probably running Windows, we let it pass.", _)
def greet(name):
"""Greet."""
if not name:
name = "world"
return "Hello " + name + "!! (coming sooooon...)"
def ml_fn(
text1: str,
text2: str,
split_to_sents: bool = False,
preview: bool = False,
# download_csv: bool = False, # modi
) -> pd.DataFrame:
"""Align multilingual (50+ pairs) text1 text2."""
text1 = str(text1)
text2 = str(text2)
try:
paras1 = text1.splitlines()
paras1 = [_.strip() for _ in paras1 if _.strip()]
except Exception as exc:
logger.error(" praras.slpitlines() erros: %s, setting to ['']", exc)
paras1 = [""]
try:
paras2 = text2.splitlines()
paras2 = [_.strip() for _ in paras2 if _.strip()]
except Exception as exc:
logger.error(" praras slpitlines erros: %s, setting to ['']", exc)
paras2 = [""]
if split_to_sents: # TODO
try:
paras1 = seg_text(paras1)
except Exception as exc:
logger.error(exc)
try:
paras2 = seg_text(paras2)
except Exception as exc:
logger.error(exc)
with about_time() as t:
try:
cmat = gen_cmat(paras1, paras2)
except Exception as exc:
logger.exception(exc)
logger.info(paras1)
logger.info(paras2)
logger.info("len(paras1): %s, len(paras2): %s", len(paras1), len(paras2))
cmat = [[]]
try:
aset = cmat2aset(cmat)
except Exception as exc:
logger.exception(exc)
aset = [["", "", ""]]
len1 = len(paras1)
len2 = len(paras2)
ic(len1, len2)
if not (len1 and len2):
_ = "At least one text is empty... nothing to do."
return pd.DataFrame([[_]]), None, None
av = ""
len12 = len1 + len2
if len12:
av = f"{t.duration / len12 * 1000:.2f}"
logger.info(" %s blocks, took %s, av. %s s/1000 blk", len12, t.duration_human, av)
pairs = aset2pairs(paras1, paras2, aset)
df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"])
html = None
if preview:
html = df.to_html()
_ = """ # modi
dl_csv = None
csv_str = None
if download_csv:
try:
dl_csv = Path("aligned-blocks.csv")
csv_str = df.to_csv(index=False)
dl_csv.write_text(csv_str, encoding="gbk")
ic("Saving df.to_csv to dl_csv...")
except Exception as exc:
logger.exception(exc)
# """
# return df, html, dl_csv
return df, html # modi
iface = gr.Interface(
fn=ml_fn,
inputs=[
"textarea",
"textarea",
gr.Checkbox(label="Split to sents?"),
gr.Checkbox(label="Preview?"),
# gr.Checkbox(label="Download csv?"), # modi
],
outputs=[
"dataframe",
"html",
# gr.outputs.File(label="Click to download csv"), # modi
],
# outputs="html",
title=f"radio-mlbee {__version__}",
description="mlbee rest api on dev ",
examples=[
# [text1, text2, False],
# [text1[: len(text1) // 5], text2[: len(text2) // 5], False, False, False],
["a\nb\nc", "a\nc", False, False],
["test 1 \n test2", "测试 1\n我爱你\n 更多测试", False, False],
[text1, text2, True, True], # modi
],
allow_flagging="never",
)
debug = False
if set_loglevel() <= 10:
debug = True
iface.launch(
# server_name="0.0.0.0",
# server_port = 8889,
show_error=debug,
enable_queue=True,
debug=debug,
)