Spaces:
Runtime error
Runtime error
"""Create entry.""" | |
# pylint: disbale=invalid-name | |
import os | |
import time | |
from pathlib import Path | |
import gradio as gr | |
import logzero | |
import pandas as pd | |
from about_time import about_time | |
from aset2pairs import aset2pairs | |
from cmat2aset import cmat2aset | |
from icecream import install as ic_install, ic | |
from logzero import logger | |
from seg_text import seg_text | |
from set_loglevel import set_loglevel | |
from radio_mlbee import __version__ | |
from radio_mlbee.gen_cmat import gen_cmat | |
from radio_mlbee.utils import text1, text2 | |
os.environ["LOGLEVEL"] = "10" # turn debug on | |
os.environ["LOGLEVEL"] = "20" # turn debug off | |
logzero.loglevel(set_loglevel()) | |
if set_loglevel() <= 10: | |
logger.info(" debug is on ") | |
else: | |
logger.info(" debug is off ") | |
ic_install() | |
ic.configureOutput( | |
includeContext=True, | |
outputFunction=logger.info, | |
) | |
ic.enable() | |
# ic.disenable() # to turn off | |
os.environ["TZ"] = "Asia/Shanghai" | |
try: | |
time.tzset() # type: ignore | |
except Exception as _: | |
logger.warning("time.tzset() error: %s. Probably running Windows, we let it pass.", _) | |
def greet(name): | |
"""Greet.""" | |
if not name: | |
name = "world" | |
return "Hello " + name + "!! (coming sooooon...)" | |
def ml_fn( | |
text1: str, | |
text2: str, | |
split_to_sents: bool = False, | |
preview: bool = False, | |
# download_csv: bool = False, # modi | |
) -> pd.DataFrame: | |
"""Align multilingual (50+ pairs) text1 text2.""" | |
text1 = str(text1) | |
text2 = str(text2) | |
try: | |
paras1 = text1.splitlines() | |
paras1 = [_.strip() for _ in paras1 if _.strip()] | |
except Exception as exc: | |
logger.error(" praras.slpitlines() erros: %s, setting to ['']", exc) | |
paras1 = [""] | |
try: | |
paras2 = text2.splitlines() | |
paras2 = [_.strip() for _ in paras2 if _.strip()] | |
except Exception as exc: | |
logger.error(" praras slpitlines erros: %s, setting to ['']", exc) | |
paras2 = [""] | |
if split_to_sents: # TODO | |
try: | |
paras1 = seg_text(paras1) | |
except Exception as exc: | |
logger.error(exc) | |
try: | |
paras2 = seg_text(paras2) | |
except Exception as exc: | |
logger.error(exc) | |
with about_time() as t: | |
try: | |
cmat = gen_cmat(paras1, paras2) | |
except Exception as exc: | |
logger.exception(exc) | |
logger.info(paras1) | |
logger.info(paras2) | |
logger.info("len(paras1): %s, len(paras2): %s", len(paras1), len(paras2)) | |
cmat = [[]] | |
try: | |
aset = cmat2aset(cmat) | |
except Exception as exc: | |
logger.exception(exc) | |
aset = [["", "", ""]] | |
len1 = len(paras1) | |
len2 = len(paras2) | |
ic(len1, len2) | |
if not (len1 and len2): | |
_ = "At least one text is empty... nothing to do." | |
return pd.DataFrame([[_]]), None, None | |
av = "" | |
len12 = len1 + len2 | |
if len12: | |
av = f"{t.duration / len12 * 1000:.2f}" | |
logger.info(" %s blocks, took %s, av. %s s/1000 blk", len12, t.duration_human, av) | |
pairs = aset2pairs(paras1, paras2, aset) | |
df = pd.DataFrame(pairs, columns=["text1", "text2", "llh"]) | |
html = None | |
if preview: | |
html = df.to_html() | |
_ = """ # modi | |
dl_csv = None | |
csv_str = None | |
if download_csv: | |
try: | |
dl_csv = Path("aligned-blocks.csv") | |
csv_str = df.to_csv(index=False) | |
dl_csv.write_text(csv_str, encoding="gbk") | |
ic("Saving df.to_csv to dl_csv...") | |
except Exception as exc: | |
logger.exception(exc) | |
# """ | |
# return df, html, dl_csv | |
return df, html # modi | |
iface = gr.Interface( | |
fn=ml_fn, | |
inputs=[ | |
"textarea", | |
"textarea", | |
gr.Checkbox(label="Split to sents?"), | |
gr.Checkbox(label="Preview?"), | |
# gr.Checkbox(label="Download csv?"), # modi | |
], | |
outputs=[ | |
"dataframe", | |
"html", | |
# gr.outputs.File(label="Click to download csv"), # modi | |
], | |
# outputs="html", | |
title=f"radio-mlbee {__version__}", | |
description="mlbee rest api on dev ", | |
examples=[ | |
# [text1, text2, False], | |
# [text1[: len(text1) // 5], text2[: len(text2) // 5], False, False, False], | |
["a\nb\nc", "a\nc", False, False], | |
["test 1 \n test2", "测试 1\n我爱你\n 更多测试", False, False], | |
[text1, text2, True, True], # modi | |
], | |
allow_flagging="never", | |
) | |
debug = False | |
if set_loglevel() <= 10: | |
debug = True | |
iface.launch( | |
# server_name="0.0.0.0", | |
# server_port = 8889, | |
show_error=debug, | |
enable_queue=True, | |
debug=debug, | |
) | |