Spaces:
Build error
Build error
File size: 9,357 Bytes
bf87f43 d7cdc67 89d669f f537c4c 0905493 d7cdc67 5ae3f92 d7cdc67 89d669f 16195e5 89d669f d7cdc67 89d669f 7d6526a f537c4c c843262 89d669f 6663376 03be791 89d669f 03be791 7d6526a 99cd496 efbffad 37fb9cf 92cb1c3 4be3c52 abc8266 4be3c52 37fb9cf 4be3c52 79618dd 7d6526a 0b7bd15 efbffad 16195e5 6b29c07 6663376 89d669f 42b9713 89d669f 4aca0df 89d669f 5fb870c 844aef2 7d6526a 844aef2 0905493 3812263 0905493 771426e ac0951c 6663376 71fa1a4 6663376 89d669f 16195e5 0c6d923 70f5c39 0905493 5ae3f92 844aef2 0905493 844aef2 5fb870c 844aef2 c843262 44c4eaa 5fb870c 16195e5 6663376 5ae3f92 da8f9c2 5ae3f92 da8f9c2 6663376 c978e0b 6663376 c978e0b 6663376 c843262 6663376 16195e5 c978e0b 5821b23 16195e5 c843262 c978e0b 5821b23 c978e0b 6663376 c978e0b 6ed04ff 89d669f 1ccfc22 899f8ea c843262 899f8ea c843262 6663376 4360582 c843262 1b2837a 130a100 6663376 c843262 771426e edd5899 c843262 771426e c843262 34c6270 899f8ea c843262 cd374f0 c843262 6663376 771426e c843262 771426e 6663376 c843262 771426e c843262 771426e c843262 771426e 6663376 312ea97 7d6526a c843262 7f331da c843262 130a100 c843262 6663376 c843262 16195e5 efbffad 16195e5 899f8ea 16195e5 1ccfc22 c843262 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
"""Gen ubee main.
private
url = 'https://hf.space/embed/mikeee/zero-shot/+/api/predict'
resp = httpx.post(
url,
json={"data": ["love", ",".join(["liebe", "this is test", "hate you"]), False]},
timeout=httpx.Timeout(None, connect=3),
)
resp.json()
{'data': [{'label': 'liebe',
'confidences': [{'label': 'liebe', 'confidence': 0.8688847422599792},
{'label': 'this is test', 'confidence': 0.12558135390281677},
{'label': 'hate you', 'confidence': 0.005533925257623196}]}],
'duration': 0.265749454498291,
'average_duration': 4.639325571060181}
"""
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
import sys
from itertools import zip_longest
from pathlib import Path
from random import shuffle
from textwrap import dedent
from typing import Optional, Tuple
import gradio as gr
import logzero
import pandas as pd
from icecream import ic
from icecream import install as ic_install
from logzero import logger
from set_loglevel import set_loglevel
from ubee import __version__
from ubee.ubee import ubee
# for embeddable python
# if "." not in sys.path: sys.path.insert(0, ".")
logzero.loglevel(set_loglevel())
logger.debug(" debug on ")
ic_install()
ic.configureOutput(
includeContext=True,
outputFunction=logger.info,
)
ic.enable()
# ic.disenable() # to turn off
ic(" ic.enabled ")
_ = """
ic("Testing...")
import model_pool
from model_pool import fetch_check_aux
print("model-pool version", model_pool.__version__)
print("gradio version", gr.__version__)
try:
fetch_check_aux.fetch_check_aux()
except Exception as _:
ic(["fetch_check_aux.fetch_check_aux", _])
from model_pool.load_model import load_model
try:
clas = load_model("clas-l-user")
except Exception as _:
ic(["load_model(\"clas-l-user\")", _])
# """
# _ = clas("love", ["liebe", "hate you", "test"])
# print(_)
# raise SystemExit("Exit by intention")
# {'sequence': 'love', 'labels': ['liebe', 'test', 'hate you'],
# 'scores': [0.8885253667831421, 0.10581762343645096, 0.005657028406858444]}
# Runs OK
# segment: str
def ifn(text1, text2, thresh):
"""Take inputs, return outputs.
Args:
text1: text
text2: text
Returns:
pd.DataFrame
"""
res1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
res2 = [elm.strip() for elm in text2.splitlines() if elm.strip()]
ic(res1)
ic(res2)
# _ = pd.DataFrame(zip_longest(res1, res2), columns=["text1", "text2"])
# return _
res1_, res2_ = ubee(res1, res2, thresh)
# res1_, res2_ = res1, res2
out_df = pd.DataFrame(
zip_longest(res1, res2),
columns=["text1", "text2"],
)
if res2_:
_ = pd.DataFrame(res2_, columns=["text1", "text2"])
else:
_ = None
# return out_df, pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]), _
df = pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"])
html1 = df.to_html() if df is not None else df
html2 = _.to_html() if _ is not None else _
return html1, html2
def main():
"""Create main entry."""
# global text1, text2, threash
text_zh = Path("data/test_zh.txt").read_text(encoding="utf8")
text_zh = [elm.strip() for elm in text_zh.splitlines() if elm.strip()][:10]
text_zh = "\n\n".join(text_zh)
text_en = [
elm.strip()
for elm in Path("data/test_en.txt").read_text(encoding="utf8").splitlines()
if elm.strip()
]
_ = text_en[:9]
shuffle(_)
text_en = "\n\n".join(_)
title = "Ultimatumbee"
theme = "dark-grass"
theme = "grass"
description = """WIP showcasing a novel aligner"""
article = dedent(
"""
## NB
* The ultimatumbee aligner (``ubee`` for short) is intended for aligning text blocks (be it paragraphs, sentences or words). Since it is rather slow (30 para pairs (Wuthering Height ch1. for example) can take 10 to 20 mniutes), anything more than 50 blocks should probably be avaoided. Nevertheless, you are welcome to try. No big brother is watching.
* ``thresh``: longer text blocks justify a larger value; `.5` appears to be just right for paragraphs for Wuthering Height ch1.
Stay tuned for more details coming soon...
"""
).strip()
ex1_zh = [
"雪开始下大了。",
"我握住门柄又试一回。",
"这时一个没穿外衣的年轻人,扛着一根草耙,在后面院子里出现了。",
"他招呼我跟着他走,穿过了一个洗衣房和一片铺平的地,那儿有煤棚、抽水机和鸽笼,我们终于到了我上次被接待过的那间温暖的、热闹的大屋子。",
"煤、炭和木材混合在一起燃起的熊熊炉火,使这屋子放着光彩。",
"在准备摆上丰盛晚餐的桌旁,我很高兴地看到了那位“太太”,以前我从未料想到会有这么一个人存在的。",
"我鞠躬等候,以为她会叫我坐下。",
"她望望我,往她的椅背一靠,不动,也不出声。",
]
ex1_en = [
"The snow began to drive thickly.",
"I seized the handle to essay another trial; when a young man without coat, and shouldering a pitchfork, appeared in the yard behind.",
"He hailed me to follow him, and, after marching through a wash-house, and a paved area containing a coal shed, pump, and pigeon cot, we at length arrived in the huge, warm, cheerful apartment, where I was formerly received.",
"It glowed delightfully in the radiance of an immense fire, compounded of coal, peat, and wood; and near the table, laid for a plentiful evening meal, I was pleased to observe the `missis', an individual whose existence I had never previously suspected.",
"I bowed and waited, thinking she would bid me take a seat.",
"She looked at me, leaning back in her chair, and remained motionless and mute.",
]
shuffle(ex1_en)
ex1_zh = "\n".join(ex1_zh)
ex1_en = "\n".join(ex1_en)
ex2_zh = "她\n望望\n我\n往\n她的\n椅背\n一靠\n不\n动\n也\n不\n出声"
ex2_en = "She looked at me leaning back in her chair and remained motionless and mute".split()
shuffle(ex2_en)
ex2_en = "\n".join(ex2_en)
examples = [
[ex2_zh, ex2_en, 0.3],
[text_zh, text_en, 0.5],
]
lines = 15
placeholder = "Type or paste text here"
# blocks = gr.Blocks()
with gr.Blocks() as blocks:
gr.Markdown(
dedent(
f"""
## Ultimatumbee {__version__}
Align non-sequential dualtexts.
可对词、句、段,每个词(或句或段)一行。可对任意语言对(英中、英德、德法、中日……等等)。建议 threshold 门槛值 -- 词: 0.3,句:0.5, 段: 0.7。如果太多 leftover,可适当调小 threshold。 如果太多误对则可以适当调大 threshold。
"""
).strip()
)
with gr.Column():
with gr.Row():
text1 = gr.inputs.Textbox(
lines=lines, placeholder=placeholder, default=ex1_zh, label="text1"
)
text2 = gr.inputs.Textbox(
lines=lines, placeholder=placeholder, default=ex1_en, label="text2"
)
with gr.Row():
thresh = gr.Slider(
minimum=0.1,
maximum=0.9,
step=0.1,
value=0.4,
label="threshold",
)
btn = gr.Button("Run")
_ = """
out_df = gr.outputs.Dataframe(
headers=None,
max_rows=lines, # 20
max_cols=None,
overflow_row_behaviour="paginate",
type="auto",
label="To be aligned",
)
# """
with gr.Row():
_ = """
aligned = gr.Dataframe(
headers=None,
max_rows=lines, # 20
max_cols=None,
overflow_row_behaviour="paginate",
type="auto",
label="Aligned",
)
leftover = gr.Dataframe(
headers=None,
max_rows=lines, # 20
max_cols=None,
overflow_row_behaviour="paginate",
type="auto",
label="Leftover",
)
# """
aligned = gr.HTML(label="Aligned")
leftover = gr.HTML(label="Leftover")
btn.click(
fn=ifn,
inputs=[
text1,
text2,
thresh,
],
outputs=[
# out_df,
aligned,
leftover,
],
)
# blocks.launch()
blocks.launch(debug=True, enable_queue=True)
if __name__ == "__main__":
# logger.info(" Start main()")
main()
_ = """
gr.inputs.Radio(
["para", "sent", "word"],
default="para",
label="segment"
)
# """
|