Spaces:
Build error
Build error
freemt
commited on
Commit
•
c843262
1
Parent(s):
0a75fcd
Update refactoring with blocks
Browse files- nohup.out +0 -97
- ubee/__main__.py +81 -14
- ubee/__main__.py- +215 -0
nohup.out
DELETED
@@ -1,97 +0,0 @@
|
|
1 |
-
[33m[nodemon] 2.0.18[39m
|
2 |
-
[33m[nodemon] to restart at any time, enter `rs`[39m
|
3 |
-
[33m[nodemon] watching path(s): app.py ubee/**/*[39m
|
4 |
-
[33m[nodemon] watching extensions: py,json[39m
|
5 |
-
[32m[nodemon] starting `python -t py app.py`[39m
|
6 |
-
events.js:377
|
7 |
-
throw er; // Unhandled 'error' event
|
8 |
-
^
|
9 |
-
|
10 |
-
Error: EBADF: bad file descriptor, read
|
11 |
-
Emitted 'error' event on ReadStream instance at:
|
12 |
-
at internal/fs/streams.js:173:14
|
13 |
-
at FSReqCallback.wrapper [as oncomplete] (fs.js:563:5) {
|
14 |
-
errno: -9,
|
15 |
-
code: 'EBADF',
|
16 |
-
syscall: 'read'
|
17 |
-
}
|
18 |
-
python: can't open file 'py': [Errno 2] No such file or directory
|
19 |
-
[33m[nodemon] 2.0.18[39m
|
20 |
-
[33m[nodemon] to restart at any time, enter `rs`[39m
|
21 |
-
[33m[nodemon] watching path(s): app.py ubee/**/*[39m
|
22 |
-
[33m[nodemon] watching extensions: py[39m
|
23 |
-
[32m[nodemon] starting `python app.py`[39m
|
24 |
-
events.js:377
|
25 |
-
throw er; // Unhandled 'error' event
|
26 |
-
^
|
27 |
-
|
28 |
-
Error: EBADF: bad file descriptor, read
|
29 |
-
Emitted 'error' event on ReadStream instance at:
|
30 |
-
at internal/fs/streams.js:173:14
|
31 |
-
at FSReqCallback.wrapper [as oncomplete] (fs.js:563:5) {
|
32 |
-
errno: -9,
|
33 |
-
code: 'EBADF',
|
34 |
-
syscall: 'read'
|
35 |
-
}
|
36 |
-
[E 220701 19:47:16 fetch_check_aux:54]
|
37 |
-
Traceback (most recent call last):
|
38 |
-
File "/usr/lib/python3.8/pathlib.py", line 1288, in mkdir
|
39 |
-
self._accessor.mkdir(self, mode)
|
40 |
-
PermissionError: [Errno 13] Permission denied: '/root/.cache/huggingface/transformers'
|
41 |
-
|
42 |
-
During handling of the above exception, another exception occurred:
|
43 |
-
|
44 |
-
Traceback (most recent call last):
|
45 |
-
File "/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/model_pool/fetch_check_aux.py", line 52, in fetch_check_aux
|
46 |
-
local_dir.mkdir(parents=True, exist_ok=True)
|
47 |
-
File "/usr/lib/python3.8/pathlib.py", line 1297, in mkdir
|
48 |
-
if not exist_ok or not self.is_dir():
|
49 |
-
File "/usr/lib/python3.8/pathlib.py", line 1422, in is_dir
|
50 |
-
return S_ISDIR(self.stat().st_mode)
|
51 |
-
File "/usr/lib/python3.8/pathlib.py", line 1198, in stat
|
52 |
-
return self._accessor.stat(self)
|
53 |
-
PermissionError: [Errno 13] Permission denied: '/root/.cache/huggingface/transformers'
|
54 |
-
[W 220701 19:47:16 fetch_check_aux:55] You will need to run this as admin or root, or create /root (C:
|
55 |
-
[W 220701 19:47:16 load_model:44] You ll need to run as root or admin or give write permission to /root/.cache/huggingface or C:
|
56 |
-
[I 220701 19:47:17 model_s:32] Fetching and caching model_s from huggingface.co... The first time may take a while depending on your net.
|
57 |
-
Subsequent loading takes ~2-3 secs ... |███| 1/1 [100%] in 2.9s (0.34/s)
|
58 |
-
[I 220701 19:47:20 load_model:73] Fetching and caching clas-l-user from huggingface.co... The first time may take a while depending on your net.
|
59 |
-
Subsequent loading takes ~2-3 secs ... |███| 1/1 [100%] in 7.8s (0.13/s)
|
60 |
-
/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/gradio/deprecation.py:40: UserWarning: `optional` parameter is deprecated, and it has no effect
|
61 |
-
warnings.warn(value)
|
62 |
-
/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/gradio/deprecation.py:40: UserWarning: `numeric` parameter is deprecated, and it has no effect
|
63 |
-
warnings.warn(value)
|
64 |
-
/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/gradio/deprecation.py:40: UserWarning: The 'type' parameter has been deprecated. Use the Number component instead.
|
65 |
-
warnings.warn(value)
|
66 |
-
/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/gradio/deprecation.py:40: UserWarning: `layout` parameter is deprecated, and it has no effect
|
67 |
-
warnings.warn(value)
|
68 |
-
/home/mu2018/github/ultimatumbee/.venv/lib/python3.8/site-packages/gradio/interface.py:292: UserWarning: Currently, only the 'default' theme is supported.
|
69 |
-
warnings.warn("Currently, only the 'default' theme is supported.")
|
70 |
-
[I 220701 20:43:48 icecream:185] ic| __main__.py:56 in greet()
|
71 |
-
res1: ['她', '望望', '我', '往', '她', '的', '椅背', '一靠', '不', '动', '也', '不', '出声']
|
72 |
-
[I 220701 20:43:48 icecream:185] ic| __main__.py:57 in greet()
|
73 |
-
res2: ['She',
|
74 |
-
'at',
|
75 |
-
'me',
|
76 |
-
'leaning',
|
77 |
-
'and',
|
78 |
-
'mute',
|
79 |
-
'remained',
|
80 |
-
'in',
|
81 |
-
'her',
|
82 |
-
'chair',
|
83 |
-
'back',
|
84 |
-
'and',
|
85 |
-
'looked',
|
86 |
-
'motionless']
|
87 |
-
[I 220701 20:43:48 icecream:185] ic| ubee.py:33 in ubee()- seq: '她'
|
88 |
-
[I 220701 20:44:24 icecream:185] ic| ubee.py:33 in ubee()- seq: '望望'
|
89 |
-
[I 220701 20:44:58 icecream:185] ic| ubee.py:33 in ubee()- seq: '我'
|
90 |
-
[I 220701 20:45:31 icecream:185] ic| ubee.py:33 in ubee()- seq: '往'
|
91 |
-
[I 220701 20:46:06 icecream:185] ic| ubee.py:33 in ubee()- seq: '她'
|
92 |
-
[E 220701 20:46:06 ubee:41] list.remove(x): x not in list
|
93 |
-
[I 220701 20:46:06 ubee:42] seq: 她, lable: She
|
94 |
-
[I 220701 20:46:06 icecream:185] ic| ubee.py:33 in ubee()- seq: '的'
|
95 |
-
[E 220701 20:46:38 ubee:41] list.remove(x): x not in list
|
96 |
-
[I 220701 20:46:38 ubee:42] seq: 的, lable: at
|
97 |
-
[I 220701 20:46:38 icecream:185] ic| ubee.py:33 in ubee()- seq: '椅背'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ubee/__main__.py
CHANGED
@@ -16,9 +16,9 @@ from icecream import install as ic_install
|
|
16 |
from logzero import logger
|
17 |
|
18 |
# for embeddable python
|
19 |
-
if "." not in sys.path:
|
20 |
-
sys.path.insert(0, ".")
|
21 |
|
|
|
22 |
from ubee.ubee import ubee
|
23 |
|
24 |
# logzero.loglevel(10)
|
@@ -36,7 +36,7 @@ def greet1(name):
|
|
36 |
return "Hello " + name + "!!"
|
37 |
|
38 |
|
39 |
-
def
|
40 |
text1,
|
41 |
text2,
|
42 |
# segment: str
|
@@ -89,7 +89,7 @@ def main():
|
|
89 |
shuffle(_)
|
90 |
text_en = "\n\n".join(_)
|
91 |
|
92 |
-
title = "Ultimatumbee
|
93 |
theme = "dark-grass"
|
94 |
theme = "grass"
|
95 |
description = """WIP showcasing a novel aligner"""
|
@@ -106,24 +106,24 @@ def main():
|
|
106 |
ex1_zh = [
|
107 |
'雪开始下大了。',
|
108 |
'我握住门柄又试一回。',
|
109 |
-
'这时一个没穿外衣的年轻人,扛着一根草耙,在后面院子里出现了。',
|
110 |
'煤、炭和木材混合在一起燃起的熊熊炉火,使这屋子放着光彩。', '在准备摆上丰盛晚餐的桌旁,我很高兴地看到了那位“太太”,以前我从未料想到会有这么一个人存在的。',
|
111 |
'我鞠躬等候,以为她会叫我坐下。',
|
112 |
'她望望我,往她的椅背一靠,不动,也不出声。'
|
113 |
]
|
114 |
ex1_en = [
|
115 |
'The snow began to drive thickly.',
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
]
|
122 |
shuffle(ex1_en)
|
123 |
ex1_zh = "\n".join(ex1_zh)
|
124 |
ex1_en = "\n".join(ex1_en)
|
125 |
|
126 |
-
ex2_zh = "她\n望望\n我\n往\n
|
127 |
ex2_en = "She looked at me leaning back in her chair and remained motionless and mute".split()
|
128 |
shuffle(ex2_en)
|
129 |
ex2_en = "\n".join(ex2_en)
|
@@ -137,10 +137,10 @@ def main():
|
|
137 |
|
138 |
inputs = [
|
139 |
gr.inputs.Textbox(
|
140 |
-
lines=lines, placeholder=placeholder, default=
|
141 |
),
|
142 |
gr.inputs.Textbox(
|
143 |
-
lines=lines, placeholder=placeholder, default=
|
144 |
),
|
145 |
gr.inputs.Slider(
|
146 |
minimum=0.0,
|
@@ -181,6 +181,7 @@ def main():
|
|
181 |
leftover,
|
182 |
]
|
183 |
|
|
|
184 |
iface = gr.Interface(
|
185 |
fn=greet,
|
186 |
# fn=ubee,
|
@@ -200,6 +201,72 @@ def main():
|
|
200 |
enable_queue=True,
|
201 |
share=True,
|
202 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
|
205 |
if __name__ == "__main__":
|
@@ -212,4 +279,4 @@ _ = """
|
|
212 |
default="para",
|
213 |
label="segment"
|
214 |
)
|
215 |
-
# """
|
|
|
16 |
from logzero import logger
|
17 |
|
18 |
# for embeddable python
|
19 |
+
# if "." not in sys.path: sys.path.insert(0, ".")
|
|
|
20 |
|
21 |
+
from ubee import __version__
|
22 |
from ubee.ubee import ubee
|
23 |
|
24 |
# logzero.loglevel(10)
|
|
|
36 |
return "Hello " + name + "!!"
|
37 |
|
38 |
|
39 |
+
def ifn(
|
40 |
text1,
|
41 |
text2,
|
42 |
# segment: str
|
|
|
89 |
shuffle(_)
|
90 |
text_en = "\n\n".join(_)
|
91 |
|
92 |
+
title = "Ultimatumbee"
|
93 |
theme = "dark-grass"
|
94 |
theme = "grass"
|
95 |
description = """WIP showcasing a novel aligner"""
|
|
|
106 |
ex1_zh = [
|
107 |
'雪开始下大了。',
|
108 |
'我握住门柄又试一回。',
|
109 |
+
'这时一个没穿外衣的年轻人,扛着一根草耙,在后面院子里出现了。', '他招呼我跟着他走,穿过了一个洗衣房和一片铺平的地,那儿有煤棚、抽水机和鸽笼,我们终于到了我上次被接待过的那间温暖的、热闹的大屋子。',
|
110 |
'煤、炭和木材混合在一起燃起的熊熊炉火,使这屋子放着光彩。', '在准备摆上丰盛晚餐的桌旁,我很高兴地看到了那位“太太”,以前我从未料想到会有这么一个人存在的。',
|
111 |
'我鞠躬等候,以为她会叫我坐下。',
|
112 |
'她望望我,往她的椅背一靠,不动,也不出声。'
|
113 |
]
|
114 |
ex1_en = [
|
115 |
'The snow began to drive thickly.',
|
116 |
+
'I seized the handle to essay another trial; when a young man without coat, and shouldering a pitchfork, appeared in the yard behind.',
|
117 |
+
'He hailed me to follow him, and, after marching through a wash-house, and a paved area containing a coal shed, pump, and pigeon cot, we at length arrived in the huge, warm, cheerful apartment, where I was formerly received.',
|
118 |
+
"It glowed delightfully in the radiance of an immense fire, compounded of coal, peat, and wood; and near the table, laid for a plentiful evening meal, I was pleased to observe the `missis', an individual whose existence I had never previously suspected.",
|
119 |
+
'I bowed and waited, thinking she would bid me take a seat.',
|
120 |
+
'She looked at me, leaning back in her chair, and remained motionless and mute.'
|
121 |
]
|
122 |
shuffle(ex1_en)
|
123 |
ex1_zh = "\n".join(ex1_zh)
|
124 |
ex1_en = "\n".join(ex1_en)
|
125 |
|
126 |
+
ex2_zh = "她\n望望\n我\n往\n她的\n椅背\n一靠\n不\n动\n也\n不\n出声"
|
127 |
ex2_en = "She looked at me leaning back in her chair and remained motionless and mute".split()
|
128 |
shuffle(ex2_en)
|
129 |
ex2_en = "\n".join(ex2_en)
|
|
|
137 |
|
138 |
inputs = [
|
139 |
gr.inputs.Textbox(
|
140 |
+
lines=lines, placeholder=placeholder, default=ex2_zh, label="text1"
|
141 |
),
|
142 |
gr.inputs.Textbox(
|
143 |
+
lines=lines, placeholder=placeholder, default=ex2_en, label="text2"
|
144 |
),
|
145 |
gr.inputs.Slider(
|
146 |
minimum=0.0,
|
|
|
181 |
leftover,
|
182 |
]
|
183 |
|
184 |
+
_ = """ # switch to blocks
|
185 |
iface = gr.Interface(
|
186 |
fn=greet,
|
187 |
# fn=ubee,
|
|
|
201 |
enable_queue=True,
|
202 |
share=True,
|
203 |
)
|
204 |
+
# """
|
205 |
+
blocks = gr.Blocks()
|
206 |
+
|
207 |
+
with blocks:
|
208 |
+
gr.Markdown(
|
209 |
+
dedent(f"""
|
210 |
+
## Ultimatumbee {__version__}
|
211 |
+
|
212 |
+
Align non-sequential dualtexts.
|
213 |
+
|
214 |
+
""").strip()
|
215 |
+
)
|
216 |
+
with gr.Column():
|
217 |
+
with gr.Row():
|
218 |
+
text1 = gr.inputs.Textbox(
|
219 |
+
lines=lines, placeholder=placeholder, default=ex2_zh, label="text1"
|
220 |
+
),
|
221 |
+
text2 = gr.inputs.Textbox(
|
222 |
+
lines=lines, placeholder=placeholder, default=ex2_en, label="text2"
|
223 |
+
)
|
224 |
+
with gr.Row():
|
225 |
+
slider = gr.inputs.Slider(
|
226 |
+
minimum=0.0,
|
227 |
+
maximum=1.0,
|
228 |
+
step=0.1,
|
229 |
+
default=0.4,
|
230 |
+
label="threshold",
|
231 |
+
)
|
232 |
+
btn = gr.Button("Run")
|
233 |
+
out_df = gr.outputs.Dataframe(
|
234 |
+
headers=None,
|
235 |
+
max_rows=lines, # 20
|
236 |
+
max_cols=None,
|
237 |
+
overflow_row_behaviour="paginate",
|
238 |
+
type="auto",
|
239 |
+
label="To be aligned",
|
240 |
+
)
|
241 |
+
with gr.Row():
|
242 |
+
aligned = gr.outputs.Dataframe(
|
243 |
+
headers=None,
|
244 |
+
max_rows=lines, # 20
|
245 |
+
max_cols=None,
|
246 |
+
overflow_row_behaviour="paginate",
|
247 |
+
type="auto",
|
248 |
+
label="Aligned",
|
249 |
+
)
|
250 |
+
leftover = gr.outputs.Dataframe(
|
251 |
+
headers=None,
|
252 |
+
max_rows=lines, # 20
|
253 |
+
max_cols=None,
|
254 |
+
overflow_row_behaviour="paginate",
|
255 |
+
type="auto",
|
256 |
+
label="Leftover",
|
257 |
+
)
|
258 |
+
btn.click(
|
259 |
+
fn=ifn,
|
260 |
+
inputs=[
|
261 |
+
text1,
|
262 |
+
text2,
|
263 |
+
],
|
264 |
+
outputs=[
|
265 |
+
out_df,
|
266 |
+
aligned,
|
267 |
+
leftover,
|
268 |
+
]
|
269 |
+
)
|
270 |
|
271 |
|
272 |
if __name__ == "__main__":
|
|
|
279 |
default="para",
|
280 |
label="segment"
|
281 |
)
|
282 |
+
# """
|
ubee/__main__.py-
ADDED
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Gen ubee main."""
|
2 |
+
# pylint: disable=unused-import, wrong-import-position, wrong-import-order, too-many-locals, broad-except, line-too-long
|
3 |
+
|
4 |
+
import sys
|
5 |
+
from itertools import zip_longest
|
6 |
+
from pathlib import Path
|
7 |
+
from random import shuffle
|
8 |
+
from textwrap import dedent
|
9 |
+
from typing import Optional, Tuple
|
10 |
+
|
11 |
+
import gradio as gr
|
12 |
+
import logzero
|
13 |
+
import pandas as pd
|
14 |
+
from icecream import ic
|
15 |
+
from icecream import install as ic_install
|
16 |
+
from logzero import logger
|
17 |
+
|
18 |
+
# for embeddable python
|
19 |
+
if "." not in sys.path:
|
20 |
+
sys.path.insert(0, ".")
|
21 |
+
|
22 |
+
from ubee.ubee import ubee
|
23 |
+
|
24 |
+
# logzero.loglevel(10)
|
25 |
+
ic_install()
|
26 |
+
ic.configureOutput(
|
27 |
+
includeContext=True,
|
28 |
+
outputFunction=logger.info,
|
29 |
+
)
|
30 |
+
ic.enable()
|
31 |
+
# ic.disenable() # to turn off
|
32 |
+
|
33 |
+
|
34 |
+
def greet1(name):
|
35 |
+
"""Dummy."""
|
36 |
+
return "Hello " + name + "!!"
|
37 |
+
|
38 |
+
|
39 |
+
def greet(
|
40 |
+
text1,
|
41 |
+
text2,
|
42 |
+
# segment: str
|
43 |
+
thresh: float
|
44 |
+
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
45 |
+
"""Take inputs, return outputs.
|
46 |
+
|
47 |
+
Args:
|
48 |
+
text1: text
|
49 |
+
text2: text
|
50 |
+
Returns:
|
51 |
+
pd.DataFrame
|
52 |
+
"""
|
53 |
+
res1 = [elm.strip() for elm in text1.splitlines() if elm.strip()]
|
54 |
+
res2 = [elm.strip() for elm in text2.splitlines() if elm.strip()]
|
55 |
+
|
56 |
+
ic(res1)
|
57 |
+
ic(res2)
|
58 |
+
|
59 |
+
# _ = pd.DataFrame(zip_longest(res1, res2), columns=["text1", "text2"])
|
60 |
+
# return _
|
61 |
+
|
62 |
+
res1_, res2_ = ubee(res1, res2, thresh)
|
63 |
+
|
64 |
+
out_df = pd.DataFrame(
|
65 |
+
zip_longest(res1, res2),
|
66 |
+
columns=["text1", "text2"],
|
67 |
+
)
|
68 |
+
|
69 |
+
if res2_:
|
70 |
+
_ = pd.DataFrame(res2_, columns=["text1", "text2"])
|
71 |
+
else:
|
72 |
+
_ = None
|
73 |
+
|
74 |
+
return out_df, pd.DataFrame(res1_, columns=["text1", "text2", "likelihood"]), _
|
75 |
+
|
76 |
+
|
77 |
+
def main():
|
78 |
+
"""Create main entry."""
|
79 |
+
text_zh = Path("data/test_zh.txt").read_text(encoding="utf8")
|
80 |
+
text_zh = [elm.strip() for elm in text_zh.splitlines() if elm.strip()][:10]
|
81 |
+
text_zh = "\n\n".join(text_zh)
|
82 |
+
|
83 |
+
text_en = [
|
84 |
+
elm.strip()
|
85 |
+
for elm in Path("data/test_en.txt").read_text(encoding="utf8").splitlines()
|
86 |
+
if elm.strip()
|
87 |
+
]
|
88 |
+
_ = text_en[:9]
|
89 |
+
shuffle(_)
|
90 |
+
text_en = "\n\n".join(_)
|
91 |
+
|
92 |
+
title = "Ultimatumbee Aligner"
|
93 |
+
theme = "dark-grass"
|
94 |
+
theme = "grass"
|
95 |
+
description = """WIP showcasing a novel aligner"""
|
96 |
+
article = dedent("""
|
97 |
+
## NB
|
98 |
+
|
99 |
+
* The ultimatumbee aligner (``ubee`` for short) is intended for aligning text blocks (be it paragraphs, sentences or words). Since it is rather slow (30 para pairs (Wuthering Height ch1. for example) can take 10 to 20 mniutes), anything more than 50 blocks should probably be avaoided. Nevertheless, you are welcome to try. No big brother is watching.
|
100 |
+
|
101 |
+
* ``thresh``: longer text blocks justify a larger value; `.5` appears to be just right for paragraphs for Wuthering Height ch1.
|
102 |
+
|
103 |
+
Stay tuned for more details coming soon...
|
104 |
+
""").strip()
|
105 |
+
|
106 |
+
ex1_zh = [
|
107 |
+
'雪开始下大了。',
|
108 |
+
'我握住门柄又试一回。',
|
109 |
+
'这时一个没穿外衣的年轻人,扛着一根草耙,在后面院子里出现了。', '他招呼我跟着他走,穿过了一个洗衣房和一片铺平的地,那儿有煤棚、抽水机和鸽笼,我们终于到了我上次被接待过的那间温暖的、热闹的大屋子。',
|
110 |
+
'煤、炭和木材混合在一起燃起的熊熊炉火,使这屋子放着光彩。', '在准备摆上丰盛晚餐的桌旁,我很高兴地看到了那位“太太”,以前我从未料想到会有这么一个人存在的。',
|
111 |
+
'我鞠躬等候,以为她会叫我坐下。',
|
112 |
+
'她望望我,往她的椅背一靠,不动,也不出声。'
|
113 |
+
]
|
114 |
+
ex1_en = [
|
115 |
+
'The snow began to drive thickly.',
|
116 |
+
'I seized the handle to essay another trial; when a young man without coat, and shouldering a pitchfork, appeared in the yard behind.',
|
117 |
+
'He hailed me to follow him, and, after marching through a wash-house, and a paved area containing a coal shed, pump, and pigeon cot, we at length arrived in the huge, warm, cheerful apartment, where I was formerly received.',
|
118 |
+
"It glowed delightfully in the radiance of an immense fire, compounded of coal, peat, and wood; and near the table, laid for a plentiful evening meal, I was pleased to observe the `missis', an individual whose existence I had never previously suspected.",
|
119 |
+
'I bowed and waited, thinking she would bid me take a seat.',
|
120 |
+
'She looked at me, leaning back in her chair, and remained motionless and mute.'
|
121 |
+
]
|
122 |
+
shuffle(ex1_en)
|
123 |
+
ex1_zh = "\n".join(ex1_zh)
|
124 |
+
ex1_en = "\n".join(ex1_en)
|
125 |
+
|
126 |
+
ex2_zh = "她\n望望\n我\n往\n她\n的\n椅背\n一靠\n不\n动\n也\n不\n出声"
|
127 |
+
ex2_en = "She looked at me leaning back in her chair and remained motionless and mute".split()
|
128 |
+
shuffle(ex2_en)
|
129 |
+
ex2_en = "\n".join(ex2_en)
|
130 |
+
|
131 |
+
examples = [
|
132 |
+
[ex2_zh, ex2_en, .3],
|
133 |
+
[text_zh, text_en, .5],
|
134 |
+
]
|
135 |
+
lines = 15
|
136 |
+
placeholder = "Type or paste text here"
|
137 |
+
|
138 |
+
inputs = [
|
139 |
+
gr.inputs.Textbox(
|
140 |
+
lines=lines, placeholder=placeholder, default=ex1_zh, label="text1"
|
141 |
+
),
|
142 |
+
gr.inputs.Textbox(
|
143 |
+
lines=lines, placeholder=placeholder, default=ex1_en, label="text2"
|
144 |
+
),
|
145 |
+
gr.inputs.Slider(
|
146 |
+
minimum=0.0,
|
147 |
+
maximum=1.0,
|
148 |
+
step=0.1,
|
149 |
+
default=0.4,
|
150 |
+
label="threshold",
|
151 |
+
),
|
152 |
+
]
|
153 |
+
|
154 |
+
out_df = gr.outputs.Dataframe(
|
155 |
+
headers=None,
|
156 |
+
max_rows=lines, # 20
|
157 |
+
max_cols=None,
|
158 |
+
overflow_row_behaviour="paginate",
|
159 |
+
type="auto",
|
160 |
+
label="To be aligned",
|
161 |
+
)
|
162 |
+
aligned = gr.outputs.Dataframe(
|
163 |
+
headers=None,
|
164 |
+
max_rows=lines, # 20
|
165 |
+
max_cols=None,
|
166 |
+
overflow_row_behaviour="paginate",
|
167 |
+
type="auto",
|
168 |
+
label="Aligned",
|
169 |
+
)
|
170 |
+
leftover = gr.outputs.Dataframe(
|
171 |
+
headers=None,
|
172 |
+
max_rows=lines, # 20
|
173 |
+
max_cols=None,
|
174 |
+
overflow_row_behaviour="paginate",
|
175 |
+
type="auto",
|
176 |
+
label="Leftover",
|
177 |
+
)
|
178 |
+
outputs = [ # tot. 3
|
179 |
+
out_df,
|
180 |
+
aligned,
|
181 |
+
leftover,
|
182 |
+
]
|
183 |
+
|
184 |
+
iface = gr.Interface(
|
185 |
+
fn=greet,
|
186 |
+
# fn=ubee,
|
187 |
+
title=title,
|
188 |
+
theme=theme,
|
189 |
+
layout="vertical", # horizontal unaligned
|
190 |
+
description=description,
|
191 |
+
article=article,
|
192 |
+
# inputs="text",
|
193 |
+
# outputs="text",
|
194 |
+
inputs=inputs, # text1, text2, segment, thresh
|
195 |
+
outputs=outputs,
|
196 |
+
examples=examples,
|
197 |
+
# enable_queue=True,
|
198 |
+
)
|
199 |
+
iface.launch(
|
200 |
+
enable_queue=True,
|
201 |
+
share=True,
|
202 |
+
)
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
main()
|
207 |
+
|
208 |
+
_ = """
|
209 |
+
|
210 |
+
gr.inputs.Radio(
|
211 |
+
["para", "sent", "word"],
|
212 |
+
default="para",
|
213 |
+
label="segment"
|
214 |
+
)
|
215 |
+
# """
|