Spaces:
Build error
Build error
freemt
commited on
Commit
•
077e1eb
1
Parent(s):
03014a3
Update short text bug fix
Browse files- data/en.txt +8 -0
- data/testen.txt +5 -0
- data/testzh.txt +5 -0
- data/zh.txt +7 -0
- radiobee/__main__.py +43 -13
- radiobee/files2df.py +1 -0
- radiobee/gen_pset.py +34 -1
- radiobee/loadtext.py +2 -16
- requirements.txt +1 -0
- tests/test_en_zh_short.py +71 -0
- tests/test_files2df.py +13 -4
- tests/test_main.py +1 -0
data/en.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[Young Warrior] Kingold(184283681) 2021-12-30 22:27:37
|
2 |
+
It seems that the standalone version can
|
3 |
+
omit the GUI and specify the two files to be aligned directly on the command line.
|
4 |
+
|
5 |
+
|
6 |
+
But if it's not the GUI module that's taking up space, then
|
7 |
+
removing it won't help compress the size of the whole package.
|
8 |
+
|
data/testen.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Superfast Computer Chip Transmits Data with Light
|
2 |
+
Computer chips have two important parts��the logic on the chip, which computes and executes programs. Then there��s the part that sends and receives��gets data to crunch, sends back the answer. And while that first part, chip logic, has gotten much faster over the years, the transmission part has lagged behind,because data gets sent via electrical signals passing through copper.
|
3 |
+
So researchers designed a chip that exchanges data with light instead. "By going into optics, we're able to relieve this fundamental bottleneck of copper, and in doing so we're able to increase the bandwidth density on the chip, so how fast the chip can take data in and out, by an order of magnitude," said Chen Sun, a computer hardware researcher at UC Berkeley, and the startup Ayar Labs.
|
4 |
+
A metal pin on the memory chip in your computer might transmit at 1.6 gigabits per second. Sun's optical connection ups that rate to 2.5 gigabits per second. Not a huge difference on the face of it. But the killer app here is that multiple wavelengths of light��up to 11��can be used simultaneously to send data through a single fiber, which means this technology has potential speeds of 27.5 gigabits per second��more than an order of magnitude faster than today��s standard. "So that's the extra dimension we have to scale bandwidth that we don't have with normal electrical signals." The findings appear in the journal Nature.
|
5 |
+
These chips with optical connections are not just high-speed��they also require less energy than the copper versions. That could be a big deal, with server farms projected to outpace every other commercial use of electricity within the next decade. Going optical could thus be a win-win: faster processing using a fraction of the energy.
|
data/testzh.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
�����ټ�����оƬ
|
2 |
+
�����оƬ��������Ҫ���֣���һ��оƬ�ϵ�����·�����ڼ����ִ�г�������Ǵ�����·�����ڷ��ͺͽ������ݣ����������ݴ�����Ȼ�ؽ����ǰ����Щ�귢չʮ��Ѹ�٣����������ڳ������ݵĵ��ź���ͨ��ͭ�ߴ��͵ģ������չ��Ϊ�ͺ�
|
3 |
+
�����о���Ա��������ù�������ݽ�����оƬ�������Ǽ��ݴ�ѧ��������У�մ������õİ��Ƕ�ʵ���ҵ�˫Ƹ�����Ӳ���о�Ա�� ��˵����ͨ�����ù�ѧ�����������ܽ��ͭ�ߴ�����е�ƿ�����⣬�������ǿ�������оƬ�Ŀ����ܶȣ���ˣ���������������ٶȻ�쵽����һ����������
|
4 |
+
���˵����ڴ�оƬ�Ľ�����Ŵ����ٶ�Ϊ1.6������/�룬���ԵĹоƬ�ɽ���һ�ٶ�������2.5������/�롣����濴��û��̫��IJ��죬��������ɱ֮�����ڽ�һ�����˿�ͬʱ��������Ⲩ�������ɴ�11���������������ݡ���Ҳ����ζ���䴫���ٶȿɴﵽ27.5������/�룬��Ƚ���ı�����һ����������������˵�������������辶����չ����ͨ���ź���ӵ�еĴ������� ���о���������ڡ���Ȼ����־�ϡ�
|
5 |
+
�����ͭ�ߵ絼оƬ����Щ�оƬ���������������ٶȿ죬�����ܺ�Ҳ�ϵ͡���δ��ʮ���ڣ�Ԥ�Ƶ�������Ⱥ�ĵ������ᳬ���κ�������ҵ�õ磬�����⣨һ�������£�������һ�������ش��Ͷ�ʡ� ��ѧ��Ӧ�ý���������һ��˫Ӯ�ľ���:�����ٶȵ���������Դ���Ľϵļ��١�
|
data/zh.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
【少侠】Kingold(184283681) 2021-12-30 22:27:37
|
2 |
+
单机版貌似可以省略掉图形界面,直接
|
3 |
+
命令行指定两个待对齐文件。
|
4 |
+
|
5 |
+
不过如果占地方的
|
6 |
+
|
7 |
+
不是图形界面的模块,那去掉了也对压缩整个包的大小没帮助。
|
radiobee/__main__.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
"""Run interactively."""
|
|
|
2 |
from typing import Tuple # , Optional
|
3 |
|
4 |
-
|
5 |
from pathlib import Path
|
6 |
-
import
|
7 |
from random import randint
|
8 |
from textwrap import dedent
|
9 |
from itertools import zip_longest
|
10 |
-
from sklearn.cluster import DBSCAN
|
11 |
-
|
12 |
from socket import socket, AF_INET, SOCK_STREAM
|
13 |
-
|
|
|
|
|
14 |
from varname import nameof
|
15 |
from logzero import logger
|
16 |
|
@@ -22,6 +23,8 @@ import matplotlib.pyplot as plt
|
|
22 |
# from tabulate import tabulate
|
23 |
from fastlid import fastlid
|
24 |
|
|
|
|
|
25 |
import gradio as gr
|
26 |
from radiobee.process_upload import process_upload
|
27 |
from radiobee.files2df import files2df
|
@@ -43,6 +46,7 @@ print("Press Ctrl+C to quit\n")
|
|
43 |
|
44 |
|
45 |
def savelzma(obj, fileloc: str = None):
|
|
|
46 |
if fileloc is None:
|
47 |
fileloc = nameof(obj) # this wont work
|
48 |
joblib.dump(obj, f"data/{fileloc}.lzma")
|
@@ -55,6 +59,7 @@ def greet(input):
|
|
55 |
|
56 |
def upfile1(file1, file2=None) -> Tuple[str, str]:
|
57 |
"""Upload file1, file2."""
|
|
|
58 |
return file1.name, f"'Sup yo! (your input: {input})"
|
59 |
|
60 |
|
@@ -245,6 +250,11 @@ if __name__ == "__main__":
|
|
245 |
sns.set()
|
246 |
sns.set_style("darkgrid")
|
247 |
|
|
|
|
|
|
|
|
|
|
|
248 |
fig = plt.figure()
|
249 |
gs = fig.add_gridspec(2, 2, wspace=0.4, hspace=0.58)
|
250 |
ax2 = fig.add_subplot(gs[0, 0])
|
@@ -260,7 +270,8 @@ if __name__ == "__main__":
|
|
260 |
fig.suptitle("alignment projection")
|
261 |
|
262 |
_ = DBSCAN(min_samples=min_samples, eps=eps).fit(df_).labels_ > -1
|
263 |
-
_x = DBSCAN(min_samples=min_samples, eps=eps).fit(df_).labels_ < 0
|
|
|
264 |
|
265 |
df_.plot.scatter("x", "y", c="cos", cmap=cmap, ax=ax0)
|
266 |
|
@@ -309,12 +320,28 @@ if __name__ == "__main__":
|
|
309 |
)
|
310 |
|
311 |
# process lst1, lst2 to obtained df_aligned
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
min_samples
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
src_len, tgt_len = cmat.shape
|
319 |
aset = gen_aset(pset, src_len, tgt_len)
|
320 |
final_list = align_texts(aset, lst2, lst1) # note the order
|
@@ -360,7 +387,10 @@ if __name__ == "__main__":
|
|
360 |
* Click "Clear" first for subsequent submits when uploading files.
|
361 |
* `tf_type` `idf_type` `dl_type` `norm`: Normally there is no need to touch these unless you know what you are doing.
|
362 |
* Suggested `esp` and `min_samples` values -- `esp` (minimum epsilon): 8-12, `min_samples`: 4-8.
|
363 |
-
- Smaller larger `esp` or `min_samples` will result in more aligned pairs but also more **false positives** (pairs
|
|
|
|
|
|
|
364 |
* If you need to have a better look at the image, you can right-click on the image and select copy-image-address and open a new tab in the browser with the copied image address.
|
365 |
* `Flag`: Should something go wrong, you can click Flag to save the output and inform the developer.
|
366 |
"""
|
|
|
1 |
"""Run interactively."""
|
2 |
+
# pylint: disable=invalid-name, too-many-arguments, unused-argument, redefined-builtin, wrong-import-position, too-many-locals, too-many-statements
|
3 |
from typing import Tuple # , Optional
|
4 |
|
5 |
+
import sys
|
6 |
from pathlib import Path
|
7 |
+
import signal
|
8 |
from random import randint
|
9 |
from textwrap import dedent
|
10 |
from itertools import zip_longest
|
|
|
|
|
11 |
from socket import socket, AF_INET, SOCK_STREAM
|
12 |
+
|
13 |
+
from sklearn.cluster import DBSCAN
|
14 |
+
import joblib
|
15 |
from varname import nameof
|
16 |
from logzero import logger
|
17 |
|
|
|
23 |
# from tabulate import tabulate
|
24 |
from fastlid import fastlid
|
25 |
|
26 |
+
if "." not in sys.path:
|
27 |
+
sys.path.insert(0, ".")
|
28 |
import gradio as gr
|
29 |
from radiobee.process_upload import process_upload
|
30 |
from radiobee.files2df import files2df
|
|
|
46 |
|
47 |
|
48 |
def savelzma(obj, fileloc: str = None):
|
49 |
+
"""Aux funciton."""
|
50 |
if fileloc is None:
|
51 |
fileloc = nameof(obj) # this wont work
|
52 |
joblib.dump(obj, f"data/{fileloc}.lzma")
|
|
|
59 |
|
60 |
def upfile1(file1, file2=None) -> Tuple[str, str]:
|
61 |
"""Upload file1, file2."""
|
62 |
+
del file2
|
63 |
return file1.name, f"'Sup yo! (your input: {input})"
|
64 |
|
65 |
|
|
|
250 |
sns.set()
|
251 |
sns.set_style("darkgrid")
|
252 |
|
253 |
+
# close all existing figures, necesssary for hf spaces
|
254 |
+
plt.close("all")
|
255 |
+
# if sys.platform not in ["win32", "linux"]:
|
256 |
+
plt.switch_backend('Agg') # to cater for Mac, thanks to WhiteFox
|
257 |
+
|
258 |
fig = plt.figure()
|
259 |
gs = fig.add_gridspec(2, 2, wspace=0.4, hspace=0.58)
|
260 |
ax2 = fig.add_subplot(gs[0, 0])
|
|
|
270 |
fig.suptitle("alignment projection")
|
271 |
|
272 |
_ = DBSCAN(min_samples=min_samples, eps=eps).fit(df_).labels_ > -1
|
273 |
+
# _x = DBSCAN(min_samples=min_samples, eps=eps).fit(df_).labels_ < 0
|
274 |
+
_x = ~_
|
275 |
|
276 |
df_.plot.scatter("x", "y", c="cos", cmap=cmap, ax=ax0)
|
277 |
|
|
|
320 |
)
|
321 |
|
322 |
# process lst1, lst2 to obtained df_aligned
|
323 |
+
# quick fix ValueError: not enough values to unpack (expected at least 1, got 0)
|
324 |
+
# fixed in gen_pet, but we leave the loop here
|
325 |
+
for min_s in range(min_samples):
|
326 |
+
logger.info(" min_samples, try %s", min_samples - min_s)
|
327 |
+
try:
|
328 |
+
pset = gen_pset(
|
329 |
+
cmat,
|
330 |
+
eps=eps,
|
331 |
+
min_samples=min_samples - min_s,
|
332 |
+
delta=7,
|
333 |
+
)
|
334 |
+
break
|
335 |
+
except ValueError:
|
336 |
+
logger.info(" decrease min_samples by %s", min_s + 1)
|
337 |
+
continue
|
338 |
+
except Exception as e:
|
339 |
+
logger.error(e)
|
340 |
+
continue
|
341 |
+
else:
|
342 |
+
# break should happen above when min_samples = 2
|
343 |
+
raise Exception("bummer, this shouldn't happen, probably another bug")
|
344 |
+
|
345 |
src_len, tgt_len = cmat.shape
|
346 |
aset = gen_aset(pset, src_len, tgt_len)
|
347 |
final_list = align_texts(aset, lst2, lst1) # note the order
|
|
|
387 |
* Click "Clear" first for subsequent submits when uploading files.
|
388 |
* `tf_type` `idf_type` `dl_type` `norm`: Normally there is no need to touch these unless you know what you are doing.
|
389 |
* Suggested `esp` and `min_samples` values -- `esp` (minimum epsilon): 8-12, `min_samples`: 4-8.
|
390 |
+
- Smaller larger `esp` or `min_samples` will result in more aligned pairs but also more **false positives** (pairs
|
391 |
+
falsely identified as candidates). On the other hand,
|
392 |
+
larger smaller `esp` or `min_samples` values tend to miss
|
393 |
+
'good' pairs.
|
394 |
* If you need to have a better look at the image, you can right-click on the image and select copy-image-address and open a new tab in the browser with the copied image address.
|
395 |
* `Flag`: Should something go wrong, you can click Flag to save the output and inform the developer.
|
396 |
"""
|
radiobee/files2df.py
CHANGED
@@ -25,6 +25,7 @@ def files2df(file1, file2):
|
|
25 |
|
26 |
return df
|
27 |
|
|
|
28 |
_ = """
|
29 |
# return tabulate(df)
|
30 |
# return tabulate(df, tablefmt="grid")
|
|
|
25 |
|
26 |
return df
|
27 |
|
28 |
+
|
29 |
_ = """
|
30 |
# return tabulate(df)
|
31 |
# return tabulate(df, tablefmt="grid")
|
radiobee/gen_pset.py
CHANGED
@@ -13,7 +13,7 @@ from radiobee.cmat2tset import cmat2tset
|
|
13 |
from radiobee.interpolate_pset import interpolate_pset
|
14 |
|
15 |
|
16 |
-
def
|
17 |
cmat1: Union[List[List[float]], np.ndarray, pd.DataFrame],
|
18 |
eps: float = 10,
|
19 |
min_samples: int = 6,
|
@@ -139,3 +139,36 @@ def gen_pset(
|
|
139 |
|
140 |
# return [(1, 1, "")]
|
141 |
return [(int(elm0), int(elm1), elm2) for elm0, elm1, elm2 in buff]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
from radiobee.interpolate_pset import interpolate_pset
|
14 |
|
15 |
|
16 |
+
def _gen_pset(
|
17 |
cmat1: Union[List[List[float]], np.ndarray, pd.DataFrame],
|
18 |
eps: float = 10,
|
19 |
min_samples: int = 6,
|
|
|
139 |
|
140 |
# return [(1, 1, "")]
|
141 |
return [(int(elm0), int(elm1), elm2) for elm0, elm1, elm2 in buff]
|
142 |
+
|
143 |
+
|
144 |
+
def gen_pset(
|
145 |
+
cmat1: Union[List[List[float]], np.ndarray, pd.DataFrame],
|
146 |
+
eps: float = 10,
|
147 |
+
min_samples: int = 6,
|
148 |
+
delta: float = 7,
|
149 |
+
verbose: Union[bool, int] = False,
|
150 |
+
) -> List[Tuple[Union[float, str], Union[float, str], Union[float, str]]]:
|
151 |
+
"""Gen pset.
|
152 |
+
|
153 |
+
Refer to _gen_pset.
|
154 |
+
"""
|
155 |
+
for min_s in range(min_samples):
|
156 |
+
logger.debug(" min_samples, try %s", min_samples - min_s)
|
157 |
+
try:
|
158 |
+
pset = _gen_pset(
|
159 |
+
cmat1,
|
160 |
+
eps=eps,
|
161 |
+
min_samples=min_samples - min_s,
|
162 |
+
delta=delta,
|
163 |
+
)
|
164 |
+
break
|
165 |
+
except ValueError:
|
166 |
+
logger.debug(" decrease min_samples by %s", min_s + 1)
|
167 |
+
continue
|
168 |
+
except Exception as e:
|
169 |
+
logger.error(e)
|
170 |
+
continue
|
171 |
+
else:
|
172 |
+
# break should happen above when min_samples = 2
|
173 |
+
raise Exception("bummer, this shouldn't happen, probably another bug")
|
174 |
+
return pset
|
radiobee/loadtext.py
CHANGED
@@ -16,8 +16,7 @@ magic.from_file("testdata/test.pdf")
|
|
16 |
original load_textrev
|
17 |
refer to load_paras.py
|
18 |
"""
|
19 |
-
from typing import Optional, Union
|
20 |
-
import os
|
21 |
from pathlib import Path
|
22 |
import cchardet
|
23 |
|
@@ -26,24 +25,11 @@ from logzero import logger
|
|
26 |
# from detect_file import detect_file
|
27 |
|
28 |
|
29 |
-
def loadtext(filepath: Union[Path, str] = "") ->
|
30 |
"""Load file context to text.
|
31 |
|
32 |
Check encoding and load a file to text.
|
33 |
-
|
34 |
-
load_paras(filepath='') ==> paralist, lenlist =
|
35 |
"""
|
36 |
-
if not filepath:
|
37 |
-
defaultdir = r"C:\dl\Dropbox\shuangyu_ku\txt-books"
|
38 |
-
defaultfile = r"Folding_Beijing-en.txt"
|
39 |
-
|
40 |
-
# filepath = defaultfile
|
41 |
-
# defaultdir = r'C:\dl\Dropbox\mat-dir\snippets-mat\pyqt'
|
42 |
-
# defaultfile = r'notes pyqt tkinter tktable.txt'
|
43 |
-
|
44 |
-
filepath = os.path.join(defaultdir, defaultfile)
|
45 |
-
filepath = Path(defaultdir) / defaultfile
|
46 |
-
|
47 |
filepath = Path(filepath)
|
48 |
if not filepath.is_file():
|
49 |
logger.error(" file [%s] does not exist or is not a file.", filepath)
|
|
|
16 |
original load_textrev
|
17 |
refer to load_paras.py
|
18 |
"""
|
19 |
+
from typing import Optional, Union # noqa
|
|
|
20 |
from pathlib import Path
|
21 |
import cchardet
|
22 |
|
|
|
25 |
# from detect_file import detect_file
|
26 |
|
27 |
|
28 |
+
def loadtext(filepath: Union[Path, str] = "") -> str:
|
29 |
"""Load file context to text.
|
30 |
|
31 |
Check encoding and load a file to text.
|
|
|
|
|
32 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
filepath = Path(filepath)
|
34 |
if not filepath.is_file():
|
35 |
logger.error(" file [%s] does not exist or is not a file.", filepath)
|
requirements.txt
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
# charset-normalizer
|
4 |
# idna
|
5 |
# typing-extensions
|
|
|
6 |
sklearn
|
7 |
textacy
|
8 |
logzero
|
|
|
3 |
# charset-normalizer
|
4 |
# idna
|
5 |
# typing-extensions
|
6 |
+
gradio
|
7 |
sklearn
|
8 |
textacy
|
9 |
logzero
|
tests/test_en_zh_short.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Test loadtext."""
|
2 |
+
# pylint: diable=invalid-name
|
3 |
+
import pytest
|
4 |
+
|
5 |
+
from fastlid import fastlid
|
6 |
+
|
7 |
+
from radiobee.loadtext import loadtext
|
8 |
+
from radiobee.files2df import files2df
|
9 |
+
from radiobee.file2text import file2text
|
10 |
+
from radiobee.lists2cmat import lists2cmat
|
11 |
+
from radiobee.cmat2tset import cmat2tset
|
12 |
+
from radiobee.gen_pset import gen_pset
|
13 |
+
|
14 |
+
en = loadtext("data/en.txt")
|
15 |
+
zh = loadtext("data/zh.txt")
|
16 |
+
testen = loadtext("data/testen.txt")
|
17 |
+
testzh = loadtext("data/testzh.txt")
|
18 |
+
|
19 |
+
|
20 |
+
def test_en_zh_short1():
|
21 |
+
"""Test en_zh_short."""
|
22 |
+
lst1 = [elm for elm in en.splitlines() if elm.strip()]
|
23 |
+
lst2 = [elm for elm in zh.splitlines() if elm.strip()]
|
24 |
+
|
25 |
+
lang1, _ = fastlid(en)
|
26 |
+
lang2, _ = fastlid(zh)
|
27 |
+
|
28 |
+
cmat0 = lists2cmat(lst1, lst2)
|
29 |
+
pset = gen_pset(cmat0)
|
30 |
+
|
31 |
+
assert pset.__len__() > 2
|
32 |
+
|
33 |
+
|
34 |
+
def test_en_zh_short2():
|
35 |
+
"""Test en_zh_short testen testzh."""
|
36 |
+
# en = testen.copy()
|
37 |
+
# zh = testzh.copy()
|
38 |
+
lst1a = [elm for elm in testen.splitlines() if elm.strip()]
|
39 |
+
lst2a = [elm for elm in testzh.splitlines() if elm.strip()]
|
40 |
+
|
41 |
+
lang1a, _ = fastlid(testen)
|
42 |
+
lang2a, _ = fastlid(testzh)
|
43 |
+
|
44 |
+
cmat1 = lists2cmat(lst1a, lst2a)
|
45 |
+
pset = gen_pset(cmat1)
|
46 |
+
|
47 |
+
assert pset.__len__() > 2
|
48 |
+
|
49 |
+
|
50 |
+
_ = """
|
51 |
+
import matplotlib
|
52 |
+
import matplotlib.pyplot as plt
|
53 |
+
import numpy as np
|
54 |
+
import pandas as pd
|
55 |
+
import seaborn as sns
|
56 |
+
|
57 |
+
sns.set()
|
58 |
+
sns.set_style("darkgrid")
|
59 |
+
cmap = "viridis_r"
|
60 |
+
plt.ion()
|
61 |
+
|
62 |
+
eps = 6
|
63 |
+
min_samples = 10
|
64 |
+
|
65 |
+
|
66 |
+
tset = pd.DataFrame(cmat2tset(cmat))
|
67 |
+
tset.columns = ["x", "y", "cos"]
|
68 |
+
|
69 |
+
df_ = tset
|
70 |
+
|
71 |
+
# """
|
tests/test_files2df.py
CHANGED
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
3 |
import tempfile
|
4 |
from radiobee.files2df import files2df
|
5 |
|
|
|
6 |
def test_files2df():
|
7 |
"""Test files2df with tests/test_en.txt tests/test_zh.txt."""
|
8 |
file1_ = "tests/test_en.txt"
|
@@ -15,8 +16,12 @@ def test_files2df():
|
|
15 |
|
16 |
df = files2df(file1, file2)
|
17 |
|
18 |
-
|
19 |
-
assert df.iloc[1,
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
def test_files2df_file2none():
|
@@ -29,5 +34,9 @@ def test_files2df_file2none():
|
|
29 |
|
30 |
df = files2df(file1, file2)
|
31 |
|
32 |
-
|
33 |
-
assert df.iloc[1,
|
|
|
|
|
|
|
|
|
|
3 |
import tempfile
|
4 |
from radiobee.files2df import files2df
|
5 |
|
6 |
+
|
7 |
def test_files2df():
|
8 |
"""Test files2df with tests/test_en.txt tests/test_zh.txt."""
|
9 |
file1_ = "tests/test_en.txt"
|
|
|
16 |
|
17 |
df = files2df(file1, file2)
|
18 |
|
19 |
+
# with filenames as frist row
|
20 |
+
# assert df.iloc[1, 0] == "Wuthering Heights"
|
21 |
+
# assert df.iloc[1, 1] == "呼啸山庄"
|
22 |
+
|
23 |
+
assert df.iloc[0, 0] == "Wuthering Heights"
|
24 |
+
assert df.iloc[0, 1] == "呼啸山庄"
|
25 |
|
26 |
|
27 |
def test_files2df_file2none():
|
|
|
34 |
|
35 |
df = files2df(file1, file2)
|
36 |
|
37 |
+
# with filename as first row
|
38 |
+
# assert df.iloc[1, 0] == "Wuthering Heights"
|
39 |
+
# assert df.iloc[1, 1] == ""
|
40 |
+
|
41 |
+
assert df.iloc[0, 0] == "Wuthering Heights"
|
42 |
+
assert df.iloc[0, 1] == ""
|
tests/test_main.py
CHANGED
@@ -18,6 +18,7 @@ file2loc = "data/test_en.txt"
|
|
18 |
file1 = tempfile._TemporaryFileWrapper(open(file1loc, "rb"), file1loc)
|
19 |
file2 = tempfile._TemporaryFileWrapper(open(file2loc, "rb"), file2loc)
|
20 |
|
|
|
21 |
def test_file2file1():
|
22 |
"""Test cmat file2 file1."""
|
23 |
# logger.info("file1: *%s*, file2: *%s*", file1, file2)
|
|
|
18 |
file1 = tempfile._TemporaryFileWrapper(open(file1loc, "rb"), file1loc)
|
19 |
file2 = tempfile._TemporaryFileWrapper(open(file2loc, "rb"), file2loc)
|
20 |
|
21 |
+
|
22 |
def test_file2file1():
|
23 |
"""Test cmat file2 file1."""
|
24 |
# logger.info("file1: *%s*, file2: *%s*", file1, file2)
|