mlbee / tests /test_gen_cmat_aset2pairs.py
ffreemt
Update cmat2aset310
dbfaf91
"""Test gen_cmat.
king
In [21]: len(de)
Out[21]: 51
In [22]: len(en)
Out[22]: 51
In [23]: len(" ".join(en))
Out[23]: 11208
In [24]: len(" ".join(de))
Out[24]: 13532
In [25]: %time en_vec = model_s.encode(en)
CPU times: user 22 s, sys: 436 ms, total: 22.4 s
Wall time: 22.4 s
In [26]: %time de_vec = model_s.encode(de)
CPU times: user 22.8 s, sys: 311 ms, total: 23.1 s
Wall time: 23.1 s
en1 = loadparas("data/sternstunden04-en.txt")
en2 = loadparas("data/sternstunden04-de.txt")
len(en1) # 30
len(" ".join(en1)) # 29718
len(" ".join(en2)) # 31478
"""
from cmat2aset310 import cmat2aset
from aset2pairs import aset2pairs
from st_mlbee.gen_cmat import gen_cmat
from st_mlbee.loadtext import loadparas
paras1 = loadparas("data/sternstunden04-en.txt")
paras2 = loadparas("data/sternstunden04-de.txt")
cmat = gen_cmat(paras1, paras2)
def test_gen_cmat_sternstunden04():
"""Test gen_cmat sternstunden04."""
len1, len2 = len(paras1), len(paras2)
# note the order
assert cmat.shape == (len2, len1)
def test_aset2pairs():
"""Test aset2pairs."""
aset = cmat2aset(cmat)
pairs = aset2pairs(paras1, paras2, aset)
assert "Marseillaise" in pairs[2][0]
assert "Marseillaise" in pairs[2][1]
assert pairs[2][2] > 0.95