opus-mt-tc-base-en-hi / opus+bt.spm32k-spm32k.transformer-align.model1.npz.best-perplexity.yml
Gabriele Sarti
Initial commit
7d74ea5
raw
history blame
1.24 kB
release: eng-hin/opus+bt-2021-04-10.zip
release-date: 2021-04-10
dataset-name: opus+bt
modeltype: transformer-align
vocabulary:
source: opus+bt.spm32k-spm32k.vocab.yml
target: opus+bt.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- hin
training-data:
eng-hin: Tatoeba-train (1483083) wiki.aa.hin-eng (907641) wiki.ab.hin-eng (326437) wikibooks.aa.hin-eng (106062) wikiquote.aa.hin-eng (9581) wikisource.aa.hin-eng (973520) wikisource.ab.hin-eng (387860)
validation-data:
eng-hin: Tatoeba-dev, 5821
total size of shuffled dev data: 5821
devset = top 5000 lines of Tatoeba-dev.src.shuffled!
test-data:
newsdev2014.eng-hin: 520/9538
newstest2014-hien.eng-hin: 2507/60878
Tatoeba-test.eng-hin: 5000/32904
tico19-test.eng-hin: 2100/62738
BLEU-scores:
newsdev2014.eng-hin: 13.9
newstest2014-hien.eng-hin: 17.4
Tatoeba-test.eng-hin: 22.2
tico19-test.eng-hin: 30.6
chr-F-scores:
newsdev2014.eng-hin: 0.421
newstest2014-hien.eng-hin: 0.442
Tatoeba-test.eng-hin: 0.485
tico19-test.eng-hin: 0.539