Initial commit
Browse files- .gitattributes +1 -0
- README.md +1506 -0
- benchmark_results.txt +176 -0
- benchmark_translations.zip +0 -0
- config.json +41 -0
- generation_config.json +16 -0
- model.safetensors +3 -0
- pytorch_model.bin +3 -0
- source.spm +3 -0
- special_tokens_map.json +1 -0
- target.spm +3 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.spm filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,1506 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
language:
|
4 |
+
- anp
|
5 |
+
- as
|
6 |
+
- awa
|
7 |
+
- bho
|
8 |
+
- bn
|
9 |
+
- bpy
|
10 |
+
- de
|
11 |
+
- dv
|
12 |
+
- en
|
13 |
+
- es
|
14 |
+
- fr
|
15 |
+
- gbm
|
16 |
+
- gu
|
17 |
+
- hi
|
18 |
+
- hif
|
19 |
+
- hne
|
20 |
+
- hns
|
21 |
+
- kok
|
22 |
+
- ks
|
23 |
+
- lah
|
24 |
+
- mag
|
25 |
+
- mai
|
26 |
+
- mr
|
27 |
+
- ne
|
28 |
+
- or
|
29 |
+
- pa
|
30 |
+
- pi
|
31 |
+
- pt
|
32 |
+
- rhg
|
33 |
+
- rmy
|
34 |
+
- rom
|
35 |
+
- sa
|
36 |
+
- sd
|
37 |
+
- si
|
38 |
+
- skr
|
39 |
+
- syl
|
40 |
+
- ur
|
41 |
+
|
42 |
+
tags:
|
43 |
+
- translation
|
44 |
+
- opus-mt-tc-bible
|
45 |
+
|
46 |
+
license: apache-2.0
|
47 |
+
model-index:
|
48 |
+
- name: opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc
|
49 |
+
results:
|
50 |
+
- task:
|
51 |
+
name: Translation deu-ben
|
52 |
+
type: translation
|
53 |
+
args: deu-ben
|
54 |
+
dataset:
|
55 |
+
name: flores200-devtest
|
56 |
+
type: flores200-devtest
|
57 |
+
args: deu-ben
|
58 |
+
metrics:
|
59 |
+
- name: BLEU
|
60 |
+
type: bleu
|
61 |
+
value: 11.3
|
62 |
+
- name: chr-F
|
63 |
+
type: chrf
|
64 |
+
value: 0.44696
|
65 |
+
- task:
|
66 |
+
name: Translation deu-guj
|
67 |
+
type: translation
|
68 |
+
args: deu-guj
|
69 |
+
dataset:
|
70 |
+
name: flores200-devtest
|
71 |
+
type: flores200-devtest
|
72 |
+
args: deu-guj
|
73 |
+
metrics:
|
74 |
+
- name: BLEU
|
75 |
+
type: bleu
|
76 |
+
value: 12.0
|
77 |
+
- name: chr-F
|
78 |
+
type: chrf
|
79 |
+
value: 0.40939
|
80 |
+
- task:
|
81 |
+
name: Translation deu-hin
|
82 |
+
type: translation
|
83 |
+
args: deu-hin
|
84 |
+
dataset:
|
85 |
+
name: flores200-devtest
|
86 |
+
type: flores200-devtest
|
87 |
+
args: deu-hin
|
88 |
+
metrics:
|
89 |
+
- name: BLEU
|
90 |
+
type: bleu
|
91 |
+
value: 22.7
|
92 |
+
- name: chr-F
|
93 |
+
type: chrf
|
94 |
+
value: 0.48864
|
95 |
+
- task:
|
96 |
+
name: Translation deu-hne
|
97 |
+
type: translation
|
98 |
+
args: deu-hne
|
99 |
+
dataset:
|
100 |
+
name: flores200-devtest
|
101 |
+
type: flores200-devtest
|
102 |
+
args: deu-hne
|
103 |
+
metrics:
|
104 |
+
- name: BLEU
|
105 |
+
type: bleu
|
106 |
+
value: 14.2
|
107 |
+
- name: chr-F
|
108 |
+
type: chrf
|
109 |
+
value: 0.43166
|
110 |
+
- task:
|
111 |
+
name: Translation deu-mag
|
112 |
+
type: translation
|
113 |
+
args: deu-mag
|
114 |
+
dataset:
|
115 |
+
name: flores200-devtest
|
116 |
+
type: flores200-devtest
|
117 |
+
args: deu-mag
|
118 |
+
metrics:
|
119 |
+
- name: BLEU
|
120 |
+
type: bleu
|
121 |
+
value: 14.2
|
122 |
+
- name: chr-F
|
123 |
+
type: chrf
|
124 |
+
value: 0.43058
|
125 |
+
- task:
|
126 |
+
name: Translation deu-pan
|
127 |
+
type: translation
|
128 |
+
args: deu-pan
|
129 |
+
dataset:
|
130 |
+
name: flores200-devtest
|
131 |
+
type: flores200-devtest
|
132 |
+
args: deu-pan
|
133 |
+
metrics:
|
134 |
+
- name: BLEU
|
135 |
+
type: bleu
|
136 |
+
value: 10.5
|
137 |
+
- name: chr-F
|
138 |
+
type: chrf
|
139 |
+
value: 0.36795
|
140 |
+
- task:
|
141 |
+
name: Translation deu-urd
|
142 |
+
type: translation
|
143 |
+
args: deu-urd
|
144 |
+
dataset:
|
145 |
+
name: flores200-devtest
|
146 |
+
type: flores200-devtest
|
147 |
+
args: deu-urd
|
148 |
+
metrics:
|
149 |
+
- name: BLEU
|
150 |
+
type: bleu
|
151 |
+
value: 14.3
|
152 |
+
- name: chr-F
|
153 |
+
type: chrf
|
154 |
+
value: 0.41167
|
155 |
+
- task:
|
156 |
+
name: Translation eng-ben
|
157 |
+
type: translation
|
158 |
+
args: eng-ben
|
159 |
+
dataset:
|
160 |
+
name: flores200-devtest
|
161 |
+
type: flores200-devtest
|
162 |
+
args: eng-ben
|
163 |
+
metrics:
|
164 |
+
- name: BLEU
|
165 |
+
type: bleu
|
166 |
+
value: 17.7
|
167 |
+
- name: chr-F
|
168 |
+
type: chrf
|
169 |
+
value: 0.52088
|
170 |
+
- task:
|
171 |
+
name: Translation eng-bho
|
172 |
+
type: translation
|
173 |
+
args: eng-bho
|
174 |
+
dataset:
|
175 |
+
name: flores200-devtest
|
176 |
+
type: flores200-devtest
|
177 |
+
args: eng-bho
|
178 |
+
metrics:
|
179 |
+
- name: BLEU
|
180 |
+
type: bleu
|
181 |
+
value: 11.6
|
182 |
+
- name: chr-F
|
183 |
+
type: chrf
|
184 |
+
value: 0.37333
|
185 |
+
- task:
|
186 |
+
name: Translation eng-guj
|
187 |
+
type: translation
|
188 |
+
args: eng-guj
|
189 |
+
dataset:
|
190 |
+
name: flores200-devtest
|
191 |
+
type: flores200-devtest
|
192 |
+
args: eng-guj
|
193 |
+
metrics:
|
194 |
+
- name: BLEU
|
195 |
+
type: bleu
|
196 |
+
value: 23.2
|
197 |
+
- name: chr-F
|
198 |
+
type: chrf
|
199 |
+
value: 0.54758
|
200 |
+
- task:
|
201 |
+
name: Translation eng-hin
|
202 |
+
type: translation
|
203 |
+
args: eng-hin
|
204 |
+
dataset:
|
205 |
+
name: flores200-devtest
|
206 |
+
type: flores200-devtest
|
207 |
+
args: eng-hin
|
208 |
+
metrics:
|
209 |
+
- name: BLEU
|
210 |
+
type: bleu
|
211 |
+
value: 34.4
|
212 |
+
- name: chr-F
|
213 |
+
type: chrf
|
214 |
+
value: 0.58825
|
215 |
+
- task:
|
216 |
+
name: Translation eng-hne
|
217 |
+
type: translation
|
218 |
+
args: eng-hne
|
219 |
+
dataset:
|
220 |
+
name: flores200-devtest
|
221 |
+
type: flores200-devtest
|
222 |
+
args: eng-hne
|
223 |
+
metrics:
|
224 |
+
- name: BLEU
|
225 |
+
type: bleu
|
226 |
+
value: 19.1
|
227 |
+
- name: chr-F
|
228 |
+
type: chrf
|
229 |
+
value: 0.46144
|
230 |
+
- task:
|
231 |
+
name: Translation eng-mag
|
232 |
+
type: translation
|
233 |
+
args: eng-mag
|
234 |
+
dataset:
|
235 |
+
name: flores200-devtest
|
236 |
+
type: flores200-devtest
|
237 |
+
args: eng-mag
|
238 |
+
metrics:
|
239 |
+
- name: BLEU
|
240 |
+
type: bleu
|
241 |
+
value: 21.9
|
242 |
+
- name: chr-F
|
243 |
+
type: chrf
|
244 |
+
value: 0.50291
|
245 |
+
- task:
|
246 |
+
name: Translation eng-mar
|
247 |
+
type: translation
|
248 |
+
args: eng-mar
|
249 |
+
dataset:
|
250 |
+
name: flores200-devtest
|
251 |
+
type: flores200-devtest
|
252 |
+
args: eng-mar
|
253 |
+
metrics:
|
254 |
+
- name: BLEU
|
255 |
+
type: bleu
|
256 |
+
value: 15.6
|
257 |
+
- name: chr-F
|
258 |
+
type: chrf
|
259 |
+
value: 0.49344
|
260 |
+
- task:
|
261 |
+
name: Translation eng-pan
|
262 |
+
type: translation
|
263 |
+
args: eng-pan
|
264 |
+
dataset:
|
265 |
+
name: flores200-devtest
|
266 |
+
type: flores200-devtest
|
267 |
+
args: eng-pan
|
268 |
+
metrics:
|
269 |
+
- name: BLEU
|
270 |
+
type: bleu
|
271 |
+
value: 18.4
|
272 |
+
- name: chr-F
|
273 |
+
type: chrf
|
274 |
+
value: 0.45635
|
275 |
+
- task:
|
276 |
+
name: Translation eng-sin
|
277 |
+
type: translation
|
278 |
+
args: eng-sin
|
279 |
+
dataset:
|
280 |
+
name: flores200-devtest
|
281 |
+
type: flores200-devtest
|
282 |
+
args: eng-sin
|
283 |
+
metrics:
|
284 |
+
- name: BLEU
|
285 |
+
type: bleu
|
286 |
+
value: 11.8
|
287 |
+
- name: chr-F
|
288 |
+
type: chrf
|
289 |
+
value: 0.45683
|
290 |
+
- task:
|
291 |
+
name: Translation eng-urd
|
292 |
+
type: translation
|
293 |
+
args: eng-urd
|
294 |
+
dataset:
|
295 |
+
name: flores200-devtest
|
296 |
+
type: flores200-devtest
|
297 |
+
args: eng-urd
|
298 |
+
metrics:
|
299 |
+
- name: BLEU
|
300 |
+
type: bleu
|
301 |
+
value: 20.6
|
302 |
+
- name: chr-F
|
303 |
+
type: chrf
|
304 |
+
value: 0.48224
|
305 |
+
- task:
|
306 |
+
name: Translation fra-ben
|
307 |
+
type: translation
|
308 |
+
args: fra-ben
|
309 |
+
dataset:
|
310 |
+
name: flores200-devtest
|
311 |
+
type: flores200-devtest
|
312 |
+
args: fra-ben
|
313 |
+
metrics:
|
314 |
+
- name: BLEU
|
315 |
+
type: bleu
|
316 |
+
value: 11.1
|
317 |
+
- name: chr-F
|
318 |
+
type: chrf
|
319 |
+
value: 0.44486
|
320 |
+
- task:
|
321 |
+
name: Translation fra-guj
|
322 |
+
type: translation
|
323 |
+
args: fra-guj
|
324 |
+
dataset:
|
325 |
+
name: flores200-devtest
|
326 |
+
type: flores200-devtest
|
327 |
+
args: fra-guj
|
328 |
+
metrics:
|
329 |
+
- name: BLEU
|
330 |
+
type: bleu
|
331 |
+
value: 12.2
|
332 |
+
- name: chr-F
|
333 |
+
type: chrf
|
334 |
+
value: 0.41021
|
335 |
+
- task:
|
336 |
+
name: Translation fra-hin
|
337 |
+
type: translation
|
338 |
+
args: fra-hin
|
339 |
+
dataset:
|
340 |
+
name: flores200-devtest
|
341 |
+
type: flores200-devtest
|
342 |
+
args: fra-hin
|
343 |
+
metrics:
|
344 |
+
- name: BLEU
|
345 |
+
type: bleu
|
346 |
+
value: 22.7
|
347 |
+
- name: chr-F
|
348 |
+
type: chrf
|
349 |
+
value: 0.48632
|
350 |
+
- task:
|
351 |
+
name: Translation fra-hne
|
352 |
+
type: translation
|
353 |
+
args: fra-hne
|
354 |
+
dataset:
|
355 |
+
name: flores200-devtest
|
356 |
+
type: flores200-devtest
|
357 |
+
args: fra-hne
|
358 |
+
metrics:
|
359 |
+
- name: BLEU
|
360 |
+
type: bleu
|
361 |
+
value: 13.8
|
362 |
+
- name: chr-F
|
363 |
+
type: chrf
|
364 |
+
value: 0.42777
|
365 |
+
- task:
|
366 |
+
name: Translation fra-mag
|
367 |
+
type: translation
|
368 |
+
args: fra-mag
|
369 |
+
dataset:
|
370 |
+
name: flores200-devtest
|
371 |
+
type: flores200-devtest
|
372 |
+
args: fra-mag
|
373 |
+
metrics:
|
374 |
+
- name: BLEU
|
375 |
+
type: bleu
|
376 |
+
value: 14.3
|
377 |
+
- name: chr-F
|
378 |
+
type: chrf
|
379 |
+
value: 0.42725
|
380 |
+
- task:
|
381 |
+
name: Translation fra-pan
|
382 |
+
type: translation
|
383 |
+
args: fra-pan
|
384 |
+
dataset:
|
385 |
+
name: flores200-devtest
|
386 |
+
type: flores200-devtest
|
387 |
+
args: fra-pan
|
388 |
+
metrics:
|
389 |
+
- name: BLEU
|
390 |
+
type: bleu
|
391 |
+
value: 10.6
|
392 |
+
- name: chr-F
|
393 |
+
type: chrf
|
394 |
+
value: 0.36902
|
395 |
+
- task:
|
396 |
+
name: Translation fra-urd
|
397 |
+
type: translation
|
398 |
+
args: fra-urd
|
399 |
+
dataset:
|
400 |
+
name: flores200-devtest
|
401 |
+
type: flores200-devtest
|
402 |
+
args: fra-urd
|
403 |
+
metrics:
|
404 |
+
- name: BLEU
|
405 |
+
type: bleu
|
406 |
+
value: 13.6
|
407 |
+
- name: chr-F
|
408 |
+
type: chrf
|
409 |
+
value: 0.40901
|
410 |
+
- task:
|
411 |
+
name: Translation por-ben
|
412 |
+
type: translation
|
413 |
+
args: por-ben
|
414 |
+
dataset:
|
415 |
+
name: flores200-devtest
|
416 |
+
type: flores200-devtest
|
417 |
+
args: por-ben
|
418 |
+
metrics:
|
419 |
+
- name: BLEU
|
420 |
+
type: bleu
|
421 |
+
value: 10.7
|
422 |
+
- name: chr-F
|
423 |
+
type: chrf
|
424 |
+
value: 0.43877
|
425 |
+
- task:
|
426 |
+
name: Translation por-guj
|
427 |
+
type: translation
|
428 |
+
args: por-guj
|
429 |
+
dataset:
|
430 |
+
name: flores200-devtest
|
431 |
+
type: flores200-devtest
|
432 |
+
args: por-guj
|
433 |
+
metrics:
|
434 |
+
- name: BLEU
|
435 |
+
type: bleu
|
436 |
+
value: 10.9
|
437 |
+
- name: chr-F
|
438 |
+
type: chrf
|
439 |
+
value: 0.38225
|
440 |
+
- task:
|
441 |
+
name: Translation por-hin
|
442 |
+
type: translation
|
443 |
+
args: por-hin
|
444 |
+
dataset:
|
445 |
+
name: flores200-devtest
|
446 |
+
type: flores200-devtest
|
447 |
+
args: por-hin
|
448 |
+
metrics:
|
449 |
+
- name: BLEU
|
450 |
+
type: bleu
|
451 |
+
value: 23.9
|
452 |
+
- name: chr-F
|
453 |
+
type: chrf
|
454 |
+
value: 0.50121
|
455 |
+
- task:
|
456 |
+
name: Translation por-hne
|
457 |
+
type: translation
|
458 |
+
args: por-hne
|
459 |
+
dataset:
|
460 |
+
name: flores200-devtest
|
461 |
+
type: flores200-devtest
|
462 |
+
args: por-hne
|
463 |
+
metrics:
|
464 |
+
- name: BLEU
|
465 |
+
type: bleu
|
466 |
+
value: 14.1
|
467 |
+
- name: chr-F
|
468 |
+
type: chrf
|
469 |
+
value: 0.42270
|
470 |
+
- task:
|
471 |
+
name: Translation por-mag
|
472 |
+
type: translation
|
473 |
+
args: por-mag
|
474 |
+
dataset:
|
475 |
+
name: flores200-devtest
|
476 |
+
type: flores200-devtest
|
477 |
+
args: por-mag
|
478 |
+
metrics:
|
479 |
+
- name: BLEU
|
480 |
+
type: bleu
|
481 |
+
value: 13.7
|
482 |
+
- name: chr-F
|
483 |
+
type: chrf
|
484 |
+
value: 0.42146
|
485 |
+
- task:
|
486 |
+
name: Translation por-urd
|
487 |
+
type: translation
|
488 |
+
args: por-urd
|
489 |
+
dataset:
|
490 |
+
name: flores200-devtest
|
491 |
+
type: flores200-devtest
|
492 |
+
args: por-urd
|
493 |
+
metrics:
|
494 |
+
- name: BLEU
|
495 |
+
type: bleu
|
496 |
+
value: 14.5
|
497 |
+
- name: chr-F
|
498 |
+
type: chrf
|
499 |
+
value: 0.41225
|
500 |
+
- task:
|
501 |
+
name: Translation spa-hin
|
502 |
+
type: translation
|
503 |
+
args: spa-hin
|
504 |
+
dataset:
|
505 |
+
name: flores200-devtest
|
506 |
+
type: flores200-devtest
|
507 |
+
args: spa-hin
|
508 |
+
metrics:
|
509 |
+
- name: BLEU
|
510 |
+
type: bleu
|
511 |
+
value: 16.4
|
512 |
+
- name: chr-F
|
513 |
+
type: chrf
|
514 |
+
value: 0.43977
|
515 |
+
- task:
|
516 |
+
name: Translation spa-hne
|
517 |
+
type: translation
|
518 |
+
args: spa-hne
|
519 |
+
dataset:
|
520 |
+
name: flores200-devtest
|
521 |
+
type: flores200-devtest
|
522 |
+
args: spa-hne
|
523 |
+
metrics:
|
524 |
+
- name: BLEU
|
525 |
+
type: bleu
|
526 |
+
value: 10.8
|
527 |
+
- name: chr-F
|
528 |
+
type: chrf
|
529 |
+
value: 0.39555
|
530 |
+
- task:
|
531 |
+
name: Translation spa-mag
|
532 |
+
type: translation
|
533 |
+
args: spa-mag
|
534 |
+
dataset:
|
535 |
+
name: flores200-devtest
|
536 |
+
type: flores200-devtest
|
537 |
+
args: spa-mag
|
538 |
+
metrics:
|
539 |
+
- name: BLEU
|
540 |
+
type: bleu
|
541 |
+
value: 11.1
|
542 |
+
- name: chr-F
|
543 |
+
type: chrf
|
544 |
+
value: 0.39621
|
545 |
+
- task:
|
546 |
+
name: Translation spa-urd
|
547 |
+
type: translation
|
548 |
+
args: spa-urd
|
549 |
+
dataset:
|
550 |
+
name: flores200-devtest
|
551 |
+
type: flores200-devtest
|
552 |
+
args: spa-urd
|
553 |
+
metrics:
|
554 |
+
- name: BLEU
|
555 |
+
type: bleu
|
556 |
+
value: 10.8
|
557 |
+
- name: chr-F
|
558 |
+
type: chrf
|
559 |
+
value: 0.37993
|
560 |
+
- task:
|
561 |
+
name: Translation deu-ben
|
562 |
+
type: translation
|
563 |
+
args: deu-ben
|
564 |
+
dataset:
|
565 |
+
name: flores101-devtest
|
566 |
+
type: flores_101
|
567 |
+
args: deu ben devtest
|
568 |
+
metrics:
|
569 |
+
- name: BLEU
|
570 |
+
type: bleu
|
571 |
+
value: 10.8
|
572 |
+
- name: chr-F
|
573 |
+
type: chrf
|
574 |
+
value: 0.44269
|
575 |
+
- task:
|
576 |
+
name: Translation deu-hin
|
577 |
+
type: translation
|
578 |
+
args: deu-hin
|
579 |
+
dataset:
|
580 |
+
name: flores101-devtest
|
581 |
+
type: flores_101
|
582 |
+
args: deu hin devtest
|
583 |
+
metrics:
|
584 |
+
- name: BLEU
|
585 |
+
type: bleu
|
586 |
+
value: 21.9
|
587 |
+
- name: chr-F
|
588 |
+
type: chrf
|
589 |
+
value: 0.48314
|
590 |
+
- task:
|
591 |
+
name: Translation eng-ben
|
592 |
+
type: translation
|
593 |
+
args: eng-ben
|
594 |
+
dataset:
|
595 |
+
name: flores101-devtest
|
596 |
+
type: flores_101
|
597 |
+
args: eng ben devtest
|
598 |
+
metrics:
|
599 |
+
- name: BLEU
|
600 |
+
type: bleu
|
601 |
+
value: 17.4
|
602 |
+
- name: chr-F
|
603 |
+
type: chrf
|
604 |
+
value: 0.51768
|
605 |
+
- task:
|
606 |
+
name: Translation eng-guj
|
607 |
+
type: translation
|
608 |
+
args: eng-guj
|
609 |
+
dataset:
|
610 |
+
name: flores101-devtest
|
611 |
+
type: flores_101
|
612 |
+
args: eng guj devtest
|
613 |
+
metrics:
|
614 |
+
- name: BLEU
|
615 |
+
type: bleu
|
616 |
+
value: 22.7
|
617 |
+
- name: chr-F
|
618 |
+
type: chrf
|
619 |
+
value: 0.54325
|
620 |
+
- task:
|
621 |
+
name: Translation eng-hin
|
622 |
+
type: translation
|
623 |
+
args: eng-hin
|
624 |
+
dataset:
|
625 |
+
name: flores101-devtest
|
626 |
+
type: flores_101
|
627 |
+
args: eng hin devtest
|
628 |
+
metrics:
|
629 |
+
- name: BLEU
|
630 |
+
type: bleu
|
631 |
+
value: 34.1
|
632 |
+
- name: chr-F
|
633 |
+
type: chrf
|
634 |
+
value: 0.58472
|
635 |
+
- task:
|
636 |
+
name: Translation fra-ben
|
637 |
+
type: translation
|
638 |
+
args: fra-ben
|
639 |
+
dataset:
|
640 |
+
name: flores101-devtest
|
641 |
+
type: flores_101
|
642 |
+
args: fra ben devtest
|
643 |
+
metrics:
|
644 |
+
- name: BLEU
|
645 |
+
type: bleu
|
646 |
+
value: 11.1
|
647 |
+
- name: chr-F
|
648 |
+
type: chrf
|
649 |
+
value: 0.44304
|
650 |
+
- task:
|
651 |
+
name: Translation fra-hin
|
652 |
+
type: translation
|
653 |
+
args: fra-hin
|
654 |
+
dataset:
|
655 |
+
name: flores101-devtest
|
656 |
+
type: flores_101
|
657 |
+
args: fra hin devtest
|
658 |
+
metrics:
|
659 |
+
- name: BLEU
|
660 |
+
type: bleu
|
661 |
+
value: 22.5
|
662 |
+
- name: chr-F
|
663 |
+
type: chrf
|
664 |
+
value: 0.48245
|
665 |
+
- task:
|
666 |
+
name: Translation deu-hin
|
667 |
+
type: translation
|
668 |
+
args: deu-hin
|
669 |
+
dataset:
|
670 |
+
name: ntrex128
|
671 |
+
type: ntrex128
|
672 |
+
args: deu-hin
|
673 |
+
metrics:
|
674 |
+
- name: BLEU
|
675 |
+
type: bleu
|
676 |
+
value: 17.0
|
677 |
+
- name: chr-F
|
678 |
+
type: chrf
|
679 |
+
value: 0.43252
|
680 |
+
- task:
|
681 |
+
name: Translation deu-pan
|
682 |
+
type: translation
|
683 |
+
args: deu-pan
|
684 |
+
dataset:
|
685 |
+
name: ntrex128
|
686 |
+
type: ntrex128
|
687 |
+
args: deu-pan
|
688 |
+
metrics:
|
689 |
+
- name: BLEU
|
690 |
+
type: bleu
|
691 |
+
value: 10.2
|
692 |
+
- name: chr-F
|
693 |
+
type: chrf
|
694 |
+
value: 0.36448
|
695 |
+
- task:
|
696 |
+
name: Translation deu-urd
|
697 |
+
type: translation
|
698 |
+
args: deu-urd
|
699 |
+
dataset:
|
700 |
+
name: ntrex128
|
701 |
+
type: ntrex128
|
702 |
+
args: deu-urd
|
703 |
+
metrics:
|
704 |
+
- name: BLEU
|
705 |
+
type: bleu
|
706 |
+
value: 14.8
|
707 |
+
- name: chr-F
|
708 |
+
type: chrf
|
709 |
+
value: 0.41844
|
710 |
+
- task:
|
711 |
+
name: Translation eng-ben
|
712 |
+
type: translation
|
713 |
+
args: eng-ben
|
714 |
+
dataset:
|
715 |
+
name: ntrex128
|
716 |
+
type: ntrex128
|
717 |
+
args: eng-ben
|
718 |
+
metrics:
|
719 |
+
- name: BLEU
|
720 |
+
type: bleu
|
721 |
+
value: 17.3
|
722 |
+
- name: chr-F
|
723 |
+
type: chrf
|
724 |
+
value: 0.52381
|
725 |
+
- task:
|
726 |
+
name: Translation eng-guj
|
727 |
+
type: translation
|
728 |
+
args: eng-guj
|
729 |
+
dataset:
|
730 |
+
name: ntrex128
|
731 |
+
type: ntrex128
|
732 |
+
args: eng-guj
|
733 |
+
metrics:
|
734 |
+
- name: BLEU
|
735 |
+
type: bleu
|
736 |
+
value: 17.2
|
737 |
+
- name: chr-F
|
738 |
+
type: chrf
|
739 |
+
value: 0.49386
|
740 |
+
- task:
|
741 |
+
name: Translation eng-hin
|
742 |
+
type: translation
|
743 |
+
args: eng-hin
|
744 |
+
dataset:
|
745 |
+
name: ntrex128
|
746 |
+
type: ntrex128
|
747 |
+
args: eng-hin
|
748 |
+
metrics:
|
749 |
+
- name: BLEU
|
750 |
+
type: bleu
|
751 |
+
value: 27.4
|
752 |
+
- name: chr-F
|
753 |
+
type: chrf
|
754 |
+
value: 0.52696
|
755 |
+
- task:
|
756 |
+
name: Translation eng-mar
|
757 |
+
type: translation
|
758 |
+
args: eng-mar
|
759 |
+
dataset:
|
760 |
+
name: ntrex128
|
761 |
+
type: ntrex128
|
762 |
+
args: eng-mar
|
763 |
+
metrics:
|
764 |
+
- name: BLEU
|
765 |
+
type: bleu
|
766 |
+
value: 10.8
|
767 |
+
- name: chr-F
|
768 |
+
type: chrf
|
769 |
+
value: 0.45244
|
770 |
+
- task:
|
771 |
+
name: Translation eng-pan
|
772 |
+
type: translation
|
773 |
+
args: eng-pan
|
774 |
+
dataset:
|
775 |
+
name: ntrex128
|
776 |
+
type: ntrex128
|
777 |
+
args: eng-pan
|
778 |
+
metrics:
|
779 |
+
- name: BLEU
|
780 |
+
type: bleu
|
781 |
+
value: 19.5
|
782 |
+
- name: chr-F
|
783 |
+
type: chrf
|
784 |
+
value: 0.46534
|
785 |
+
- task:
|
786 |
+
name: Translation eng-sin
|
787 |
+
type: translation
|
788 |
+
args: eng-sin
|
789 |
+
dataset:
|
790 |
+
name: ntrex128
|
791 |
+
type: ntrex128
|
792 |
+
args: eng-sin
|
793 |
+
metrics:
|
794 |
+
- name: BLEU
|
795 |
+
type: bleu
|
796 |
+
value: 10.5
|
797 |
+
- name: chr-F
|
798 |
+
type: chrf
|
799 |
+
value: 0.44124
|
800 |
+
- task:
|
801 |
+
name: Translation eng-urd
|
802 |
+
type: translation
|
803 |
+
args: eng-urd
|
804 |
+
dataset:
|
805 |
+
name: ntrex128
|
806 |
+
type: ntrex128
|
807 |
+
args: eng-urd
|
808 |
+
metrics:
|
809 |
+
- name: BLEU
|
810 |
+
type: bleu
|
811 |
+
value: 22.4
|
812 |
+
- name: chr-F
|
813 |
+
type: chrf
|
814 |
+
value: 0.50060
|
815 |
+
- task:
|
816 |
+
name: Translation fra-hin
|
817 |
+
type: translation
|
818 |
+
args: fra-hin
|
819 |
+
dataset:
|
820 |
+
name: ntrex128
|
821 |
+
type: ntrex128
|
822 |
+
args: fra-hin
|
823 |
+
metrics:
|
824 |
+
- name: BLEU
|
825 |
+
type: bleu
|
826 |
+
value: 17.4
|
827 |
+
- name: chr-F
|
828 |
+
type: chrf
|
829 |
+
value: 0.42777
|
830 |
+
- task:
|
831 |
+
name: Translation fra-urd
|
832 |
+
type: translation
|
833 |
+
args: fra-urd
|
834 |
+
dataset:
|
835 |
+
name: ntrex128
|
836 |
+
type: ntrex128
|
837 |
+
args: fra-urd
|
838 |
+
metrics:
|
839 |
+
- name: BLEU
|
840 |
+
type: bleu
|
841 |
+
value: 14.3
|
842 |
+
- name: chr-F
|
843 |
+
type: chrf
|
844 |
+
value: 0.41229
|
845 |
+
- task:
|
846 |
+
name: Translation por-ben
|
847 |
+
type: translation
|
848 |
+
args: por-ben
|
849 |
+
dataset:
|
850 |
+
name: ntrex128
|
851 |
+
type: ntrex128
|
852 |
+
args: por-ben
|
853 |
+
metrics:
|
854 |
+
- name: BLEU
|
855 |
+
type: bleu
|
856 |
+
value: 10.1
|
857 |
+
- name: chr-F
|
858 |
+
type: chrf
|
859 |
+
value: 0.44134
|
860 |
+
- task:
|
861 |
+
name: Translation por-hin
|
862 |
+
type: translation
|
863 |
+
args: por-hin
|
864 |
+
dataset:
|
865 |
+
name: ntrex128
|
866 |
+
type: ntrex128
|
867 |
+
args: por-hin
|
868 |
+
metrics:
|
869 |
+
- name: BLEU
|
870 |
+
type: bleu
|
871 |
+
value: 17.7
|
872 |
+
- name: chr-F
|
873 |
+
type: chrf
|
874 |
+
value: 0.43461
|
875 |
+
- task:
|
876 |
+
name: Translation por-urd
|
877 |
+
type: translation
|
878 |
+
args: por-urd
|
879 |
+
dataset:
|
880 |
+
name: ntrex128
|
881 |
+
type: ntrex128
|
882 |
+
args: por-urd
|
883 |
+
metrics:
|
884 |
+
- name: BLEU
|
885 |
+
type: bleu
|
886 |
+
value: 14.5
|
887 |
+
- name: chr-F
|
888 |
+
type: chrf
|
889 |
+
value: 0.41777
|
890 |
+
- task:
|
891 |
+
name: Translation spa-ben
|
892 |
+
type: translation
|
893 |
+
args: spa-ben
|
894 |
+
dataset:
|
895 |
+
name: ntrex128
|
896 |
+
type: ntrex128
|
897 |
+
args: spa-ben
|
898 |
+
metrics:
|
899 |
+
- name: BLEU
|
900 |
+
type: bleu
|
901 |
+
value: 10.6
|
902 |
+
- name: chr-F
|
903 |
+
type: chrf
|
904 |
+
value: 0.45329
|
905 |
+
- task:
|
906 |
+
name: Translation spa-hin
|
907 |
+
type: translation
|
908 |
+
args: spa-hin
|
909 |
+
dataset:
|
910 |
+
name: ntrex128
|
911 |
+
type: ntrex128
|
912 |
+
args: spa-hin
|
913 |
+
metrics:
|
914 |
+
- name: BLEU
|
915 |
+
type: bleu
|
916 |
+
value: 17.9
|
917 |
+
- name: chr-F
|
918 |
+
type: chrf
|
919 |
+
value: 0.43747
|
920 |
+
- task:
|
921 |
+
name: Translation spa-pan
|
922 |
+
type: translation
|
923 |
+
args: spa-pan
|
924 |
+
dataset:
|
925 |
+
name: ntrex128
|
926 |
+
type: ntrex128
|
927 |
+
args: spa-pan
|
928 |
+
metrics:
|
929 |
+
- name: BLEU
|
930 |
+
type: bleu
|
931 |
+
value: 10.2
|
932 |
+
- name: chr-F
|
933 |
+
type: chrf
|
934 |
+
value: 0.36716
|
935 |
+
- task:
|
936 |
+
name: Translation spa-urd
|
937 |
+
type: translation
|
938 |
+
args: spa-urd
|
939 |
+
dataset:
|
940 |
+
name: ntrex128
|
941 |
+
type: ntrex128
|
942 |
+
args: spa-urd
|
943 |
+
metrics:
|
944 |
+
- name: BLEU
|
945 |
+
type: bleu
|
946 |
+
value: 14.6
|
947 |
+
- name: chr-F
|
948 |
+
type: chrf
|
949 |
+
value: 0.41929
|
950 |
+
- task:
|
951 |
+
name: Translation eng-hin
|
952 |
+
type: translation
|
953 |
+
args: eng-hin
|
954 |
+
dataset:
|
955 |
+
name: tatoeba-test-v2021-08-07
|
956 |
+
type: tatoeba_mt
|
957 |
+
args: eng-hin
|
958 |
+
metrics:
|
959 |
+
- name: BLEU
|
960 |
+
type: bleu
|
961 |
+
value: 28.1
|
962 |
+
- name: chr-F
|
963 |
+
type: chrf
|
964 |
+
value: 0.52587
|
965 |
+
- task:
|
966 |
+
name: Translation eng-mar
|
967 |
+
type: translation
|
968 |
+
args: eng-mar
|
969 |
+
dataset:
|
970 |
+
name: tatoeba-test-v2021-08-07
|
971 |
+
type: tatoeba_mt
|
972 |
+
args: eng-mar
|
973 |
+
metrics:
|
974 |
+
- name: BLEU
|
975 |
+
type: bleu
|
976 |
+
value: 24.2
|
977 |
+
- name: chr-F
|
978 |
+
type: chrf
|
979 |
+
value: 0.52516
|
980 |
+
- task:
|
981 |
+
name: Translation multi-multi
|
982 |
+
type: translation
|
983 |
+
args: multi-multi
|
984 |
+
dataset:
|
985 |
+
name: tatoeba-test-v2020-07-28-v2023-09-26
|
986 |
+
type: tatoeba_mt
|
987 |
+
args: multi-multi
|
988 |
+
metrics:
|
989 |
+
- name: BLEU
|
990 |
+
type: bleu
|
991 |
+
value: 21.9
|
992 |
+
- name: chr-F
|
993 |
+
type: chrf
|
994 |
+
value: 0.49252
|
995 |
+
- task:
|
996 |
+
name: Translation eng-ben
|
997 |
+
type: translation
|
998 |
+
args: eng-ben
|
999 |
+
dataset:
|
1000 |
+
name: tico19-test
|
1001 |
+
type: tico19-test
|
1002 |
+
args: eng-ben
|
1003 |
+
metrics:
|
1004 |
+
- name: BLEU
|
1005 |
+
type: bleu
|
1006 |
+
value: 18.6
|
1007 |
+
- name: chr-F
|
1008 |
+
type: chrf
|
1009 |
+
value: 0.51850
|
1010 |
+
- task:
|
1011 |
+
name: Translation eng-hin
|
1012 |
+
type: translation
|
1013 |
+
args: eng-hin
|
1014 |
+
dataset:
|
1015 |
+
name: tico19-test
|
1016 |
+
type: tico19-test
|
1017 |
+
args: eng-hin
|
1018 |
+
metrics:
|
1019 |
+
- name: BLEU
|
1020 |
+
type: bleu
|
1021 |
+
value: 41.9
|
1022 |
+
- name: chr-F
|
1023 |
+
type: chrf
|
1024 |
+
value: 0.62999
|
1025 |
+
- task:
|
1026 |
+
name: Translation eng-mar
|
1027 |
+
type: translation
|
1028 |
+
args: eng-mar
|
1029 |
+
dataset:
|
1030 |
+
name: tico19-test
|
1031 |
+
type: tico19-test
|
1032 |
+
args: eng-mar
|
1033 |
+
metrics:
|
1034 |
+
- name: BLEU
|
1035 |
+
type: bleu
|
1036 |
+
value: 13.0
|
1037 |
+
- name: chr-F
|
1038 |
+
type: chrf
|
1039 |
+
value: 0.45968
|
1040 |
+
- task:
|
1041 |
+
name: Translation eng-nep
|
1042 |
+
type: translation
|
1043 |
+
args: eng-nep
|
1044 |
+
dataset:
|
1045 |
+
name: tico19-test
|
1046 |
+
type: tico19-test
|
1047 |
+
args: eng-nep
|
1048 |
+
metrics:
|
1049 |
+
- name: BLEU
|
1050 |
+
type: bleu
|
1051 |
+
value: 18.7
|
1052 |
+
- name: chr-F
|
1053 |
+
type: chrf
|
1054 |
+
value: 0.54373
|
1055 |
+
- task:
|
1056 |
+
name: Translation eng-urd
|
1057 |
+
type: translation
|
1058 |
+
args: eng-urd
|
1059 |
+
dataset:
|
1060 |
+
name: tico19-test
|
1061 |
+
type: tico19-test
|
1062 |
+
args: eng-urd
|
1063 |
+
metrics:
|
1064 |
+
- name: BLEU
|
1065 |
+
type: bleu
|
1066 |
+
value: 21.7
|
1067 |
+
- name: chr-F
|
1068 |
+
type: chrf
|
1069 |
+
value: 0.50920
|
1070 |
+
- task:
|
1071 |
+
name: Translation fra-hin
|
1072 |
+
type: translation
|
1073 |
+
args: fra-hin
|
1074 |
+
dataset:
|
1075 |
+
name: tico19-test
|
1076 |
+
type: tico19-test
|
1077 |
+
args: fra-hin
|
1078 |
+
metrics:
|
1079 |
+
- name: BLEU
|
1080 |
+
type: bleu
|
1081 |
+
value: 25.6
|
1082 |
+
- name: chr-F
|
1083 |
+
type: chrf
|
1084 |
+
value: 0.48666
|
1085 |
+
- task:
|
1086 |
+
name: Translation fra-nep
|
1087 |
+
type: translation
|
1088 |
+
args: fra-nep
|
1089 |
+
dataset:
|
1090 |
+
name: tico19-test
|
1091 |
+
type: tico19-test
|
1092 |
+
args: fra-nep
|
1093 |
+
metrics:
|
1094 |
+
- name: BLEU
|
1095 |
+
type: bleu
|
1096 |
+
value: 10.0
|
1097 |
+
- name: chr-F
|
1098 |
+
type: chrf
|
1099 |
+
value: 0.41414
|
1100 |
+
- task:
|
1101 |
+
name: Translation fra-urd
|
1102 |
+
type: translation
|
1103 |
+
args: fra-urd
|
1104 |
+
dataset:
|
1105 |
+
name: tico19-test
|
1106 |
+
type: tico19-test
|
1107 |
+
args: fra-urd
|
1108 |
+
metrics:
|
1109 |
+
- name: BLEU
|
1110 |
+
type: bleu
|
1111 |
+
value: 13.4
|
1112 |
+
- name: chr-F
|
1113 |
+
type: chrf
|
1114 |
+
value: 0.39479
|
1115 |
+
- task:
|
1116 |
+
name: Translation por-ben
|
1117 |
+
type: translation
|
1118 |
+
args: por-ben
|
1119 |
+
dataset:
|
1120 |
+
name: tico19-test
|
1121 |
+
type: tico19-test
|
1122 |
+
args: por-ben
|
1123 |
+
metrics:
|
1124 |
+
- name: BLEU
|
1125 |
+
type: bleu
|
1126 |
+
value: 12.7
|
1127 |
+
- name: chr-F
|
1128 |
+
type: chrf
|
1129 |
+
value: 0.45609
|
1130 |
+
- task:
|
1131 |
+
name: Translation por-hin
|
1132 |
+
type: translation
|
1133 |
+
args: por-hin
|
1134 |
+
dataset:
|
1135 |
+
name: tico19-test
|
1136 |
+
type: tico19-test
|
1137 |
+
args: por-hin
|
1138 |
+
metrics:
|
1139 |
+
- name: BLEU
|
1140 |
+
type: bleu
|
1141 |
+
value: 31.2
|
1142 |
+
- name: chr-F
|
1143 |
+
type: chrf
|
1144 |
+
value: 0.55530
|
1145 |
+
- task:
|
1146 |
+
name: Translation por-nep
|
1147 |
+
type: translation
|
1148 |
+
args: por-nep
|
1149 |
+
dataset:
|
1150 |
+
name: tico19-test
|
1151 |
+
type: tico19-test
|
1152 |
+
args: por-nep
|
1153 |
+
metrics:
|
1154 |
+
- name: BLEU
|
1155 |
+
type: bleu
|
1156 |
+
value: 12.4
|
1157 |
+
- name: chr-F
|
1158 |
+
type: chrf
|
1159 |
+
value: 0.47698
|
1160 |
+
- task:
|
1161 |
+
name: Translation por-urd
|
1162 |
+
type: translation
|
1163 |
+
args: por-urd
|
1164 |
+
dataset:
|
1165 |
+
name: tico19-test
|
1166 |
+
type: tico19-test
|
1167 |
+
args: por-urd
|
1168 |
+
metrics:
|
1169 |
+
- name: BLEU
|
1170 |
+
type: bleu
|
1171 |
+
value: 15.6
|
1172 |
+
- name: chr-F
|
1173 |
+
type: chrf
|
1174 |
+
value: 0.44747
|
1175 |
+
- task:
|
1176 |
+
name: Translation spa-ben
|
1177 |
+
type: translation
|
1178 |
+
args: spa-ben
|
1179 |
+
dataset:
|
1180 |
+
name: tico19-test
|
1181 |
+
type: tico19-test
|
1182 |
+
args: spa-ben
|
1183 |
+
metrics:
|
1184 |
+
- name: BLEU
|
1185 |
+
type: bleu
|
1186 |
+
value: 13.3
|
1187 |
+
- name: chr-F
|
1188 |
+
type: chrf
|
1189 |
+
value: 0.46418
|
1190 |
+
- task:
|
1191 |
+
name: Translation spa-hin
|
1192 |
+
type: translation
|
1193 |
+
args: spa-hin
|
1194 |
+
dataset:
|
1195 |
+
name: tico19-test
|
1196 |
+
type: tico19-test
|
1197 |
+
args: spa-hin
|
1198 |
+
metrics:
|
1199 |
+
- name: BLEU
|
1200 |
+
type: bleu
|
1201 |
+
value: 31.0
|
1202 |
+
- name: chr-F
|
1203 |
+
type: chrf
|
1204 |
+
value: 0.55526
|
1205 |
+
- task:
|
1206 |
+
name: Translation spa-mar
|
1207 |
+
type: translation
|
1208 |
+
args: spa-mar
|
1209 |
+
dataset:
|
1210 |
+
name: tico19-test
|
1211 |
+
type: tico19-test
|
1212 |
+
args: spa-mar
|
1213 |
+
metrics:
|
1214 |
+
- name: BLEU
|
1215 |
+
type: bleu
|
1216 |
+
value: 10.0
|
1217 |
+
- name: chr-F
|
1218 |
+
type: chrf
|
1219 |
+
value: 0.41189
|
1220 |
+
- task:
|
1221 |
+
name: Translation spa-nep
|
1222 |
+
type: translation
|
1223 |
+
args: spa-nep
|
1224 |
+
dataset:
|
1225 |
+
name: tico19-test
|
1226 |
+
type: tico19-test
|
1227 |
+
args: spa-nep
|
1228 |
+
metrics:
|
1229 |
+
- name: BLEU
|
1230 |
+
type: bleu
|
1231 |
+
value: 12.1
|
1232 |
+
- name: chr-F
|
1233 |
+
type: chrf
|
1234 |
+
value: 0.47414
|
1235 |
+
- task:
|
1236 |
+
name: Translation spa-urd
|
1237 |
+
type: translation
|
1238 |
+
args: spa-urd
|
1239 |
+
dataset:
|
1240 |
+
name: tico19-test
|
1241 |
+
type: tico19-test
|
1242 |
+
args: spa-urd
|
1243 |
+
metrics:
|
1244 |
+
- name: BLEU
|
1245 |
+
type: bleu
|
1246 |
+
value: 15.6
|
1247 |
+
- name: chr-F
|
1248 |
+
type: chrf
|
1249 |
+
value: 0.44788
|
1250 |
+
- task:
|
1251 |
+
name: Translation eng-hin
|
1252 |
+
type: translation
|
1253 |
+
args: eng-hin
|
1254 |
+
dataset:
|
1255 |
+
name: newstest2014
|
1256 |
+
type: wmt-2014-news
|
1257 |
+
args: eng-hin
|
1258 |
+
metrics:
|
1259 |
+
- name: BLEU
|
1260 |
+
type: bleu
|
1261 |
+
value: 24.0
|
1262 |
+
- name: chr-F
|
1263 |
+
type: chrf
|
1264 |
+
value: 0.51541
|
1265 |
+
- task:
|
1266 |
+
name: Translation eng-guj
|
1267 |
+
type: translation
|
1268 |
+
args: eng-guj
|
1269 |
+
dataset:
|
1270 |
+
name: newstest2019
|
1271 |
+
type: wmt-2019-news
|
1272 |
+
args: eng-guj
|
1273 |
+
metrics:
|
1274 |
+
- name: BLEU
|
1275 |
+
type: bleu
|
1276 |
+
value: 25.7
|
1277 |
+
- name: chr-F
|
1278 |
+
type: chrf
|
1279 |
+
value: 0.57815
|
1280 |
+
---
|
1281 |
+
# opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc
|
1282 |
+
|
1283 |
+
## Table of Contents
|
1284 |
+
- [Model Details](#model-details)
|
1285 |
+
- [Uses](#uses)
|
1286 |
+
- [Risks, Limitations and Biases](#risks-limitations-and-biases)
|
1287 |
+
- [How to Get Started With the Model](#how-to-get-started-with-the-model)
|
1288 |
+
- [Training](#training)
|
1289 |
+
- [Evaluation](#evaluation)
|
1290 |
+
- [Citation Information](#citation-information)
|
1291 |
+
- [Acknowledgements](#acknowledgements)
|
1292 |
+
|
1293 |
+
## Model Details
|
1294 |
+
|
1295 |
+
Neural machine translation model for translating from unknown (deu+eng+fra+por+spa) to Indic languages (inc).
|
1296 |
+
|
1297 |
+
This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
|
1298 |
+
**Model Description:**
|
1299 |
+
- **Developed by:** Language Technology Research Group at the University of Helsinki
|
1300 |
+
- **Model Type:** Translation (transformer-big)
|
1301 |
+
- **Release**: 2024-05-30
|
1302 |
+
- **License:** Apache-2.0
|
1303 |
+
- **Language(s):**
|
1304 |
+
- Source Language(s): deu eng fra por spa
|
1305 |
+
- Target Language(s): anp asm awa ben bho bpy div dty gbm guj hif hin hne hns kas kok lah mag mai mar nep npi ori pan pli rhg rmy rom san sin skr snd syl urd
|
1306 |
+
- Valid Target Language Labels: >>aee<< >>aeq<< >>anp<< >>anr<< >>asm<< >>awa<< >>bdv<< >>ben<< >>bfb<< >>bfy<< >>bfz<< >>bgc<< >>bgd<< >>bge<< >>bgw<< >>bha<< >>bhb<< >>bhd<< >>bhe<< >>bhi<< >>bho<< >>bht<< >>bhu<< >>bjj<< >>bkk<< >>bmj<< >>bns<< >>bpx<< >>bpy<< >>bra<< >>btv<< >>ccp<< >>cdh<< >>cdi<< >>cdj<< >>cih<< >>clh<< >>ctg<< >>dcc<< >>dhn<< >>dho<< >>div<< >>dmk<< >>dml<< >>doi<< >>dry<< >>dty<< >>dub<< >>duh<< >>dwz<< >>emx<< >>gas<< >>gbk<< >>gbl<< >>gbm<< >>gdx<< >>ggg<< >>ghr<< >>gig<< >>gjk<< >>glh<< >>gra<< >>guj<< >>gwc<< >>gwf<< >>gwt<< >>haj<< >>hca<< >>hif<< >>hif_Latn<< >>hii<< >>hin<< >>hin_Latn<< >>hlb<< >>hne<< >>hns<< >>jdg<< >>jml<< >>jnd<< >>jns<< >>kas<< >>kas_Arab<< >>kas_Deva<< >>kbu<< >>keq<< >>key<< >>kfr<< >>kfs<< >>kft<< >>kfu<< >>kfv<< >>kfx<< >>kfy<< >>khn<< >>khw<< >>kjo<< >>kls<< >>kok<< >>kra<< >>ksy<< >>kvx<< >>kxp<< >>kyw<< >>lah<< >>lbm<< >>lhl<< >>lmn<< >>lss<< >>luv<< >>mag<< >>mai<< >>mar<< >>mby<< >>mjl<< >>mjz<< >>mkb<< >>mke<< >>mki<< >>mvy<< >>mwr<< >>nag<< >>nep<< >>nhh<< >>nli<< >>nlx<< >>noe<< >>noi<< >>npi<< >>odk<< >>omr<< >>ori<< >>ort<< >>pan<< >>pan_Guru<< >>paq<< >>pcl<< >>pgg<< >>phd<< >>phl<< >>pli<< >>plk<< >>plp<< >>pmh<< >>psh<< >>psi<< >>psu<< >>pwr<< >>raj<< >>rei<< >>rhg<< >>rhg_Latn<< >>rjs<< >>rkt<< >>rmi<< >>rmq<< >>rmt<< >>rmy<< >>rom<< >>rtw<< >>san<< >>san_Deva<< >>saz<< >>sbn<< >>sck<< >>scl<< >>sdg<< >>sdr<< >>shd<< >>sin<< >>sjp<< >>skr<< >>smm<< >>smv<< >>snd<< >>snd_Arab<< >>soi<< >>srx<< >>ssi<< >>sts<< >>syl<< >>syl_Sylo<< >>tdb<< >>the<< >>thl<< >>thq<< >>thr<< >>tkb<< >>tkt<< >>tnv<< >>tra<< >>trw<< >>urd<< >>ush<< >>vaa<< >>vah<< >>vas<< >>vav<< >>ved<< >>vgr<< >>wsv<< >>wtm<< >>xka<< >>xxx<<
|
1307 |
+
- **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
|
1308 |
+
- **Resources for more information:**
|
1309 |
+
- [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-inc/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
|
1310 |
+
- [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
|
1311 |
+
- [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
|
1312 |
+
- [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
|
1313 |
+
- [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
|
1314 |
+
- [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
|
1315 |
+
|
1316 |
+
This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>anp<<`
|
1317 |
+
|
1318 |
+
## Uses
|
1319 |
+
|
1320 |
+
This model can be used for translation and text-to-text generation.
|
1321 |
+
|
1322 |
+
## Risks, Limitations and Biases
|
1323 |
+
|
1324 |
+
**CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
|
1325 |
+
|
1326 |
+
Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
|
1327 |
+
|
1328 |
+
## How to Get Started With the Model
|
1329 |
+
|
1330 |
+
A short example code:
|
1331 |
+
|
1332 |
+
```python
|
1333 |
+
from transformers import MarianMTModel, MarianTokenizer
|
1334 |
+
|
1335 |
+
src_text = [
|
1336 |
+
">>anp<< Replace this with text in an accepted source language.",
|
1337 |
+
">>urd<< This is the second sentence."
|
1338 |
+
]
|
1339 |
+
|
1340 |
+
model_name = "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc"
|
1341 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
1342 |
+
model = MarianMTModel.from_pretrained(model_name)
|
1343 |
+
translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
|
1344 |
+
|
1345 |
+
for t in translated:
|
1346 |
+
print( tokenizer.decode(t, skip_special_tokens=True) )
|
1347 |
+
```
|
1348 |
+
|
1349 |
+
You can also use OPUS-MT models with the transformers pipelines, for example:
|
1350 |
+
|
1351 |
+
```python
|
1352 |
+
from transformers import pipeline
|
1353 |
+
pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc")
|
1354 |
+
print(pipe(">>anp<< Replace this with text in an accepted source language."))
|
1355 |
+
```
|
1356 |
+
|
1357 |
+
## Training
|
1358 |
+
|
1359 |
+
- **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
|
1360 |
+
- **Pre-processing**: SentencePiece (spm32k,spm32k)
|
1361 |
+
- **Model Type:** transformer-big
|
1362 |
+
- **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
|
1363 |
+
- **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
|
1364 |
+
|
1365 |
+
## Evaluation
|
1366 |
+
|
1367 |
+
* [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-inc/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
|
1368 |
+
* test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
|
1369 |
+
* test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-inc/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
|
1370 |
+
* benchmark results: [benchmark_results.txt](benchmark_results.txt)
|
1371 |
+
* benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
|
1372 |
+
|
1373 |
+
| langpair | testset | chr-F | BLEU | #sent | #words |
|
1374 |
+
|----------|---------|-------|-------|-------|--------|
|
1375 |
+
| eng-ben | tatoeba-test-v2021-08-07 | 0.48316 | 18.1 | 2500 | 11654 |
|
1376 |
+
| eng-hin | tatoeba-test-v2021-08-07 | 0.52587 | 28.1 | 5000 | 32904 |
|
1377 |
+
| eng-mar | tatoeba-test-v2021-08-07 | 0.52516 | 24.2 | 10396 | 61140 |
|
1378 |
+
| eng-urd | tatoeba-test-v2021-08-07 | 0.46228 | 18.8 | 1663 | 12155 |
|
1379 |
+
| deu-ben | flores101-devtest | 0.44269 | 10.8 | 1012 | 21155 |
|
1380 |
+
| deu-hin | flores101-devtest | 0.48314 | 21.9 | 1012 | 27743 |
|
1381 |
+
| eng-ben | flores101-devtest | 0.51768 | 17.4 | 1012 | 21155 |
|
1382 |
+
| eng-guj | flores101-devtest | 0.54325 | 22.7 | 1012 | 23840 |
|
1383 |
+
| eng-hin | flores101-devtest | 0.58472 | 34.1 | 1012 | 27743 |
|
1384 |
+
| fra-ben | flores101-devtest | 0.44304 | 11.1 | 1012 | 21155 |
|
1385 |
+
| fra-hin | flores101-devtest | 0.48245 | 22.5 | 1012 | 27743 |
|
1386 |
+
| deu-ben | flores200-devtest | 0.44696 | 11.3 | 1012 | 21155 |
|
1387 |
+
| deu-guj | flores200-devtest | 0.40939 | 12.0 | 1012 | 23840 |
|
1388 |
+
| deu-hin | flores200-devtest | 0.48864 | 22.7 | 1012 | 27743 |
|
1389 |
+
| deu-hne | flores200-devtest | 0.43166 | 14.2 | 1012 | 26582 |
|
1390 |
+
| deu-mag | flores200-devtest | 0.43058 | 14.2 | 1012 | 26516 |
|
1391 |
+
| deu-urd | flores200-devtest | 0.41167 | 14.3 | 1012 | 28098 |
|
1392 |
+
| eng-ben | flores200-devtest | 0.52088 | 17.7 | 1012 | 21155 |
|
1393 |
+
| eng-guj | flores200-devtest | 0.54758 | 23.2 | 1012 | 23840 |
|
1394 |
+
| eng-hin | flores200-devtest | 0.58825 | 34.4 | 1012 | 27743 |
|
1395 |
+
| eng-hne | flores200-devtest | 0.46144 | 19.1 | 1012 | 26582 |
|
1396 |
+
| eng-mag | flores200-devtest | 0.50291 | 21.9 | 1012 | 26516 |
|
1397 |
+
| eng-mar | flores200-devtest | 0.49344 | 15.6 | 1012 | 21810 |
|
1398 |
+
| eng-pan | flores200-devtest | 0.45635 | 18.4 | 1012 | 27451 |
|
1399 |
+
| eng-sin | flores200-devtest | 0.45683 | 11.8 | 1012 | 23278 |
|
1400 |
+
| eng-urd | flores200-devtest | 0.48224 | 20.6 | 1012 | 28098 |
|
1401 |
+
| fra-ben | flores200-devtest | 0.44486 | 11.1 | 1012 | 21155 |
|
1402 |
+
| fra-guj | flores200-devtest | 0.41021 | 12.2 | 1012 | 23840 |
|
1403 |
+
| fra-hin | flores200-devtest | 0.48632 | 22.7 | 1012 | 27743 |
|
1404 |
+
| fra-hne | flores200-devtest | 0.42777 | 13.8 | 1012 | 26582 |
|
1405 |
+
| fra-mag | flores200-devtest | 0.42725 | 14.3 | 1012 | 26516 |
|
1406 |
+
| fra-urd | flores200-devtest | 0.40901 | 13.6 | 1012 | 28098 |
|
1407 |
+
| por-ben | flores200-devtest | 0.43877 | 10.7 | 1012 | 21155 |
|
1408 |
+
| por-hin | flores200-devtest | 0.50121 | 23.9 | 1012 | 27743 |
|
1409 |
+
| por-hne | flores200-devtest | 0.42270 | 14.1 | 1012 | 26582 |
|
1410 |
+
| por-mag | flores200-devtest | 0.42146 | 13.7 | 1012 | 26516 |
|
1411 |
+
| por-san | flores200-devtest | 9.879 | 0.4 | 1012 | 18253 |
|
1412 |
+
| por-urd | flores200-devtest | 0.41225 | 14.5 | 1012 | 28098 |
|
1413 |
+
| spa-ben | flores200-devtest | 0.42040 | 8.8 | 1012 | 21155 |
|
1414 |
+
| spa-hin | flores200-devtest | 0.43977 | 16.4 | 1012 | 27743 |
|
1415 |
+
| eng-hin | newstest2014 | 0.51541 | 24.0 | 2507 | 60872 |
|
1416 |
+
| eng-guj | newstest2019 | 0.57815 | 25.7 | 998 | 21924 |
|
1417 |
+
| deu-ben | ntrex128 | 0.44384 | 9.9 | 1997 | 40095 |
|
1418 |
+
| deu-hin | ntrex128 | 0.43252 | 17.0 | 1997 | 55219 |
|
1419 |
+
| deu-urd | ntrex128 | 0.41844 | 14.8 | 1997 | 54259 |
|
1420 |
+
| eng-ben | ntrex128 | 0.52381 | 17.3 | 1997 | 40095 |
|
1421 |
+
| eng-guj | ntrex128 | 0.49386 | 17.2 | 1997 | 45335 |
|
1422 |
+
| eng-hin | ntrex128 | 0.52696 | 27.4 | 1997 | 55219 |
|
1423 |
+
| eng-mar | ntrex128 | 0.45244 | 10.8 | 1997 | 42375 |
|
1424 |
+
| eng-nep | ntrex128 | 0.43339 | 8.8 | 1997 | 40570 |
|
1425 |
+
| eng-pan | ntrex128 | 0.46534 | 19.5 | 1997 | 54355 |
|
1426 |
+
| eng-sin | ntrex128 | 0.44124 | 10.5 | 1997 | 44429 |
|
1427 |
+
| eng-urd | ntrex128 | 0.50060 | 22.4 | 1997 | 54259 |
|
1428 |
+
| fra-ben | ntrex128 | 0.42857 | 9.4 | 1997 | 40095 |
|
1429 |
+
| fra-hin | ntrex128 | 0.42777 | 17.4 | 1997 | 55219 |
|
1430 |
+
| fra-urd | ntrex128 | 0.41229 | 14.3 | 1997 | 54259 |
|
1431 |
+
| por-ben | ntrex128 | 0.44134 | 10.1 | 1997 | 40095 |
|
1432 |
+
| por-hin | ntrex128 | 0.43461 | 17.7 | 1997 | 55219 |
|
1433 |
+
| por-urd | ntrex128 | 0.41777 | 14.5 | 1997 | 54259 |
|
1434 |
+
| spa-ben | ntrex128 | 0.45329 | 10.6 | 1997 | 40095 |
|
1435 |
+
| spa-hin | ntrex128 | 0.43747 | 17.9 | 1997 | 55219 |
|
1436 |
+
| spa-urd | ntrex128 | 0.41929 | 14.6 | 1997 | 54259 |
|
1437 |
+
| eng-ben | tico19-test | 0.51850 | 18.6 | 2100 | 51695 |
|
1438 |
+
| eng-hin | tico19-test | 0.62999 | 41.9 | 2100 | 62680 |
|
1439 |
+
| eng-mar | tico19-test | 0.45968 | 13.0 | 2100 | 50872 |
|
1440 |
+
| eng-nep | tico19-test | 0.54373 | 18.7 | 2100 | 48363 |
|
1441 |
+
| eng-urd | tico19-test | 0.50920 | 21.7 | 2100 | 65312 |
|
1442 |
+
| fra-hin | tico19-test | 0.48666 | 25.6 | 2100 | 62680 |
|
1443 |
+
| fra-nep | tico19-test | 0.41414 | 10.0 | 2100 | 48363 |
|
1444 |
+
| por-ben | tico19-test | 0.45609 | 12.7 | 2100 | 51695 |
|
1445 |
+
| por-hin | tico19-test | 0.55530 | 31.2 | 2100 | 62680 |
|
1446 |
+
| por-mar | tico19-test | 0.40344 | 9.7 | 2100 | 50872 |
|
1447 |
+
| por-nep | tico19-test | 0.47698 | 12.4 | 2100 | 48363 |
|
1448 |
+
| por-urd | tico19-test | 0.44747 | 15.6 | 2100 | 65312 |
|
1449 |
+
| spa-ben | tico19-test | 0.46418 | 13.3 | 2100 | 51695 |
|
1450 |
+
| spa-hin | tico19-test | 0.55526 | 31.0 | 2100 | 62680 |
|
1451 |
+
| spa-mar | tico19-test | 0.41189 | 10.0 | 2100 | 50872 |
|
1452 |
+
| spa-nep | tico19-test | 0.47414 | 12.1 | 2100 | 48363 |
|
1453 |
+
| spa-urd | tico19-test | 0.44788 | 15.6 | 2100 | 65312 |
|
1454 |
+
|
1455 |
+
## Citation Information
|
1456 |
+
|
1457 |
+
* Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
|
1458 |
+
|
1459 |
+
```bibtex
|
1460 |
+
@article{tiedemann2023democratizing,
|
1461 |
+
title={Democratizing neural machine translation with {OPUS-MT}},
|
1462 |
+
author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
|
1463 |
+
journal={Language Resources and Evaluation},
|
1464 |
+
number={58},
|
1465 |
+
pages={713--755},
|
1466 |
+
year={2023},
|
1467 |
+
publisher={Springer Nature},
|
1468 |
+
issn={1574-0218},
|
1469 |
+
doi={10.1007/s10579-023-09704-w}
|
1470 |
+
}
|
1471 |
+
|
1472 |
+
@inproceedings{tiedemann-thottingal-2020-opus,
|
1473 |
+
title = "{OPUS}-{MT} {--} Building open translation services for the World",
|
1474 |
+
author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
|
1475 |
+
booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
|
1476 |
+
month = nov,
|
1477 |
+
year = "2020",
|
1478 |
+
address = "Lisboa, Portugal",
|
1479 |
+
publisher = "European Association for Machine Translation",
|
1480 |
+
url = "https://aclanthology.org/2020.eamt-1.61",
|
1481 |
+
pages = "479--480",
|
1482 |
+
}
|
1483 |
+
|
1484 |
+
@inproceedings{tiedemann-2020-tatoeba,
|
1485 |
+
title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
|
1486 |
+
author = {Tiedemann, J{\"o}rg},
|
1487 |
+
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
|
1488 |
+
month = nov,
|
1489 |
+
year = "2020",
|
1490 |
+
address = "Online",
|
1491 |
+
publisher = "Association for Computational Linguistics",
|
1492 |
+
url = "https://aclanthology.org/2020.wmt-1.139",
|
1493 |
+
pages = "1174--1182",
|
1494 |
+
}
|
1495 |
+
```
|
1496 |
+
|
1497 |
+
## Acknowledgements
|
1498 |
+
|
1499 |
+
The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
|
1500 |
+
|
1501 |
+
## Model conversion info
|
1502 |
+
|
1503 |
+
* transformers version: 4.45.1
|
1504 |
+
* OPUS-MT git hash: 0882077
|
1505 |
+
* port time: Tue Oct 8 10:09:07 EEST 2024
|
1506 |
+
* port machine: LM0-400-22516.local
|
benchmark_results.txt
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.49252 21.9 10000 60042
|
2 |
+
deu-ben flores101-devtest 0.44269 10.8 1012 21155
|
3 |
+
deu-hin flores101-devtest 0.48314 21.9 1012 27743
|
4 |
+
deu-mar flores101-devtest 0.39479 9.2 1012 21810
|
5 |
+
deu-npi flores101-devtest 0.15099 0.2 1012 19762
|
6 |
+
eng-ben flores101-devtest 0.51768 17.4 1012 21155
|
7 |
+
eng-guj flores101-devtest 0.54325 22.7 1012 23840
|
8 |
+
eng-hin flores101-devtest 0.58472 34.1 1012 27743
|
9 |
+
fra-ben flores101-devtest 0.44304 11.1 1012 21155
|
10 |
+
fra-hin flores101-devtest 0.48245 22.5 1012 27743
|
11 |
+
fra-npi flores101-devtest 0.12216 0.2 1012 19762
|
12 |
+
spa-guj flores101-devtest 0.37550 8.3 1012 23840
|
13 |
+
deu-asm flores200-devtest 0.23161 2.6 1012 21028
|
14 |
+
deu-awa flores200-devtest 0.26194 5.0 1012 26642
|
15 |
+
deu-ben flores200-devtest 0.44696 11.3 1012 21155
|
16 |
+
deu-bho flores200-devtest 0.34690 8.8 1012 27914
|
17 |
+
deu-guj flores200-devtest 0.40939 12.0 1012 23840
|
18 |
+
deu-hin flores200-devtest 0.48864 22.7 1012 27743
|
19 |
+
deu-hne flores200-devtest 0.43166 14.2 1012 26582
|
20 |
+
deu-kas_Arab flores200-devtest 0.16579 0.5 1012 23514
|
21 |
+
deu-kas_Deva flores200-devtest 0.12989 0.6 1012 26371
|
22 |
+
deu-mag flores200-devtest 0.43058 14.2 1012 26516
|
23 |
+
deu-mai flores200-devtest 0.38967 8.1 1012 25999
|
24 |
+
deu-mar flores200-devtest 0.39856 9.3 1012 21810
|
25 |
+
deu-npi flores200-devtest 0.12740 0.2 1012 19762
|
26 |
+
deu-pan flores200-devtest 0.36795 10.5 1012 27451
|
27 |
+
deu-san flores200-devtest 0.12652 0.5 1012 18253
|
28 |
+
deu-sin flores200-devtest 0.37977 7.2 1012 23278
|
29 |
+
deu-urd flores200-devtest 0.41167 14.3 1012 28098
|
30 |
+
eng-asm flores200-devtest 0.36298 6.1 1012 21028
|
31 |
+
eng-awa flores200-devtest 0.17420 2.7 1012 26642
|
32 |
+
eng-ben flores200-devtest 0.52088 17.7 1012 21155
|
33 |
+
eng-bho flores200-devtest 0.37333 11.6 1012 27914
|
34 |
+
eng-guj flores200-devtest 0.54758 23.2 1012 23840
|
35 |
+
eng-hin flores200-devtest 0.58825 34.4 1012 27743
|
36 |
+
eng-hne flores200-devtest 0.46144 19.1 1012 26582
|
37 |
+
eng-kas_Arab flores200-devtest 0.12804 0.5 1012 23514
|
38 |
+
eng-kas_Deva flores200-devtest 0.14226 1.0 1012 26371
|
39 |
+
eng-mag flores200-devtest 0.50291 21.9 1012 26516
|
40 |
+
eng-mai flores200-devtest 0.39362 9.7 1012 25999
|
41 |
+
eng-mar flores200-devtest 0.49344 15.6 1012 21810
|
42 |
+
eng-npi flores200-devtest 0.18868 0.3 1012 19762
|
43 |
+
eng-pan flores200-devtest 0.45635 18.4 1012 27451
|
44 |
+
eng-san flores200-devtest 0.13260 0.8 1012 18253
|
45 |
+
eng-sin flores200-devtest 0.45683 11.8 1012 23278
|
46 |
+
eng-urd flores200-devtest 0.48224 20.6 1012 28098
|
47 |
+
fra-asm flores200-devtest 0.24043 2.7 1012 21028
|
48 |
+
fra-awa flores200-devtest 0.26156 4.9 1012 26642
|
49 |
+
fra-ben flores200-devtest 0.44486 11.1 1012 21155
|
50 |
+
fra-bho flores200-devtest 0.34441 9.0 1012 27914
|
51 |
+
fra-guj flores200-devtest 0.41021 12.2 1012 23840
|
52 |
+
fra-hin flores200-devtest 0.48632 22.7 1012 27743
|
53 |
+
fra-hne flores200-devtest 0.42777 13.8 1012 26582
|
54 |
+
fra-kas_Arab flores200-devtest 0.16142 0.4 1012 23514
|
55 |
+
fra-kas_Deva flores200-devtest 0.12849 0.7 1012 26371
|
56 |
+
fra-mag flores200-devtest 0.42725 14.3 1012 26516
|
57 |
+
fra-mai flores200-devtest 0.39179 8.5 1012 25999
|
58 |
+
fra-mar flores200-devtest 0.38985 9.5 1012 21810
|
59 |
+
fra-npi flores200-devtest 0.12358 0.1 1012 19762
|
60 |
+
fra-pan flores200-devtest 0.36902 10.6 1012 27451
|
61 |
+
fra-san flores200-devtest 0.10558 0.3 1012 18253
|
62 |
+
fra-sin flores200-devtest 0.38581 7.3 1012 23278
|
63 |
+
fra-urd flores200-devtest 0.40901 13.6 1012 28098
|
64 |
+
por-asm flores200-devtest 0.25566 3.1 1012 21028
|
65 |
+
por-awa flores200-devtest 0.23673 4.0 1012 26642
|
66 |
+
por-ben flores200-devtest 0.43877 10.7 1012 21155
|
67 |
+
por-bho flores200-devtest 0.34736 9.2 1012 27914
|
68 |
+
por-guj flores200-devtest 0.38225 10.9 1012 23840
|
69 |
+
por-hin flores200-devtest 0.50121 23.9 1012 27743
|
70 |
+
por-hne flores200-devtest 0.42270 14.1 1012 26582
|
71 |
+
por-kas_Arab flores200-devtest 0.15653 0.4 1012 23514
|
72 |
+
por-kas_Deva flores200-devtest 0.12836 0.7 1012 26371
|
73 |
+
por-mag flores200-devtest 0.42146 13.7 1012 26516
|
74 |
+
por-mai flores200-devtest 0.38341 7.7 1012 25999
|
75 |
+
por-mar flores200-devtest 0.37814 8.6 1012 21810
|
76 |
+
por-npi flores200-devtest 0.12482 0.1 1012 19762
|
77 |
+
por-pan flores200-devtest 0.34711 9.2 1012 27451
|
78 |
+
por-san flores200-devtest 9.879 0.4 1012 18253
|
79 |
+
por-sin flores200-devtest 0.38140 7.4 1012 23278
|
80 |
+
por-urd flores200-devtest 0.41225 14.5 1012 28098
|
81 |
+
spa-asm flores200-devtest 0.24228 2.1 1012 21028
|
82 |
+
spa-awa flores200-devtest 0.24287 3.8 1012 26642
|
83 |
+
spa-ben flores200-devtest 0.42040 8.8 1012 21155
|
84 |
+
spa-bho flores200-devtest 0.33628 8.0 1012 27914
|
85 |
+
spa-guj flores200-devtest 0.37414 8.1 1012 23840
|
86 |
+
spa-hin flores200-devtest 0.43977 16.4 1012 27743
|
87 |
+
spa-hne flores200-devtest 0.39555 10.8 1012 26582
|
88 |
+
spa-kas_Arab flores200-devtest 0.15572 0.4 1012 23514
|
89 |
+
spa-kas_Deva flores200-devtest 0.12956 0.6 1012 26371
|
90 |
+
spa-mag flores200-devtest 0.39621 11.1 1012 26516
|
91 |
+
spa-mai flores200-devtest 0.36462 6.4 1012 25999
|
92 |
+
spa-mar flores200-devtest 0.35370 6.5 1012 21810
|
93 |
+
spa-npi flores200-devtest 0.12237 0.1 1012 19762
|
94 |
+
spa-pan flores200-devtest 0.33808 7.6 1012 27451
|
95 |
+
spa-san flores200-devtest 0.11964 0.3 1012 18253
|
96 |
+
spa-sin flores200-devtest 0.36322 5.9 1012 23278
|
97 |
+
spa-urd flores200-devtest 0.37993 10.8 1012 28098
|
98 |
+
eng-hin newstest2014 0.51541 24.0 2507 60872
|
99 |
+
eng-guj newstest2019 0.57815 25.7 998 21924
|
100 |
+
deu-ben ntrex128 0.44384 9.9 1997 40095
|
101 |
+
deu-div ntrex128 0.18948 0.0 1997 37802
|
102 |
+
deu-guj ntrex128 0.38060 8.8 1997 45335
|
103 |
+
deu-hin ntrex128 0.43252 17.0 1997 55219
|
104 |
+
deu-mar ntrex128 0.36605 6.3 1997 42375
|
105 |
+
deu-nep ntrex128 0.36728 5.4 1997 40570
|
106 |
+
deu-pan ntrex128 0.36448 10.2 1997 54355
|
107 |
+
deu-sin ntrex128 0.37092 6.3 1997 44429
|
108 |
+
deu-snd_Arab ntrex128 0.248 0.0 1997 49866
|
109 |
+
deu-urd ntrex128 0.41844 14.8 1997 54259
|
110 |
+
eng-ben ntrex128 0.52381 17.3 1997 40095
|
111 |
+
eng-div ntrex128 0.17944 0.1 1997 37802
|
112 |
+
eng-guj ntrex128 0.49386 17.2 1997 45335
|
113 |
+
eng-hin ntrex128 0.52696 27.4 1997 55219
|
114 |
+
eng-mar ntrex128 0.45244 10.8 1997 42375
|
115 |
+
eng-nep ntrex128 0.43339 8.8 1997 40570
|
116 |
+
eng-pan ntrex128 0.46534 19.5 1997 54355
|
117 |
+
eng-sin ntrex128 0.44124 10.5 1997 44429
|
118 |
+
eng-snd_Arab ntrex128 0.292 0.0 1997 49866
|
119 |
+
eng-urd ntrex128 0.50060 22.4 1997 54259
|
120 |
+
fra-ben ntrex128 0.42857 9.4 1997 40095
|
121 |
+
fra-div ntrex128 0.18599 0.1 1997 37802
|
122 |
+
fra-guj ntrex128 0.37700 8.6 1997 45335
|
123 |
+
fra-hin ntrex128 0.42777 17.4 1997 55219
|
124 |
+
fra-mar ntrex128 0.35860 6.3 1997 42375
|
125 |
+
fra-nep ntrex128 0.36110 5.4 1997 40570
|
126 |
+
fra-pan ntrex128 0.35805 9.9 1997 54355
|
127 |
+
fra-sin ntrex128 0.36801 6.5 1997 44429
|
128 |
+
fra-snd_Arab ntrex128 0.236 0.0 1997 49866
|
129 |
+
fra-urd ntrex128 0.41229 14.3 1997 54259
|
130 |
+
por-ben ntrex128 0.44134 10.1 1997 40095
|
131 |
+
por-div ntrex128 0.18986 0.1 1997 37802
|
132 |
+
por-guj ntrex128 0.36989 8.4 1997 45335
|
133 |
+
por-hin ntrex128 0.43461 17.7 1997 55219
|
134 |
+
por-mar ntrex128 0.35130 6.2 1997 42375
|
135 |
+
por-nep ntrex128 0.37236 5.7 1997 40570
|
136 |
+
por-pan ntrex128 0.34258 8.7 1997 54355
|
137 |
+
por-sin ntrex128 0.37211 6.5 1997 44429
|
138 |
+
por-snd_Arab ntrex128 0.220 0.0 1997 49866
|
139 |
+
por-urd ntrex128 0.41777 14.5 1997 54259
|
140 |
+
spa-ben ntrex128 0.45329 10.6 1997 40095
|
141 |
+
spa-div ntrex128 0.18962 0.1 1997 37802
|
142 |
+
spa-guj ntrex128 0.38830 9.0 1997 45335
|
143 |
+
spa-hin ntrex128 0.43747 17.9 1997 55219
|
144 |
+
spa-mar ntrex128 0.35972 6.4 1997 42375
|
145 |
+
spa-nep ntrex128 0.37714 5.8 1997 40570
|
146 |
+
spa-pan ntrex128 0.36716 10.2 1997 54355
|
147 |
+
spa-sin ntrex128 0.38361 7.0 1997 44429
|
148 |
+
spa-snd_Arab ntrex128 0.212 0.0 1997 49866
|
149 |
+
spa-urd ntrex128 0.41929 14.6 1997 54259
|
150 |
+
eng-rom tatoeba-test-v2020-07-28 0.21188 2.3 671 4974
|
151 |
+
eng-awa tatoeba-test-v2021-08-07 0.17609 2.3 279 1148
|
152 |
+
eng-ben tatoeba-test-v2021-08-07 0.48316 18.1 2500 11654
|
153 |
+
eng-hin tatoeba-test-v2021-08-07 0.52587 28.1 5000 32904
|
154 |
+
eng-mar tatoeba-test-v2021-08-07 0.52516 24.2 10396 61140
|
155 |
+
eng-rom tatoeba-test-v2021-08-07 0.21957 2.2 706 5222
|
156 |
+
eng-urd tatoeba-test-v2021-08-07 0.46228 18.8 1663 12155
|
157 |
+
eng-ben tico19-test 0.51850 18.6 2100 51695
|
158 |
+
eng-hin tico19-test 0.62999 41.9 2100 62680
|
159 |
+
eng-mar tico19-test 0.45968 13.0 2100 50872
|
160 |
+
eng-nep tico19-test 0.54373 18.7 2100 48363
|
161 |
+
eng-urd tico19-test 0.50920 21.7 2100 65312
|
162 |
+
fra-ben tico19-test 0.39629 9.7 2100 51695
|
163 |
+
fra-hin tico19-test 0.48666 25.6 2100 62680
|
164 |
+
fra-mar tico19-test 0.36352 8.1 2100 50872
|
165 |
+
fra-nep tico19-test 0.41414 10.0 2100 48363
|
166 |
+
fra-urd tico19-test 0.39479 13.4 2100 65312
|
167 |
+
por-ben tico19-test 0.45609 12.7 2100 51695
|
168 |
+
por-hin tico19-test 0.55530 31.2 2100 62680
|
169 |
+
por-mar tico19-test 0.40344 9.7 2100 50872
|
170 |
+
por-nep tico19-test 0.47698 12.4 2100 48363
|
171 |
+
por-urd tico19-test 0.44747 15.6 2100 65312
|
172 |
+
spa-ben tico19-test 0.46418 13.3 2100 51695
|
173 |
+
spa-hin tico19-test 0.55526 31.0 2100 62680
|
174 |
+
spa-mar tico19-test 0.41189 10.0 2100 50872
|
175 |
+
spa-nep tico19-test 0.47414 12.1 2100 48363
|
176 |
+
spa-urd tico19-test 0.44788 15.6 2100 65312
|
benchmark_translations.zip
ADDED
File without changes
|
config.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-inc",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "relu",
|
5 |
+
"architectures": [
|
6 |
+
"MarianMTModel"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"bos_token_id": 0,
|
10 |
+
"classifier_dropout": 0.0,
|
11 |
+
"d_model": 1024,
|
12 |
+
"decoder_attention_heads": 16,
|
13 |
+
"decoder_ffn_dim": 4096,
|
14 |
+
"decoder_layerdrop": 0.0,
|
15 |
+
"decoder_layers": 6,
|
16 |
+
"decoder_start_token_id": 61905,
|
17 |
+
"decoder_vocab_size": 61906,
|
18 |
+
"dropout": 0.1,
|
19 |
+
"encoder_attention_heads": 16,
|
20 |
+
"encoder_ffn_dim": 4096,
|
21 |
+
"encoder_layerdrop": 0.0,
|
22 |
+
"encoder_layers": 6,
|
23 |
+
"eos_token_id": 512,
|
24 |
+
"forced_eos_token_id": null,
|
25 |
+
"init_std": 0.02,
|
26 |
+
"is_encoder_decoder": true,
|
27 |
+
"max_length": null,
|
28 |
+
"max_position_embeddings": 1024,
|
29 |
+
"model_type": "marian",
|
30 |
+
"normalize_embedding": false,
|
31 |
+
"num_beams": null,
|
32 |
+
"num_hidden_layers": 6,
|
33 |
+
"pad_token_id": 61905,
|
34 |
+
"scale_embedding": true,
|
35 |
+
"share_encoder_decoder_embeddings": true,
|
36 |
+
"static_position_embeddings": true,
|
37 |
+
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.45.1",
|
39 |
+
"use_cache": true,
|
40 |
+
"vocab_size": 61906
|
41 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bad_words_ids": [
|
4 |
+
[
|
5 |
+
61905
|
6 |
+
]
|
7 |
+
],
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"decoder_start_token_id": 61905,
|
10 |
+
"eos_token_id": 512,
|
11 |
+
"forced_eos_token_id": 512,
|
12 |
+
"max_length": 512,
|
13 |
+
"num_beams": 4,
|
14 |
+
"pad_token_id": 61905,
|
15 |
+
"transformers_version": "4.45.1"
|
16 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f47a0dfde3088a518b94a468d65064cbd2e0358b965575153e2d72a5829d6880
|
3 |
+
size 959273720
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1376812ebcba33493b0459c4ed414d2607009ebac2d1cdb0d9e067d2c74d867f
|
3 |
+
size 959324997
|
source.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beeeadf98e7c1cfb3609db2af734a2aad332091428d2c7d6d875073b31fdfd95
|
3 |
+
size 802420
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
target.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c7d11c29930951f01ab91b858356808d171afc5cb276211349c2706b09c576b
|
3 |
+
size 967595
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"source_lang": "deu+eng+fra+por+spa", "target_lang": "inc", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/deu+eng+fra+por+spa-inc", "tokenizer_class": "MarianTokenizer"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|