tiedeman commited on
Commit
4134dba
·
1 Parent(s): 8a40a8d

Initial commit

Browse files
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - af
4
+ - ang
5
+ - de
6
+ - en
7
+ - enm
8
+ - fy
9
+ - gmw
10
+ - gos
11
+ - gsw
12
+ - hrx
13
+ - ksh
14
+ - lb
15
+ - nds
16
+ - nl
17
+ - pdc
18
+ - sco
19
+ - stq
20
+ - swg
21
+ - tpi
22
+ - yi
23
+
24
+ tags:
25
+ - translation
26
+ - opus-mt-tc
27
+
28
+ license: cc-by-4.0
29
+ model-index:
30
+ - name: opus-mt-tc-big-gmw-gmw
31
+ results:
32
+ - task:
33
+ name: Translation afr-deu
34
+ type: translation
35
+ args: afr-deu
36
+ dataset:
37
+ name: flores101-devtest
38
+ type: flores_101
39
+ args: afr deu devtest
40
+ metrics:
41
+ - name: BLEU
42
+ type: bleu
43
+ value: 30.2
44
+ - name: chr-F
45
+ type: chrf
46
+ value: 0.58718
47
+ - task:
48
+ name: Translation afr-eng
49
+ type: translation
50
+ args: afr-eng
51
+ dataset:
52
+ name: flores101-devtest
53
+ type: flores_101
54
+ args: afr eng devtest
55
+ metrics:
56
+ - name: BLEU
57
+ type: bleu
58
+ value: 55.1
59
+ - name: chr-F
60
+ type: chrf
61
+ value: 0.74826
62
+ - task:
63
+ name: Translation afr-ltz
64
+ type: translation
65
+ args: afr-ltz
66
+ dataset:
67
+ name: flores101-devtest
68
+ type: flores_101
69
+ args: afr ltz devtest
70
+ metrics:
71
+ - name: BLEU
72
+ type: bleu
73
+ value: 15.7
74
+ - name: chr-F
75
+ type: chrf
76
+ value: 0.46826
77
+ - task:
78
+ name: Translation afr-nld
79
+ type: translation
80
+ args: afr-nld
81
+ dataset:
82
+ name: flores101-devtest
83
+ type: flores_101
84
+ args: afr nld devtest
85
+ metrics:
86
+ - name: BLEU
87
+ type: bleu
88
+ value: 22.5
89
+ - name: chr-F
90
+ type: chrf
91
+ value: 0.54441
92
+ - task:
93
+ name: Translation deu-afr
94
+ type: translation
95
+ args: deu-afr
96
+ dataset:
97
+ name: flores101-devtest
98
+ type: flores_101
99
+ args: deu afr devtest
100
+ metrics:
101
+ - name: BLEU
102
+ type: bleu
103
+ value: 26.4
104
+ - name: chr-F
105
+ type: chrf
106
+ value: 0.57835
107
+ - task:
108
+ name: Translation deu-eng
109
+ type: translation
110
+ args: deu-eng
111
+ dataset:
112
+ name: flores101-devtest
113
+ type: flores_101
114
+ args: deu eng devtest
115
+ metrics:
116
+ - name: BLEU
117
+ type: bleu
118
+ value: 41.8
119
+ - name: chr-F
120
+ type: chrf
121
+ value: 0.66990
122
+ - task:
123
+ name: Translation deu-ltz
124
+ type: translation
125
+ args: deu-ltz
126
+ dataset:
127
+ name: flores101-devtest
128
+ type: flores_101
129
+ args: deu ltz devtest
130
+ metrics:
131
+ - name: BLEU
132
+ type: bleu
133
+ value: 20.3
134
+ - name: chr-F
135
+ type: chrf
136
+ value: 0.52554
137
+ - task:
138
+ name: Translation deu-nld
139
+ type: translation
140
+ args: deu-nld
141
+ dataset:
142
+ name: flores101-devtest
143
+ type: flores_101
144
+ args: deu nld devtest
145
+ metrics:
146
+ - name: BLEU
147
+ type: bleu
148
+ value: 24.2
149
+ - name: chr-F
150
+ type: chrf
151
+ value: 0.55710
152
+ - task:
153
+ name: Translation eng-afr
154
+ type: translation
155
+ args: eng-afr
156
+ dataset:
157
+ name: flores101-devtest
158
+ type: flores_101
159
+ args: eng afr devtest
160
+ metrics:
161
+ - name: BLEU
162
+ type: bleu
163
+ value: 40.7
164
+ - name: chr-F
165
+ type: chrf
166
+ value: 0.68429
167
+ - task:
168
+ name: Translation eng-deu
169
+ type: translation
170
+ args: eng-deu
171
+ dataset:
172
+ name: flores101-devtest
173
+ type: flores_101
174
+ args: eng deu devtest
175
+ metrics:
176
+ - name: BLEU
177
+ type: bleu
178
+ value: 38.5
179
+ - name: chr-F
180
+ type: chrf
181
+ value: 0.64888
182
+ - task:
183
+ name: Translation eng-ltz
184
+ type: translation
185
+ args: eng-ltz
186
+ dataset:
187
+ name: flores101-devtest
188
+ type: flores_101
189
+ args: eng ltz devtest
190
+ metrics:
191
+ - name: BLEU
192
+ type: bleu
193
+ value: 18.4
194
+ - name: chr-F
195
+ type: chrf
196
+ value: 0.49231
197
+ - task:
198
+ name: Translation eng-nld
199
+ type: translation
200
+ args: eng-nld
201
+ dataset:
202
+ name: flores101-devtest
203
+ type: flores_101
204
+ args: eng nld devtest
205
+ metrics:
206
+ - name: BLEU
207
+ type: bleu
208
+ value: 26.8
209
+ - name: chr-F
210
+ type: chrf
211
+ value: 0.57984
212
+ - task:
213
+ name: Translation ltz-afr
214
+ type: translation
215
+ args: ltz-afr
216
+ dataset:
217
+ name: flores101-devtest
218
+ type: flores_101
219
+ args: ltz afr devtest
220
+ metrics:
221
+ - name: BLEU
222
+ type: bleu
223
+ value: 23.2
224
+ - name: chr-F
225
+ type: chrf
226
+ value: 0.53623
227
+ - task:
228
+ name: Translation ltz-deu
229
+ type: translation
230
+ args: ltz-deu
231
+ dataset:
232
+ name: flores101-devtest
233
+ type: flores_101
234
+ args: ltz deu devtest
235
+ metrics:
236
+ - name: BLEU
237
+ type: bleu
238
+ value: 30.0
239
+ - name: chr-F
240
+ type: chrf
241
+ value: 0.59122
242
+ - task:
243
+ name: Translation ltz-eng
244
+ type: translation
245
+ args: ltz-eng
246
+ dataset:
247
+ name: flores101-devtest
248
+ type: flores_101
249
+ args: ltz eng devtest
250
+ metrics:
251
+ - name: BLEU
252
+ type: bleu
253
+ value: 31.0
254
+ - name: chr-F
255
+ type: chrf
256
+ value: 0.57557
257
+ - task:
258
+ name: Translation ltz-nld
259
+ type: translation
260
+ args: ltz-nld
261
+ dataset:
262
+ name: flores101-devtest
263
+ type: flores_101
264
+ args: ltz nld devtest
265
+ metrics:
266
+ - name: BLEU
267
+ type: bleu
268
+ value: 18.6
269
+ - name: chr-F
270
+ type: chrf
271
+ value: 0.49312
272
+ - task:
273
+ name: Translation nld-afr
274
+ type: translation
275
+ args: nld-afr
276
+ dataset:
277
+ name: flores101-devtest
278
+ type: flores_101
279
+ args: nld afr devtest
280
+ metrics:
281
+ - name: BLEU
282
+ type: bleu
283
+ value: 20.0
284
+ - name: chr-F
285
+ type: chrf
286
+ value: 0.52409
287
+ - task:
288
+ name: Translation nld-deu
289
+ type: translation
290
+ args: nld-deu
291
+ dataset:
292
+ name: flores101-devtest
293
+ type: flores_101
294
+ args: nld deu devtest
295
+ metrics:
296
+ - name: BLEU
297
+ type: bleu
298
+ value: 22.6
299
+ - name: chr-F
300
+ type: chrf
301
+ value: 0.53898
302
+ - task:
303
+ name: Translation nld-eng
304
+ type: translation
305
+ args: nld-eng
306
+ dataset:
307
+ name: flores101-devtest
308
+ type: flores_101
309
+ args: nld eng devtest
310
+ metrics:
311
+ - name: BLEU
312
+ type: bleu
313
+ value: 30.7
314
+ - name: chr-F
315
+ type: chrf
316
+ value: 0.58970
317
+ - task:
318
+ name: Translation nld-ltz
319
+ type: translation
320
+ args: nld-ltz
321
+ dataset:
322
+ name: flores101-devtest
323
+ type: flores_101
324
+ args: nld ltz devtest
325
+ metrics:
326
+ - name: BLEU
327
+ type: bleu
328
+ value: 11.8
329
+ - name: chr-F
330
+ type: chrf
331
+ value: 0.42637
332
+ - task:
333
+ name: Translation deu-eng
334
+ type: translation
335
+ args: deu-eng
336
+ dataset:
337
+ name: multi30k_test_2016_flickr
338
+ type: multi30k-2016_flickr
339
+ args: deu-eng
340
+ metrics:
341
+ - name: BLEU
342
+ type: bleu
343
+ value: 39.9
344
+ - name: chr-F
345
+ type: chrf
346
+ value: 0.60928
347
+ - task:
348
+ name: Translation eng-deu
349
+ type: translation
350
+ args: eng-deu
351
+ dataset:
352
+ name: multi30k_test_2016_flickr
353
+ type: multi30k-2016_flickr
354
+ args: eng-deu
355
+ metrics:
356
+ - name: BLEU
357
+ type: bleu
358
+ value: 35.4
359
+ - name: chr-F
360
+ type: chrf
361
+ value: 0.64172
362
+ - task:
363
+ name: Translation deu-eng
364
+ type: translation
365
+ args: deu-eng
366
+ dataset:
367
+ name: multi30k_test_2017_flickr
368
+ type: multi30k-2017_flickr
369
+ args: deu-eng
370
+ metrics:
371
+ - name: BLEU
372
+ type: bleu
373
+ value: 40.5
374
+ - name: chr-F
375
+ type: chrf
376
+ value: 0.63154
377
+ - task:
378
+ name: Translation eng-deu
379
+ type: translation
380
+ args: eng-deu
381
+ dataset:
382
+ name: multi30k_test_2017_flickr
383
+ type: multi30k-2017_flickr
384
+ args: eng-deu
385
+ metrics:
386
+ - name: BLEU
387
+ type: bleu
388
+ value: 34.2
389
+ - name: chr-F
390
+ type: chrf
391
+ value: 0.63078
392
+ - task:
393
+ name: Translation deu-eng
394
+ type: translation
395
+ args: deu-eng
396
+ dataset:
397
+ name: multi30k_test_2017_mscoco
398
+ type: multi30k-2017_mscoco
399
+ args: deu-eng
400
+ metrics:
401
+ - name: BLEU
402
+ type: bleu
403
+ value: 32.2
404
+ - name: chr-F
405
+ type: chrf
406
+ value: 0.55708
407
+ - task:
408
+ name: Translation eng-deu
409
+ type: translation
410
+ args: eng-deu
411
+ dataset:
412
+ name: multi30k_test_2017_mscoco
413
+ type: multi30k-2017_mscoco
414
+ args: eng-deu
415
+ metrics:
416
+ - name: BLEU
417
+ type: bleu
418
+ value: 29.1
419
+ - name: chr-F
420
+ type: chrf
421
+ value: 0.57537
422
+ - task:
423
+ name: Translation deu-eng
424
+ type: translation
425
+ args: deu-eng
426
+ dataset:
427
+ name: multi30k_test_2018_flickr
428
+ type: multi30k-2018_flickr
429
+ args: deu-eng
430
+ metrics:
431
+ - name: BLEU
432
+ type: bleu
433
+ value: 36.9
434
+ - name: chr-F
435
+ type: chrf
436
+ value: 0.59422
437
+ - task:
438
+ name: Translation eng-deu
439
+ type: translation
440
+ args: eng-deu
441
+ dataset:
442
+ name: multi30k_test_2018_flickr
443
+ type: multi30k-2018_flickr
444
+ args: eng-deu
445
+ metrics:
446
+ - name: BLEU
447
+ type: bleu
448
+ value: 30.0
449
+ - name: chr-F
450
+ type: chrf
451
+ value: 0.59597
452
+ - task:
453
+ name: Translation deu-eng
454
+ type: translation
455
+ args: deu-eng
456
+ dataset:
457
+ name: news-test2008
458
+ type: news-test2008
459
+ args: deu-eng
460
+ metrics:
461
+ - name: BLEU
462
+ type: bleu
463
+ value: 27.2
464
+ - name: chr-F
465
+ type: chrf
466
+ value: 0.54601
467
+ - task:
468
+ name: Translation eng-deu
469
+ type: translation
470
+ args: eng-deu
471
+ dataset:
472
+ name: news-test2008
473
+ type: news-test2008
474
+ args: eng-deu
475
+ metrics:
476
+ - name: BLEU
477
+ type: bleu
478
+ value: 23.6
479
+ - name: chr-F
480
+ type: chrf
481
+ value: 0.53149
482
+ - task:
483
+ name: Translation afr-deu
484
+ type: translation
485
+ args: afr-deu
486
+ dataset:
487
+ name: tatoeba-test-v2021-08-07
488
+ type: tatoeba_mt
489
+ args: afr-deu
490
+ metrics:
491
+ - name: BLEU
492
+ type: bleu
493
+ value: 50.4
494
+ - name: chr-F
495
+ type: chrf
496
+ value: 0.68679
497
+ - task:
498
+ name: Translation afr-eng
499
+ type: translation
500
+ args: afr-eng
501
+ dataset:
502
+ name: tatoeba-test-v2021-08-07
503
+ type: tatoeba_mt
504
+ args: afr-eng
505
+ metrics:
506
+ - name: BLEU
507
+ type: bleu
508
+ value: 56.6
509
+ - name: chr-F
510
+ type: chrf
511
+ value: 0.70682
512
+ - task:
513
+ name: Translation afr-nld
514
+ type: translation
515
+ args: afr-nld
516
+ dataset:
517
+ name: tatoeba-test-v2021-08-07
518
+ type: tatoeba_mt
519
+ args: afr-nld
520
+ metrics:
521
+ - name: BLEU
522
+ type: bleu
523
+ value: 55.5
524
+ - name: chr-F
525
+ type: chrf
526
+ value: 0.71516
527
+ - task:
528
+ name: Translation deu-afr
529
+ type: translation
530
+ args: deu-afr
531
+ dataset:
532
+ name: tatoeba-test-v2021-08-07
533
+ type: tatoeba_mt
534
+ args: deu-afr
535
+ metrics:
536
+ - name: BLEU
537
+ type: bleu
538
+ value: 54.3
539
+ - name: chr-F
540
+ type: chrf
541
+ value: 0.70274
542
+ - task:
543
+ name: Translation deu-eng
544
+ type: translation
545
+ args: deu-eng
546
+ dataset:
547
+ name: tatoeba-test-v2021-08-07
548
+ type: tatoeba_mt
549
+ args: deu-eng
550
+ metrics:
551
+ - name: BLEU
552
+ type: bleu
553
+ value: 48.6
554
+ - name: chr-F
555
+ type: chrf
556
+ value: 0.66023
557
+ - task:
558
+ name: Translation deu-nds
559
+ type: translation
560
+ args: deu-nds
561
+ dataset:
562
+ name: tatoeba-test-v2021-08-07
563
+ type: tatoeba_mt
564
+ args: deu-nds
565
+ metrics:
566
+ - name: BLEU
567
+ type: bleu
568
+ value: 23.2
569
+ - name: chr-F
570
+ type: chrf
571
+ value: 0.48058
572
+ - task:
573
+ name: Translation deu-nld
574
+ type: translation
575
+ args: deu-nld
576
+ dataset:
577
+ name: tatoeba-test-v2021-08-07
578
+ type: tatoeba_mt
579
+ args: deu-nld
580
+ metrics:
581
+ - name: BLEU
582
+ type: bleu
583
+ value: 54.6
584
+ - name: chr-F
585
+ type: chrf
586
+ value: 0.71440
587
+ - task:
588
+ name: Translation eng-afr
589
+ type: translation
590
+ args: eng-afr
591
+ dataset:
592
+ name: tatoeba-test-v2021-08-07
593
+ type: tatoeba_mt
594
+ args: eng-afr
595
+ metrics:
596
+ - name: BLEU
597
+ type: bleu
598
+ value: 56.5
599
+ - name: chr-F
600
+ type: chrf
601
+ value: 0.71995
602
+ - task:
603
+ name: Translation eng-deu
604
+ type: translation
605
+ args: eng-deu
606
+ dataset:
607
+ name: tatoeba-test-v2021-08-07
608
+ type: tatoeba_mt
609
+ args: eng-deu
610
+ metrics:
611
+ - name: BLEU
612
+ type: bleu
613
+ value: 42.0
614
+ - name: chr-F
615
+ type: chrf
616
+ value: 0.63103
617
+ - task:
618
+ name: Translation eng-fry
619
+ type: translation
620
+ args: eng-fry
621
+ dataset:
622
+ name: tatoeba-test-v2021-03-30
623
+ type: tatoeba_mt
624
+ args: eng-fry
625
+ metrics:
626
+ - name: BLEU
627
+ type: bleu
628
+ value: 21.3
629
+ - name: chr-F
630
+ type: chrf
631
+ value: 0.38580
632
+ - task:
633
+ name: Translation eng-nld
634
+ type: translation
635
+ args: eng-nld
636
+ dataset:
637
+ name: tatoeba-test-v2021-08-07
638
+ type: tatoeba_mt
639
+ args: eng-nld
640
+ metrics:
641
+ - name: BLEU
642
+ type: bleu
643
+ value: 54.5
644
+ - name: chr-F
645
+ type: chrf
646
+ value: 0.71062
647
+ - task:
648
+ name: Translation fry-eng
649
+ type: translation
650
+ args: fry-eng
651
+ dataset:
652
+ name: tatoeba-test-v2021-08-07
653
+ type: tatoeba_mt
654
+ args: fry-eng
655
+ metrics:
656
+ - name: BLEU
657
+ type: bleu
658
+ value: 25.1
659
+ - name: chr-F
660
+ type: chrf
661
+ value: 0.40545
662
+ - task:
663
+ name: Translation fry-nld
664
+ type: translation
665
+ args: fry-nld
666
+ dataset:
667
+ name: tatoeba-test-v2021-08-07
668
+ type: tatoeba_mt
669
+ args: fry-nld
670
+ metrics:
671
+ - name: BLEU
672
+ type: bleu
673
+ value: 41.7
674
+ - name: chr-F
675
+ type: chrf
676
+ value: 0.55771
677
+ - task:
678
+ name: Translation gos-deu
679
+ type: translation
680
+ args: gos-deu
681
+ dataset:
682
+ name: tatoeba-test-v2021-08-07
683
+ type: tatoeba_mt
684
+ args: gos-deu
685
+ metrics:
686
+ - name: BLEU
687
+ type: bleu
688
+ value: 25.4
689
+ - name: chr-F
690
+ type: chrf
691
+ value: 0.45302
692
+ - task:
693
+ name: Translation gos-eng
694
+ type: translation
695
+ args: gos-eng
696
+ dataset:
697
+ name: tatoeba-test-v2021-08-07
698
+ type: tatoeba_mt
699
+ args: gos-eng
700
+ metrics:
701
+ - name: BLEU
702
+ type: bleu
703
+ value: 24.1
704
+ - name: chr-F
705
+ type: chrf
706
+ value: 0.37628
707
+ - task:
708
+ name: Translation gos-nld
709
+ type: translation
710
+ args: gos-nld
711
+ dataset:
712
+ name: tatoeba-test-v2021-08-07
713
+ type: tatoeba_mt
714
+ args: gos-nld
715
+ metrics:
716
+ - name: BLEU
717
+ type: bleu
718
+ value: 26.2
719
+ - name: chr-F
720
+ type: chrf
721
+ value: 0.45777
722
+ - task:
723
+ name: Translation ltz-deu
724
+ type: translation
725
+ args: ltz-deu
726
+ dataset:
727
+ name: tatoeba-test-v2021-08-07
728
+ type: tatoeba_mt
729
+ args: ltz-deu
730
+ metrics:
731
+ - name: BLEU
732
+ type: bleu
733
+ value: 21.3
734
+ - name: chr-F
735
+ type: chrf
736
+ value: 0.37165
737
+ - task:
738
+ name: Translation ltz-eng
739
+ type: translation
740
+ args: ltz-eng
741
+ dataset:
742
+ name: tatoeba-test-v2021-08-07
743
+ type: tatoeba_mt
744
+ args: ltz-eng
745
+ metrics:
746
+ - name: BLEU
747
+ type: bleu
748
+ value: 30.3
749
+ - name: chr-F
750
+ type: chrf
751
+ value: 0.37784
752
+ - task:
753
+ name: Translation ltz-nld
754
+ type: translation
755
+ args: ltz-nld
756
+ dataset:
757
+ name: tatoeba-test-v2021-08-07
758
+ type: tatoeba_mt
759
+ args: ltz-nld
760
+ metrics:
761
+ - name: BLEU
762
+ type: bleu
763
+ value: 26.7
764
+ - name: chr-F
765
+ type: chrf
766
+ value: 0.32823
767
+ - task:
768
+ name: Translation nds-deu
769
+ type: translation
770
+ args: nds-deu
771
+ dataset:
772
+ name: tatoeba-test-v2021-08-07
773
+ type: tatoeba_mt
774
+ args: nds-deu
775
+ metrics:
776
+ - name: BLEU
777
+ type: bleu
778
+ value: 45.4
779
+ - name: chr-F
780
+ type: chrf
781
+ value: 0.64008
782
+ - task:
783
+ name: Translation nds-eng
784
+ type: translation
785
+ args: nds-eng
786
+ dataset:
787
+ name: tatoeba-test-v2021-08-07
788
+ type: tatoeba_mt
789
+ args: nds-eng
790
+ metrics:
791
+ - name: BLEU
792
+ type: bleu
793
+ value: 38.3
794
+ - name: chr-F
795
+ type: chrf
796
+ value: 0.55193
797
+ - task:
798
+ name: Translation nds-nld
799
+ type: translation
800
+ args: nds-nld
801
+ dataset:
802
+ name: tatoeba-test-v2021-08-07
803
+ type: tatoeba_mt
804
+ args: nds-nld
805
+ metrics:
806
+ - name: BLEU
807
+ type: bleu
808
+ value: 50.0
809
+ - name: chr-F
810
+ type: chrf
811
+ value: 0.66943
812
+ - task:
813
+ name: Translation nld-afr
814
+ type: translation
815
+ args: nld-afr
816
+ dataset:
817
+ name: tatoeba-test-v2021-08-07
818
+ type: tatoeba_mt
819
+ args: nld-afr
820
+ metrics:
821
+ - name: BLEU
822
+ type: bleu
823
+ value: 62.3
824
+ - name: chr-F
825
+ type: chrf
826
+ value: 0.76610
827
+ - task:
828
+ name: Translation nld-deu
829
+ type: translation
830
+ args: nld-deu
831
+ dataset:
832
+ name: tatoeba-test-v2021-08-07
833
+ type: tatoeba_mt
834
+ args: nld-deu
835
+ metrics:
836
+ - name: BLEU
837
+ type: bleu
838
+ value: 56.8
839
+ - name: chr-F
840
+ type: chrf
841
+ value: 0.73162
842
+ - task:
843
+ name: Translation nld-eng
844
+ type: translation
845
+ args: nld-eng
846
+ dataset:
847
+ name: tatoeba-test-v2021-08-07
848
+ type: tatoeba_mt
849
+ args: nld-eng
850
+ metrics:
851
+ - name: BLEU
852
+ type: bleu
853
+ value: 60.5
854
+ - name: chr-F
855
+ type: chrf
856
+ value: 0.74088
857
+ - task:
858
+ name: Translation nld-fry
859
+ type: translation
860
+ args: nld-fry
861
+ dataset:
862
+ name: tatoeba-test-v2021-08-07
863
+ type: tatoeba_mt
864
+ args: nld-fry
865
+ metrics:
866
+ - name: BLEU
867
+ type: bleu
868
+ value: 31.4
869
+ - name: chr-F
870
+ type: chrf
871
+ value: 0.48460
872
+ - task:
873
+ name: Translation deu-eng
874
+ type: translation
875
+ args: deu-eng
876
+ dataset:
877
+ name: newstest2009
878
+ type: wmt-2009-news
879
+ args: deu-eng
880
+ metrics:
881
+ - name: BLEU
882
+ type: bleu
883
+ value: 25.9
884
+ - name: chr-F
885
+ type: chrf
886
+ value: 0.53747
887
+ - task:
888
+ name: Translation eng-deu
889
+ type: translation
890
+ args: eng-deu
891
+ dataset:
892
+ name: newstest2009
893
+ type: wmt-2009-news
894
+ args: eng-deu
895
+ metrics:
896
+ - name: BLEU
897
+ type: bleu
898
+ value: 22.9
899
+ - name: chr-F
900
+ type: chrf
901
+ value: 0.53283
902
+ - task:
903
+ name: Translation deu-eng
904
+ type: translation
905
+ args: deu-eng
906
+ dataset:
907
+ name: newstest2010
908
+ type: wmt-2010-news
909
+ args: deu-eng
910
+ metrics:
911
+ - name: BLEU
912
+ type: bleu
913
+ value: 30.6
914
+ - name: chr-F
915
+ type: chrf
916
+ value: 0.58355
917
+ - task:
918
+ name: Translation eng-deu
919
+ type: translation
920
+ args: eng-deu
921
+ dataset:
922
+ name: newstest2010
923
+ type: wmt-2010-news
924
+ args: eng-deu
925
+ metrics:
926
+ - name: BLEU
927
+ type: bleu
928
+ value: 25.8
929
+ - name: chr-F
930
+ type: chrf
931
+ value: 0.54885
932
+ - task:
933
+ name: Translation deu-eng
934
+ type: translation
935
+ args: deu-eng
936
+ dataset:
937
+ name: newstest2011
938
+ type: wmt-2011-news
939
+ args: deu-eng
940
+ metrics:
941
+ - name: BLEU
942
+ type: bleu
943
+ value: 26.3
944
+ - name: chr-F
945
+ type: chrf
946
+ value: 0.54883
947
+ - task:
948
+ name: Translation eng-deu
949
+ type: translation
950
+ args: eng-deu
951
+ dataset:
952
+ name: newstest2011
953
+ type: wmt-2011-news
954
+ args: eng-deu
955
+ metrics:
956
+ - name: BLEU
957
+ type: bleu
958
+ value: 23.1
959
+ - name: chr-F
960
+ type: chrf
961
+ value: 0.52712
962
+ - task:
963
+ name: Translation deu-eng
964
+ type: translation
965
+ args: deu-eng
966
+ dataset:
967
+ name: newstest2012
968
+ type: wmt-2012-news
969
+ args: deu-eng
970
+ metrics:
971
+ - name: BLEU
972
+ type: bleu
973
+ value: 28.5
974
+ - name: chr-F
975
+ type: chrf
976
+ value: 0.56153
977
+ - task:
978
+ name: Translation eng-deu
979
+ type: translation
980
+ args: eng-deu
981
+ dataset:
982
+ name: newstest2012
983
+ type: wmt-2012-news
984
+ args: eng-deu
985
+ metrics:
986
+ - name: BLEU
987
+ type: bleu
988
+ value: 23.3
989
+ - name: chr-F
990
+ type: chrf
991
+ value: 0.52662
992
+ - task:
993
+ name: Translation deu-eng
994
+ type: translation
995
+ args: deu-eng
996
+ dataset:
997
+ name: newstest2013
998
+ type: wmt-2013-news
999
+ args: deu-eng
1000
+ metrics:
1001
+ - name: BLEU
1002
+ type: bleu
1003
+ value: 31.4
1004
+ - name: chr-F
1005
+ type: chrf
1006
+ value: 0.57770
1007
+ - task:
1008
+ name: Translation eng-deu
1009
+ type: translation
1010
+ args: eng-deu
1011
+ dataset:
1012
+ name: newstest2013
1013
+ type: wmt-2013-news
1014
+ args: eng-deu
1015
+ metrics:
1016
+ - name: BLEU
1017
+ type: bleu
1018
+ value: 27.8
1019
+ - name: chr-F
1020
+ type: chrf
1021
+ value: 0.55774
1022
+ - task:
1023
+ name: Translation deu-eng
1024
+ type: translation
1025
+ args: deu-eng
1026
+ dataset:
1027
+ name: newstest2014
1028
+ type: wmt-2014-news
1029
+ args: deu-eng
1030
+ metrics:
1031
+ - name: BLEU
1032
+ type: bleu
1033
+ value: 33.2
1034
+ - name: chr-F
1035
+ type: chrf
1036
+ value: 0.59826
1037
+ - task:
1038
+ name: Translation eng-deu
1039
+ type: translation
1040
+ args: eng-deu
1041
+ dataset:
1042
+ name: newstest2014
1043
+ type: wmt-2014-news
1044
+ args: eng-deu
1045
+ metrics:
1046
+ - name: BLEU
1047
+ type: bleu
1048
+ value: 29.0
1049
+ - name: chr-F
1050
+ type: chrf
1051
+ value: 0.59301
1052
+ - task:
1053
+ name: Translation deu-eng
1054
+ type: translation
1055
+ args: deu-eng
1056
+ dataset:
1057
+ name: newstest2015
1058
+ type: wmt-2015-news
1059
+ args: deu-eng
1060
+ metrics:
1061
+ - name: BLEU
1062
+ type: bleu
1063
+ value: 33.4
1064
+ - name: chr-F
1065
+ type: chrf
1066
+ value: 0.59660
1067
+ - task:
1068
+ name: Translation eng-deu
1069
+ type: translation
1070
+ args: eng-deu
1071
+ dataset:
1072
+ name: newstest2015
1073
+ type: wmt-2015-news
1074
+ args: eng-deu
1075
+ metrics:
1076
+ - name: BLEU
1077
+ type: bleu
1078
+ value: 32.3
1079
+ - name: chr-F
1080
+ type: chrf
1081
+ value: 0.59889
1082
+ - task:
1083
+ name: Translation deu-eng
1084
+ type: translation
1085
+ args: deu-eng
1086
+ dataset:
1087
+ name: newstest2016
1088
+ type: wmt-2016-news
1089
+ args: deu-eng
1090
+ metrics:
1091
+ - name: BLEU
1092
+ type: bleu
1093
+ value: 39.8
1094
+ - name: chr-F
1095
+ type: chrf
1096
+ value: 0.64736
1097
+ - task:
1098
+ name: Translation eng-deu
1099
+ type: translation
1100
+ args: eng-deu
1101
+ dataset:
1102
+ name: newstest2016
1103
+ type: wmt-2016-news
1104
+ args: eng-deu
1105
+ metrics:
1106
+ - name: BLEU
1107
+ type: bleu
1108
+ value: 38.3
1109
+ - name: chr-F
1110
+ type: chrf
1111
+ value: 0.64427
1112
+ - task:
1113
+ name: Translation deu-eng
1114
+ type: translation
1115
+ args: deu-eng
1116
+ dataset:
1117
+ name: newstest2017
1118
+ type: wmt-2017-news
1119
+ args: deu-eng
1120
+ metrics:
1121
+ - name: BLEU
1122
+ type: bleu
1123
+ value: 35.2
1124
+ - name: chr-F
1125
+ type: chrf
1126
+ value: 0.60933
1127
+ - task:
1128
+ name: Translation eng-deu
1129
+ type: translation
1130
+ args: eng-deu
1131
+ dataset:
1132
+ name: newstest2017
1133
+ type: wmt-2017-news
1134
+ args: eng-deu
1135
+ metrics:
1136
+ - name: BLEU
1137
+ type: bleu
1138
+ value: 30.7
1139
+ - name: chr-F
1140
+ type: chrf
1141
+ value: 0.59257
1142
+ - task:
1143
+ name: Translation deu-eng
1144
+ type: translation
1145
+ args: deu-eng
1146
+ dataset:
1147
+ name: newstest2018
1148
+ type: wmt-2018-news
1149
+ args: deu-eng
1150
+ metrics:
1151
+ - name: BLEU
1152
+ type: bleu
1153
+ value: 42.6
1154
+ - name: chr-F
1155
+ type: chrf
1156
+ value: 0.66797
1157
+ - task:
1158
+ name: Translation eng-deu
1159
+ type: translation
1160
+ args: eng-deu
1161
+ dataset:
1162
+ name: newstest2018
1163
+ type: wmt-2018-news
1164
+ args: eng-deu
1165
+ metrics:
1166
+ - name: BLEU
1167
+ type: bleu
1168
+ value: 46.5
1169
+ - name: chr-F
1170
+ type: chrf
1171
+ value: 0.69605
1172
+ - task:
1173
+ name: Translation deu-eng
1174
+ type: translation
1175
+ args: deu-eng
1176
+ dataset:
1177
+ name: newstest2019
1178
+ type: wmt-2019-news
1179
+ args: deu-eng
1180
+ metrics:
1181
+ - name: BLEU
1182
+ type: bleu
1183
+ value: 39.7
1184
+ - name: chr-F
1185
+ type: chrf
1186
+ value: 0.63749
1187
+ - task:
1188
+ name: Translation eng-deu
1189
+ type: translation
1190
+ args: eng-deu
1191
+ dataset:
1192
+ name: newstest2019
1193
+ type: wmt-2019-news
1194
+ args: eng-deu
1195
+ metrics:
1196
+ - name: BLEU
1197
+ type: bleu
1198
+ value: 42.9
1199
+ - name: chr-F
1200
+ type: chrf
1201
+ value: 0.66751
1202
+ - task:
1203
+ name: Translation deu-eng
1204
+ type: translation
1205
+ args: deu-eng
1206
+ dataset:
1207
+ name: newstest2020
1208
+ type: wmt-2020-news
1209
+ args: deu-eng
1210
+ metrics:
1211
+ - name: BLEU
1212
+ type: bleu
1213
+ value: 35.0
1214
+ - name: chr-F
1215
+ type: chrf
1216
+ value: 0.61200
1217
+ - task:
1218
+ name: Translation eng-deu
1219
+ type: translation
1220
+ args: eng-deu
1221
+ dataset:
1222
+ name: newstest2020
1223
+ type: wmt-2020-news
1224
+ args: eng-deu
1225
+ metrics:
1226
+ - name: BLEU
1227
+ type: bleu
1228
+ value: 32.3
1229
+ - name: chr-F
1230
+ type: chrf
1231
+ value: 0.60411
1232
+ ---
1233
+ # opus-mt-tc-big-gmw-gmw
1234
+
1235
+ ## Table of Contents
1236
+ - [Model Details](#model-details)
1237
+ - [Uses](#uses)
1238
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1239
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1240
+ - [Training](#training)
1241
+ - [Evaluation](#evaluation)
1242
+ - [Citation Information](#citation-information)
1243
+ - [Acknowledgements](#acknowledgements)
1244
+
1245
+ ## Model Details
1246
+
1247
+ Neural machine translation model for translating from West Germanic languages (gmw) to West Germanic languages (gmw).
1248
+
1249
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1250
+ **Model Description:**
1251
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1252
+ - **Model Type:** Translation (transformer-big)
1253
+ - **Release**: 2022-08-11
1254
+ - **License:** CC-BY-4.0
1255
+ - **Language(s):**
1256
+ - Source Language(s): afr deu eng enm fry gos gsw hrx ksh ltz nds nld pdc sco stq swg tpi yid
1257
+ - Target Language(s): afr ang deu eng enm fry gos ltz nds nld sco tpi yid
1258
+ - Valid Target Language Labels: >>afr<< >>ang<< >>deu<< >>eng<< >>enm<< >>fry<< >>gos<< >>ltz<< >>nds<< >>nld<< >>sco<< >>tpi<< >>yid<<
1259
+ - **Original Model**: [opusTCv20210807_transformer-big_2022-08-11.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/gmw-gmw/opusTCv20210807_transformer-big_2022-08-11.zip)
1260
+ - **Resources for more information:**
1261
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1262
+ - More information about released models for this language pair: [OPUS-MT gmw-gmw README](https://github.com/Helsinki-NLP/Tatoeba-Challenge/tree/master/models/gmw-gmw/README.md)
1263
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1264
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/
1265
+
1266
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>afr<<`
1267
+
1268
+ ## Uses
1269
+
1270
+ This model can be used for translation and text-to-text generation.
1271
+
1272
+ ## Risks, Limitations and Biases
1273
+
1274
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1275
+
1276
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1277
+
1278
+ ## How to Get Started With the Model
1279
+
1280
+ A short example code:
1281
+
1282
+ ```python
1283
+ from transformers import MarianMTModel, MarianTokenizer
1284
+
1285
+ src_text = [
1286
+ ">>nds<< Red keinen Quatsch.",
1287
+ ">>eng<< Findet ihr das nicht etwas übereilt?"
1288
+ ]
1289
+
1290
+ model_name = "pytorch-models/opus-mt-tc-big-gmw-gmw"
1291
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1292
+ model = MarianMTModel.from_pretrained(model_name)
1293
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1294
+
1295
+ for t in translated:
1296
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1297
+
1298
+ # expected output:
1299
+ # Kiek ok bi: Rott.
1300
+ # Aren't you in a hurry?
1301
+ ```
1302
+
1303
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1304
+
1305
+ ```python
1306
+ from transformers import pipeline
1307
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-gmw-gmw")
1308
+ print(pipe(">>nds<< Red keinen Quatsch."))
1309
+
1310
+ # expected output: Kiek ok bi: Rott.
1311
+ ```
1312
+
1313
+ ## Training
1314
+
1315
+ - **Data**: opusTCv20210807 ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1316
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1317
+ - **Model Type:** transformer-big
1318
+ - **Original MarianNMT Model**: [opusTCv20210807_transformer-big_2022-08-11.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/gmw-gmw/opusTCv20210807_transformer-big_2022-08-11.zip)
1319
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1320
+
1321
+ ## Evaluation
1322
+
1323
+ * test set translations: [opusTCv20210807_transformer-big_2022-08-11.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/gmw-gmw/opusTCv20210807_transformer-big_2022-08-11.test.txt)
1324
+ * test set scores: [opusTCv20210807_transformer-big_2022-08-11.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/gmw-gmw/opusTCv20210807_transformer-big_2022-08-11.eval.txt)
1325
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1326
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1327
+
1328
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1329
+ |----------|---------|-------|-------|-------|--------|
1330
+ | afr-deu | tatoeba-test-v2021-08-07 | 0.68679 | 50.4 | 1583 | 9105 |
1331
+ | afr-eng | tatoeba-test-v2021-08-07 | 0.70682 | 56.6 | 1374 | 9622 |
1332
+ | afr-nld | tatoeba-test-v2021-08-07 | 0.71516 | 55.5 | 1056 | 6710 |
1333
+ | deu-afr | tatoeba-test-v2021-08-07 | 0.70274 | 54.3 | 1583 | 9507 |
1334
+ | deu-eng | tatoeba-test-v2021-08-07 | 0.66023 | 48.6 | 17565 | 149462 |
1335
+ | deu-nds | tatoeba-test-v2021-08-07 | 0.48058 | 23.2 | 9999 | 76137 |
1336
+ | deu-nld | tatoeba-test-v2021-08-07 | 0.71440 | 54.6 | 10218 | 75235 |
1337
+ | deu-yid | tatoeba-test-v2021-08-07 | 9.211 | 0.4 | 853 | 5355 |
1338
+ | eng-afr | tatoeba-test-v2021-08-07 | 0.71995 | 56.5 | 1374 | 10317 |
1339
+ | eng-deu | tatoeba-test-v2021-08-07 | 0.63103 | 42.0 | 17565 | 151568 |
1340
+ | eng-nld | tatoeba-test-v2021-08-07 | 0.71062 | 54.5 | 12696 | 91796 |
1341
+ | eng-yid | tatoeba-test-v2021-08-07 | 9.624 | 0.4 | 2483 | 16395 |
1342
+ | fry-eng | tatoeba-test-v2021-08-07 | 0.40545 | 25.1 | 220 | 1573 |
1343
+ | fry-nld | tatoeba-test-v2021-08-07 | 0.55771 | 41.7 | 260 | 1854 |
1344
+ | gos-deu | tatoeba-test-v2021-08-07 | 0.45302 | 25.4 | 207 | 1168 |
1345
+ | gos-eng | tatoeba-test-v2021-08-07 | 0.37628 | 24.1 | 1154 | 5635 |
1346
+ | gos-nld | tatoeba-test-v2021-08-07 | 0.45777 | 26.2 | 1852 | 9903 |
1347
+ | ltz-deu | tatoeba-test-v2021-08-07 | 0.37165 | 21.3 | 347 | 2208 |
1348
+ | ltz-eng | tatoeba-test-v2021-08-07 | 0.37784 | 30.3 | 293 | 1840 |
1349
+ | ltz-nld | tatoeba-test-v2021-08-07 | 0.32823 | 26.7 | 292 | 1685 |
1350
+ | nds-deu | tatoeba-test-v2021-08-07 | 0.64008 | 45.4 | 9999 | 74564 |
1351
+ | nds-eng | tatoeba-test-v2021-08-07 | 0.55193 | 38.3 | 2500 | 17589 |
1352
+ | nds-nld | tatoeba-test-v2021-08-07 | 0.66943 | 50.0 | 1657 | 11490 |
1353
+ | nld-afr | tatoeba-test-v2021-08-07 | 0.76610 | 62.3 | 1056 | 6823 |
1354
+ | nld-deu | tatoeba-test-v2021-08-07 | 0.73162 | 56.8 | 10218 | 74131 |
1355
+ | nld-eng | tatoeba-test-v2021-08-07 | 0.74088 | 60.5 | 12696 | 89978 |
1356
+ | nld-fry | tatoeba-test-v2021-08-07 | 0.48460 | 31.4 | 260 | 1857 |
1357
+ | nld-nds | tatoeba-test-v2021-08-07 | 0.43779 | 19.9 | 1657 | 11711 |
1358
+ | swg-deu | tatoeba-test-v2021-08-07 | 0.40348 | 16.1 | 1523 | 15632 |
1359
+ | yid-deu | tatoeba-test-v2021-08-07 | 6.305 | 0.1 | 853 | 5173 |
1360
+ | yid-eng | tatoeba-test-v2021-08-07 | 3.704 | 0.1 | 2483 | 15452 |
1361
+ | afr-deu | flores101-devtest | 0.58718 | 30.2 | 1012 | 25094 |
1362
+ | afr-eng | flores101-devtest | 0.74826 | 55.1 | 1012 | 24721 |
1363
+ | afr-ltz | flores101-devtest | 0.46826 | 15.7 | 1012 | 25087 |
1364
+ | afr-nld | flores101-devtest | 0.54441 | 22.5 | 1012 | 25467 |
1365
+ | deu-afr | flores101-devtest | 0.57835 | 26.4 | 1012 | 25740 |
1366
+ | deu-eng | flores101-devtest | 0.66990 | 41.8 | 1012 | 24721 |
1367
+ | deu-ltz | flores101-devtest | 0.52554 | 20.3 | 1012 | 25087 |
1368
+ | deu-nld | flores101-devtest | 0.55710 | 24.2 | 1012 | 25467 |
1369
+ | eng-afr | flores101-devtest | 0.68429 | 40.7 | 1012 | 25740 |
1370
+ | eng-deu | flores101-devtest | 0.64888 | 38.5 | 1012 | 25094 |
1371
+ | eng-ltz | flores101-devtest | 0.49231 | 18.4 | 1012 | 25087 |
1372
+ | eng-nld | flores101-devtest | 0.57984 | 26.8 | 1012 | 25467 |
1373
+ | ltz-afr | flores101-devtest | 0.53623 | 23.2 | 1012 | 25740 |
1374
+ | ltz-deu | flores101-devtest | 0.59122 | 30.0 | 1012 | 25094 |
1375
+ | ltz-eng | flores101-devtest | 0.57557 | 31.0 | 1012 | 24721 |
1376
+ | ltz-nld | flores101-devtest | 0.49312 | 18.6 | 1012 | 25467 |
1377
+ | nld-afr | flores101-devtest | 0.52409 | 20.0 | 1012 | 25740 |
1378
+ | nld-deu | flores101-devtest | 0.53898 | 22.6 | 1012 | 25094 |
1379
+ | nld-eng | flores101-devtest | 0.58970 | 30.7 | 1012 | 24721 |
1380
+ | nld-ltz | flores101-devtest | 0.42637 | 11.8 | 1012 | 25087 |
1381
+ | deu-eng | multi30k_test_2016_flickr | 0.60928 | 39.9 | 1000 | 12955 |
1382
+ | eng-deu | multi30k_test_2016_flickr | 0.64172 | 35.4 | 1000 | 12106 |
1383
+ | deu-eng | multi30k_test_2017_flickr | 0.63154 | 40.5 | 1000 | 11374 |
1384
+ | eng-deu | multi30k_test_2017_flickr | 0.63078 | 34.2 | 1000 | 10755 |
1385
+ | deu-eng | multi30k_test_2017_mscoco | 0.55708 | 32.2 | 461 | 5231 |
1386
+ | eng-deu | multi30k_test_2017_mscoco | 0.57537 | 29.1 | 461 | 5158 |
1387
+ | deu-eng | multi30k_test_2018_flickr | 0.59422 | 36.9 | 1071 | 14689 |
1388
+ | eng-deu | multi30k_test_2018_flickr | 0.59597 | 30.0 | 1071 | 13703 |
1389
+ | deu-eng | newssyscomb2009 | 0.54993 | 28.2 | 502 | 11818 |
1390
+ | eng-deu | newssyscomb2009 | 0.53867 | 23.2 | 502 | 11271 |
1391
+ | deu-eng | news-test2008 | 0.54601 | 27.2 | 2051 | 49380 |
1392
+ | eng-deu | news-test2008 | 0.53149 | 23.6 | 2051 | 47447 |
1393
+ | deu-eng | newstest2009 | 0.53747 | 25.9 | 2525 | 65399 |
1394
+ | eng-deu | newstest2009 | 0.53283 | 22.9 | 2525 | 62816 |
1395
+ | deu-eng | newstest2010 | 0.58355 | 30.6 | 2489 | 61711 |
1396
+ | eng-deu | newstest2010 | 0.54885 | 25.8 | 2489 | 61503 |
1397
+ | deu-eng | newstest2011 | 0.54883 | 26.3 | 3003 | 74681 |
1398
+ | eng-deu | newstest2011 | 0.52712 | 23.1 | 3003 | 72981 |
1399
+ | deu-eng | newstest2012 | 0.56153 | 28.5 | 3003 | 72812 |
1400
+ | eng-deu | newstest2012 | 0.52662 | 23.3 | 3003 | 72886 |
1401
+ | deu-eng | newstest2013 | 0.57770 | 31.4 | 3000 | 64505 |
1402
+ | eng-deu | newstest2013 | 0.55774 | 27.8 | 3000 | 63737 |
1403
+ | deu-eng | newstest2014 | 0.59826 | 33.2 | 3003 | 67337 |
1404
+ | eng-deu | newstest2014 | 0.59301 | 29.0 | 3003 | 62688 |
1405
+ | deu-eng | newstest2015 | 0.59660 | 33.4 | 2169 | 46443 |
1406
+ | eng-deu | newstest2015 | 0.59889 | 32.3 | 2169 | 44260 |
1407
+ | deu-eng | newstest2016 | 0.64736 | 39.8 | 2999 | 64119 |
1408
+ | eng-deu | newstest2016 | 0.64427 | 38.3 | 2999 | 62669 |
1409
+ | deu-eng | newstest2017 | 0.60933 | 35.2 | 3004 | 64399 |
1410
+ | eng-deu | newstest2017 | 0.59257 | 30.7 | 3004 | 61287 |
1411
+ | deu-eng | newstest2018 | 0.66797 | 42.6 | 2998 | 67012 |
1412
+ | eng-deu | newstest2018 | 0.69605 | 46.5 | 2998 | 64276 |
1413
+ | deu-eng | newstest2019 | 0.63749 | 39.7 | 2000 | 39227 |
1414
+ | eng-deu | newstest2019 | 0.66751 | 42.9 | 1997 | 48746 |
1415
+ | deu-eng | newstest2020 | 0.61200 | 35.0 | 785 | 38220 |
1416
+ | eng-deu | newstest2020 | 0.60411 | 32.3 | 1418 | 52383 |
1417
+ | deu-eng | newstestB2020 | 0.61255 | 35.1 | 785 | 37696 |
1418
+ | eng-deu | newstestB2020 | 0.59513 | 31.8 | 1418 | 53092 |
1419
+
1420
+ ## Citation Information
1421
+
1422
+ * Publications: [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1423
+
1424
+ ```
1425
+ @inproceedings{tiedemann-thottingal-2020-opus,
1426
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1427
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1428
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1429
+ month = nov,
1430
+ year = "2020",
1431
+ address = "Lisboa, Portugal",
1432
+ publisher = "European Association for Machine Translation",
1433
+ url = "https://aclanthology.org/2020.eamt-1.61",
1434
+ pages = "479--480",
1435
+ }
1436
+
1437
+ @inproceedings{tiedemann-2020-tatoeba,
1438
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1439
+ author = {Tiedemann, J{\"o}rg},
1440
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1441
+ month = nov,
1442
+ year = "2020",
1443
+ address = "Online",
1444
+ publisher = "Association for Computational Linguistics",
1445
+ url = "https://aclanthology.org/2020.wmt-1.139",
1446
+ pages = "1174--1182",
1447
+ }
1448
+ ```
1449
+
1450
+ ## Acknowledgements
1451
+
1452
+ The work is supported by the [European Language Grid](https://www.european-language-grid.eu/) as [pilot project 2866](https://live.european-language-grid.eu/catalogue/#/resource/projects/2866), by the [FoTran project](https://www.helsinki.fi/en/researchgroups/natural-language-understanding-with-cross-lingual-grounding), funded by the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement No 771113), and the [MeMAD project](https://memad.eu/), funded by the European Union’s Horizon 2020 Research and Innovation Programme under grant agreement No 780069. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland.
1453
+
1454
+ ## Model conversion info
1455
+
1456
+ * transformers version: 4.16.2
1457
+ * OPUS-MT git hash: 8b9f0b0
1458
+ * port time: Fri Aug 12 13:17:06 EEST 2022
1459
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ afr-deu flores101-dev 0.58214 30.7 997 24097
2
+ afr-eng flores101-dev 0.74745 54.5 997 23555
3
+ afr-ltz flores101-dev 0.47392 17.1 997 23906
4
+ afr-nld flores101-dev 0.54440 22.8 997 24342
5
+ deu-afr flores101-dev 0.57083 26.8 997 24677
6
+ deu-eng flores101-dev 0.66856 41.8 997 23555
7
+ deu-ltz flores101-dev 0.53317 21.9 997 23906
8
+ deu-nld flores101-dev 0.55429 24.7 997 24342
9
+ eng-afr flores101-dev 0.67961 40.0 997 24677
10
+ eng-deu flores101-dev 0.64499 38.9 997 24097
11
+ eng-ltz flores101-dev 0.49853 19.5 997 23906
12
+ eng-nld flores101-dev 0.58154 27.1 997 24342
13
+ ltz-afr flores101-dev 0.53572 23.9 997 24677
14
+ ltz-deu flores101-dev 0.59331 31.5 997 24097
15
+ ltz-eng flores101-dev 0.57663 32.2 997 23555
16
+ ltz-nld flores101-dev 0.49426 19.3 997 24342
17
+ nld-afr flores101-dev 0.52240 20.4 997 24677
18
+ nld-deu flores101-dev 0.53393 23.5 997 24097
19
+ nld-eng flores101-dev 0.59281 30.6 997 23555
20
+ nld-ltz flores101-dev 0.42922 12.7 997 23906
21
+ afr-deu flores101-devtest 0.58718 30.2 1012 25094
22
+ afr-eng flores101-devtest 0.74826 55.1 1012 24721
23
+ afr-ltz flores101-devtest 0.46826 15.7 1012 25087
24
+ afr-nld flores101-devtest 0.54441 22.5 1012 25467
25
+ deu-afr flores101-devtest 0.57835 26.4 1012 25740
26
+ deu-eng flores101-devtest 0.66990 41.8 1012 24721
27
+ deu-ltz flores101-devtest 0.52554 20.3 1012 25087
28
+ deu-nld flores101-devtest 0.55710 24.2 1012 25467
29
+ eng-afr flores101-devtest 0.68429 40.7 1012 25740
30
+ eng-deu flores101-devtest 0.64888 38.5 1012 25094
31
+ eng-ltz flores101-devtest 0.49231 18.4 1012 25087
32
+ eng-nld flores101-devtest 0.57984 26.8 1012 25467
33
+ ltz-afr flores101-devtest 0.53623 23.2 1012 25740
34
+ ltz-deu flores101-devtest 0.59122 30.0 1012 25094
35
+ ltz-eng flores101-devtest 0.57557 31.0 1012 24721
36
+ ltz-nld flores101-devtest 0.49312 18.6 1012 25467
37
+ nld-afr flores101-devtest 0.52409 20.0 1012 25740
38
+ nld-deu flores101-devtest 0.53898 22.6 1012 25094
39
+ nld-eng flores101-devtest 0.58970 30.7 1012 24721
40
+ nld-ltz flores101-devtest 0.42637 11.8 1012 25087
41
+ deu-eng multi30k_task2_test_2016 0.21505 4.1 5000 67382
42
+ eng-deu multi30k_task2_test_2016 0.26916 2.7 5000 51501
43
+ deu-eng multi30k_test_2016_flickr 0.60928 39.9 1000 12955
44
+ eng-deu multi30k_test_2016_flickr 0.64172 35.4 1000 12106
45
+ deu-eng multi30k_test_2017_flickr 0.63154 40.5 1000 11374
46
+ eng-deu multi30k_test_2017_flickr 0.63078 34.2 1000 10755
47
+ deu-eng multi30k_test_2017_mscoco 0.55708 32.2 461 5231
48
+ eng-deu multi30k_test_2017_mscoco 0.57537 29.1 461 5158
49
+ deu-eng multi30k_test_2018_flickr 0.59422 36.9 1071 14689
50
+ eng-deu multi30k_test_2018_flickr 0.59597 30.0 1071 13703
51
+ deu-eng newssyscomb2009 0.54993 28.2 502 11818
52
+ eng-deu newssyscomb2009 0.53867 23.2 502 11271
53
+ deu-eng news-test2008 0.54601 27.2 2051 49380
54
+ eng-deu news-test2008 0.53149 23.6 2051 47447
55
+ deu-eng newstest2009 0.53747 25.9 2525 65399
56
+ eng-deu newstest2009 0.53283 22.9 2525 62816
57
+ deu-eng newstest2010 0.58355 30.6 2489 61711
58
+ eng-deu newstest2010 0.54885 25.8 2489 61503
59
+ deu-eng newstest2011 0.54883 26.3 3003 74681
60
+ eng-deu newstest2011 0.52712 23.1 3003 72981
61
+ deu-eng newstest2012 0.56153 28.5 3003 72812
62
+ eng-deu newstest2012 0.52662 23.3 3003 72886
63
+ deu-eng newstest2013 0.57770 31.4 3000 64505
64
+ eng-deu newstest2013 0.55774 27.8 3000 63737
65
+ deu-eng newstest2014 0.59826 33.2 3003 67337
66
+ eng-deu newstest2014 0.59301 29.0 3003 62688
67
+ deu-eng newstest2015 0.59660 33.4 2169 46443
68
+ eng-deu newstest2015 0.59889 32.3 2169 44260
69
+ deu-eng newstest2016 0.64736 39.8 2999 64119
70
+ eng-deu newstest2016 0.64427 38.3 2999 62669
71
+ deu-eng newstest2017 0.60933 35.2 3004 64399
72
+ eng-deu newstest2017 0.59257 30.7 3004 61287
73
+ deu-eng newstest2018 0.66797 42.6 2998 67012
74
+ eng-deu newstest2018 0.69605 46.5 2998 64276
75
+ deu-eng newstest2019 0.63749 39.7 2000 39227
76
+ eng-deu newstest2019 0.66751 42.9 1997 48746
77
+ deu-eng newstest2020 0.61200 35.0 785 38220
78
+ eng-deu newstest2020 0.60411 32.3 1418 52383
79
+ deu-eng newstestB2020 0.61255 35.1 785 37696
80
+ eng-deu newstestB2020 0.59513 31.8 1418 53092
81
+ deu-eng tatoeba-test-v2020-07-28 0.67948 51.4 10000 81233
82
+ deu-ltz tatoeba-test-v2020-07-28 0.32481 19.4 337 2135
83
+ deu-nds tatoeba-test-v2020-07-28 0.48061 23.3 10000 76144
84
+ deu-nld tatoeba-test-v2020-07-28 0.71449 54.7 10000 73546
85
+ deu-yid tatoeba-test-v2020-07-28 9.144 0.5 556 3425
86
+ eng-deu tatoeba-test-v2020-07-28 0.64151 44.1 10000 83347
87
+ eng-fry tatoeba-test-v2020-07-28 0.38103 20.9 205 1529
88
+ eng-gos tatoeba-test-v2020-07-28 0.15669 0.3 1152 5514
89
+ eng-ltz tatoeba-test-v2020-07-28 0.31995 13.2 283 1733
90
+ eng-nld tatoeba-test-v2020-07-28 0.71794 55.8 10000 71436
91
+ eng-yid tatoeba-test-v2020-07-28 9.702 0.5 1168 8094
92
+ fry-eng tatoeba-test-v2020-07-28 0.40876 26.2 205 1500
93
+ fry-nld tatoeba-test-v2020-07-28 0.54997 40.8 233 1672
94
+ gos-eng tatoeba-test-v2020-07-28 0.37610 24.2 1152 5622
95
+ ltz-deu tatoeba-test-v2020-07-28 0.36313 20.6 337 2144
96
+ ltz-eng tatoeba-test-v2020-07-28 0.36455 28.9 283 1751
97
+ ltz-nld tatoeba-test-v2020-07-28 0.31590 25.4 273 1567
98
+ nds-deu tatoeba-test-v2020-07-28 0.64008 45.4 10000 74571
99
+ nld-deu tatoeba-test-v2020-07-28 0.73181 56.9 10000 72438
100
+ nld-eng tatoeba-test-v2020-07-28 0.74967 61.5 10000 69848
101
+ nld-fry tatoeba-test-v2020-07-28 0.48245 31.8 233 1679
102
+ nld-ltz tatoeba-test-v2020-07-28 0.33202 17.4 273 1532
103
+ yid-deu tatoeba-test-v2020-07-28 6.146 0.1 556 3332
104
+ yid-eng tatoeba-test-v2020-07-28 3.724 0.0 1168 7741
105
+ afr-nld tatoeba-test-v2021-03-30 0.71484 55.5 1058 6720
106
+ deu-eng tatoeba-test-v2021-03-30 0.66953 50.1 12664 105121
107
+ deu-frr tatoeba-test-v2021-03-30 0.12733 0.2 279 1861
108
+ deu-gos tatoeba-test-v2021-03-30 0.14297 0.5 210 1140
109
+ deu-ltz tatoeba-test-v2021-03-30 0.32262 19.1 350 2227
110
+ deu-nds tatoeba-test-v2021-03-30 0.48061 23.3 10000 76144
111
+ deu-nld tatoeba-test-v2021-03-30 0.71451 54.7 10124 74568
112
+ deu-yid tatoeba-test-v2021-03-30 9.201 0.4 830 5207
113
+ eng-deu tatoeba-test-v2021-03-30 0.63435 42.8 12664 107460
114
+ eng-fry tatoeba-test-v2021-03-30 0.38580 21.3 221 1655
115
+ eng-gos tatoeba-test-v2021-03-30 0.15617 0.3 1193 5711
116
+ eng-gsw tatoeba-test-v2021-03-30 0.22797 1.0 210 1013
117
+ eng-ltz tatoeba-test-v2021-03-30 0.31837 13.0 299 1833
118
+ eng-nld tatoeba-test-v2021-03-30 0.71214 55.0 11660 83811
119
+ eng-yid tatoeba-test-v2021-03-30 9.720 0.4 1888 12516
120
+ frr-deu tatoeba-test-v2021-03-30 0.22492 5.1 279 1886
121
+ fry-eng tatoeba-test-v2021-03-30 0.41803 27.2 221 1624
122
+ fry-nld tatoeba-test-v2021-03-30 0.55658 40.8 265 1884
123
+ gos-deu tatoeba-test-v2021-03-30 0.44480 24.2 210 1175
124
+ gos-eng tatoeba-test-v2021-03-30 0.37414 23.7 1193 5819
125
+ gsw-eng tatoeba-test-v2021-03-30 0.35044 18.9 210 1021
126
+ ltz-deu tatoeba-test-v2021-03-30 0.37178 21.1 350 2229
127
+ ltz-eng tatoeba-test-v2021-03-30 0.37187 30.0 299 1845
128
+ ltz-nld tatoeba-test-v2021-03-30 0.30605 25.9 306 1763
129
+ nds-deu tatoeba-test-v2021-03-30 0.64008 45.4 10000 74571
130
+ nld-afr tatoeba-test-v2021-03-30 0.76604 62.3 1058 6833
131
+ nld-deu tatoeba-test-v2021-03-30 0.73191 56.9 10124 73449
132
+ nld-eng tatoeba-test-v2021-03-30 0.74096 60.6 11660 81885
133
+ nld-fry tatoeba-test-v2021-03-30 0.47807 30.8 265 1889
134
+ nld-ltz tatoeba-test-v2021-03-30 0.33346 17.5 306 1735
135
+ yid-deu tatoeba-test-v2021-03-30 6.349 0.1 830 5045
136
+ yid-eng tatoeba-test-v2021-03-30 3.764 0.1 1888 11810
137
+ afr-deu tatoeba-test-v2021-08-07 0.68679 50.4 1583 9105
138
+ afr-eng tatoeba-test-v2021-08-07 0.70682 56.6 1374 9622
139
+ afr-nld tatoeba-test-v2021-08-07 0.71516 55.5 1056 6710
140
+ deu-afr tatoeba-test-v2021-08-07 0.70274 54.3 1583 9507
141
+ deu-eng tatoeba-test-v2021-08-07 0.66023 48.6 17565 149462
142
+ deu-frr tatoeba-test-v2021-08-07 0.12516 0.2 278 1855
143
+ deu-gos tatoeba-test-v2021-08-07 0.14486 0.5 207 1135
144
+ deu-ltz tatoeba-test-v2021-08-07 0.32573 19.4 347 2206
145
+ deu-nds tatoeba-test-v2021-08-07 0.48058 23.2 9999 76137
146
+ deu-nld tatoeba-test-v2021-08-07 0.71440 54.6 10218 75235
147
+ deu-swg tatoeba-test-v2021-08-07 0.20932 1.1 1523 15448
148
+ deu-yid tatoeba-test-v2021-08-07 9.211 0.4 853 5355
149
+ eng-afr tatoeba-test-v2021-08-07 0.71995 56.5 1374 10317
150
+ eng-deu tatoeba-test-v2021-08-07 0.63103 42.0 17565 151568
151
+ eng-fry tatoeba-test-v2021-08-07 0.37079 19.2 220 1600
152
+ eng-gos tatoeba-test-v2021-08-07 0.15745 0.3 1154 5525
153
+ eng-gsw tatoeba-test-v2021-08-07 0.22863 1.0 205 984
154
+ eng-ltz tatoeba-test-v2021-08-07 0.32377 13.4 293 1828
155
+ eng-nds tatoeba-test-v2021-08-07 0.39238 16.9 2500 18264
156
+ eng-nld tatoeba-test-v2021-08-07 0.71062 54.5 12696 91796
157
+ eng-yid tatoeba-test-v2021-08-07 9.624 0.4 2483 16395
158
+ frr-deu tatoeba-test-v2021-08-07 0.22487 5.2 278 1880
159
+ fry-eng tatoeba-test-v2021-08-07 0.40545 25.1 220 1573
160
+ fry-nld tatoeba-test-v2021-08-07 0.55771 41.7 260 1854
161
+ gos-deu tatoeba-test-v2021-08-07 0.45302 25.4 207 1168
162
+ gos-eng tatoeba-test-v2021-08-07 0.37628 24.1 1154 5635
163
+ gos-nld tatoeba-test-v2021-08-07 0.45777 26.2 1852 9903
164
+ gsw-eng tatoeba-test-v2021-08-07 0.35209 19.1 205 990
165
+ ltz-deu tatoeba-test-v2021-08-07 0.37165 21.3 347 2208
166
+ ltz-eng tatoeba-test-v2021-08-07 0.37784 30.3 293 1840
167
+ ltz-nld tatoeba-test-v2021-08-07 0.32823 26.7 292 1685
168
+ nds-deu tatoeba-test-v2021-08-07 0.64008 45.4 9999 74564
169
+ nds-eng tatoeba-test-v2021-08-07 0.55193 38.3 2500 17589
170
+ nds-nld tatoeba-test-v2021-08-07 0.66943 50.0 1657 11490
171
+ nld-afr tatoeba-test-v2021-08-07 0.76610 62.3 1056 6823
172
+ nld-deu tatoeba-test-v2021-08-07 0.73162 56.8 10218 74131
173
+ nld-eng tatoeba-test-v2021-08-07 0.74088 60.5 12696 89978
174
+ nld-fry tatoeba-test-v2021-08-07 0.48460 31.4 260 1857
175
+ nld-gos tatoeba-test-v2021-08-07 0.16479 0.5 1852 9535
176
+ nld-ltz tatoeba-test-v2021-08-07 0.33533 18.0 292 1658
177
+ nld-nds tatoeba-test-v2021-08-07 0.43779 19.9 1657 11711
178
+ swg-deu tatoeba-test-v2021-08-07 0.40348 16.1 1523 15632
179
+ yid-deu tatoeba-test-v2021-08-07 6.305 0.1 853 5173
180
+ yid-eng tatoeba-test-v2021-08-07 3.704 0.1 2483 15452
benchmark_translations.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929afadf717def03c9f3b82e33504efa6e757ecdbfdd81a7832ce9a8755c1c83
3
+ size 29041557
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "relu",
4
+ "architectures": [
5
+ "MarianMTModel"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "bad_words_ids": [
9
+ [
10
+ 35697
11
+ ]
12
+ ],
13
+ "bos_token_id": 0,
14
+ "classifier_dropout": 0.0,
15
+ "d_model": 1024,
16
+ "decoder_attention_heads": 16,
17
+ "decoder_ffn_dim": 4096,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 6,
20
+ "decoder_start_token_id": 35697,
21
+ "decoder_vocab_size": 35698,
22
+ "dropout": 0.1,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 6,
27
+ "eos_token_id": 26162,
28
+ "forced_eos_token_id": 26162,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "max_length": 512,
32
+ "max_position_embeddings": 1024,
33
+ "model_type": "marian",
34
+ "normalize_embedding": false,
35
+ "num_beams": 4,
36
+ "num_hidden_layers": 6,
37
+ "pad_token_id": 35697,
38
+ "scale_embedding": true,
39
+ "share_encoder_decoder_embeddings": true,
40
+ "static_position_embeddings": true,
41
+ "torch_dtype": "float16",
42
+ "transformers_version": "4.18.0.dev0",
43
+ "use_cache": true,
44
+ "vocab_size": 35698
45
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1783a2f9c43727b4609deb0cfd2b8d4b5b47e59ca7ca16b9f3d76988a73b8a4d
3
+ size 499096835
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60a485d1557ca976c144d2ce667e7cb410d52f01696fb3c6e1178cb268eba51
3
+ size 803488
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af6fcca1f79b251959fb1ab2f1461fde04c8e5070879e951862c20b026975e7
3
+ size 801079
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "gmw", "target_lang": "gmw", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20210807_transformer-big_2022-08-11/gmw-gmw", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff