File size: 134,942 Bytes
78aa4ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "name": "English_To_Dendi_BPE_notebook_custom_data.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.8"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Jamiil92/masakhane/blob/master/en-ddn/live.bible.is-baseline/English_To_Dendi_BPE_notebook_custom_data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "Igc5itf-xMGj"
      },
      "source": [
        "# Masakhane - Machine Translation for African Languages (Using JoeyNMT)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "x4fXCKCf36IK"
      },
      "source": [
        "## Note before beginning:\n",
        "### - The idea is that you should be able to make minimal changes to this in order to get SOME result for your own translation corpus. \n",
        "\n",
        "### - The tl;dr: Go to the **\"TODO\"** comments which will tell you what to update to get up and running\n",
        "\n",
        "### - If you actually want to have a clue what you're doing, read the text and peek at the links\n",
        "\n",
        "### - With 100 epochs, it should take around 7 hours to run in Google Colab\n",
        "\n",
        "### - Once you've gotten a result for your language, please attach and email your notebook that generated it to masakhanetranslation@gmail.com\n",
        "\n",
        "### - If you care enough and get a chance, doing a brief background on your language would be amazing. See examples in  [(Martinus, 2019)](https://arxiv.org/abs/1906.05685)\n",
        "\n",
        "### - This notebook is intended to be used with custom parallel data. That means that you need two files, where one is in your language, the other English, and the lines in the files are corresponding translations."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "l929HimrxS0a"
      },
      "source": [
        "## Pre-process your data\n",
        "\n",
        "We assume here that you already have a data set. The format in which we will process it here requires that \n",
        "1. you have two files, one for each language\n",
        "2. the files are sentence-aligned, which means that each line should correspond to the same line in the other file.\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "oGRmDELn7Az0",
        "outputId": "0ffd4c96-92c0-4630-ce07-e03bf4bc5f66",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 124
        }
      },
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
            "\n",
            "Enter your authorization code:\n",
            "··········\n",
            "Mounted at /content/drive\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "Cn3tgQLzUxwn",
        "colab": {}
      },
      "source": [
        "# TODO: Set your source and target languages. Keep in mind, these traditionally use language codes as found here:\n",
        "# These will also become the suffix's of all vocab and corpus files used throughout\n",
        "import os\n",
        "source_language = \"en\"\n",
        "target_language = \"ddn\" \n",
        "lc = False  # If True, lowercase the data.\n",
        "seed = 42  # Random seed for shuffling.\n",
        "tag = \"baseline\" # Give a unique name to your folder - this is to ensure you don't rewrite any models you've already submitted\n",
        "\n",
        "os.environ[\"src\"] = source_language # Sets them in bash as well, since we often use bash scripts\n",
        "os.environ[\"tgt\"] = target_language\n",
        "os.environ[\"tag\"] = tag\n",
        "\n",
        "# This will save it to a folder in our gdrive instead!\n",
        "!mkdir -p \"/content/drive/My Drive/masakhane/$src-$tgt-$tag\"\n",
        "os.environ[\"gdrive_path\"] = \"/content/drive/My Drive/masakhane/%s-%s-%s\" % (source_language, target_language, tag)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "kBSgJHEw7Nvx",
        "outputId": "daf07487-72e2-425f-fd48-fa07fc942446",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "!echo $gdrive_path"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/content/drive/My Drive/masakhane/en-ddn-baseline\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "gA75Fs9ys8Y9",
        "outputId": "4cd4ac9b-6e20-4212-de29-3c2fd7c02206",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 104
        }
      },
      "source": [
        "# Install opus-tools\n",
        "! pip install opustools-pkg"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Collecting opustools-pkg\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/6c/9f/e829a0cceccc603450cd18e1ff80807b6237a88d9a8df2c0bb320796e900/opustools_pkg-0.0.52-py3-none-any.whl (80kB)\n",
            "\r\u001b[K     |████                            | 10kB 21.3MB/s eta 0:00:01\r\u001b[K     |████████                        | 20kB 2.2MB/s eta 0:00:01\r\u001b[K     |████████████▏                   | 30kB 3.3MB/s eta 0:00:01\r\u001b[K     |████████████████▏               | 40kB 2.1MB/s eta 0:00:01\r\u001b[K     |████████████████████▎           | 51kB 2.6MB/s eta 0:00:01\r\u001b[K     |████████████████████████▎       | 61kB 3.1MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▎   | 71kB 3.6MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 81kB 3.1MB/s \n",
            "\u001b[?25hInstalling collected packages: opustools-pkg\n",
            "Successfully installed opustools-pkg-0.0.52\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "xq-tDZVks7ZD",
        "outputId": "8ef197f9-aab8-48a0-c808-b802103dc366",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        }
      },
      "source": [
        "# TODO: specify the file paths here\n",
        "source_file = \"/test.en\"\n",
        "target_file = \"/test.ddn\"\n",
        "\n",
        "# They should both have the same length.\n",
        "! wc -l $source_file\n",
        "! wc -l $target_file"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "7943 /test.en\n",
            "7943 /test.ddn\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BNmkusFfGorx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# TODO: Pre-processing! (OPTIONAL)\n",
        "\n",
        "# If your data contains weird symbols or the like, you might want to do some cleaning and normalization.\n",
        "# We don't have the code in the notebook for that, but you can use sacremoses \"normalize\" for example for normalization punctuation: https://github.com/alvations/sacremoses.\n",
        "\n",
        "# We apply tokenization to separate punctuation marks from the actual words, split words at hyphens etc.\n",
        "# If your data is already tokenized, that's great! Skip this cell.\n",
        "# Otherwise we can use sacremoses to do the tokenization for us. \n",
        "# We need the data to be tokenized such that it matches the global test set.\n",
        "\n",
        "#! pip install sacremoses\n",
        "\n",
        "#tok_source_file = source_file+\".tok\"\n",
        "#tok_target_file = target_file+\".tok\"\n",
        "\n",
        "# Tokenize the source\n",
        "#! sacremoses tokenize -l $source_language < $source_file > $tok_source_file\n",
        "# Tokenize the target\n",
        "#! sacremoses tokenize -l $target_language < $target_file > $tok_target_file\n",
        "\n",
        "# Let's take a look what tokenization did to the text.\n",
        "#! head $source_file*\n",
        "#! head $target_file*\n",
        "\n",
        "# Change the pointers to our files such that we continue to work with the tokenized data.\n",
        "#source_file = tok_source_file\n",
        "#target_file = tok_target_file"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "n48GDRnP8y2G",
        "colab": {}
      },
      "source": [
        "# Download the global test set.\n",
        "#! wget https://raw.githubusercontent.com/masakhane-io/masakhane/master/jw300_utils/test/test.en-any.en\n",
        "  \n",
        "# And the specific test set for this language pair.\n",
        "#os.environ[\"trg\"] = target_language \n",
        "#os.environ[\"src\"] = target_language \n",
        "\n",
        "#! wget https://raw.githubusercontent.com/masakhane-io/masakhane/master/jw300_utils/test/test.en-$trg.en \n",
        "#! mv test.en-$trg.en test.en\n",
        "#! wget https://raw.githubusercontent.com/masakhane-io/masakhane/master/jw300_utils/test/test.en-$trg.$trg \n",
        "#! mv test.en-$trg.$trg test.$trg\n",
        "\n",
        "# TODO: if this fails it means that there is NO test set for your language yet. It's on you to create one.\n",
        "# A good idea would be to take a random subset of your data, and add it to https://raw.githubusercontent.com/masakhane-io/masakhane/master/jw300_utils/test/test.en-any.en.\n",
        "# Make a Pull Request and get it approved and merged.\n",
        "# Then repeat this cell to retrieve the new test set.\n",
        "# Then proceed to the next cell that will filter out all duplicates from the training set, so that there is no overlap between training and test set."
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "NqDG-CI28y2L",
        "colab": {}
      },
      "source": [
        "# Read the test data to filter from train and dev splits.\n",
        "# Store english portion in set for quick filtering checks.\n",
        "#en_test_sents = set()\n",
        "#filter_test_sents = \"test.en-any.en\"\n",
        "#j = 0\n",
        "#with open(filter_test_sents) as f:\n",
        "#  for line in f:\n",
        "#    en_test_sents.add(line.strip())\n",
        "#    j += 1\n",
        "#print('Loaded {} global test sentences to filter from the training/dev data.'.format(j))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "3CNdwLBCfSIl",
        "outputId": "dd4274db-a18a-4bcd-e301-7ef2d85fb842",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 161
        }
      },
      "source": [
        "import pandas as pd\n",
        "\n",
        "source = []\n",
        "target = []\n",
        "skip_lines = []  # Collect the line numbers of the source portion to skip the same lines for the target portion.\n",
        "with open(source_file) as f:\n",
        "    for i, line in enumerate(f):\n",
        "        # Skip sentences that are contained in the test set.\n",
        "        #if line.strip() not in en_test_sents:\n",
        "        source.append(line.strip())\n",
        "        #else:\n",
        "        #    skip_lines.append(i)             \n",
        "with open(target_file) as f:\n",
        "    for j, line in enumerate(f):\n",
        "        # Only add to corpus if corresponding source was not skipped.\n",
        "        #if j not in skip_lines:\n",
        "        target.append(line.strip())\n",
        "    \n",
        "print('Loaded data and skipped {}/{} lines since contained in test set.'.format(len(skip_lines), i))\n",
        "    \n",
        "df = pd.DataFrame(zip(source, target), columns=['source_sentence', 'target_sentence'])\n",
        "df.head(3)"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Loaded data and skipped 0/7942 lines since contained in test set.\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>source_sentence</th>\n",
              "      <th>target_sentence</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>The book of the generation of Jesus Christ, th...</td>\n",
              "      <td>Yesu Mɛsiyɑ ko ɑ̀ ci Dɑfidi ize, Abulɛmɑ ize, ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Abraham begat Isaac; and Isaac begat Jacob; an...</td>\n",
              "      <td>Abulɛmɑ nɑ Isɑɑkɑ hɛi. Isɑɑkɑ nɑ Yɑkɔfu hɛi. Y...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>and Judah begat Perez and Zerah of Tamar; and ...</td>\n",
              "      <td>Yudɑ mo nɑ Fɑresi ndɑ Zerɑ hɛi Tɑmɑrɑ gɑɑ. Fɑr...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                     source_sentence                                    target_sentence\n",
              "0  The book of the generation of Jesus Christ, th...  Yesu Mɛsiyɑ ko ɑ̀ ci Dɑfidi ize, Abulɛmɑ ize, ...\n",
              "1  Abraham begat Isaac; and Isaac begat Jacob; an...  Abulɛmɑ nɑ Isɑɑkɑ hɛi. Isɑɑkɑ nɑ Yɑkɔfu hɛi. Y...\n",
              "2  and Judah begat Perez and Zerah of Tamar; and ...  Yudɑ mo nɑ Fɑresi ndɑ Zerɑ hɛi Tɑmɑrɑ gɑɑ. Fɑr..."
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "YkuK3B4p2AkN"
      },
      "source": [
        "## Pre-processing and export\n",
        "\n",
        "It is generally a good idea to remove duplicate translations and conflicting translations from the corpus. In practice, these public corpora include some number of these that need to be cleaned.\n",
        "\n",
        "In addition we will split our data into dev/test/train and export to the filesystem."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "M_2ouEOH1_1q",
        "colab": {}
      },
      "source": [
        "# drop duplicate translations\n",
        "df_pp = df.drop_duplicates()\n",
        "\n",
        "# drop conflicting translations\n",
        "#df_pp.drop_duplicates(subset='source_sentence', inplace=True)\n",
        "#df_pp.drop_duplicates(subset='target_sentence', inplace=True)\n",
        "\n",
        "# Shuffle the data to remove bias in dev set selection.\n",
        "df_pp = df_pp.sample(frac=1, random_state=seed).reset_index(drop=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "niVvXvLV0bkE",
        "colab_type": "code",
        "outputId": "df7805a7-4e56-4d83-b326-cdf0cc0734da",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "df_pp.shape"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(7937, 2)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "hxxBOCA-xXhy",
        "outputId": "05e690d4-6561-41c9-c058-d0facedcf7ba",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "# This section does the split between train/dev for the parallel corpora then saves them as separate files\n",
        "# We use 1000 dev test and the given test set.\n",
        "import csv\n",
        "import numpy as np\n",
        "\n",
        "# TODO: if your corpus is smaller than 1000, reduce this number. With a corpus that small you might not obtain good results with NMT though :/\n",
        "# Do the split between dev/train and create parallel corpora\n",
        "num_dev_patterns = 1000\n",
        "\n",
        "# Optional: lower case the corpora - this will make it easier to generalize, but without proper casing.\n",
        "if lc:  # Julia: making lowercasing optional\n",
        "    df_pp[\"source_sentence\"] = df_pp[\"source_sentence\"].str.lower()\n",
        "    df_pp[\"target_sentence\"] = df_pp[\"target_sentence\"].str.lower()\n",
        "\n",
        "\n",
        "#Divide df_pp into train and test sets\n",
        "\n",
        "msk = np.random.rand(len(df_pp)) < 0.98\n",
        "train = df_pp[msk]\n",
        "test_df = df_pp[~msk]\n",
        "\n",
        "# Julia: test sets are already generated\n",
        "dev_df = df_pp.tail(num_dev_patterns) # Herman: Error in original\n",
        "stripped_df = df_pp.drop(df_pp.tail(num_dev_patterns).index)\n",
        "\n",
        "with open(\"train.\"+source_language, \"w\") as src_file, open(\"train.\"+target_language, \"w\") as trg_file:\n",
        "  for index, row in stripped_df.iterrows():\n",
        "    src_file.write(row[\"source_sentence\"]+\"\\n\")\n",
        "    trg_file.write(row[\"target_sentence\"]+\"\\n\")\n",
        "    \n",
        "with open(\"dev.\"+source_language, \"w\") as src_file, open(\"dev.\"+target_language, \"w\") as trg_file:\n",
        "  for index, row in dev_df.iterrows():\n",
        "    src_file.write(row[\"source_sentence\"]+\"\\n\")\n",
        "    trg_file.write(row[\"target_sentence\"]+\"\\n\")\n",
        "\n",
        "with open(\"test.\"+source_language, \"w\") as src_file, open(\"test.\"+target_language, \"w\") as trg_file:\n",
        "  for index, row in test_df.iterrows():\n",
        "    src_file.write(row[\"source_sentence\"]+\"\\n\")\n",
        "    trg_file.write(row[\"target_sentence\"]+\"\\n\")\n",
        "\n",
        "\n",
        "#stripped[[\"source_sentence\"]].to_csv(\"train.\"+source_language, header=False, index=False)  # Herman: Added `header=False` everywhere\n",
        "#stripped[[\"target_sentence\"]].to_csv(\"train.\"+target_language, header=False, index=False)  # Julia: Problematic handling of quotation marks.\n",
        "\n",
        "#dev[[\"source_sentence\"]].to_csv(\"dev.\"+source_language, header=False, index=False)\n",
        "#dev[[\"target_sentence\"]].to_csv(\"dev.\"+target_language, header=False, index=False)\n",
        "\n",
        "\n",
        "# TODO: Doublecheck the format below. There should be no extra quotation marks or weird characters. It should also not be empty.\n",
        "! head train.*\n",
        "! head dev.*\n",
        "! head test.*"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "==> train.ddn <==\n",
            "À kɑɑ hunu kɑ hɑndunyɑ dimi yom kɑ ǹ goono hɑndunyɑ cɛnjɛ tɑɑci gɑɑ dɛrɑndi, wɑtom Gɔgu ndɑ Mɑgɔgu. À gɑ ǹ meigu wɑngu tɛyom sɛ. Ǹ bɑyom bisɑ tɛku tɑɑsi.\n",
            "Zɑngɑ Ikpɛ Sendi hɑntumɑntɛ cii: Ikpɛ nɑ ǹ nɑ birikɑyom biyɑ. À nɑ ǹ nɑ moo yom kɑ ǹ si di, ndɑ hɑngɑ yom kɑ ǹ si mɑɑ hɑli kɑ kɑɑ hɔ.\n",
            "A mɑɑ jinde beeri fɔ kɑ ɑ̀ go hunu zɑ Ikpɛ goonoyom dɔ. À go cii Ikpɛ dɔntɔneize iye di sɛ: Wɑ kpei kɑ Ikpɛ binefute gɑɑsiɑ iye munu hɑndunyɑ bɔm.\n",
            "Kɑnkɑmi kunɑ ɑ nɑ gbei futu yom tɛ. A zɑm jiibi ce boobo. A mɑɑ hɛrɛɛ. A mɑɑ jo. A nɑ meehɔ yom zɑɑ cɛrɛ bɔm. A mɑɑ yeeni. A gɔrɔ bumbum.\n",
            "Ammɑ Piɛɛ nɑ Yesu ze kɑ cii: A si ɑ̀ bei. A si fɑhɑm mo ndɑ hɛ kɑ n gɑ cii. Piɛɛ hunu hundi bɑtumɑ kunɑ kɑ kpei kɑtɑ dɔ. À gɑɑ no, gɔrɔngɔ cɑ jinde fɔ.\n",
            "Ammɑ sɑɑ kɑ i nɑ jiribi iye tɔnɑndi, i tunu kɑ dirɑ. Ǹ kulu ndɑ ǹ wɛndɛ yom ndɑ ǹ koo yom nɑ i dum kɑlɑ wɑngɑrɑ bɑndɑ. I sɔmbu tɛkuɑ mee gɑɑ kɑ ɑduwɑ tɛ.\n",
            "Yesu cii ɑ̀ sɛ: A koo hundiyo, n lɑɑkɑli mɑ si tunu. N nɑɑne nɑ n no bɑɑni. Kpei ndɑ lɑɑkɑli kɑne.\n",
            "Ǹ kɔmɑ hɑndunyɑ ziibi yom gɑɑ Kpe Yesu Mɛsiyɑ kɑ ɑ̀ ci Fɑɑbɑkɔ ɑ̀ beiyom dɔ. À bɑndɑ de ǹ kɑɑ yɑɑrɑ kɑ wuluwɑli hɑndunyɑ ziibi yom kunɑ, hɑndunyɑ wɔ gɑɑŋmɑɑ ǹ bɔm. Ǹ kɔkɔrɔ bɑndɑ goonoyom, ɑ̀ jɑɑsɑ sintine gɔrɛ.\n",
            "Zɑngɑ koo yom go kɑ ngei bɑɑbɑ gɑnɑ, wɑ si wom bɔm no wom dom bineibɑɑi sɛ kɑ ɑ̀ kunɑ wom gɔrɔ sɑɑ fɔ zɑm kɑ bei kunɑ.\n",
            "Ngɑ di no, Biyɑ Hɑlɑɑlɑ mo go tɛ sɛdɑ i sɛ ngɑ di bɔm. Sintine ɑ̀ cii:\n",
            "\n",
            "==> train.en <==\n",
            "and shall come forth to deceive the nations which are in the four corners of the earth, Gog and Magog, to gather them together to the war: the number of whom is as the sand of the sea.\n",
            "according as it is written, God gave them a spirit of stupor, eyes that they should not see, and ears that they should not hear, unto this very day.\n",
            "And I heard a great voice out of the temple, saying to the seven angels, Go ye, and pour out the seven bowls of the wrath of God into the earth.\n",
            "in labor and travail, in watchings often, in hunger and thirst, in fastings often, in cold and nakedness.\n",
            "But he denied, saying, I neither know, nor understand what thou sayest: and he went out into the porch; and the cock crew.\n",
            "And when it came to pass that we had accomplished the days, we departed and went on our journey; and they all, with wives and children, brought us on our way till we were out of the city: and kneeling down on the beach, we prayed, and bade each other farewell;\n",
            "And he said unto her, Daughter, thy faith hath made thee whole; go in peace.\n",
            "For if, after they have escaped the defilements of the world through the knowledge of the Lord and Saviour Jesus Christ, they are again entangled therein and overcome, the last state is become worse with them than the first.\n",
            "as children of obedience, not fashioning yourselves according to your former lusts in the time of your ignorance:\n",
            "And the Holy Spirit also beareth witness to us; for after he hath said,\n",
            "==> dev.ddn <==\n",
            "A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "Cɔfɔ ɑ̀ go cii: A fuu kɑ ɑ̀ kunɑ ɑ hunu no, ɑ kɑɑ ye. Sɑɑ kɑ hɔllɛ di ye fuu di, ɑ̀ gɑru hɛ fɔ kulu si ɑ̀ kunɑ. Ammɑ ǹ nɑ ɑ̀ hɑɑbu kɑ booriɑndi.\n",
            "Kɑ Yesu ye kɑ huro hɑrihi ɑ̀ nɑ buulɑ dem kɑ kpei ngɑ kpɑɑrɑ.\n",
            "Cɔfɔ jinde fɔ hunu zɑ bɛɛnɛ kɑ cii: Ni yɑ cii ɑ Ize Binegɑnji. N gɑɑ nɑ ɑ du ɑ binekɑɑne.\n",
            "Yesu tu ǹ sɛ kɑ cii: Beerem kɑ ǹ gundɑ bɑɑni si bɑɑ lokotoro kɑlɑ beerem kɑ ǹ sindɑ bɑɑni.\n",
            "Sɑɑ kɑ Pɔlu ndɑ Silɑ bisɑ Amfipoli wɑngɑrɑ ndɑ Apoloni wɑngɑrɑ ǹ kɑɑ Tesɑlonikɑ wɑngɑrɑ. Nungu di Yuifu yom mɑrgɑ fuu fɔ goono.\n",
            "Ikpɛ mɑnɑ ɑ̀ nɑ gɑndɑ hɛ fɔ kulu ɑ̀ mɑ tubu, bɑ cee dɛcɛyom dɔ sɑrɑ. Ammɑ Ikpɛ nɑ ɑlikɑwɑli tɛ ɑ̀ sɛ kɑ cii ngɑ gɑ ɑ̀ no lɑɑbu ɑ̀ mɑ ci ɑ̀ ŋmɔne ndɑ ɑ̀ bɑndɑ yom ŋmɔne mo. Sɑɑ ngɑ di Abulɛmɑ sindɑ koo jinɑ.\n",
            "\n",
            "==> dev.en <==\n",
            "I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "Then he saith, I will return into my house whence I came out; and when he is come, he findeth it empty, swept, and garnished.\n",
            "And he entered into a boat, and crossed over, and came into his own city.\n",
            "and a voice came out of the heavens, Thou art my beloved Son, in thee I am well pleased.\n",
            "And Jesus answering said unto them, They that are in health have no need of a physician; but they that are sick.\n",
            "Now when they had passed through Amphipolis and Apollonia, they came to Thessalonica, where was a synagogue of the Jews:\n",
            "and he gave him none inheritance in it, no, not so much as to set his foot on: and he promised that he would give it to him in possession, and to his seed after him, when as yet he had no child.\n",
            "==> test.ddn <==\n",
            "Abulɛmɑ bine hunu Kɑlɛdɑncei lɑɑbu kɑ kpei gɔrɔ Hɑrɑm lɑɑbu. Nungu di no ɑ̀ bɑɑbɑ bu. À bɑndɑ Ikpɛ kpeindɑ Abulɛmɑ lɑɑbu kɑ ɑ̀ kunɑ wom gɑ gɔrɔ hɑli mɑɑsɑ.\n",
            "Cɔfɔ ɑ̀ nɑ ǹ moo yom hɑmɑ kɑ cii: À mɑ tɛ wom sɛ ndɑ zɑngɑ wom nɑɑne goono ndɑ.\n",
            "Wom mɑ si ǹ gɑnɑ ndɑ ǹ moo diyom kɑ kɑɑni iburɑdɑm yom sɛ. Ammɑ wɑ ǹ gɑnɑ zɑngɑ Mɛsiyɑ tɑm yom kɑ ǹ gɑ kɑ Ikpɛ binebɑɑ tɛ ndɑ bine fɔ.\n",
            "Ǹ mɑ si cɛyom susu fɔndɔ bei ɑ̀ bisɑ, de sɑɑ kɑ ǹ nɑ ɑ̀ bei, ɑ̀ bɑndɑ ǹ mɑ bɑndɑ bɛrɛ ɑ̀ gɑɑ ndɑ meire hɑlɑɑlɑ kɑ ǹ jinɑ kɑ no ǹ sɛ.\n",
            "À nɑ ngɑ di tɛ zɑmɑ hɛ kɑ ɑndebi cii di mɑ tɔ sɛ kɑ ɑ̀ cii: A gɑ ɑ mee feeri ndɑ yɑɑse yom. A kɑɑ hɛ yom kɑɑtɛrɛ kɑ ǹ goono tugɑntɛ zɑ hɑndunyɑ sinjiyom.\n",
            "Bɔrɔ mɑntɑm di cii Filipu sɛ: A gɑ n hã, mee bɔm nɑ ɑndebi go sendi yɑ wɔ dimi? À go sendi ngɑ bɔm nɑ dee, wɑlɑ bɔrɔ fɔ ŋmɑni bɔm no?\n",
            "Beerem kɑ ǹ ci Mɛsiyɑ ŋmɔne yom ǹ nɑ ngei dulum hɑɑli kɑnji ndɑ ɑ̀ bɑɑyom futu yom ndɑ ɑ̀ bineibɑɑi.\n",
            "Ǹ go kɑ ɑ bɛɛrɑndi no kɔnu zɑmɑ cɛbɛ yom kɑ ǹ go tɛ mo iburɑdɑm yom ɑlɑɑdɑ yom no.\n",
            "Ngɑ di kulu tɛ zɑmɑ hɛ kɑ ɑndebi cii mɑ tɔ sɛ. À cii:\n",
            "Zɑ Moisi gɑɑ kɑlɑ kɑ kpei ɑndebi yom kulu gɑɑ, ɑ̀ sinti kɑ fɑsɑli ǹ sɛ Ikpɛ Sendi hɑntumɑntɛ yom kulu kunɑ, hɛ yom kɑ ǹ goono kɑ simbɑ ndɑ ngɑ bumbum.\n",
            "\n",
            "==> test.en <==\n",
            "Then came he out of the land of the Chaldæans, and dwelt in Haran: and from thence, when his father was dead, God removed him into this land, wherein ye now dwell:\n",
            "Then touched he their eyes, saying, According to your faith be it done unto you.\n",
            "not in the way of eyeservice, as men-pleasers; but as servants of Christ, doing the will of God from the heart;\n",
            "For it were better for them not to have known the way of righteousness, than, after knowing it, to turn back from the holy commandment delivered unto them.\n",
            "that it might be fulfilled which was spoken through the prophet, saying, I will open my mouth in parables; I will utter things hidden from the foundation of the world.\n",
            "And the eunuch answered Philip, and said, I pray thee, of whom speaketh the prophet this? of himself, or of some other?\n",
            "And they that are of Christ Jesus have crucified the flesh with the passions and the lusts thereof.\n",
            "But in vain do they worship me, Teaching as their doctrines the precepts of men.\n",
            "Now this is come to pass, that it might be fulfilled which was spoken through the prophet, saying,\n",
            "And beginning from Moses and from all the prophets, he interpreted to them in all the scriptures the things concerning himself.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "gEDAsKXPw28d",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "stripped_df.to_csv('train.csv')\n",
        "!cp train.csv \"/content/drive/My Drive/masakhane/$src-$tgt-$tag\""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_yGbVVjlw61I",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "test_df.to_csv('test.csv')\n",
        "!cp test.csv \"/content/drive/My Drive/masakhane/$src-$tgt-$tag\""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "f--enEIOw-2I",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "dev_df.to_csv('dev.csv')\n",
        "!cp dev.csv \"/content/drive/My Drive/masakhane/$src-$tgt-$tag\""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "epeCydmCyS8X"
      },
      "source": [
        "\n",
        "\n",
        "---\n",
        "\n",
        "\n",
        "## Installation of JoeyNMT\n",
        "\n",
        "JoeyNMT is a simple, minimalist NMT package which is useful for learning and teaching. Check out the documentation for JoeyNMT [here](https://joeynmt.readthedocs.io)  "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "iBRMm4kMxZ8L",
        "outputId": "dfaac3fd-7a64-4a05-d4b3-0c366ad2c28d",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "# Install JoeyNMT\n",
        "! git clone https://github.com/joeynmt/joeynmt.git\n",
        "! cd joeynmt; pip3 install ."
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Cloning into 'joeynmt'...\n",
            "remote: Enumerating objects: 97, done.\u001b[K\n",
            "remote: Counting objects:   1% (1/97)\u001b[K\rremote: Counting objects:   2% (2/97)\u001b[K\rremote: Counting objects:   3% (3/97)\u001b[K\rremote: Counting objects:   4% (4/97)\u001b[K\rremote: Counting objects:   5% (5/97)\u001b[K\rremote: Counting objects:   6% (6/97)\u001b[K\rremote: Counting objects:   7% (7/97)\u001b[K\rremote: Counting objects:   8% (8/97)\u001b[K\rremote: Counting objects:   9% (9/97)\u001b[K\rremote: Counting objects:  10% (10/97)\u001b[K\rremote: Counting objects:  11% (11/97)\u001b[K\rremote: Counting objects:  12% (12/97)\u001b[K\rremote: Counting objects:  13% (13/97)\u001b[K\rremote: Counting objects:  14% (14/97)\u001b[K\rremote: Counting objects:  15% (15/97)\u001b[K\rremote: Counting objects:  16% (16/97)\u001b[K\rremote: Counting objects:  17% (17/97)\u001b[K\rremote: Counting objects:  18% (18/97)\u001b[K\rremote: Counting objects:  19% (19/97)\u001b[K\rremote: Counting objects:  20% (20/97)\u001b[K\rremote: Counting objects:  21% (21/97)\u001b[K\rremote: Counting objects:  22% (22/97)\u001b[K\rremote: Counting objects:  23% (23/97)\u001b[K\rremote: Counting objects:  24% (24/97)\u001b[K\rremote: Counting objects:  25% (25/97)\u001b[K\rremote: Counting objects:  26% (26/97)\u001b[K\rremote: Counting objects:  27% (27/97)\u001b[K\rremote: Counting objects:  28% (28/97)\u001b[K\rremote: Counting objects:  29% (29/97)\u001b[K\rremote: Counting objects:  30% (30/97)\u001b[K\rremote: Counting objects:  31% (31/97)\u001b[K\rremote: Counting objects:  32% (32/97)\u001b[K\rremote: Counting objects:  34% (33/97)\u001b[K\rremote: Counting objects:  35% (34/97)\u001b[K\rremote: Counting objects:  36% (35/97)\u001b[K\rremote: Counting objects:  37% (36/97)\u001b[K\rremote: Counting objects:  38% (37/97)\u001b[K\rremote: Counting objects:  39% (38/97)\u001b[K\rremote: Counting objects:  40% (39/97)\u001b[K\rremote: Counting objects:  41% (40/97)\u001b[K\rremote: Counting objects:  42% (41/97)\u001b[K\rremote: Counting objects:  43% (42/97)\u001b[K\rremote: Counting objects:  44% (43/97)\u001b[K\rremote: Counting objects:  45% (44/97)\u001b[K\rremote: Counting objects:  46% (45/97)\u001b[K\rremote: Counting objects:  47% (46/97)\u001b[K\rremote: Counting objects:  48% (47/97)\u001b[K\rremote: Counting objects:  49% (48/97)\u001b[K\rremote: Counting objects:  50% (49/97)\u001b[K\rremote: Counting objects:  51% (50/97)\u001b[K\rremote: Counting objects:  52% (51/97)\u001b[K\rremote: Counting objects:  53% (52/97)\u001b[K\rremote: Counting objects:  54% (53/97)\u001b[K\rremote: Counting objects:  55% (54/97)\u001b[K\rremote: Counting objects:  56% (55/97)\u001b[K\rremote: Counting objects:  57% (56/97)\u001b[K\rremote: Counting objects:  58% (57/97)\u001b[K\rremote: Counting objects:  59% (58/97)\u001b[K\rremote: Counting objects:  60% (59/97)\u001b[K\rremote: Counting objects:  61% (60/97)\u001b[K\rremote: Counting objects:  62% (61/97)\u001b[K\rremote: Counting objects:  63% (62/97)\u001b[K\rremote: Counting objects:  64% (63/97)\u001b[K\rremote: Counting objects:  65% (64/97)\u001b[K\rremote: Counting objects:  67% (65/97)\u001b[K\rremote: Counting objects:  68% (66/97)\u001b[K\rremote: Counting objects:  69% (67/97)\u001b[K\rremote: Counting objects:  70% (68/97)\u001b[K\rremote: Counting objects:  71% (69/97)\u001b[K\rremote: Counting objects:  72% (70/97)\u001b[K\rremote: Counting objects:  73% (71/97)\u001b[K\rremote: Counting objects:  74% (72/97)\u001b[K\rremote: Counting objects:  75% (73/97)\u001b[K\rremote: Counting objects:  76% (74/97)\u001b[K\rremote: Counting objects:  77% (75/97)\u001b[K\rremote: Counting objects:  78% (76/97)\u001b[K\rremote: Counting objects:  79% (77/97)\u001b[K\rremote: Counting objects:  80% (78/97)\u001b[K\rremote: Counting objects:  81% (79/97)\u001b[K\rremote: Counting objects:  82% (80/97)\u001b[K\rremote: Counting objects:  83% (81/97)\u001b[K\rremote: Counting objects:  84% (82/97)\u001b[K\rremote: Counting objects:  85% (83/97)\u001b[K\rremote: Counting objects:  86% (84/97)\u001b[K\rremote: Counting objects:  87% (85/97)\u001b[K\rremote: Counting objects:  88% (86/97)\u001b[K\rremote: Counting objects:  89% (87/97)\u001b[K\rremote: Counting objects:  90% (88/97)\u001b[K\rremote: Counting objects:  91% (89/97)\u001b[K\rremote: Counting objects:  92% (90/97)\u001b[K\rremote: Counting objects:  93% (91/97)\u001b[K\rremote: Counting objects:  94% (92/97)\u001b[K\rremote: Counting objects:  95% (93/97)\u001b[K\rremote: Counting objects:  96% (94/97)\u001b[K\rremote: Counting objects:  97% (95/97)\u001b[K\rremote: Counting objects:  98% (96/97)\u001b[K\rremote: Counting objects: 100% (97/97)\u001b[K\rremote: Counting objects: 100% (97/97), done.\u001b[K\n",
            "remote: Compressing objects: 100% (72/72), done.\u001b[K\n",
            "remote: Total 2281 (delta 57), reused 46 (delta 25), pack-reused 2184\u001b[K\n",
            "Receiving objects: 100% (2281/2281), 2.63 MiB | 15.58 MiB/s, done.\n",
            "Resolving deltas: 100% (1578/1578), done.\n",
            "Processing /content/joeynmt\n",
            "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.16.0)\n",
            "Requirement already satisfied: pillow in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (6.2.2)\n",
            "Requirement already satisfied: numpy<2.0,>=1.14.5 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.17.5)\n",
            "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (45.1.0)\n",
            "Requirement already satisfied: torch>=1.1 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.4.0)\n",
            "Requirement already satisfied: tensorflow>=1.14 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.15.0)\n",
            "Requirement already satisfied: torchtext in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.3.1)\n",
            "Collecting sacrebleu>=1.3.6\n",
            "  Downloading https://files.pythonhosted.org/packages/45/31/1a135b964c169984b27fb2f7a50280fa7f8e6d9d404d8a9e596180487fd1/sacrebleu-1.4.3-py3-none-any.whl\n",
            "Collecting subword-nmt\n",
            "  Downloading https://files.pythonhosted.org/packages/74/60/6600a7bc09e7ab38bc53a48a20d8cae49b837f93f5842a41fe513a694912/subword_nmt-0.3.7-py2.py3-none-any.whl\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (3.1.3)\n",
            "Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (0.10.0)\n",
            "Collecting pyyaml>=5.1\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/3d/d9/ea9816aea31beeadccd03f1f8b625ecf8f645bd66744484d162d84803ce5/PyYAML-5.3.tar.gz (268kB)\n",
            "\u001b[K     |████████████████████████████████| 276kB 9.3MB/s \n",
            "\u001b[?25hCollecting pylint\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e9/59/43fc36c5ee316bb9aeb7cf5329cdbdca89e5749c34d5602753827c0aa2dc/pylint-2.4.4-py3-none-any.whl (302kB)\n",
            "\u001b[K     |████████████████████████████████| 307kB 41.1MB/s \n",
            "\u001b[?25hRequirement already satisfied: six==1.12 in /usr/local/lib/python3.6/dist-packages (from joeynmt==0.0.1) (1.12.0)\n",
            "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.1.0)\n",
            "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.9.0)\n",
            "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n",
            "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n",
            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.1.0)\n",
            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.34.2)\n",
            "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.1.8)\n",
            "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.8.1)\n",
            "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.11.2)\n",
            "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (0.2.2)\n",
            "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.0.8)\n",
            "Requirement already satisfied: tensorboard<1.16.0,>=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.0)\n",
            "Requirement already satisfied: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (1.15.1)\n",
            "Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.14->joeynmt==0.0.1) (3.10.0)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (4.28.1)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from torchtext->joeynmt==0.0.1) (2.21.0)\n",
            "Requirement already satisfied: typing in /usr/local/lib/python3.6/dist-packages (from sacrebleu>=1.3.6->joeynmt==0.0.1) (3.6.6)\n",
            "Collecting portalocker\n",
            "  Downloading https://files.pythonhosted.org/packages/91/db/7bc703c0760df726839e0699b7f78a4d8217fdc9c7fcb1b51b39c5a22a4e/portalocker-1.5.2-py2.py3-none-any.whl\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (1.1.0)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.4.6)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (2.6.1)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->joeynmt==0.0.1) (0.10.0)\n",
            "Requirement already satisfied: pandas>=0.22.0 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (0.25.3)\n",
            "Requirement already satisfied: scipy>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from seaborn->joeynmt==0.0.1) (1.4.1)\n",
            "Collecting mccabe<0.7,>=0.6\n",
            "  Downloading https://files.pythonhosted.org/packages/87/89/479dc97e18549e21354893e4ee4ef36db1d237534982482c3681ee6e7b57/mccabe-0.6.1-py2.py3-none-any.whl\n",
            "Collecting isort<5,>=4.2.5\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e5/b0/c121fd1fa3419ea9bfd55c7f9c4fedfec5143208d8c7ad3ce3db6c623c21/isort-4.3.21-py2.py3-none-any.whl (42kB)\n",
            "\u001b[K     |████████████████████████████████| 51kB 8.5MB/s \n",
            "\u001b[?25hCollecting astroid<2.4,>=2.3.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ad/ae/86734823047962e7b8c8529186a1ac4a7ca19aaf1aa0c7713c022ef593fd/astroid-2.3.3-py3-none-any.whl (205kB)\n",
            "\u001b[K     |████████████████████████████████| 215kB 58.1MB/s \n",
            "\u001b[?25hRequirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow>=1.14->joeynmt==0.0.1) (2.8.0)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (3.1.1)\n",
            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow>=1.14->joeynmt==0.0.1) (0.16.1)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2019.11.28)\n",
            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (3.0.4)\n",
            "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (1.24.3)\n",
            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->torchtext->joeynmt==0.0.1) (2.8)\n",
            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.22.0->seaborn->joeynmt==0.0.1) (2018.9)\n",
            "Collecting typed-ast<1.5,>=1.4.0; implementation_name == \"cpython\" and python_version < \"3.8\"\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/90/ed/5459080d95eb87a02fe860d447197be63b6e2b5e9ff73c2b0a85622994f4/typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl (737kB)\n",
            "\u001b[K     |████████████████████████████████| 747kB 69.7MB/s \n",
            "\u001b[?25hCollecting lazy-object-proxy==1.4.*\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/0b/dd/b1e3407e9e6913cf178e506cd0dee818e58694d9a5cd1984e3f6a8b9a10f/lazy_object_proxy-1.4.3-cp36-cp36m-manylinux1_x86_64.whl (55kB)\n",
            "\u001b[K     |████████████████████████████████| 61kB 10.3MB/s \n",
            "\u001b[?25hBuilding wheels for collected packages: joeynmt, pyyaml\n",
            "  Building wheel for joeynmt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for joeynmt: filename=joeynmt-0.0.1-cp36-none-any.whl size=73054 sha256=1324e06966a9e79bc3d60e55f6a3f9a5cfa3c289d9af531a3cfb64f8ecacc06b\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-udwyeym6/wheels/db/01/db/751cc9f3e7f6faec127c43644ba250a3ea7ad200594aeda70a\n",
            "  Building wheel for pyyaml (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pyyaml: filename=PyYAML-5.3-cp36-cp36m-linux_x86_64.whl size=44229 sha256=484cf5ff7c346bd5eaa30dfc8c1d62b918933e1c2aa37234280f1e1a4cbbc692\n",
            "  Stored in directory: /root/.cache/pip/wheels/e4/76/4d/a95b8dd7b452b69e8ed4f68b69e1b55e12c9c9624dd962b191\n",
            "Successfully built joeynmt pyyaml\n",
            "Installing collected packages: portalocker, sacrebleu, subword-nmt, pyyaml, mccabe, isort, typed-ast, lazy-object-proxy, astroid, pylint, joeynmt\n",
            "  Found existing installation: PyYAML 3.13\n",
            "    Uninstalling PyYAML-3.13:\n",
            "      Successfully uninstalled PyYAML-3.13\n",
            "Successfully installed astroid-2.3.3 isort-4.3.21 joeynmt-0.0.1 lazy-object-proxy-1.4.3 mccabe-0.6.1 portalocker-1.5.2 pylint-2.4.4 pyyaml-5.3 sacrebleu-1.4.3 subword-nmt-0.3.7 typed-ast-1.4.1\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "AaE77Tcppex9"
      },
      "source": [
        "# Preprocessing the Data into Subword BPE Tokens\n",
        "\n",
        "- One of the most powerful improvements for agglutinative languages (a feature of most Bantu languages) is using BPE tokenization [ (Sennrich, 2015) ](https://arxiv.org/abs/1508.07909).\n",
        "\n",
        "- It was also shown that by optimizing the umber of BPE codes we significantly improve results for low-resourced languages [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021) [(Martinus, 2019)](https://arxiv.org/abs/1906.05685)\n",
        "\n",
        "- Below we have the scripts for doing BPE tokenization of our data. We use 4000 tokens as recommended by [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021). You do not need to change anything. Simply running the below will be suitable. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "H-TyjtmXB1mL",
        "outputId": "67528fd4-b111-461f-9a68-d05323d998a2",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 416
        }
      },
      "source": [
        "# One of the huge boosts in NMT performance was to use a different method of tokenizing. \n",
        "# Usually, NMT would tokenize by words. However, using a method called BPE gave amazing boosts to performance\n",
        "\n",
        "# Do subword NMT\n",
        "from os import path\n",
        "os.environ[\"src\"] = source_language # Sets them in bash as well, since we often use bash scripts\n",
        "os.environ[\"tgt\"] = target_language\n",
        "\n",
        "# Learn BPEs on the training data.\n",
        "os.environ[\"data_path\"] = path.join(\"joeynmt\", \"data\", source_language + target_language) # Herman! \n",
        "! subword-nmt learn-joint-bpe-and-vocab --input train.$src train.$tgt -s 4000 -o bpe.codes.4000 --write-vocabulary vocab.$src vocab.$tgt\n",
        "\n",
        "# Apply BPE splits to the development and test data.\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < train.$src > train.bpe.$src\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < train.$tgt > train.bpe.$tgt\n",
        "\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < dev.$src > dev.bpe.$src\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < dev.$tgt > dev.bpe.$tgt\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$src < test.$src > test.bpe.$src\n",
        "! subword-nmt apply-bpe -c bpe.codes.4000 --vocabulary vocab.$tgt < test.$tgt > test.bpe.$tgt\n",
        "\n",
        "# Create directory, move everyone we care about to the correct location\n",
        "! mkdir -p $data_path\n",
        "! cp train.* $data_path\n",
        "! cp test.* $data_path\n",
        "! cp dev.* $data_path\n",
        "! cp bpe.codes.4000 $data_path\n",
        "! ls $data_path\n",
        "\n",
        "# Also move everything we care about to a mounted location in google drive (relevant if running in colab) at gdrive_path\n",
        "! cp train.* \"$gdrive_path\"\n",
        "! cp test.* \"$gdrive_path\"\n",
        "! cp dev.* \"$gdrive_path\"\n",
        "! cp bpe.codes.4000 \"$gdrive_path\"\n",
        "! ls \"$gdrive_path\"\n",
        "\n",
        "# Create that vocab using build_vocab\n",
        "! sudo chmod 777 joeynmt/scripts/build_vocab.py\n",
        "! joeynmt/scripts/build_vocab.py joeynmt/data/$src$tgt/train.bpe.$src joeynmt/data/$src$tgt/train.bpe.$tgt --output_path joeynmt/data/$src$tgt/vocab.txt\n",
        "\n",
        "# Some output\n",
        "! echo \"BPE Dendi Sentences\"\n",
        "! tail -n 5 test.bpe.$tgt\n",
        "! echo \"Combined BPE Vocab\"\n",
        "! tail -n 10 joeynmt/data/$src$tgt/vocab.txt  # Herman"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "bpe.codes.4000\tdev.csv  test.bpe.ddn  test.ddn       train.bpe.en  train.en\n",
            "dev.bpe.ddn\tdev.ddn  test.bpe.en   test.en\t      train.csv\n",
            "dev.bpe.en\tdev.en\t test.csv      train.bpe.ddn  train.ddn\n",
            "bpe.codes.4000\tdev.csv  models        test.csv  train.bpe.ddn\ttrain.ddn\n",
            "dev.bpe.ddn\tdev.ddn  test.bpe.ddn  test.ddn  train.bpe.en\ttrain.en\n",
            "dev.bpe.en\tdev.en\t test.bpe.en   test.en\t train.csv\n",
            "BPE Dendi Sentences\n",
            "Nɑɑne gɑɑ no, Moisi nɑ Bɔmdɑɑrum jingɑru tɛ. À nɑ kuri se@@ es@@ ee hɑli hɑlɑci@@ kɔ kɑ ɑ̀ nɑ koo hɑibɔrɔ sintine yom hɑlɑci mɑ si kɑmbɛ dɛcɛ Isirɑilɑ ŋmɔne yom gɑɑ.\n",
            "Bɔrɔ hin@@ no, ɑ̀ bine ji@@ si@@ ri hinno kunɑ no ɑ̀ gɑ ihinno yom kɑɑ tɛrɛ. Bɔrɔ lɑlɔ mo, ɑ̀ bine ji@@ si@@ ri lɑlɔ kunɑ no ɑ̀ gɑ ilɑlɔ yom kɑɑ tɛrɛ.\n",
            "À nɑ ɑ dɑm gbei kunɑ bɑ kɑ ɑ ci Ikpɛ cɛn@@ ɑndikɔ ndɑ guruguz@@ ɑndikɔ ndɑ bɔrɔ fu@@ tu. Ammɑ ɑ du suuji domi zɑm kɑ bei kunɑ nɑ ɑ nɑ ɑ̀ tɛ zɑm kɑ nɑɑne kunɑ.\n",
            "Wom kunɑ bɔrɔ fɔ kulu mɑ si lɑɑkɑli ndɑ ngɑ bɔm mur@@ ɑ@@ du yom hinn@@ e, ɑmmɑ ɑ̀ mɑ lɑɑkɑli mɑ ndɑ bɔrɔ fɔ yom ŋmɔne.\n",
            "Kɑ wiciri kɑmbu bɑ tɔ ɑ̀ coobɑɑbɑize yom kɑɑ ɑ̀ dɔ kɑ cii: Nungu wɔ gɑnji no. Lɔkɑci mo moor@@ u. Jɑmɑ wɔ tɑm ǹ mɑ kpei kpɑɑrɑ yom kunɑ kɑ ngei ŋmɑɑri dei kɑ ŋmɑɑ.\n",
            "Combined BPE Vocab\n",
            "clai@@\n",
            "ɑ̀@@\n",
            "secu@@\n",
            "doni\n",
            "uda@@\n",
            "Hol@@\n",
            "Dɑfi@@\n",
            "Æ@@\n",
            "pos@@\n",
            "ɔkɑci\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "IlMitUHR8Qy-",
        "outputId": "52358ed6-5169-43b4-9c2f-23c8b1351b1f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 69
        }
      },
      "source": [
        "# Also move everything we care about to a mounted location in google drive (relevant if running in colab) at gdrive_path\n",
        "! cp train.* \"$gdrive_path\"\n",
        "! cp test.* \"$gdrive_path\"\n",
        "! cp dev.* \"$gdrive_path\"\n",
        "! cp bpe.codes.4000 \"$gdrive_path\"\n",
        "! ls \"$gdrive_path\""
      ],
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "bpe.codes.4000\tdev.csv  models        test.csv  train.bpe.ddn\ttrain.ddn\n",
            "dev.bpe.ddn\tdev.ddn  test.bpe.ddn  test.ddn  train.bpe.en\ttrain.en\n",
            "dev.bpe.en\tdev.en\t test.bpe.en   test.en\t train.csv\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "Ixmzi60WsUZ8"
      },
      "source": [
        "# Creating the JoeyNMT Config\n",
        "\n",
        "JoeyNMT requires a yaml config. We provide a template below. We've also set a number of defaults with it, that you may play with!\n",
        "\n",
        "- We used Transformer architecture \n",
        "- We set our dropout to reasonably high: 0.3 (recommended in  [(Sennrich, 2019)](https://www.aclweb.org/anthology/P19-1021))\n",
        "\n",
        "Things worth playing with:\n",
        "- The batch size (also recommended to change for low-resourced languages)\n",
        "- The number of epochs (we've set it at 30 just so it runs in about an hour, for testing purposes)\n",
        "- The decoder options (beam_size, alpha)\n",
        "- Evaluation metrics (BLEU versus Crhf4)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "PIs1lY2hxMsl",
        "colab": {}
      },
      "source": [
        "# This creates the config file for our JoeyNMT system. It might seem overwhelming so we've provided a couple of useful parameters you'll need to update\n",
        "# (You can of course play with all the parameters if you'd like!)\n",
        "\n",
        "name = '%s%s' % (source_language, target_language)\n",
        "gdrive_path = os.environ[\"gdrive_path\"]\n",
        "\n",
        "# Create the config\n",
        "config = \"\"\"\n",
        "name: \"{name}_transformer\"\n",
        "\n",
        "data:\n",
        "    src: \"{source_language}\"\n",
        "    trg: \"{target_language}\"\n",
        "    train: \"data/{name}/train.bpe\"\n",
        "    dev:   \"data/{name}/dev.bpe\"\n",
        "    test:  \"data/{name}/test.bpe\"\n",
        "    level: \"bpe\"\n",
        "    lowercase: False\n",
        "    max_sent_length: 100\n",
        "    src_vocab: \"data/{name}/vocab.txt\"\n",
        "    trg_vocab: \"data/{name}/vocab.txt\"\n",
        "\n",
        "testing:\n",
        "    beam_size: 5\n",
        "    alpha: 1.0\n",
        "\n",
        "training:\n",
        "    #load_model: \"{gdrive_path}/models/{name}_transformer/1.ckpt\" # if uncommented, load a pre-trained model from this checkpoint\n",
        "    random_seed: 42\n",
        "    optimizer: \"adam\"\n",
        "    normalization: \"tokens\"\n",
        "    adam_betas: [0.9, 0.999] \n",
        "    scheduling: \"plateau\"           # TODO: try switching from plateau to Noam scheduling\n",
        "    patience: 5                     # For plateau: decrease learning rate by decrease_factor if validation score has not improved for this many validation rounds.\n",
        "    learning_rate_factor: 0.5       # factor for Noam scheduler (used with Transformer)\n",
        "    learning_rate_warmup: 1000      # warmup steps for Noam scheduler (used with Transformer)\n",
        "    decrease_factor: 0.7\n",
        "    loss: \"crossentropy\"\n",
        "    learning_rate: 0.0003\n",
        "    learning_rate_min: 0.00000001\n",
        "    weight_decay: 0.0\n",
        "    label_smoothing: 0.1\n",
        "    batch_size: 4096\n",
        "    batch_type: \"token\"\n",
        "    eval_batch_size: 3600\n",
        "    eval_batch_type: \"token\"\n",
        "    batch_multiplier: 1\n",
        "    early_stopping_metric: \"ppl\"\n",
        "    epochs: 100                     # TODO: Decrease for when playing around and checking of working. Around 30 is sufficient to check if its working at all\n",
        "    validation_freq: 1000          # TODO: Set to at least once per epoch.\n",
        "    logging_freq: 100\n",
        "    eval_metric: \"bleu\"\n",
        "    model_dir: \"models/{name}_transformer\"\n",
        "    overwrite: False               # TODO: Set to True if you want to overwrite possibly existing models. \n",
        "    shuffle: True\n",
        "    use_cuda: True\n",
        "    max_output_length: 100\n",
        "    print_valid_sents: [0, 1, 2, 3]\n",
        "    keep_last_ckpts: 3\n",
        "\n",
        "model:\n",
        "    initializer: \"xavier\"\n",
        "    bias_initializer: \"zeros\"\n",
        "    init_gain: 1.0\n",
        "    embed_initializer: \"xavier\"\n",
        "    embed_init_gain: 1.0\n",
        "    tied_embeddings: True\n",
        "    tied_softmax: True\n",
        "    encoder:\n",
        "        type: \"transformer\"\n",
        "        num_layers: 6\n",
        "        num_heads: 4             # TODO: Increase to 8 for larger data.\n",
        "        embeddings:\n",
        "            embedding_dim: 256   # TODO: Increase to 512 for larger data.\n",
        "            scale: True\n",
        "            dropout: 0.2\n",
        "        # typically ff_size = 4 x hidden_size\n",
        "        hidden_size: 256         # TODO: Increase to 512 for larger data.\n",
        "        ff_size: 1024            # TODO: Increase to 2048 for larger data.\n",
        "        dropout: 0.3\n",
        "    decoder:\n",
        "        type: \"transformer\"\n",
        "        num_layers: 6\n",
        "        num_heads: 4              # TODO: Increase to 8 for larger data.\n",
        "        embeddings:\n",
        "            embedding_dim: 256    # TODO: Increase to 512 for larger data.\n",
        "            scale: True\n",
        "            dropout: 0.2\n",
        "        # typically ff_size = 4 x hidden_size\n",
        "        hidden_size: 256         # TODO: Increase to 512 for larger data.\n",
        "        ff_size: 1024            # TODO: Increase to 2048 for larger data.\n",
        "        dropout: 0.3\n",
        "\"\"\".format(name=name, gdrive_path=os.environ[\"gdrive_path\"], source_language=source_language, target_language=target_language)\n",
        "with open(\"joeynmt/configs/transformer_{name}.yaml\".format(name=name),'w') as f:\n",
        "    f.write(config)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "pIifxE3Qzuvs"
      },
      "source": [
        "# Train the Model\n",
        "\n",
        "This single line of joeynmt runs the training using the config we made above"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "6ZBPFwT94WpI",
        "outputId": "2b84de93-977a-4dfa-e7c3-8a7266d93ce5",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        }
      },
      "source": [
        "# Train the model\n",
        "# You can press Ctrl-C to stop. And then run the next cell to save your checkpoints! \n",
        "!cd joeynmt; python3 -m joeynmt train configs/transformer_$src$tgt.yaml"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-02-12 07:57:25,866 Hello! This is Joey-NMT.\n",
            "2020-02-12 07:57:27,117 Total params: 12102656\n",
            "2020-02-12 07:57:27,118 Trainable parameters: ['decoder.layer_norm.bias', 'decoder.layer_norm.weight', 'decoder.layers.0.dec_layer_norm.bias', 'decoder.layers.0.dec_layer_norm.weight', 'decoder.layers.0.feed_forward.layer_norm.bias', 'decoder.layers.0.feed_forward.layer_norm.weight', 'decoder.layers.0.feed_forward.pwff_layer.0.bias', 'decoder.layers.0.feed_forward.pwff_layer.0.weight', 'decoder.layers.0.feed_forward.pwff_layer.3.bias', 'decoder.layers.0.feed_forward.pwff_layer.3.weight', 'decoder.layers.0.src_trg_att.k_layer.bias', 'decoder.layers.0.src_trg_att.k_layer.weight', 'decoder.layers.0.src_trg_att.output_layer.bias', 'decoder.layers.0.src_trg_att.output_layer.weight', 'decoder.layers.0.src_trg_att.q_layer.bias', 'decoder.layers.0.src_trg_att.q_layer.weight', 'decoder.layers.0.src_trg_att.v_layer.bias', 'decoder.layers.0.src_trg_att.v_layer.weight', 'decoder.layers.0.trg_trg_att.k_layer.bias', 'decoder.layers.0.trg_trg_att.k_layer.weight', 'decoder.layers.0.trg_trg_att.output_layer.bias', 'decoder.layers.0.trg_trg_att.output_layer.weight', 'decoder.layers.0.trg_trg_att.q_layer.bias', 'decoder.layers.0.trg_trg_att.q_layer.weight', 'decoder.layers.0.trg_trg_att.v_layer.bias', 'decoder.layers.0.trg_trg_att.v_layer.weight', 'decoder.layers.0.x_layer_norm.bias', 'decoder.layers.0.x_layer_norm.weight', 'decoder.layers.1.dec_layer_norm.bias', 'decoder.layers.1.dec_layer_norm.weight', 'decoder.layers.1.feed_forward.layer_norm.bias', 'decoder.layers.1.feed_forward.layer_norm.weight', 'decoder.layers.1.feed_forward.pwff_layer.0.bias', 'decoder.layers.1.feed_forward.pwff_layer.0.weight', 'decoder.layers.1.feed_forward.pwff_layer.3.bias', 'decoder.layers.1.feed_forward.pwff_layer.3.weight', 'decoder.layers.1.src_trg_att.k_layer.bias', 'decoder.layers.1.src_trg_att.k_layer.weight', 'decoder.layers.1.src_trg_att.output_layer.bias', 'decoder.layers.1.src_trg_att.output_layer.weight', 'decoder.layers.1.src_trg_att.q_layer.bias', 'decoder.layers.1.src_trg_att.q_layer.weight', 'decoder.layers.1.src_trg_att.v_layer.bias', 'decoder.layers.1.src_trg_att.v_layer.weight', 'decoder.layers.1.trg_trg_att.k_layer.bias', 'decoder.layers.1.trg_trg_att.k_layer.weight', 'decoder.layers.1.trg_trg_att.output_layer.bias', 'decoder.layers.1.trg_trg_att.output_layer.weight', 'decoder.layers.1.trg_trg_att.q_layer.bias', 'decoder.layers.1.trg_trg_att.q_layer.weight', 'decoder.layers.1.trg_trg_att.v_layer.bias', 'decoder.layers.1.trg_trg_att.v_layer.weight', 'decoder.layers.1.x_layer_norm.bias', 'decoder.layers.1.x_layer_norm.weight', 'decoder.layers.2.dec_layer_norm.bias', 'decoder.layers.2.dec_layer_norm.weight', 'decoder.layers.2.feed_forward.layer_norm.bias', 'decoder.layers.2.feed_forward.layer_norm.weight', 'decoder.layers.2.feed_forward.pwff_layer.0.bias', 'decoder.layers.2.feed_forward.pwff_layer.0.weight', 'decoder.layers.2.feed_forward.pwff_layer.3.bias', 'decoder.layers.2.feed_forward.pwff_layer.3.weight', 'decoder.layers.2.src_trg_att.k_layer.bias', 'decoder.layers.2.src_trg_att.k_layer.weight', 'decoder.layers.2.src_trg_att.output_layer.bias', 'decoder.layers.2.src_trg_att.output_layer.weight', 'decoder.layers.2.src_trg_att.q_layer.bias', 'decoder.layers.2.src_trg_att.q_layer.weight', 'decoder.layers.2.src_trg_att.v_layer.bias', 'decoder.layers.2.src_trg_att.v_layer.weight', 'decoder.layers.2.trg_trg_att.k_layer.bias', 'decoder.layers.2.trg_trg_att.k_layer.weight', 'decoder.layers.2.trg_trg_att.output_layer.bias', 'decoder.layers.2.trg_trg_att.output_layer.weight', 'decoder.layers.2.trg_trg_att.q_layer.bias', 'decoder.layers.2.trg_trg_att.q_layer.weight', 'decoder.layers.2.trg_trg_att.v_layer.bias', 'decoder.layers.2.trg_trg_att.v_layer.weight', 'decoder.layers.2.x_layer_norm.bias', 'decoder.layers.2.x_layer_norm.weight', 'decoder.layers.3.dec_layer_norm.bias', 'decoder.layers.3.dec_layer_norm.weight', 'decoder.layers.3.feed_forward.layer_norm.bias', 'decoder.layers.3.feed_forward.layer_norm.weight', 'decoder.layers.3.feed_forward.pwff_layer.0.bias', 'decoder.layers.3.feed_forward.pwff_layer.0.weight', 'decoder.layers.3.feed_forward.pwff_layer.3.bias', 'decoder.layers.3.feed_forward.pwff_layer.3.weight', 'decoder.layers.3.src_trg_att.k_layer.bias', 'decoder.layers.3.src_trg_att.k_layer.weight', 'decoder.layers.3.src_trg_att.output_layer.bias', 'decoder.layers.3.src_trg_att.output_layer.weight', 'decoder.layers.3.src_trg_att.q_layer.bias', 'decoder.layers.3.src_trg_att.q_layer.weight', 'decoder.layers.3.src_trg_att.v_layer.bias', 'decoder.layers.3.src_trg_att.v_layer.weight', 'decoder.layers.3.trg_trg_att.k_layer.bias', 'decoder.layers.3.trg_trg_att.k_layer.weight', 'decoder.layers.3.trg_trg_att.output_layer.bias', 'decoder.layers.3.trg_trg_att.output_layer.weight', 'decoder.layers.3.trg_trg_att.q_layer.bias', 'decoder.layers.3.trg_trg_att.q_layer.weight', 'decoder.layers.3.trg_trg_att.v_layer.bias', 'decoder.layers.3.trg_trg_att.v_layer.weight', 'decoder.layers.3.x_layer_norm.bias', 'decoder.layers.3.x_layer_norm.weight', 'decoder.layers.4.dec_layer_norm.bias', 'decoder.layers.4.dec_layer_norm.weight', 'decoder.layers.4.feed_forward.layer_norm.bias', 'decoder.layers.4.feed_forward.layer_norm.weight', 'decoder.layers.4.feed_forward.pwff_layer.0.bias', 'decoder.layers.4.feed_forward.pwff_layer.0.weight', 'decoder.layers.4.feed_forward.pwff_layer.3.bias', 'decoder.layers.4.feed_forward.pwff_layer.3.weight', 'decoder.layers.4.src_trg_att.k_layer.bias', 'decoder.layers.4.src_trg_att.k_layer.weight', 'decoder.layers.4.src_trg_att.output_layer.bias', 'decoder.layers.4.src_trg_att.output_layer.weight', 'decoder.layers.4.src_trg_att.q_layer.bias', 'decoder.layers.4.src_trg_att.q_layer.weight', 'decoder.layers.4.src_trg_att.v_layer.bias', 'decoder.layers.4.src_trg_att.v_layer.weight', 'decoder.layers.4.trg_trg_att.k_layer.bias', 'decoder.layers.4.trg_trg_att.k_layer.weight', 'decoder.layers.4.trg_trg_att.output_layer.bias', 'decoder.layers.4.trg_trg_att.output_layer.weight', 'decoder.layers.4.trg_trg_att.q_layer.bias', 'decoder.layers.4.trg_trg_att.q_layer.weight', 'decoder.layers.4.trg_trg_att.v_layer.bias', 'decoder.layers.4.trg_trg_att.v_layer.weight', 'decoder.layers.4.x_layer_norm.bias', 'decoder.layers.4.x_layer_norm.weight', 'decoder.layers.5.dec_layer_norm.bias', 'decoder.layers.5.dec_layer_norm.weight', 'decoder.layers.5.feed_forward.layer_norm.bias', 'decoder.layers.5.feed_forward.layer_norm.weight', 'decoder.layers.5.feed_forward.pwff_layer.0.bias', 'decoder.layers.5.feed_forward.pwff_layer.0.weight', 'decoder.layers.5.feed_forward.pwff_layer.3.bias', 'decoder.layers.5.feed_forward.pwff_layer.3.weight', 'decoder.layers.5.src_trg_att.k_layer.bias', 'decoder.layers.5.src_trg_att.k_layer.weight', 'decoder.layers.5.src_trg_att.output_layer.bias', 'decoder.layers.5.src_trg_att.output_layer.weight', 'decoder.layers.5.src_trg_att.q_layer.bias', 'decoder.layers.5.src_trg_att.q_layer.weight', 'decoder.layers.5.src_trg_att.v_layer.bias', 'decoder.layers.5.src_trg_att.v_layer.weight', 'decoder.layers.5.trg_trg_att.k_layer.bias', 'decoder.layers.5.trg_trg_att.k_layer.weight', 'decoder.layers.5.trg_trg_att.output_layer.bias', 'decoder.layers.5.trg_trg_att.output_layer.weight', 'decoder.layers.5.trg_trg_att.q_layer.bias', 'decoder.layers.5.trg_trg_att.q_layer.weight', 'decoder.layers.5.trg_trg_att.v_layer.bias', 'decoder.layers.5.trg_trg_att.v_layer.weight', 'decoder.layers.5.x_layer_norm.bias', 'decoder.layers.5.x_layer_norm.weight', 'encoder.layer_norm.bias', 'encoder.layer_norm.weight', 'encoder.layers.0.feed_forward.layer_norm.bias', 'encoder.layers.0.feed_forward.layer_norm.weight', 'encoder.layers.0.feed_forward.pwff_layer.0.bias', 'encoder.layers.0.feed_forward.pwff_layer.0.weight', 'encoder.layers.0.feed_forward.pwff_layer.3.bias', 'encoder.layers.0.feed_forward.pwff_layer.3.weight', 'encoder.layers.0.layer_norm.bias', 'encoder.layers.0.layer_norm.weight', 'encoder.layers.0.src_src_att.k_layer.bias', 'encoder.layers.0.src_src_att.k_layer.weight', 'encoder.layers.0.src_src_att.output_layer.bias', 'encoder.layers.0.src_src_att.output_layer.weight', 'encoder.layers.0.src_src_att.q_layer.bias', 'encoder.layers.0.src_src_att.q_layer.weight', 'encoder.layers.0.src_src_att.v_layer.bias', 'encoder.layers.0.src_src_att.v_layer.weight', 'encoder.layers.1.feed_forward.layer_norm.bias', 'encoder.layers.1.feed_forward.layer_norm.weight', 'encoder.layers.1.feed_forward.pwff_layer.0.bias', 'encoder.layers.1.feed_forward.pwff_layer.0.weight', 'encoder.layers.1.feed_forward.pwff_layer.3.bias', 'encoder.layers.1.feed_forward.pwff_layer.3.weight', 'encoder.layers.1.layer_norm.bias', 'encoder.layers.1.layer_norm.weight', 'encoder.layers.1.src_src_att.k_layer.bias', 'encoder.layers.1.src_src_att.k_layer.weight', 'encoder.layers.1.src_src_att.output_layer.bias', 'encoder.layers.1.src_src_att.output_layer.weight', 'encoder.layers.1.src_src_att.q_layer.bias', 'encoder.layers.1.src_src_att.q_layer.weight', 'encoder.layers.1.src_src_att.v_layer.bias', 'encoder.layers.1.src_src_att.v_layer.weight', 'encoder.layers.2.feed_forward.layer_norm.bias', 'encoder.layers.2.feed_forward.layer_norm.weight', 'encoder.layers.2.feed_forward.pwff_layer.0.bias', 'encoder.layers.2.feed_forward.pwff_layer.0.weight', 'encoder.layers.2.feed_forward.pwff_layer.3.bias', 'encoder.layers.2.feed_forward.pwff_layer.3.weight', 'encoder.layers.2.layer_norm.bias', 'encoder.layers.2.layer_norm.weight', 'encoder.layers.2.src_src_att.k_layer.bias', 'encoder.layers.2.src_src_att.k_layer.weight', 'encoder.layers.2.src_src_att.output_layer.bias', 'encoder.layers.2.src_src_att.output_layer.weight', 'encoder.layers.2.src_src_att.q_layer.bias', 'encoder.layers.2.src_src_att.q_layer.weight', 'encoder.layers.2.src_src_att.v_layer.bias', 'encoder.layers.2.src_src_att.v_layer.weight', 'encoder.layers.3.feed_forward.layer_norm.bias', 'encoder.layers.3.feed_forward.layer_norm.weight', 'encoder.layers.3.feed_forward.pwff_layer.0.bias', 'encoder.layers.3.feed_forward.pwff_layer.0.weight', 'encoder.layers.3.feed_forward.pwff_layer.3.bias', 'encoder.layers.3.feed_forward.pwff_layer.3.weight', 'encoder.layers.3.layer_norm.bias', 'encoder.layers.3.layer_norm.weight', 'encoder.layers.3.src_src_att.k_layer.bias', 'encoder.layers.3.src_src_att.k_layer.weight', 'encoder.layers.3.src_src_att.output_layer.bias', 'encoder.layers.3.src_src_att.output_layer.weight', 'encoder.layers.3.src_src_att.q_layer.bias', 'encoder.layers.3.src_src_att.q_layer.weight', 'encoder.layers.3.src_src_att.v_layer.bias', 'encoder.layers.3.src_src_att.v_layer.weight', 'encoder.layers.4.feed_forward.layer_norm.bias', 'encoder.layers.4.feed_forward.layer_norm.weight', 'encoder.layers.4.feed_forward.pwff_layer.0.bias', 'encoder.layers.4.feed_forward.pwff_layer.0.weight', 'encoder.layers.4.feed_forward.pwff_layer.3.bias', 'encoder.layers.4.feed_forward.pwff_layer.3.weight', 'encoder.layers.4.layer_norm.bias', 'encoder.layers.4.layer_norm.weight', 'encoder.layers.4.src_src_att.k_layer.bias', 'encoder.layers.4.src_src_att.k_layer.weight', 'encoder.layers.4.src_src_att.output_layer.bias', 'encoder.layers.4.src_src_att.output_layer.weight', 'encoder.layers.4.src_src_att.q_layer.bias', 'encoder.layers.4.src_src_att.q_layer.weight', 'encoder.layers.4.src_src_att.v_layer.bias', 'encoder.layers.4.src_src_att.v_layer.weight', 'encoder.layers.5.feed_forward.layer_norm.bias', 'encoder.layers.5.feed_forward.layer_norm.weight', 'encoder.layers.5.feed_forward.pwff_layer.0.bias', 'encoder.layers.5.feed_forward.pwff_layer.0.weight', 'encoder.layers.5.feed_forward.pwff_layer.3.bias', 'encoder.layers.5.feed_forward.pwff_layer.3.weight', 'encoder.layers.5.layer_norm.bias', 'encoder.layers.5.layer_norm.weight', 'encoder.layers.5.src_src_att.k_layer.bias', 'encoder.layers.5.src_src_att.k_layer.weight', 'encoder.layers.5.src_src_att.output_layer.bias', 'encoder.layers.5.src_src_att.output_layer.weight', 'encoder.layers.5.src_src_att.q_layer.bias', 'encoder.layers.5.src_src_att.q_layer.weight', 'encoder.layers.5.src_src_att.v_layer.bias', 'encoder.layers.5.src_src_att.v_layer.weight', 'src_embed.lut.weight']\n",
            "2020-02-12 07:57:37,011 cfg.name                           : enddn_transformer\n",
            "2020-02-12 07:57:37,011 cfg.data.src                       : en\n",
            "2020-02-12 07:57:37,012 cfg.data.trg                       : ddn\n",
            "2020-02-12 07:57:37,012 cfg.data.train                     : data/enddn/train.bpe\n",
            "2020-02-12 07:57:37,012 cfg.data.dev                       : data/enddn/dev.bpe\n",
            "2020-02-12 07:57:37,012 cfg.data.test                      : data/enddn/test.bpe\n",
            "2020-02-12 07:57:37,012 cfg.data.level                     : bpe\n",
            "2020-02-12 07:57:37,012 cfg.data.lowercase                 : False\n",
            "2020-02-12 07:57:37,012 cfg.data.max_sent_length           : 100\n",
            "2020-02-12 07:57:37,012 cfg.data.src_vocab                 : data/enddn/vocab.txt\n",
            "2020-02-12 07:57:37,012 cfg.data.trg_vocab                 : data/enddn/vocab.txt\n",
            "2020-02-12 07:57:37,012 cfg.testing.beam_size              : 5\n",
            "2020-02-12 07:57:37,012 cfg.testing.alpha                  : 1.0\n",
            "2020-02-12 07:57:37,012 cfg.training.random_seed           : 42\n",
            "2020-02-12 07:57:37,012 cfg.training.optimizer             : adam\n",
            "2020-02-12 07:57:37,013 cfg.training.normalization         : tokens\n",
            "2020-02-12 07:57:37,013 cfg.training.adam_betas            : [0.9, 0.999]\n",
            "2020-02-12 07:57:37,013 cfg.training.scheduling            : plateau\n",
            "2020-02-12 07:57:37,013 cfg.training.patience              : 5\n",
            "2020-02-12 07:57:37,013 cfg.training.learning_rate_factor  : 0.5\n",
            "2020-02-12 07:57:37,013 cfg.training.learning_rate_warmup  : 1000\n",
            "2020-02-12 07:57:37,013 cfg.training.decrease_factor       : 0.7\n",
            "2020-02-12 07:57:37,013 cfg.training.loss                  : crossentropy\n",
            "2020-02-12 07:57:37,013 cfg.training.learning_rate         : 0.0003\n",
            "2020-02-12 07:57:37,013 cfg.training.learning_rate_min     : 1e-08\n",
            "2020-02-12 07:57:37,013 cfg.training.weight_decay          : 0.0\n",
            "2020-02-12 07:57:37,013 cfg.training.label_smoothing       : 0.1\n",
            "2020-02-12 07:57:37,013 cfg.training.batch_size            : 4096\n",
            "2020-02-12 07:57:37,013 cfg.training.batch_type            : token\n",
            "2020-02-12 07:57:37,013 cfg.training.eval_batch_size       : 3600\n",
            "2020-02-12 07:57:37,013 cfg.training.eval_batch_type       : token\n",
            "2020-02-12 07:57:37,013 cfg.training.batch_multiplier      : 1\n",
            "2020-02-12 07:57:37,013 cfg.training.early_stopping_metric : ppl\n",
            "2020-02-12 07:57:37,013 cfg.training.epochs                : 100\n",
            "2020-02-12 07:57:37,013 cfg.training.validation_freq       : 1000\n",
            "2020-02-12 07:57:37,013 cfg.training.logging_freq          : 100\n",
            "2020-02-12 07:57:37,013 cfg.training.eval_metric           : bleu\n",
            "2020-02-12 07:57:37,014 cfg.training.model_dir             : models/enddn_transformer\n",
            "2020-02-12 07:57:37,014 cfg.training.overwrite             : False\n",
            "2020-02-12 07:57:37,014 cfg.training.shuffle               : True\n",
            "2020-02-12 07:57:37,014 cfg.training.use_cuda              : True\n",
            "2020-02-12 07:57:37,014 cfg.training.max_output_length     : 100\n",
            "2020-02-12 07:57:37,014 cfg.training.print_valid_sents     : [0, 1, 2, 3]\n",
            "2020-02-12 07:57:37,014 cfg.training.keep_last_ckpts       : 3\n",
            "2020-02-12 07:57:37,014 cfg.model.initializer              : xavier\n",
            "2020-02-12 07:57:37,014 cfg.model.bias_initializer         : zeros\n",
            "2020-02-12 07:57:37,014 cfg.model.init_gain                : 1.0\n",
            "2020-02-12 07:57:37,014 cfg.model.embed_initializer        : xavier\n",
            "2020-02-12 07:57:37,014 cfg.model.embed_init_gain          : 1.0\n",
            "2020-02-12 07:57:37,014 cfg.model.tied_embeddings          : True\n",
            "2020-02-12 07:57:37,014 cfg.model.tied_softmax             : True\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.type             : transformer\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.num_layers       : 6\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.num_heads        : 4\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.embeddings.embedding_dim : 256\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.embeddings.scale : True\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.embeddings.dropout : 0.2\n",
            "2020-02-12 07:57:37,014 cfg.model.encoder.hidden_size      : 256\n",
            "2020-02-12 07:57:37,015 cfg.model.encoder.ff_size          : 1024\n",
            "2020-02-12 07:57:37,015 cfg.model.encoder.dropout          : 0.3\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.type             : transformer\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.num_layers       : 6\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.num_heads        : 4\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.embeddings.embedding_dim : 256\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.embeddings.scale : True\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.embeddings.dropout : 0.2\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.hidden_size      : 256\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.ff_size          : 1024\n",
            "2020-02-12 07:57:37,015 cfg.model.decoder.dropout          : 0.3\n",
            "2020-02-12 07:57:37,015 Data set sizes: \n",
            "\ttrain 6937,\n",
            "\tvalid 1000,\n",
            "\ttest 165\n",
            "2020-02-12 07:57:37,015 First training example:\n",
            "\t[SRC] and shall come forth to decei@@ ve the nations which are in the four cor@@ n@@ ers of the earth, Go@@ g and Ma@@ go@@ g@@ , to gather them together to the war@@ : the number of whom is as the sand of the sea@@ .\n",
            "\t[TRG] À kɑɑ hunu kɑ hɑndunyɑ dimi yom kɑ ǹ goono hɑndunyɑ cɛn@@ jɛ tɑɑci gɑɑ dɛr@@ ɑndi, wɑtom G@@ ɔ@@ gu ndɑ Mɑ@@ g@@ ɔ@@ gu. À gɑ ǹ meigu wɑngu tɛyom sɛ. Ǹ bɑyom bisɑ tɛku tɑɑ@@ si.\n",
            "2020-02-12 07:57:37,015 First 10 words (src): (0) <unk> (1) <pad> (2) <s> (3) </s> (4) kɑ (5) the (6) ɑ̀ (7) and (8) yom (9) of\n",
            "2020-02-12 07:57:37,016 First 10 words (trg): (0) <unk> (1) <pad> (2) <s> (3) </s> (4) kɑ (5) the (6) ɑ̀ (7) and (8) yom (9) of\n",
            "2020-02-12 07:57:37,016 Number of Src words (types): 4072\n",
            "2020-02-12 07:57:37,016 Number of Trg words (types): 4072\n",
            "2020-02-12 07:57:37,016 Model(\n",
            "\tencoder=TransformerEncoder(num_layers=6, num_heads=4),\n",
            "\tdecoder=TransformerDecoder(num_layers=6, num_heads=4),\n",
            "\tsrc_embed=Embeddings(embedding_dim=256, vocab_size=4072),\n",
            "\ttrg_embed=Embeddings(embedding_dim=256, vocab_size=4072))\n",
            "2020-02-12 07:57:37,020 EPOCH 1\n",
            "2020-02-12 07:57:49,851 Epoch   1: total training loss 466.71\n",
            "2020-02-12 07:57:49,851 EPOCH 2\n",
            "2020-02-12 07:57:52,016 Epoch   2 Step:      100 Batch Loss:     5.053507 Tokens per Sec:    16552, Lr: 0.000300\n",
            "2020-02-12 07:58:02,209 Epoch   2: total training loss 427.56\n",
            "2020-02-12 07:58:02,210 EPOCH 3\n",
            "2020-02-12 07:58:06,583 Epoch   3 Step:      200 Batch Loss:     4.733194 Tokens per Sec:    16758, Lr: 0.000300\n",
            "2020-02-12 07:58:14,748 Epoch   3: total training loss 401.30\n",
            "2020-02-12 07:58:14,748 EPOCH 4\n",
            "2020-02-12 07:58:21,509 Epoch   4 Step:      300 Batch Loss:     4.482025 Tokens per Sec:    15946, Lr: 0.000300\n",
            "2020-02-12 07:58:27,645 Epoch   4: total training loss 376.97\n",
            "2020-02-12 07:58:27,646 EPOCH 5\n",
            "2020-02-12 07:58:36,667 Epoch   5 Step:      400 Batch Loss:     4.369053 Tokens per Sec:    16852, Lr: 0.000300\n",
            "2020-02-12 07:58:40,237 Epoch   5: total training loss 357.45\n",
            "2020-02-12 07:58:40,237 EPOCH 6\n",
            "2020-02-12 07:58:51,728 Epoch   6 Step:      500 Batch Loss:     4.072609 Tokens per Sec:    16318, Lr: 0.000300\n",
            "2020-02-12 07:58:53,063 Epoch   6: total training loss 350.58\n",
            "2020-02-12 07:58:53,063 EPOCH 7\n",
            "2020-02-12 07:59:06,112 Epoch   7: total training loss 343.65\n",
            "2020-02-12 07:59:06,113 EPOCH 8\n",
            "2020-02-12 07:59:06,921 Epoch   8 Step:      600 Batch Loss:     3.877546 Tokens per Sec:    15682, Lr: 0.000300\n",
            "2020-02-12 07:59:19,489 Epoch   8: total training loss 333.49\n",
            "2020-02-12 07:59:19,489 EPOCH 9\n",
            "2020-02-12 07:59:22,533 Epoch   9 Step:      700 Batch Loss:     3.790495 Tokens per Sec:    15575, Lr: 0.000300\n",
            "2020-02-12 07:59:32,708 Epoch   9: total training loss 323.89\n",
            "2020-02-12 07:59:32,709 EPOCH 10\n",
            "2020-02-12 07:59:37,884 Epoch  10 Step:      800 Batch Loss:     3.614108 Tokens per Sec:    15893, Lr: 0.000300\n",
            "2020-02-12 07:59:45,831 Epoch  10: total training loss 311.51\n",
            "2020-02-12 07:59:45,831 EPOCH 11\n",
            "2020-02-12 07:59:53,157 Epoch  11 Step:      900 Batch Loss:     3.517627 Tokens per Sec:    16008, Lr: 0.000300\n",
            "2020-02-12 07:59:58,929 Epoch  11: total training loss 303.86\n",
            "2020-02-12 07:59:58,929 EPOCH 12\n",
            "2020-02-12 08:00:08,729 Epoch  12 Step:     1000 Batch Loss:     3.723745 Tokens per Sec:    15648, Lr: 0.000300\n",
            "2020-02-12 08:00:51,979 Hooray! New best validation result [ppl]!\n",
            "2020-02-12 08:00:51,980 Saving new checkpoint.\n",
            "2020-02-12 08:00:52,225 Example #0\n",
            "2020-02-12 08:00:52,227 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:00:52,227 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:00:52,227 \tHypothesis: A go kɑ ɑ mɑɑ ɑ sɛ ɑ go kɑ ɑ mɑɑ ɑ sɛ ndɑ ɑ go kɑ ɑ̀ ci ɑ Bɑɑbɑ kɑ ɑ̀ go kɑ ɑ̀ go kɑ ɑ̀ go kɑ ɑ dɑm ɑ sɛ.\n",
            "2020-02-12 08:00:52,227 Example #1\n",
            "2020-02-12 08:00:52,228 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:00:52,228 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:00:52,228 \tHypothesis: N mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n mɑ n sɛ.\n",
            "2020-02-12 08:00:52,228 Example #2\n",
            "2020-02-12 08:00:52,228 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:00:52,229 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:00:52,229 \tHypothesis: Ikpɛ nɑ ɑ̀ tɛ ndɑ Ikpɛ nɑ ɑ̀ tɛ ndɑ ɑ̀ sɛ ndɑ ɑ̀ mɑ ci Ikpɛ dɔntɔneize yom kɑ ǹ go kɑ ɑ̀ ci Ikpɛ nɑ ɑ̀ tɛ ndɑ ɑ̀ sɛ.\n",
            "2020-02-12 08:00:52,229 Example #3\n",
            "2020-02-12 08:00:52,229 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:00:52,229 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:00:52,229 \tHypothesis: À nɑ ǹ dɑm ǹ mɑ kpei kɑ cii: A go kɑ ɑ̀ tɛ hɛ kɑ ɑ̀ go kɑ ɑ̀ go kɑ ɑ̀ go kɑ ɑ̀ dɑm ɑ sɛ.\n",
            "2020-02-12 08:00:52,229 Validation result (greedy) at epoch  12, step     1000: bleu:   2.25, loss: 108241.0781, ppl:  33.3947, duration: 43.5003s\n",
            "2020-02-12 08:00:55,834 Epoch  12: total training loss 301.16\n",
            "2020-02-12 08:00:55,834 EPOCH 13\n",
            "2020-02-12 08:01:07,754 Epoch  13 Step:     1100 Batch Loss:     3.373813 Tokens per Sec:    15904, Lr: 0.000300\n",
            "2020-02-12 08:01:08,991 Epoch  13: total training loss 290.57\n",
            "2020-02-12 08:01:08,991 EPOCH 14\n",
            "2020-02-12 08:01:21,890 Epoch  14: total training loss 284.38\n",
            "2020-02-12 08:01:21,890 EPOCH 15\n",
            "2020-02-12 08:01:22,940 Epoch  15 Step:     1200 Batch Loss:     3.351792 Tokens per Sec:    15095, Lr: 0.000300\n",
            "2020-02-12 08:01:34,859 Epoch  15: total training loss 283.04\n",
            "2020-02-12 08:01:34,859 EPOCH 16\n",
            "2020-02-12 08:01:38,137 Epoch  16 Step:     1300 Batch Loss:     3.275927 Tokens per Sec:    15668, Lr: 0.000300\n",
            "2020-02-12 08:01:48,134 Epoch  16: total training loss 276.65\n",
            "2020-02-12 08:01:48,134 EPOCH 17\n",
            "2020-02-12 08:01:53,480 Epoch  17 Step:     1400 Batch Loss:     3.273901 Tokens per Sec:    16228, Lr: 0.000300\n",
            "2020-02-12 08:02:01,271 Epoch  17: total training loss 271.29\n",
            "2020-02-12 08:02:01,271 EPOCH 18\n",
            "2020-02-12 08:02:08,967 Epoch  18 Step:     1500 Batch Loss:     3.121495 Tokens per Sec:    15736, Lr: 0.000300\n",
            "2020-02-12 08:02:14,515 Epoch  18: total training loss 262.37\n",
            "2020-02-12 08:02:14,515 EPOCH 19\n",
            "2020-02-12 08:02:24,482 Epoch  19 Step:     1600 Batch Loss:     2.943789 Tokens per Sec:    15777, Lr: 0.000300\n",
            "2020-02-12 08:02:27,735 Epoch  19: total training loss 256.87\n",
            "2020-02-12 08:02:27,735 EPOCH 20\n",
            "2020-02-12 08:02:39,900 Epoch  20 Step:     1700 Batch Loss:     3.001261 Tokens per Sec:    15690, Lr: 0.000300\n",
            "2020-02-12 08:02:41,046 Epoch  20: total training loss 255.69\n",
            "2020-02-12 08:02:41,047 EPOCH 21\n",
            "2020-02-12 08:02:54,288 Epoch  21: total training loss 247.77\n",
            "2020-02-12 08:02:54,288 EPOCH 22\n",
            "2020-02-12 08:02:55,542 Epoch  22 Step:     1800 Batch Loss:     2.730506 Tokens per Sec:    15002, Lr: 0.000300\n",
            "2020-02-12 08:03:07,423 Epoch  22: total training loss 246.18\n",
            "2020-02-12 08:03:07,423 EPOCH 23\n",
            "2020-02-12 08:03:10,863 Epoch  23 Step:     1900 Batch Loss:     2.849777 Tokens per Sec:    15479, Lr: 0.000300\n",
            "2020-02-12 08:03:20,626 Epoch  23: total training loss 238.07\n",
            "2020-02-12 08:03:20,626 EPOCH 24\n",
            "2020-02-12 08:03:26,324 Epoch  24 Step:     2000 Batch Loss:     2.248133 Tokens per Sec:    15810, Lr: 0.000300\n",
            "2020-02-12 08:03:49,547 Hooray! New best validation result [ppl]!\n",
            "2020-02-12 08:03:49,547 Saving new checkpoint.\n",
            "2020-02-12 08:03:49,797 Example #0\n",
            "2020-02-12 08:03:49,798 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:03:49,798 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:03:49,798 \tHypothesis: A go kɑ ɑ ŋmɑɑrɛ ɑ sɛ kɑ ɑ nɑ ɑ no i Kpe Yesu Mɛsiyɑ sɑbu sɛ. À nɑ ɑ no himmɑ kɑ ɑ̀ ci ɑ sɛ ndɑ ɑ nyɑize yom.\n",
            "2020-02-12 08:03:49,798 Example #1\n",
            "2020-02-12 08:03:49,798 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:03:49,799 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:03:49,799 \tHypothesis: N mɑ n ŋmɔne ibɛrɛ yom cɛɛ kɑ cii: N mɑ n cɛɛ n mɑ n cee yom tɛ n sɛ.\n",
            "2020-02-12 08:03:49,799 Example #2\n",
            "2020-02-12 08:03:49,799 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:03:49,799 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:03:49,799 \tHypothesis: De bɔrɔ kɑ ɑ̀ go nɑɑne Ikpɛ dɔ, ɑ̀ gɑ ɑ̀ cɛɑndi susu fɔndɑ cire ɑmmɑ ɑ̀ si nɑɑne gɑɑ.\n",
            "2020-02-12 08:03:49,799 Example #3\n",
            "2020-02-12 08:03:49,800 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:03:49,800 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:03:49,800 \tHypothesis: Sɑɑ kɑ ǹ nɑ gɑrɑndikɔ jinɑ yom di cɛɛ yom nɑ ǹ kulu tɛ kɑ cii: Bɔrɔ kulu kɑ ɑ̀ go mɑɑ ɑ go mɑɑ ngɑ di hɛ yom kulu kɑ ǹ go cii ɑ sɛ.\n",
            "2020-02-12 08:03:49,800 Validation result (greedy) at epoch  24, step     2000: bleu:   6.63, loss: 93221.4844, ppl:  20.5235, duration: 23.4751s\n",
            "2020-02-12 08:03:57,263 Epoch  24: total training loss 234.79\n",
            "2020-02-12 08:03:57,263 EPOCH 25\n",
            "2020-02-12 08:04:05,133 Epoch  25 Step:     2100 Batch Loss:     2.622887 Tokens per Sec:    16045, Lr: 0.000300\n",
            "2020-02-12 08:04:10,346 Epoch  25: total training loss 233.20\n",
            "2020-02-12 08:04:10,346 EPOCH 26\n",
            "2020-02-12 08:04:20,603 Epoch  26 Step:     2200 Batch Loss:     2.758588 Tokens per Sec:    15640, Lr: 0.000300\n",
            "2020-02-12 08:04:23,679 Epoch  26: total training loss 229.16\n",
            "2020-02-12 08:04:23,679 EPOCH 27\n",
            "2020-02-12 08:04:36,108 Epoch  27 Step:     2300 Batch Loss:     2.634450 Tokens per Sec:    15872, Lr: 0.000300\n",
            "2020-02-12 08:04:36,885 Epoch  27: total training loss 221.73\n",
            "2020-02-12 08:04:36,885 EPOCH 28\n",
            "2020-02-12 08:04:50,022 Epoch  28: total training loss 218.73\n",
            "2020-02-12 08:04:50,022 EPOCH 29\n",
            "2020-02-12 08:04:51,624 Epoch  29 Step:     2400 Batch Loss:     2.472899 Tokens per Sec:    15732, Lr: 0.000300\n",
            "2020-02-12 08:05:03,291 Epoch  29: total training loss 214.40\n",
            "2020-02-12 08:05:03,292 EPOCH 30\n",
            "2020-02-12 08:05:07,248 Epoch  30 Step:     2500 Batch Loss:     2.552207 Tokens per Sec:    15732, Lr: 0.000300\n",
            "2020-02-12 08:05:16,515 Epoch  30: total training loss 211.05\n",
            "2020-02-12 08:05:16,515 EPOCH 31\n",
            "2020-02-12 08:05:22,742 Epoch  31 Step:     2600 Batch Loss:     2.506979 Tokens per Sec:    15792, Lr: 0.000300\n",
            "2020-02-12 08:05:29,954 Epoch  31: total training loss 210.60\n",
            "2020-02-12 08:05:29,954 EPOCH 32\n",
            "2020-02-12 08:05:38,419 Epoch  32 Step:     2700 Batch Loss:     2.617370 Tokens per Sec:    15336, Lr: 0.000300\n",
            "2020-02-12 08:05:43,472 Epoch  32: total training loss 209.21\n",
            "2020-02-12 08:05:43,473 EPOCH 33\n",
            "2020-02-12 08:05:54,031 Epoch  33 Step:     2800 Batch Loss:     2.787237 Tokens per Sec:    15432, Lr: 0.000300\n",
            "2020-02-12 08:05:56,880 Epoch  33: total training loss 203.96\n",
            "2020-02-12 08:05:56,881 EPOCH 34\n",
            "2020-02-12 08:06:09,740 Epoch  34 Step:     2900 Batch Loss:     2.269294 Tokens per Sec:    15684, Lr: 0.000300\n",
            "2020-02-12 08:06:10,197 Epoch  34: total training loss 198.12\n",
            "2020-02-12 08:06:10,198 EPOCH 35\n",
            "2020-02-12 08:06:23,531 Epoch  35: total training loss 196.95\n",
            "2020-02-12 08:06:23,531 EPOCH 36\n",
            "2020-02-12 08:06:25,268 Epoch  36 Step:     3000 Batch Loss:     2.253644 Tokens per Sec:    15860, Lr: 0.000300\n",
            "2020-02-12 08:06:53,915 Hooray! New best validation result [ppl]!\n",
            "2020-02-12 08:06:53,915 Saving new checkpoint.\n",
            "2020-02-12 08:06:54,305 Example #0\n",
            "2020-02-12 08:06:54,306 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:06:54,306 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:06:54,306 \tHypothesis: A go kɑ ɑ̀ dɑm ɑ bine yom kunɑ kɑ i Kpe Yesu Mɛsiyɑ nɑ ɑ no himmɑ kɑ ɑ̀ ci ɑ gbei yom kɑ ɑ̀ ci ɑ sɛ ndɑ ɑ nyɑize yom sɛ.\n",
            "2020-02-12 08:06:54,306 Example #1\n",
            "2020-02-12 08:06:54,306 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:06:54,306 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:06:54,306 \tHypothesis: N mɑ n ŋmɔne ibɛrɛ bei. N mɑ n ŋmɔne ibɛrɛ bei kɑ n go kɑ n sifɑ. N mɑ n bine yom sɔlu.\n",
            "2020-02-12 08:06:54,306 Example #2\n",
            "2020-02-12 08:06:54,306 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:06:54,306 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:06:54,306 \tHypothesis: De bɔrɔ kɑ ɑ̀ go dulum tɛ, Ikpɛ gɑ ɑ̀ cɛɑndi susu cɛɑndi susu fɔndɑ gɑɑ, ɑ̀ gɑ ɑ̀ cɛɑndi susu fɔndɑ tɛgbei yom gɑɑ.\n",
            "2020-02-12 08:06:54,306 Example #3\n",
            "2020-02-12 08:06:54,306 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:06:54,306 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:06:54,307 \tHypothesis: Sɑɑ kɑ jiribi boobo goono Sɑmɑri lɑɑbu bɔrɔ boobo nɑɑne ɑ̀ gɑɑ. À nɑ ɑ̀ bei kɑ ɑ̀ nɑ ngɑ bɔm kɑɑtɛrɛ ɑ sɛ.\n",
            "2020-02-12 08:06:54,307 Validation result (greedy) at epoch  36, step     3000: bleu:   9.22, loss: 87075.9375, ppl:  16.8168, duration: 29.0380s\n",
            "2020-02-12 08:07:05,785 Epoch  36: total training loss 194.14\n",
            "2020-02-12 08:07:05,785 EPOCH 37\n",
            "2020-02-12 08:07:09,763 Epoch  37 Step:     3100 Batch Loss:     2.478496 Tokens per Sec:    15837, Lr: 0.000300\n",
            "2020-02-12 08:07:18,983 Epoch  37: total training loss 186.80\n",
            "2020-02-12 08:07:18,983 EPOCH 38\n",
            "2020-02-12 08:07:25,326 Epoch  38 Step:     3200 Batch Loss:     2.459899 Tokens per Sec:    16005, Lr: 0.000300\n",
            "2020-02-12 08:07:32,158 Epoch  38: total training loss 187.90\n",
            "2020-02-12 08:07:32,158 EPOCH 39\n",
            "2020-02-12 08:07:40,545 Epoch  39 Step:     3300 Batch Loss:     2.147654 Tokens per Sec:    16118, Lr: 0.000300\n",
            "2020-02-12 08:07:45,210 Epoch  39: total training loss 183.65\n",
            "2020-02-12 08:07:45,210 EPOCH 40\n",
            "2020-02-12 08:07:55,968 Epoch  40 Step:     3400 Batch Loss:     2.215834 Tokens per Sec:    15900, Lr: 0.000300\n",
            "2020-02-12 08:07:58,273 Epoch  40: total training loss 181.06\n",
            "2020-02-12 08:07:58,273 EPOCH 41\n",
            "2020-02-12 08:08:11,320 Epoch  41 Step:     3500 Batch Loss:     2.216592 Tokens per Sec:    15814, Lr: 0.000300\n",
            "2020-02-12 08:08:11,477 Epoch  41: total training loss 179.41\n",
            "2020-02-12 08:08:11,477 EPOCH 42\n",
            "2020-02-12 08:08:24,532 Epoch  42: total training loss 176.67\n",
            "2020-02-12 08:08:24,532 EPOCH 43\n",
            "2020-02-12 08:08:26,580 Epoch  43 Step:     3600 Batch Loss:     2.267456 Tokens per Sec:    15997, Lr: 0.000300\n",
            "2020-02-12 08:08:37,597 Epoch  43: total training loss 175.32\n",
            "2020-02-12 08:08:37,597 EPOCH 44\n",
            "2020-02-12 08:08:41,765 Epoch  44 Step:     3700 Batch Loss:     2.231358 Tokens per Sec:    15763, Lr: 0.000300\n",
            "2020-02-12 08:08:50,648 Epoch  44: total training loss 170.17\n",
            "2020-02-12 08:08:50,648 EPOCH 45\n",
            "2020-02-12 08:08:56,997 Epoch  45 Step:     3800 Batch Loss:     1.962990 Tokens per Sec:    16110, Lr: 0.000300\n",
            "2020-02-12 08:09:03,514 Epoch  45: total training loss 168.11\n",
            "2020-02-12 08:09:03,514 EPOCH 46\n",
            "2020-02-12 08:09:12,224 Epoch  46 Step:     3900 Batch Loss:     1.920840 Tokens per Sec:    16143, Lr: 0.000300\n",
            "2020-02-12 08:09:16,514 Epoch  46: total training loss 164.80\n",
            "2020-02-12 08:09:16,514 EPOCH 47\n",
            "2020-02-12 08:09:27,503 Epoch  47 Step:     4000 Batch Loss:     2.051774 Tokens per Sec:    16163, Lr: 0.000300\n",
            "2020-02-12 08:09:50,187 Hooray! New best validation result [ppl]!\n",
            "2020-02-12 08:09:50,188 Saving new checkpoint.\n",
            "2020-02-12 08:09:50,454 Example #0\n",
            "2020-02-12 08:09:50,455 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:09:50,455 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:09:50,455 \tHypothesis: A go kɑ ɑ dɑm ɑ bɑndɑ kɑ ɑ nɑ ɑ hɑlɑɑlɑndi Kpe Yesu Mɛsiyɑ gɑɑ. À go kɑ ɑ cɛbɛ yɑ ci ɑ sɛ kɑ ɑ nɑ gbei tɛ ndɑ ɑ sɛ hinɑbunyɑ.\n",
            "2020-02-12 08:09:50,455 Example #1\n",
            "2020-02-12 08:09:50,455 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:09:50,455 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:09:50,455 \tHypothesis: N go tirɑ wɔ hɑntum n sɛ kɑ cii: Kpe, n mɑ lɑɑkɑli ndɑ n gɔrɔkɑsine ndɑ n gɑɑbi kɑ ɑ̀ ci n nyɑize lɑɑkɑli kɑne.\n",
            "2020-02-12 08:09:50,455 Example #2\n",
            "2020-02-12 08:09:50,455 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:09:50,455 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:09:50,455 \tHypothesis: Zɑngɑ yɑ no Ikpɛ go bɔrɔ cɛɑndi susu cɛɑndi susu. À gɑ cɛɑndiyom susu cɛɑndi susu gbei tɛyom gɑɑ, nɑɑne kɑ ɑ̀ go cɛɑndiyom susu tɛ cɛɑndiyom susu.\n",
            "2020-02-12 08:09:50,455 Example #3\n",
            "2020-02-12 08:09:50,456 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:09:50,456 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:09:50,456 \tHypothesis: Wɑngɑrɑ boobo kɑ ǹ nɑɑne Sɑmɑriɑncɛ yom gɑɑ nɑɑne ɑ̀ gɑɑ. À gɑɑ no bɔrɔ kulu kɑ ɑ̀ go sendi ɑ sɛ ndɑ zɑngɑ ɑ̀ cii ɑ sɛ hɛ kulu kɑ ɑ̀ tɛ.\n",
            "2020-02-12 08:09:50,456 Validation result (greedy) at epoch  47, step     4000: bleu:  11.29, loss: 85277.0312, ppl:  15.8643, duration: 22.9526s\n",
            "2020-02-12 08:09:52,574 Epoch  47: total training loss 163.93\n",
            "2020-02-12 08:09:52,574 EPOCH 48\n",
            "2020-02-12 08:10:05,768 Epoch  48 Step:     4100 Batch Loss:     1.702314 Tokens per Sec:    15861, Lr: 0.000300\n",
            "2020-02-12 08:10:05,768 Epoch  48: total training loss 162.85\n",
            "2020-02-12 08:10:05,769 EPOCH 49\n",
            "2020-02-12 08:10:18,843 Epoch  49: total training loss 160.63\n",
            "2020-02-12 08:10:18,843 EPOCH 50\n",
            "2020-02-12 08:10:21,067 Epoch  50 Step:     4200 Batch Loss:     1.834686 Tokens per Sec:    15999, Lr: 0.000300\n",
            "2020-02-12 08:10:32,003 Epoch  50: total training loss 155.13\n",
            "2020-02-12 08:10:32,003 EPOCH 51\n",
            "2020-02-12 08:10:36,505 Epoch  51 Step:     4300 Batch Loss:     2.050732 Tokens per Sec:    15220, Lr: 0.000300\n",
            "2020-02-12 08:10:45,399 Epoch  51: total training loss 155.59\n",
            "2020-02-12 08:10:45,399 EPOCH 52\n",
            "2020-02-12 08:10:52,175 Epoch  52 Step:     4400 Batch Loss:     1.790398 Tokens per Sec:    15697, Lr: 0.000300\n",
            "2020-02-12 08:10:58,800 Epoch  52: total training loss 152.99\n",
            "2020-02-12 08:10:58,800 EPOCH 53\n",
            "2020-02-12 08:11:07,604 Epoch  53 Step:     4500 Batch Loss:     1.742444 Tokens per Sec:    15682, Lr: 0.000300\n",
            "2020-02-12 08:11:12,077 Epoch  53: total training loss 150.35\n",
            "2020-02-12 08:11:12,077 EPOCH 54\n",
            "2020-02-12 08:11:23,061 Epoch  54 Step:     4600 Batch Loss:     1.742742 Tokens per Sec:    16035, Lr: 0.000300\n",
            "2020-02-12 08:11:25,161 Epoch  54: total training loss 147.54\n",
            "2020-02-12 08:11:25,161 EPOCH 55\n",
            "2020-02-12 08:11:38,167 Epoch  55: total training loss 145.17\n",
            "2020-02-12 08:11:38,167 EPOCH 56\n",
            "2020-02-12 08:11:38,322 Epoch  56 Step:     4700 Batch Loss:     1.371213 Tokens per Sec:    14813, Lr: 0.000300\n",
            "2020-02-12 08:11:51,342 Epoch  56: total training loss 145.02\n",
            "2020-02-12 08:11:51,342 EPOCH 57\n",
            "2020-02-12 08:11:53,589 Epoch  57 Step:     4800 Batch Loss:     0.818968 Tokens per Sec:    15337, Lr: 0.000300\n",
            "2020-02-12 08:12:04,549 Epoch  57: total training loss 140.81\n",
            "2020-02-12 08:12:04,549 EPOCH 58\n",
            "2020-02-12 08:12:09,119 Epoch  58 Step:     4900 Batch Loss:     1.726804 Tokens per Sec:    16059, Lr: 0.000300\n",
            "2020-02-12 08:12:17,724 Epoch  58: total training loss 139.84\n",
            "2020-02-12 08:12:17,724 EPOCH 59\n",
            "2020-02-12 08:12:24,476 Epoch  59 Step:     5000 Batch Loss:     1.663464 Tokens per Sec:    16112, Lr: 0.000300\n",
            "2020-02-12 08:12:44,550 Example #0\n",
            "2020-02-12 08:12:44,550 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:12:44,550 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:12:44,550 \tHypothesis: A go kɑ ɑ̀ dɑm ɑ bɑndɑ Mɛsiyɑ Yesu sɑbu sɛ. A go kɑ ɑ cɛbɛ yɑ nɑɑnekpɛ no zɑmɑ ngɑ yɑ ci ɑ tɑm yom gbei yom sɛ.\n",
            "2020-02-12 08:12:44,550 Example #1\n",
            "2020-02-12 08:12:44,550 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:12:44,551 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:12:44,551 \tHypothesis: A nyɑizei, n mɑ wɔne cii ɑ sɛ, Kpei, n mɑ kpei kɑ n bine yeenɑndi.\n",
            "2020-02-12 08:12:44,551 Example #2\n",
            "2020-02-12 08:12:44,551 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:12:44,551 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:12:44,551 \tHypothesis: Zɑngɑ yɑ no Ikpɛ go bɔrɔ fɔ ŋmɑni tɛ, ɑ̀ gɑ kɑ ɑ̀ cɛɑndi susu. À gɑ nɑɑne cini. À gɑ nɑɑne mo cɛyom susu zɑm kɑ nɑɑne cini.\n",
            "2020-02-12 08:12:44,551 Example #3\n",
            "2020-02-12 08:12:44,551 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:12:44,551 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:12:44,551 \tHypothesis: Yude lɑɑbu bɔrɔ boobo nɑɑne kɑ ǹ nɑɑne ɑ̀ gɑɑ zɑmɑ ǹ nɑɑne ɑ̀ gɑɑ no ɑ̀ cii ɑ̀ sɛ ndɑ bɔrɔ kulu kɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:12:44,551 Validation result (greedy) at epoch  59, step     5000: bleu:  12.00, loss: 85901.3047, ppl:  16.1885, duration: 20.0748s\n",
            "2020-02-12 08:12:50,706 Epoch  59: total training loss 137.53\n",
            "2020-02-12 08:12:50,706 EPOCH 60\n",
            "2020-02-12 08:12:59,895 Epoch  60 Step:     5100 Batch Loss:     1.482252 Tokens per Sec:    15654, Lr: 0.000300\n",
            "2020-02-12 08:13:04,077 Epoch  60: total training loss 137.19\n",
            "2020-02-12 08:13:04,077 EPOCH 61\n",
            "2020-02-12 08:13:15,535 Epoch  61 Step:     5200 Batch Loss:     1.831912 Tokens per Sec:    15724, Lr: 0.000300\n",
            "2020-02-12 08:13:17,416 Epoch  61: total training loss 133.84\n",
            "2020-02-12 08:13:17,416 EPOCH 62\n",
            "2020-02-12 08:13:30,686 Epoch  62: total training loss 132.52\n",
            "2020-02-12 08:13:30,686 EPOCH 63\n",
            "2020-02-12 08:13:31,028 Epoch  63 Step:     5300 Batch Loss:     1.674448 Tokens per Sec:    15683, Lr: 0.000300\n",
            "2020-02-12 08:13:43,747 Epoch  63: total training loss 130.11\n",
            "2020-02-12 08:13:43,747 EPOCH 64\n",
            "2020-02-12 08:13:46,363 Epoch  64 Step:     5400 Batch Loss:     1.587212 Tokens per Sec:    15789, Lr: 0.000300\n",
            "2020-02-12 08:13:56,888 Epoch  64: total training loss 128.50\n",
            "2020-02-12 08:13:56,888 EPOCH 65\n",
            "2020-02-12 08:14:01,749 Epoch  65 Step:     5500 Batch Loss:     1.394756 Tokens per Sec:    16017, Lr: 0.000300\n",
            "2020-02-12 08:14:10,042 Epoch  65: total training loss 127.63\n",
            "2020-02-12 08:14:10,042 EPOCH 66\n",
            "2020-02-12 08:14:17,078 Epoch  66 Step:     5600 Batch Loss:     1.455463 Tokens per Sec:    16081, Lr: 0.000300\n",
            "2020-02-12 08:14:23,112 Epoch  66: total training loss 124.27\n",
            "2020-02-12 08:14:23,112 EPOCH 67\n",
            "2020-02-12 08:14:32,570 Epoch  67 Step:     5700 Batch Loss:     1.391649 Tokens per Sec:    16040, Lr: 0.000300\n",
            "2020-02-12 08:14:36,154 Epoch  67: total training loss 121.23\n",
            "2020-02-12 08:14:36,154 EPOCH 68\n",
            "2020-02-12 08:14:47,781 Epoch  68 Step:     5800 Batch Loss:     1.611635 Tokens per Sec:    16005, Lr: 0.000300\n",
            "2020-02-12 08:14:49,192 Epoch  68: total training loss 121.93\n",
            "2020-02-12 08:14:49,193 EPOCH 69\n",
            "2020-02-12 08:15:02,240 Epoch  69: total training loss 119.52\n",
            "2020-02-12 08:15:02,240 EPOCH 70\n",
            "2020-02-12 08:15:03,184 Epoch  70 Step:     5900 Batch Loss:     1.386215 Tokens per Sec:    16172, Lr: 0.000300\n",
            "2020-02-12 08:15:15,354 Epoch  70: total training loss 119.11\n",
            "2020-02-12 08:15:15,354 EPOCH 71\n",
            "2020-02-12 08:15:18,488 Epoch  71 Step:     6000 Batch Loss:     1.360763 Tokens per Sec:    15807, Lr: 0.000300\n",
            "2020-02-12 08:15:43,518 Example #0\n",
            "2020-02-12 08:15:43,519 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:15:43,519 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:15:43,519 \tHypothesis: A go kɑ ɑ hɔngɑndi kɑ Yesu Mɛsiyɑ nɑ ɑ no ngɑ suuji sɑbu sɛ. À go kɑ ɑ cɛbɛ no tɑlikɑ yom sɛ kɑ ǹ ci ɑ sɛ hinɑbunutɛrɛ hɛ kɑ ɑ gundɑ ɑ sɛ ndɑ.\n",
            "2020-02-12 08:15:43,519 Example #1\n",
            "2020-02-12 08:15:43,519 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:15:43,519 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:15:43,519 \tHypothesis: A nyɑizei, n mɑ wɔne cii ɑ sɛ, Kpei, Setɑm. N mɑ n bine yeenɑndi kɑ n bine yom kɔnkɔm.\n",
            "2020-02-12 08:15:43,519 Example #2\n",
            "2020-02-12 08:15:43,519 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:15:43,519 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:15:43,519 \tHypothesis: Zɑngɑ yɑ no Ikpɛ go hungu, ngɑ kɑ ɑ̀ gɑ cɛɑndi susu. À gɑ bɔrɔ cɛɑndi susu. À gɑ nɑɑne nɑɑne gɑɑ nɑɑne gɑɑ, nɑɑne kɑ ɑ̀ gɑ nɑɑne cini.\n",
            "2020-02-12 08:15:43,519 Example #3\n",
            "2020-02-12 08:15:43,519 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:15:43,520 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:15:43,520 \tHypothesis: Zɑ Sɑmɑri lɑɑbu bɔrɔ boobo nɑɑne ɑ̀ gɑɑ kɑ Yesu mɑɑ sendɑ sɑbu sɛ. À nɑɑne ɑ̀ gɑɑ no ɑ̀ cii bɔrɔ kulu kɑ ɑ̀ sɛ ɑ go sendi hɛ kulu kɑ ɑ̀ sɛ ndɑ hɛ kulu kɑ ɑ̀ ci lɑɑli li boobo.\n",
            "2020-02-12 08:15:43,520 Validation result (greedy) at epoch  71, step     6000: bleu:  12.57, loss: 87502.0469, ppl:  17.0506, duration: 25.0314s\n",
            "2020-02-12 08:15:53,449 Epoch  71: total training loss 117.19\n",
            "2020-02-12 08:15:53,449 EPOCH 72\n",
            "2020-02-12 08:15:58,872 Epoch  72 Step:     6100 Batch Loss:     1.318082 Tokens per Sec:    15817, Lr: 0.000300\n",
            "2020-02-12 08:16:06,526 Epoch  72: total training loss 116.32\n",
            "2020-02-12 08:16:06,526 EPOCH 73\n",
            "2020-02-12 08:16:14,045 Epoch  73 Step:     6200 Batch Loss:     1.606343 Tokens per Sec:    15923, Lr: 0.000300\n",
            "2020-02-12 08:16:19,546 Epoch  73: total training loss 113.52\n",
            "2020-02-12 08:16:19,546 EPOCH 74\n",
            "2020-02-12 08:16:29,348 Epoch  74 Step:     6300 Batch Loss:     1.334043 Tokens per Sec:    16390, Lr: 0.000300\n",
            "2020-02-12 08:16:32,383 Epoch  74: total training loss 110.36\n",
            "2020-02-12 08:16:32,383 EPOCH 75\n",
            "2020-02-12 08:16:44,515 Epoch  75 Step:     6400 Batch Loss:     1.328613 Tokens per Sec:    15922, Lr: 0.000300\n",
            "2020-02-12 08:16:45,472 Epoch  75: total training loss 111.58\n",
            "2020-02-12 08:16:45,472 EPOCH 76\n",
            "2020-02-12 08:16:58,446 Epoch  76: total training loss 109.24\n",
            "2020-02-12 08:16:58,447 EPOCH 77\n",
            "2020-02-12 08:16:59,907 Epoch  77 Step:     6500 Batch Loss:     1.263935 Tokens per Sec:    15999, Lr: 0.000300\n",
            "2020-02-12 08:17:11,608 Epoch  77: total training loss 108.91\n",
            "2020-02-12 08:17:11,609 EPOCH 78\n",
            "2020-02-12 08:17:15,134 Epoch  78 Step:     6600 Batch Loss:     1.015139 Tokens per Sec:    15818, Lr: 0.000300\n",
            "2020-02-12 08:17:24,678 Epoch  78: total training loss 106.83\n",
            "2020-02-12 08:17:24,678 EPOCH 79\n",
            "2020-02-12 08:17:30,369 Epoch  79 Step:     6700 Batch Loss:     1.263142 Tokens per Sec:    16083, Lr: 0.000300\n",
            "2020-02-12 08:17:37,741 Epoch  79: total training loss 104.67\n",
            "2020-02-12 08:17:37,742 EPOCH 80\n",
            "2020-02-12 08:17:45,888 Epoch  80 Step:     6800 Batch Loss:     1.249294 Tokens per Sec:    16000, Lr: 0.000300\n",
            "2020-02-12 08:17:50,719 Epoch  80: total training loss 103.46\n",
            "2020-02-12 08:17:50,719 EPOCH 81\n",
            "2020-02-12 08:18:01,082 Epoch  81 Step:     6900 Batch Loss:     1.248681 Tokens per Sec:    16417, Lr: 0.000300\n",
            "2020-02-12 08:18:03,519 Epoch  81: total training loss 102.05\n",
            "2020-02-12 08:18:03,520 EPOCH 82\n",
            "2020-02-12 08:18:16,247 Epoch  82 Step:     7000 Batch Loss:     0.890814 Tokens per Sec:    16235, Lr: 0.000300\n",
            "2020-02-12 08:18:35,052 Example #0\n",
            "2020-02-12 08:18:35,053 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:18:35,053 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:18:35,053 \tHypothesis: A go kɑ ɑ dɔntɔm mɑɑsɑ. À gɑ kɑ ɑ Kpe Yesu Mɛsiyɑ cɛbɛ yɑ nɑɑnekpɛ no. À nɑ ɑ cɛbɛ yɑ gbei hinno yom tɛ ɑ sɛ. A go kɑ ɑ nɑ ɑ bɔm no himɑndi ndɑ himmɑ.\n",
            "2020-02-12 08:18:35,053 Example #1\n",
            "2020-02-12 08:18:35,053 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:18:35,053 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:18:35,053 \tHypothesis: A nyɑizei, n mɑ wɔne cii n sɛ, Kpei, n mɑ lɑɑkɑli tunɑndi kɑ dimi dɑm n lɑɑkɑli kɑne.\n",
            "2020-02-12 08:18:35,053 Example #2\n",
            "2020-02-12 08:18:35,053 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:18:35,053 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:18:35,053 \tHypothesis: Zɑngɑ yɑ no Ikpɛ go bɔrɔ cɛɑndi susu. Bɔrɔ kɑ ɑ̀ gɑ nɑɑne gɑ ɑ̀ cɛɑndi susu. À gɑ nɑɑne nɑɑne hinnɑ. À gɑ nɑɑne nɑɑne nɑɑne cini.\n",
            "2020-02-12 08:18:35,053 Example #3\n",
            "2020-02-12 08:18:35,053 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:18:35,053 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:18:35,053 \tHypothesis: Yude lɑɑbu bɔrɔ boobo nɑɑne ɑ̀ gɑɑ. Ǹ nɑ Yesu nɑɑne ɑ̀ gɑɑ zɑmɑ ɑ̀ cii bɔrɔ kulu kɑ ɑ̀ mɑɑ ɑ sɛ.\n",
            "2020-02-12 08:18:35,053 Validation result (greedy) at epoch  82, step     7000: bleu:  13.65, loss: 89457.6172, ppl:  18.1664, duration: 18.8061s\n",
            "2020-02-12 08:18:35,212 Epoch  82: total training loss 99.90\n",
            "2020-02-12 08:18:35,212 EPOCH 83\n",
            "2020-02-12 08:18:48,109 Epoch  83: total training loss 99.90\n",
            "2020-02-12 08:18:48,109 EPOCH 84\n",
            "2020-02-12 08:18:50,306 Epoch  84 Step:     7100 Batch Loss:     1.299865 Tokens per Sec:    15861, Lr: 0.000300\n",
            "2020-02-12 08:19:00,884 Epoch  84: total training loss 98.65\n",
            "2020-02-12 08:19:00,884 EPOCH 85\n",
            "2020-02-12 08:19:05,137 Epoch  85 Step:     7200 Batch Loss:     0.877816 Tokens per Sec:    15843, Lr: 0.000300\n",
            "2020-02-12 08:19:13,810 Epoch  85: total training loss 97.33\n",
            "2020-02-12 08:19:13,810 EPOCH 86\n",
            "2020-02-12 08:19:20,507 Epoch  86 Step:     7300 Batch Loss:     1.287703 Tokens per Sec:    16485, Lr: 0.000300\n",
            "2020-02-12 08:19:26,654 Epoch  86: total training loss 95.47\n",
            "2020-02-12 08:19:26,655 EPOCH 87\n",
            "2020-02-12 08:19:35,599 Epoch  87 Step:     7400 Batch Loss:     1.237134 Tokens per Sec:    15999, Lr: 0.000300\n",
            "2020-02-12 08:19:39,705 Epoch  87: total training loss 96.01\n",
            "2020-02-12 08:19:39,705 EPOCH 88\n",
            "2020-02-12 08:19:50,814 Epoch  88 Step:     7500 Batch Loss:     1.328148 Tokens per Sec:    16186, Lr: 0.000300\n",
            "2020-02-12 08:19:52,727 Epoch  88: total training loss 95.81\n",
            "2020-02-12 08:19:52,728 EPOCH 89\n",
            "2020-02-12 08:20:05,885 Epoch  89: total training loss 92.46\n",
            "2020-02-12 08:20:05,885 EPOCH 90\n",
            "2020-02-12 08:20:06,204 Epoch  90 Step:     7600 Batch Loss:     0.852934 Tokens per Sec:    16006, Lr: 0.000300\n",
            "2020-02-12 08:20:18,856 Epoch  90: total training loss 91.59\n",
            "2020-02-12 08:20:18,856 EPOCH 91\n",
            "2020-02-12 08:20:21,463 Epoch  91 Step:     7700 Batch Loss:     1.099604 Tokens per Sec:    16478, Lr: 0.000300\n",
            "2020-02-12 08:20:31,836 Epoch  91: total training loss 90.21\n",
            "2020-02-12 08:20:31,836 EPOCH 92\n",
            "2020-02-12 08:20:36,833 Epoch  92 Step:     7800 Batch Loss:     1.154287 Tokens per Sec:    16331, Lr: 0.000300\n",
            "2020-02-12 08:20:44,719 Epoch  92: total training loss 89.19\n",
            "2020-02-12 08:20:44,719 EPOCH 93\n",
            "2020-02-12 08:20:51,703 Epoch  93 Step:     7900 Batch Loss:     1.021082 Tokens per Sec:    16284, Lr: 0.000300\n",
            "2020-02-12 08:20:57,555 Epoch  93: total training loss 89.00\n",
            "2020-02-12 08:20:57,555 EPOCH 94\n",
            "2020-02-12 08:21:06,806 Epoch  94 Step:     8000 Batch Loss:     1.096427 Tokens per Sec:    16372, Lr: 0.000300\n",
            "2020-02-12 08:21:25,191 Example #0\n",
            "2020-02-12 08:21:25,191 \tSource:     I thank him that enabled me, even Christ Jesus our Lord, for that he counted me faithful, appointing me to his service;\n",
            "2020-02-12 08:21:25,191 \tReference:  A go sɑɑbu i Kpe Yesu Mɛsiyɑ sɛ, ngɑ kɑ ɑ̀ nɑ ɑ no gɑɑbi. A gɑ kɑ ɑ̀ sɑɑbu domi ɑ̀ nɑ ɑ lɑsɑbu nɑɑnekpɛ kɑ ɑ dɑm ngɑ gbei kunɑ.\n",
            "2020-02-12 08:21:25,191 \tHypothesis: A go kɑ ɑ dɔntɔm ɑ̀ mɑ ɑ dii kɑ cii Mɛsiyɑ yɑ Kpe no. A go kɑ ɑ cɛbɛ yɑ nɑɑnekpɛ yom no zɑmɑ ngɑ gbei yom kɑ ɑ gundɑ ɑ sɛ yɑ ɑ himmɑ.\n",
            "2020-02-12 08:21:25,191 Example #1\n",
            "2020-02-12 08:21:25,191 \tSource:     Now lettest thou thy servant depart, Lord, According to thy word, in peace;\n",
            "2020-02-12 08:21:25,192 \tReference:  Mɑɑsɑnkulu Kpe, n mɑ tu n bɑnyɑ mɑ kpei ndɑ bɑɑni zɑngɑ n Sendɑ cii.\n",
            "2020-02-12 08:21:25,192 \tHypothesis: A nyɑizei, n mɑ wɔne tɑm tɑm yɑ ci Kpe n nyɑize kɑ ɑ̀ goono ndɑ lɑɑkɑli kɑne.\n",
            "2020-02-12 08:21:25,192 Example #2\n",
            "2020-02-12 08:21:25,192 \tSource:     if so be that God is one, and he shall justify the circumcision by faith, and the uncircumcision through faith.\n",
            "2020-02-12 08:21:25,192 \tReference:  Ikpɛ fɔlɔnku yɑ gɑ bɑnguize yom cɛɑndi susu nɑɑne gɑɑ. À go zɑm kɑ dɑmbɑnguize yom mo cɛɑndi susu nɑɑne gɑɑ.\n",
            "2020-02-12 08:21:25,192 \tHypothesis: De Ikpɛ go bɔrɔ fɔ ŋmɑni tɛ, ɑkpɛ mo Ikpɛ gɑ cɛɑndi susu. À gɑ bɔrɔ cɛɑndi susu nɑɑne gɑɑ, nɑɑne gɑ ɑ̀ cɛɑndi susu.\n",
            "2020-02-12 08:21:25,192 Example #3\n",
            "2020-02-12 08:21:25,192 \tSource:     And from that city many of the Samaritans believed on him because of the word of the woman, who testified, He told me all things that ever I did.\n",
            "2020-02-12 08:21:25,192 \tReference:  Wɑngɑrɑ ngɑ di, Sɑmɑriɑncɛ boobo nɑɑne Yesu gɑɑ weibɔrɔ di sendɑ sɑbu sɛ. Weibɔrɔ di tɛ sɛdɑ kɑ cii: Hɛ kulu kɑ ɑ jinɑ kɑ tɛ, ɑ̀ nɑ ɑ̀ cii ɑ sɛ.\n",
            "2020-02-12 08:21:25,192 \tHypothesis: Yude lɑɑbu bɔrɔ boobo nɑɑne ɑ̀ gɑɑ kɑ Yesu mɑɑ bɑɑru zɑmɑ ǹ nɑ ɑ̀ sendɑ nɑɑne ɑ̀ gɑɑ kɑ cii mɔngɔlɔ di sɛ: Bɔrɔ kulu kɑ ɑ̀ go kɑ ɑ ze ndɑ hɛ kulu kɑ ɑ̀ ci lɑɑbuyom.\n",
            "2020-02-12 08:21:25,192 Validation result (greedy) at epoch  94, step     8000: bleu:  14.18, loss: 91254.2891, ppl:  19.2557, duration: 18.3859s\n",
            "2020-02-12 08:21:28,745 Epoch  94: total training loss 87.77\n",
            "2020-02-12 08:21:28,745 EPOCH 95\n",
            "2020-02-12 08:21:40,211 Epoch  95 Step:     8100 Batch Loss:     1.297292 Tokens per Sec:    16383, Lr: 0.000300\n",
            "2020-02-12 08:21:41,572 Epoch  95: total training loss 87.34\n",
            "2020-02-12 08:21:41,572 EPOCH 96\n",
            "2020-02-12 08:21:54,274 Epoch  96: total training loss 86.27\n",
            "2020-02-12 08:21:54,275 EPOCH 97\n",
            "2020-02-12 08:21:55,049 Epoch  97 Step:     8200 Batch Loss:     1.032753 Tokens per Sec:    16466, Lr: 0.000300\n",
            "2020-02-12 08:22:07,257 Epoch  97: total training loss 84.80\n",
            "2020-02-12 08:22:07,258 EPOCH 98\n",
            "2020-02-12 08:22:10,303 Epoch  98 Step:     8300 Batch Loss:     0.992160 Tokens per Sec:    15828, Lr: 0.000300\n",
            "2020-02-12 08:22:20,378 Epoch  98: total training loss 83.56\n",
            "2020-02-12 08:22:20,378 EPOCH 99\n",
            "2020-02-12 08:22:25,811 Epoch  99 Step:     8400 Batch Loss:     0.987172 Tokens per Sec:    15959, Lr: 0.000300\n",
            "2020-02-12 08:22:33,492 Epoch  99: total training loss 83.05\n",
            "2020-02-12 08:22:33,492 EPOCH 100\n",
            "2020-02-12 08:22:41,229 Epoch 100 Step:     8500 Batch Loss:     0.951234 Tokens per Sec:    15873, Lr: 0.000300\n",
            "2020-02-12 08:22:46,610 Epoch 100: total training loss 83.42\n",
            "2020-02-12 08:22:46,610 Training ended after 100 epochs.\n",
            "2020-02-12 08:22:46,610 Best validation result (greedy) at step     4000:  15.86 ppl.\n",
            "2020-02-12 08:23:15,974  dev bleu:  13.32 [Beam search decoding with beam size = 5 and alpha = 1.0]\n",
            "2020-02-12 08:23:15,975 Translations saved to: models/enddn_transformer/00004000.hyps.dev\n",
            "2020-02-12 08:23:20,504 test bleu:  22.30 [Beam search decoding with beam size = 5 and alpha = 1.0]\n",
            "2020-02-12 08:23:20,504 Translations saved to: models/enddn_transformer/00004000.hyps.test\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nJHT-J8tU56N",
        "colab_type": "code",
        "outputId": "65d0e7ba-2d23-47ea-ff15-32f1414d2501",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 86
        }
      },
      "source": [
        "!ls joeynmt/models/${src}${tgt}_transformer"
      ],
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "00004000.hyps.dev   2000.hyps  4000.hyps  8000.hyps\t tensorboard\n",
            "00004000.hyps.test  3000.ckpt  5000.hyps  best.ckpt\t train.log\n",
            "1000.hyps\t    3000.hyps  6000.hyps  config.yaml\t trg_vocab.txt\n",
            "2000.ckpt\t    4000.ckpt  7000.hyps  src_vocab.txt  validations.txt\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "MBoDS09JM807",
        "outputId": "f4ed8475-4759-4f2e-fcb0-ba90a59c07c9",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        }
      },
      "source": [
        "# Copy the created models from the notebook storage to google drive for persistant storage \n",
        "!cp -r joeynmt/models/${src}${tgt}_transformer/* \"$gdrive_path/models/${src}${tgt}_transformer/\""
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "cp: cannot create symbolic link '/content/drive/My Drive/masakhane/en-ddn-baseline/models/enddn_transformer/best.ckpt': Operation not supported\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VNBlLPMFVKc2",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Copy the created models from the notebook storage to google drive for persistant storage \n",
        "!cp joeynmt/models/${src}${tgt}_transformer/best.ckpt \"$gdrive_path/models/${src}${tgt}_transformer/\""
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "n94wlrCjVc17",
        "outputId": "37d6339f-1136-4e11-e0f7-8837ccdf6b3e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 156
        }
      },
      "source": [
        "# Output our validation accuracy\n",
        "! cat \"$gdrive_path/models/${src}${tgt}_transformer/validations.txt\""
      ],
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Steps: 1000\tLoss: 108241.07812\tPPL: 33.39471\tbleu: 2.25322\tLR: 0.00030000\t*\n",
            "Steps: 2000\tLoss: 93221.48438\tPPL: 20.52349\tbleu: 6.63362\tLR: 0.00030000\t*\n",
            "Steps: 3000\tLoss: 87075.93750\tPPL: 16.81676\tbleu: 9.22080\tLR: 0.00030000\t*\n",
            "Steps: 4000\tLoss: 85277.03125\tPPL: 15.86425\tbleu: 11.29147\tLR: 0.00030000\t*\n",
            "Steps: 5000\tLoss: 85901.30469\tPPL: 16.18852\tbleu: 12.00153\tLR: 0.00030000\t\n",
            "Steps: 6000\tLoss: 87502.04688\tPPL: 17.05063\tbleu: 12.57401\tLR: 0.00030000\t\n",
            "Steps: 7000\tLoss: 89457.61719\tPPL: 18.16638\tbleu: 13.65009\tLR: 0.00030000\t\n",
            "Steps: 8000\tLoss: 91254.28906\tPPL: 19.25571\tbleu: 14.18309\tLR: 0.00030000\t\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "66WhRE9lIhoD",
        "outputId": "46270930-9553-4917-c55a-9b1d6fd6cafd",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 69
        }
      },
      "source": [
        "# Test our model\n",
        "! cd joeynmt; python3 -m joeynmt test \"$gdrive_path/models/${src}${tgt}_transformer/config.yaml\""
      ],
      "execution_count": 26,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-02-12 08:23:33,730 Hello! This is Joey-NMT.\n",
            "2020-02-12 08:24:08,045  dev bleu:  13.32 [Beam search decoding with beam size = 5 and alpha = 1.0]\n",
            "2020-02-12 08:24:12,709 test bleu:  22.30 [Beam search decoding with beam size = 5 and alpha = 1.0]\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}