|
{"loss": 2.7936, "learning_rate": 2.9999999999999997e-05, "epoch": 0.04, "step": 10} |
|
{"loss": 2.6511, "learning_rate": 5.9999999999999995e-05, "epoch": 0.07, "step": 20} |
|
{"loss": 2.634, "learning_rate": 8.999999999999999e-05, "epoch": 0.11, "step": 30} |
|
{"loss": 2.6239, "learning_rate": 0.00011999999999999999, "epoch": 0.14, "step": 40} |
|
{"loss": 2.5104, "learning_rate": 0.00015, "epoch": 0.18, "step": 50} |
|
{"loss": 2.3821, "learning_rate": 0.00017999999999999998, "epoch": 0.22, "step": 60} |
|
{"loss": 2.3514, "learning_rate": 0.00020999999999999998, "epoch": 0.25, "step": 70} |
|
{"loss": 2.2218, "learning_rate": 0.00023999999999999998, "epoch": 0.29, "step": 80} |
|
{"loss": 2.2881, "learning_rate": 0.00027, "epoch": 0.32, "step": 90} |
|
{"loss": 2.3525, "learning_rate": 0.0003, "epoch": 0.36, "step": 100} |
|
{"loss": 2.3534, "learning_rate": 0.00029888059701492536, "epoch": 0.4, "step": 110} |
|
{"loss": 2.375, "learning_rate": 0.00029776119402985074, "epoch": 0.43, "step": 120} |
|
{"loss": 2.1723, "learning_rate": 0.0002966417910447761, "epoch": 0.47, "step": 130} |
|
{"loss": 2.1229, "learning_rate": 0.00029552238805970145, "epoch": 0.5, "step": 140} |
|
{"loss": 2.3203, "learning_rate": 0.00029440298507462683, "epoch": 0.54, "step": 150} |
|
{"loss": 2.2084, "learning_rate": 0.0002932835820895522, "epoch": 0.58, "step": 160} |
|
{"loss": 2.2841, "learning_rate": 0.0002921641791044776, "epoch": 0.61, "step": 170} |
|
{"loss": 2.135, "learning_rate": 0.00029104477611940297, "epoch": 0.65, "step": 180} |
|
{"loss": 2.2003, "learning_rate": 0.0002899253731343283, "epoch": 0.68, "step": 190} |
|
{"loss": 2.2004, "learning_rate": 0.0002888059701492537, "epoch": 0.72, "step": 200} |
|
{"loss": 2.0142, "learning_rate": 0.0002876865671641791, "epoch": 0.76, "step": 210} |
|
{"loss": 2.1899, "learning_rate": 0.00028656716417910444, "epoch": 0.79, "step": 220} |
|
{"loss": 2.1275, "learning_rate": 0.0002854477611940298, "epoch": 0.83, "step": 230} |
|
{"loss": 2.2414, "learning_rate": 0.0002843283582089552, "epoch": 0.86, "step": 240} |
|
{"loss": 2.162, "learning_rate": 0.0002832089552238806, "epoch": 0.9, "step": 250} |
|
{"loss": 2.1962, "learning_rate": 0.00028208955223880597, "epoch": 0.94, "step": 260} |
|
{"loss": 2.0908, "learning_rate": 0.0002809701492537313, "epoch": 0.97, "step": 270} |
|
{"loss": 2.2293, "learning_rate": 0.0002798507462686567, "epoch": 1.01, "step": 280} |
|
{"loss": 2.1165, "learning_rate": 0.00027873134328358206, "epoch": 1.04, "step": 290} |
|
{"loss": 2.1872, "learning_rate": 0.00027761194029850744, "epoch": 1.08, "step": 300} |
|
{"loss": 2.0367, "learning_rate": 0.0002764925373134328, "epoch": 1.12, "step": 310} |
|
{"loss": 2.0395, "learning_rate": 0.0002753731343283582, "epoch": 1.15, "step": 320} |
|
{"loss": 2.0058, "learning_rate": 0.0002742537313432836, "epoch": 1.19, "step": 330} |
|
{"loss": 1.9998, "learning_rate": 0.0002731343283582089, "epoch": 1.22, "step": 340} |
|
{"loss": 2.1682, "learning_rate": 0.0002720149253731343, "epoch": 1.26, "step": 350} |
|
{"loss": 1.9747, "learning_rate": 0.0002708955223880597, "epoch": 1.29, "step": 360} |
|
{"loss": 2.0787, "learning_rate": 0.00026977611940298506, "epoch": 1.33, "step": 370} |
|
{"loss": 2.235, "learning_rate": 0.00026865671641791044, "epoch": 1.37, "step": 380} |
|
{"loss": 1.9715, "learning_rate": 0.00026753731343283577, "epoch": 1.4, "step": 390} |
|
{"loss": 1.9939, "learning_rate": 0.00026641791044776115, "epoch": 1.44, "step": 400} |
|
{"loss": 2.0815, "learning_rate": 0.0002652985074626866, "epoch": 1.47, "step": 410} |
|
{"loss": 2.0237, "learning_rate": 0.0002641791044776119, "epoch": 1.51, "step": 420} |
|
{"loss": 2.3032, "learning_rate": 0.0002630597014925373, "epoch": 1.55, "step": 430} |
|
{"loss": 2.1848, "learning_rate": 0.0002619402985074627, "epoch": 1.58, "step": 440} |
|
{"loss": 2.1893, "learning_rate": 0.00026082089552238806, "epoch": 1.62, "step": 450} |
|
{"loss": 1.9002, "learning_rate": 0.0002597014925373134, "epoch": 1.65, "step": 460} |
|
{"loss": 2.0481, "learning_rate": 0.00025858208955223876, "epoch": 1.69, "step": 470} |
|
{"loss": 2.1521, "learning_rate": 0.00025746268656716415, "epoch": 1.73, "step": 480} |
|
{"loss": 1.9633, "learning_rate": 0.00025634328358208953, "epoch": 1.76, "step": 490} |
|
{"loss": 2.1533, "learning_rate": 0.0002552238805970149, "epoch": 1.8, "step": 500} |
|
{"loss": 1.8578, "learning_rate": 0.00025410447761194024, "epoch": 1.83, "step": 510} |
|
{"loss": 2.0494, "learning_rate": 0.00025298507462686567, "epoch": 1.87, "step": 520} |
|
{"loss": 2.1759, "learning_rate": 0.00025186567164179105, "epoch": 1.91, "step": 530} |
|
{"loss": 2.0618, "learning_rate": 0.0002507462686567164, "epoch": 1.94, "step": 540} |
|
{"loss": 2.0098, "learning_rate": 0.00024962686567164176, "epoch": 1.98, "step": 550} |
|
{"loss": 1.9424, "learning_rate": 0.00024850746268656714, "epoch": 2.01, "step": 560} |
|
{"loss": 1.8802, "learning_rate": 0.0002473880597014925, "epoch": 2.05, "step": 570} |
|
{"loss": 2.0241, "learning_rate": 0.0002462686567164179, "epoch": 2.09, "step": 580} |
|
{"loss": 2.0073, "learning_rate": 0.00024514925373134323, "epoch": 2.12, "step": 590} |
|
{"loss": 2.0162, "learning_rate": 0.00024402985074626864, "epoch": 2.16, "step": 600} |
|
{"loss": 1.8319, "learning_rate": 0.000242910447761194, "epoch": 2.19, "step": 610} |
|
{"loss": 1.814, "learning_rate": 0.00024179104477611938, "epoch": 2.23, "step": 620} |
|
{"loss": 1.5829, "learning_rate": 0.00024067164179104476, "epoch": 2.27, "step": 630} |
|
{"loss": 1.7978, "learning_rate": 0.00023955223880597012, "epoch": 2.3, "step": 640} |
|
{"loss": 1.8052, "learning_rate": 0.0002384328358208955, "epoch": 2.34, "step": 650} |
|
{"loss": 2.0461, "learning_rate": 0.00023731343283582085, "epoch": 2.37, "step": 660} |
|
{"loss": 1.8847, "learning_rate": 0.00023619402985074626, "epoch": 2.41, "step": 670} |
|
{"loss": 1.8465, "learning_rate": 0.00023507462686567164, "epoch": 2.45, "step": 680} |
|
{"loss": 1.7806, "learning_rate": 0.000233955223880597, "epoch": 2.48, "step": 690} |
|
{"loss": 1.8604, "learning_rate": 0.00023283582089552238, "epoch": 2.52, "step": 700} |
|
{"loss": 1.7639, "learning_rate": 0.00023171641791044773, "epoch": 2.55, "step": 710} |
|
{"loss": 1.8926, "learning_rate": 0.0002305970149253731, "epoch": 2.59, "step": 720} |
|
{"loss": 1.7832, "learning_rate": 0.0002294776119402985, "epoch": 2.63, "step": 730} |
|
{"loss": 1.7619, "learning_rate": 0.00022835820895522385, "epoch": 2.66, "step": 740} |
|
{"loss": 1.8423, "learning_rate": 0.00022723880597014923, "epoch": 2.7, "step": 750} |
|
{"loss": 1.9202, "learning_rate": 0.00022611940298507459, "epoch": 2.73, "step": 760} |
|
{"loss": 2.0189, "learning_rate": 0.000225, "epoch": 2.77, "step": 770} |
|
{"loss": 2.0472, "learning_rate": 0.00022388059701492538, "epoch": 2.81, "step": 780} |
|
{"loss": 2.0446, "learning_rate": 0.00022276119402985073, "epoch": 2.84, "step": 790} |
|
{"loss": 1.8546, "learning_rate": 0.0002216417910447761, "epoch": 2.88, "step": 800} |
|
{"loss": 1.896, "learning_rate": 0.00022052238805970147, "epoch": 2.91, "step": 810} |
|
{"loss": 2.0741, "learning_rate": 0.00021940298507462685, "epoch": 2.95, "step": 820} |
|
{"loss": 1.8384, "learning_rate": 0.00021828358208955223, "epoch": 2.99, "step": 830} |
|
{"loss": 1.9125, "learning_rate": 0.00021716417910447758, "epoch": 3.02, "step": 840} |
|
{"loss": 1.6362, "learning_rate": 0.00021604477611940296, "epoch": 3.06, "step": 850} |
|
{"loss": 1.5579, "learning_rate": 0.00021492537313432832, "epoch": 3.09, "step": 860} |
|
{"loss": 1.7687, "learning_rate": 0.0002138059701492537, "epoch": 3.13, "step": 870} |
|
{"loss": 1.6143, "learning_rate": 0.0002126865671641791, "epoch": 3.17, "step": 880} |
|
{"loss": 1.6969, "learning_rate": 0.00021156716417910446, "epoch": 3.2, "step": 890} |
|
{"loss": 1.4892, "learning_rate": 0.00021044776119402985, "epoch": 3.24, "step": 900} |
|
{"loss": 1.7392, "learning_rate": 0.0002093283582089552, "epoch": 3.27, "step": 910} |
|
{"loss": 1.691, "learning_rate": 0.00020820895522388058, "epoch": 3.31, "step": 920} |
|
{"loss": 1.6215, "learning_rate": 0.00020708955223880596, "epoch": 3.35, "step": 930} |
|
{"loss": 1.7974, "learning_rate": 0.00020597014925373132, "epoch": 3.38, "step": 940} |
|
{"loss": 1.747, "learning_rate": 0.0002048507462686567, "epoch": 3.42, "step": 950} |
|
{"loss": 1.692, "learning_rate": 0.00020373134328358205, "epoch": 3.45, "step": 960} |
|
{"loss": 1.5483, "learning_rate": 0.00020261194029850743, "epoch": 3.49, "step": 970} |
|
{"loss": 1.6975, "learning_rate": 0.00020149253731343284, "epoch": 3.53, "step": 980} |
|
{"loss": 1.4288, "learning_rate": 0.0002003731343283582, "epoch": 3.56, "step": 990} |
|
{"loss": 1.8136, "learning_rate": 0.00019925373134328358, "epoch": 3.6, "step": 1000} |
|
{"loss": 1.7636, "learning_rate": 0.00019813432835820893, "epoch": 3.63, "step": 1010} |
|
{"loss": 1.7509, "learning_rate": 0.00019701492537313432, "epoch": 3.67, "step": 1020} |
|
{"loss": 1.553, "learning_rate": 0.0001958955223880597, "epoch": 3.71, "step": 1030} |
|
{"loss": 1.752, "learning_rate": 0.00019477611940298505, "epoch": 3.74, "step": 1040} |
|
{"loss": 1.7697, "learning_rate": 0.00019365671641791043, "epoch": 3.78, "step": 1050} |
|
{"loss": 1.6802, "learning_rate": 0.0001925373134328358, "epoch": 3.81, "step": 1060} |
|
{"loss": 1.6143, "learning_rate": 0.00019141791044776117, "epoch": 3.85, "step": 1070} |
|
{"loss": 1.8689, "learning_rate": 0.00019029850746268658, "epoch": 3.88, "step": 1080} |
|
{"loss": 1.7677, "learning_rate": 0.00018917910447761193, "epoch": 3.92, "step": 1090} |
|
{"loss": 1.7427, "learning_rate": 0.0001880597014925373, "epoch": 3.96, "step": 1100} |
|
{"loss": 1.509, "learning_rate": 0.00018694029850746267, "epoch": 3.99, "step": 1110} |
|
{"loss": 1.5502, "learning_rate": 0.00018582089552238805, "epoch": 4.03, "step": 1120} |
|
{"loss": 1.4425, "learning_rate": 0.0001847014925373134, "epoch": 4.06, "step": 1130} |
|
{"loss": 1.5528, "learning_rate": 0.00018358208955223879, "epoch": 4.1, "step": 1140} |
|
{"loss": 1.445, "learning_rate": 0.00018246268656716417, "epoch": 4.14, "step": 1150} |
|
{"loss": 1.5416, "learning_rate": 0.00018134328358208952, "epoch": 4.17, "step": 1160} |
|
{"loss": 1.553, "learning_rate": 0.0001802238805970149, "epoch": 4.21, "step": 1170} |
|
{"loss": 1.4628, "learning_rate": 0.00017910447761194026, "epoch": 4.24, "step": 1180} |
|
{"loss": 1.5914, "learning_rate": 0.00017798507462686567, "epoch": 4.28, "step": 1190} |
|
{"loss": 1.3937, "learning_rate": 0.00017686567164179105, "epoch": 4.32, "step": 1200} |
|
{"loss": 1.4096, "learning_rate": 0.0001757462686567164, "epoch": 4.35, "step": 1210} |
|
{"loss": 1.4138, "learning_rate": 0.00017462686567164178, "epoch": 4.39, "step": 1220} |
|
{"loss": 1.5178, "learning_rate": 0.00017350746268656714, "epoch": 4.42, "step": 1230} |
|
{"loss": 1.5928, "learning_rate": 0.00017238805970149252, "epoch": 4.46, "step": 1240} |
|
{"loss": 1.4397, "learning_rate": 0.0001712686567164179, "epoch": 4.5, "step": 1250} |
|
{"loss": 1.3036, "learning_rate": 0.00017014925373134325, "epoch": 4.53, "step": 1260} |
|
{"loss": 1.4615, "learning_rate": 0.00016902985074626864, "epoch": 4.57, "step": 1270} |
|
{"loss": 1.6205, "learning_rate": 0.000167910447761194, "epoch": 4.6, "step": 1280} |
|
{"loss": 1.4631, "learning_rate": 0.0001667910447761194, "epoch": 4.64, "step": 1290} |
|
{"loss": 1.5588, "learning_rate": 0.00016567164179104478, "epoch": 4.68, "step": 1300} |
|
{"loss": 1.5026, "learning_rate": 0.00016455223880597014, "epoch": 4.71, "step": 1310} |
|
{"loss": 1.4519, "learning_rate": 0.00016343283582089552, "epoch": 4.75, "step": 1320} |
|
{"loss": 1.6337, "learning_rate": 0.00016231343283582087, "epoch": 4.78, "step": 1330} |
|
{"loss": 1.482, "learning_rate": 0.00016119402985074625, "epoch": 4.82, "step": 1340} |
|
{"loss": 1.3933, "learning_rate": 0.00016007462686567163, "epoch": 4.86, "step": 1350} |
|
{"loss": 1.5357, "learning_rate": 0.000158955223880597, "epoch": 4.89, "step": 1360} |
|
{"loss": 1.3478, "learning_rate": 0.00015783582089552237, "epoch": 4.93, "step": 1370} |
|
{"loss": 1.5469, "learning_rate": 0.00015671641791044772, "epoch": 4.96, "step": 1380} |
|
{"loss": 1.4212, "learning_rate": 0.00015559701492537313, "epoch": 5.0, "step": 1390} |
|
{"loss": 1.3422, "learning_rate": 0.00015447761194029851, "epoch": 5.04, "step": 1400} |
|
{"loss": 1.4765, "learning_rate": 0.00015335820895522387, "epoch": 5.07, "step": 1410} |
|
{"loss": 1.2777, "learning_rate": 0.00015223880597014925, "epoch": 5.11, "step": 1420} |
|
{"loss": 1.3657, "learning_rate": 0.0001511194029850746, "epoch": 5.14, "step": 1430} |
|
{"loss": 1.1507, "learning_rate": 0.00015, "epoch": 5.18, "step": 1440} |
|
{"loss": 1.489, "learning_rate": 0.00014888059701492537, "epoch": 5.22, "step": 1450} |
|
{"loss": 1.4137, "learning_rate": 0.00014776119402985072, "epoch": 5.25, "step": 1460} |
|
{"loss": 1.2471, "learning_rate": 0.0001466417910447761, "epoch": 5.29, "step": 1470} |
|
{"loss": 1.3481, "learning_rate": 0.00014552238805970149, "epoch": 5.32, "step": 1480} |
|
{"loss": 1.4573, "learning_rate": 0.00014440298507462684, "epoch": 5.36, "step": 1490} |
|
{"loss": 1.4607, "learning_rate": 0.00014328358208955222, "epoch": 5.4, "step": 1500} |
|
{"loss": 1.2774, "learning_rate": 0.0001421641791044776, "epoch": 5.43, "step": 1510} |
|
{"loss": 1.1598, "learning_rate": 0.00014104477611940298, "epoch": 5.47, "step": 1520} |
|
{"loss": 1.3139, "learning_rate": 0.00013992537313432834, "epoch": 5.5, "step": 1530} |
|
{"loss": 1.2327, "learning_rate": 0.00013880597014925372, "epoch": 5.54, "step": 1540} |
|
{"loss": 1.2279, "learning_rate": 0.0001376865671641791, "epoch": 5.58, "step": 1550} |
|
{"loss": 1.3786, "learning_rate": 0.00013656716417910446, "epoch": 5.61, "step": 1560} |
|
{"loss": 1.3426, "learning_rate": 0.00013544776119402984, "epoch": 5.65, "step": 1570} |
|
{"loss": 1.4421, "learning_rate": 0.00013432835820895522, "epoch": 5.68, "step": 1580} |
|
{"loss": 1.3727, "learning_rate": 0.00013320895522388057, "epoch": 5.72, "step": 1590} |
|
{"loss": 1.1887, "learning_rate": 0.00013208955223880596, "epoch": 5.76, "step": 1600} |
|
{"loss": 1.1396, "learning_rate": 0.00013097014925373134, "epoch": 5.79, "step": 1610} |
|
{"loss": 1.2622, "learning_rate": 0.0001298507462686567, "epoch": 5.83, "step": 1620} |
|
{"loss": 1.3236, "learning_rate": 0.00012873134328358207, "epoch": 5.86, "step": 1630} |
|
{"loss": 1.2268, "learning_rate": 0.00012761194029850745, "epoch": 5.9, "step": 1640} |
|
{"loss": 1.1743, "learning_rate": 0.00012649253731343284, "epoch": 5.94, "step": 1650} |
|
{"loss": 1.2099, "learning_rate": 0.0001253731343283582, "epoch": 5.97, "step": 1660} |
|
{"loss": 1.3259, "learning_rate": 0.00012425373134328357, "epoch": 6.01, "step": 1670} |
|
{"loss": 1.0077, "learning_rate": 0.00012313432835820895, "epoch": 6.04, "step": 1680} |
|
{"loss": 0.9612, "learning_rate": 0.00012201492537313432, "epoch": 6.08, "step": 1690} |
|
{"loss": 1.2474, "learning_rate": 0.00012089552238805969, "epoch": 6.12, "step": 1700} |
|
{"loss": 1.0267, "learning_rate": 0.00011977611940298506, "epoch": 6.15, "step": 1710} |
|
{"loss": 1.0815, "learning_rate": 0.00011865671641791043, "epoch": 6.19, "step": 1720} |
|
{"loss": 1.1129, "learning_rate": 0.00011753731343283582, "epoch": 6.22, "step": 1730} |
|
{"loss": 1.0391, "learning_rate": 0.00011641791044776119, "epoch": 6.26, "step": 1740} |
|
{"loss": 1.1293, "learning_rate": 0.00011529850746268656, "epoch": 6.29, "step": 1750} |
|
{"loss": 1.2439, "learning_rate": 0.00011417910447761192, "epoch": 6.33, "step": 1760} |
|
{"loss": 1.0303, "learning_rate": 0.00011305970149253729, "epoch": 6.37, "step": 1770} |
|
{"loss": 1.18, "learning_rate": 0.00011194029850746269, "epoch": 6.4, "step": 1780} |
|
{"loss": 1.1128, "learning_rate": 0.00011082089552238806, "epoch": 6.44, "step": 1790} |
|
{"loss": 1.3343, "learning_rate": 0.00010970149253731342, "epoch": 6.47, "step": 1800} |
|
{"loss": 1.0738, "learning_rate": 0.00010858208955223879, "epoch": 6.51, "step": 1810} |
|
{"loss": 1.2427, "learning_rate": 0.00010746268656716416, "epoch": 6.55, "step": 1820} |
|
{"loss": 1.2309, "learning_rate": 0.00010634328358208955, "epoch": 6.58, "step": 1830} |
|
{"loss": 1.1866, "learning_rate": 0.00010522388059701492, "epoch": 6.62, "step": 1840} |
|
{"loss": 1.1793, "learning_rate": 0.00010410447761194029, "epoch": 6.65, "step": 1850} |
|
{"loss": 0.9522, "learning_rate": 0.00010298507462686566, "epoch": 6.69, "step": 1860} |
|
{"loss": 1.2496, "learning_rate": 0.00010186567164179103, "epoch": 6.73, "step": 1870} |
|
{"loss": 1.1967, "learning_rate": 0.00010074626865671642, "epoch": 6.76, "step": 1880} |
|
{"loss": 1.1936, "learning_rate": 9.962686567164179e-05, "epoch": 6.8, "step": 1890} |
|
{"loss": 1.264, "learning_rate": 9.850746268656716e-05, "epoch": 6.83, "step": 1900} |
|
{"loss": 1.2078, "learning_rate": 9.738805970149253e-05, "epoch": 6.87, "step": 1910} |
|
{"loss": 1.2529, "learning_rate": 9.62686567164179e-05, "epoch": 6.91, "step": 1920} |
|
{"loss": 1.2267, "learning_rate": 9.514925373134329e-05, "epoch": 6.94, "step": 1930} |
|
{"loss": 1.0987, "learning_rate": 9.402985074626866e-05, "epoch": 6.98, "step": 1940} |
|
{"loss": 1.0249, "learning_rate": 9.291044776119402e-05, "epoch": 7.01, "step": 1950} |
|
{"loss": 0.8457, "learning_rate": 9.179104477611939e-05, "epoch": 7.05, "step": 1960} |
|
{"loss": 0.8884, "learning_rate": 9.067164179104476e-05, "epoch": 7.09, "step": 1970} |
|
{"loss": 1.0515, "learning_rate": 8.955223880597013e-05, "epoch": 7.12, "step": 1980} |
|
{"loss": 1.1131, "learning_rate": 8.843283582089552e-05, "epoch": 7.16, "step": 1990} |
|
{"loss": 0.9864, "learning_rate": 8.731343283582089e-05, "epoch": 7.19, "step": 2000} |
|
{"loss": 0.9131, "learning_rate": 8.619402985074626e-05, "epoch": 7.23, "step": 2010} |
|
{"loss": 0.831, "learning_rate": 8.507462686567163e-05, "epoch": 7.27, "step": 2020} |
|
{"loss": 1.0958, "learning_rate": 8.3955223880597e-05, "epoch": 7.3, "step": 2030} |
|
{"loss": 1.1438, "learning_rate": 8.283582089552239e-05, "epoch": 7.34, "step": 2040} |
|
{"loss": 1.1594, "learning_rate": 8.171641791044776e-05, "epoch": 7.37, "step": 2050} |
|
{"loss": 1.0476, "learning_rate": 8.059701492537313e-05, "epoch": 7.41, "step": 2060} |
|
{"loss": 1.0496, "learning_rate": 7.94776119402985e-05, "epoch": 7.45, "step": 2070} |
|
{"loss": 0.8074, "learning_rate": 7.835820895522386e-05, "epoch": 7.48, "step": 2080} |
|
{"loss": 0.8995, "learning_rate": 7.723880597014926e-05, "epoch": 7.52, "step": 2090} |
|
{"loss": 1.0888, "learning_rate": 7.611940298507463e-05, "epoch": 7.55, "step": 2100} |
|
{"loss": 1.2066, "learning_rate": 7.5e-05, "epoch": 7.59, "step": 2110} |
|
{"loss": 0.9468, "learning_rate": 7.388059701492536e-05, "epoch": 7.63, "step": 2120} |
|
{"loss": 0.9874, "learning_rate": 7.276119402985074e-05, "epoch": 7.66, "step": 2130} |
|
{"loss": 0.8849, "learning_rate": 7.164179104477611e-05, "epoch": 7.7, "step": 2140} |
|
{"loss": 1.0517, "learning_rate": 7.052238805970149e-05, "epoch": 7.73, "step": 2150} |
|
{"loss": 0.9174, "learning_rate": 6.940298507462686e-05, "epoch": 7.77, "step": 2160} |
|
{"loss": 0.9831, "learning_rate": 6.828358208955223e-05, "epoch": 7.81, "step": 2170} |
|
{"loss": 1.1041, "learning_rate": 6.716417910447761e-05, "epoch": 7.84, "step": 2180} |
|
{"loss": 1.0281, "learning_rate": 6.604477611940298e-05, "epoch": 7.88, "step": 2190} |
|
{"loss": 1.0085, "learning_rate": 6.492537313432835e-05, "epoch": 7.91, "step": 2200} |
|
{"loss": 1.1037, "learning_rate": 6.380597014925373e-05, "epoch": 7.95, "step": 2210} |
|
{"loss": 1.2633, "learning_rate": 6.26865671641791e-05, "epoch": 7.99, "step": 2220} |
|
{"loss": 1.0117, "learning_rate": 6.156716417910448e-05, "epoch": 8.02, "step": 2230} |
|
{"loss": 0.9484, "learning_rate": 6.0447761194029845e-05, "epoch": 8.06, "step": 2240} |
|
{"loss": 0.8574, "learning_rate": 5.932835820895521e-05, "epoch": 8.09, "step": 2250} |
|
{"loss": 1.0723, "learning_rate": 5.8208955223880594e-05, "epoch": 8.13, "step": 2260} |
|
{"loss": 0.7553, "learning_rate": 5.708955223880596e-05, "epoch": 8.17, "step": 2270} |
|
{"loss": 0.9336, "learning_rate": 5.5970149253731344e-05, "epoch": 8.2, "step": 2280} |
|
{"loss": 0.8903, "learning_rate": 5.485074626865671e-05, "epoch": 8.24, "step": 2290} |
|
{"loss": 0.7616, "learning_rate": 5.373134328358208e-05, "epoch": 8.27, "step": 2300} |
|
{"loss": 0.9979, "learning_rate": 5.261194029850746e-05, "epoch": 8.31, "step": 2310} |
|
{"loss": 1.1038, "learning_rate": 5.149253731343283e-05, "epoch": 8.35, "step": 2320} |
|
{"loss": 0.8682, "learning_rate": 5.037313432835821e-05, "epoch": 8.38, "step": 2330} |
|
{"loss": 0.9784, "learning_rate": 4.925373134328358e-05, "epoch": 8.42, "step": 2340} |
|
{"loss": 0.8704, "learning_rate": 4.813432835820895e-05, "epoch": 8.45, "step": 2350} |
|
{"loss": 0.9511, "learning_rate": 4.701492537313433e-05, "epoch": 8.49, "step": 2360} |
|
{"loss": 1.0958, "learning_rate": 4.5895522388059696e-05, "epoch": 8.53, "step": 2370} |
|
{"loss": 0.7955, "learning_rate": 4.4776119402985064e-05, "epoch": 8.56, "step": 2380} |
|
{"loss": 0.9566, "learning_rate": 4.3656716417910446e-05, "epoch": 8.6, "step": 2390} |
|
{"loss": 0.7675, "learning_rate": 4.2537313432835814e-05, "epoch": 8.63, "step": 2400} |
|
{"loss": 0.9047, "learning_rate": 4.1417910447761195e-05, "epoch": 8.67, "step": 2410} |
|
{"loss": 0.8634, "learning_rate": 4.029850746268656e-05, "epoch": 8.71, "step": 2420} |
|
{"loss": 0.7287, "learning_rate": 3.917910447761193e-05, "epoch": 8.74, "step": 2430} |
|
{"loss": 0.9552, "learning_rate": 3.805970149253731e-05, "epoch": 8.78, "step": 2440} |
|
{"loss": 0.8651, "learning_rate": 3.694029850746268e-05, "epoch": 8.81, "step": 2450} |
|
{"loss": 0.9632, "learning_rate": 3.5820895522388055e-05, "epoch": 8.85, "step": 2460} |
|
{"loss": 0.9298, "learning_rate": 3.470149253731343e-05, "epoch": 8.88, "step": 2470} |
|
{"loss": 0.7165, "learning_rate": 3.3582089552238805e-05, "epoch": 8.92, "step": 2480} |
|
{"loss": 0.9407, "learning_rate": 3.246268656716417e-05, "epoch": 8.96, "step": 2490} |
|
{"loss": 0.8694, "learning_rate": 3.134328358208955e-05, "epoch": 8.99, "step": 2500} |
|
{"loss": 0.8516, "learning_rate": 3.0223880597014922e-05, "epoch": 9.03, "step": 2510} |
|
{"loss": 0.9565, "learning_rate": 2.9104477611940297e-05, "epoch": 9.06, "step": 2520} |
|
{"loss": 0.8364, "learning_rate": 2.7985074626865672e-05, "epoch": 9.1, "step": 2530} |
|
{"loss": 0.8792, "learning_rate": 2.686567164179104e-05, "epoch": 9.14, "step": 2540} |
|
{"loss": 0.7395, "learning_rate": 2.5746268656716415e-05, "epoch": 9.17, "step": 2550} |
|
{"loss": 0.7, "learning_rate": 2.462686567164179e-05, "epoch": 9.21, "step": 2560} |
|
{"loss": 0.8797, "learning_rate": 2.3507462686567164e-05, "epoch": 9.24, "step": 2570} |
|
{"loss": 0.8762, "learning_rate": 2.2388059701492532e-05, "epoch": 9.28, "step": 2580} |
|
{"loss": 0.8505, "learning_rate": 2.1268656716417907e-05, "epoch": 9.32, "step": 2590} |
|
{"loss": 0.7759, "learning_rate": 2.014925373134328e-05, "epoch": 9.35, "step": 2600} |
|
{"loss": 0.7994, "learning_rate": 1.9029850746268656e-05, "epoch": 9.39, "step": 2610} |
|
{"loss": 0.9462, "learning_rate": 1.7910447761194028e-05, "epoch": 9.42, "step": 2620} |
|
{"loss": 0.9192, "learning_rate": 1.6791044776119402e-05, "epoch": 9.46, "step": 2630} |
|
{"loss": 0.8138, "learning_rate": 1.5671641791044774e-05, "epoch": 9.5, "step": 2640} |
|
{"loss": 0.7991, "learning_rate": 1.4552238805970149e-05, "epoch": 9.53, "step": 2650} |
|
{"loss": 0.8714, "learning_rate": 1.343283582089552e-05, "epoch": 9.57, "step": 2660} |
|
{"loss": 0.8073, "learning_rate": 1.2313432835820895e-05, "epoch": 9.6, "step": 2670} |
|
{"loss": 0.7959, "learning_rate": 1.1194029850746266e-05, "epoch": 9.64, "step": 2680} |
|
{"loss": 0.7809, "learning_rate": 1.007462686567164e-05, "epoch": 9.68, "step": 2690} |
|
{"loss": 0.5922, "learning_rate": 8.955223880597014e-06, "epoch": 9.71, "step": 2700} |
|
{"loss": 0.8417, "learning_rate": 7.835820895522387e-06, "epoch": 9.75, "step": 2710} |
|
{"loss": 0.8592, "learning_rate": 6.71641791044776e-06, "epoch": 9.78, "step": 2720} |
|
{"loss": 0.9484, "learning_rate": 5.597014925373133e-06, "epoch": 9.82, "step": 2730} |
|
{"loss": 0.824, "learning_rate": 4.477611940298507e-06, "epoch": 9.86, "step": 2740} |
|
{"loss": 0.8203, "learning_rate": 3.35820895522388e-06, "epoch": 9.89, "step": 2750} |
|
{"loss": 0.7591, "learning_rate": 2.2388059701492535e-06, "epoch": 9.93, "step": 2760} |
|
{"loss": 0.7422, "learning_rate": 1.1194029850746267e-06, "epoch": 9.96, "step": 2770} |
|
{"loss": 0.9275, "learning_rate": 0.0, "epoch": 10.0, "step": 2780} |
|
{"train_runtime": 65996.59, "train_samples_per_second": 0.042, "train_steps_per_second": 0.042, "total_flos": 6.64507164033024e+16, "train_loss": 1.4632105468846053, "epoch": 10.0, "step": 2780} |