diff --git "a/checkpoint-39000/trainer_state.json" "b/checkpoint-39000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-39000/trainer_state.json" @@ -0,0 +1,23416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3864201919658727, + "global_step": 39000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.3999999999999997e-05, + "loss": 1.0564, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 5.399999999999999e-05, + "loss": 0.9789, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 8.4e-05, + "loss": 0.9108, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011399999999999999, + "loss": 0.8764, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 0.8502, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017399999999999997, + "loss": 0.8358, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.000204, + "loss": 0.751, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.000234, + "loss": 0.6918, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.00026399999999999997, + "loss": 0.7512, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.000294, + "loss": 0.7945, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00029997152687151495, + "loss": 0.7894, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 0.00029993593546090875, + "loss": 0.6707, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002999003440503025, + "loss": 0.8224, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002998647526396962, + "loss": 0.7089, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029982916122909, + "loss": 0.726, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029979356981848376, + "loss": 0.8548, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029975797840787755, + "loss": 0.7061, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002997223869972713, + "loss": 0.7457, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002996867955866651, + "loss": 0.7526, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002996512041760588, + "loss": 0.8593, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029961561276545257, + "loss": 0.7059, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029958002135484636, + "loss": 0.8394, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002995444299442401, + "loss": 0.7684, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029950883853363384, + "loss": 0.72, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029947324712302763, + "loss": 0.7342, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029943765571242137, + "loss": 0.6663, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002994020643018151, + "loss": 0.7824, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002993664728912089, + "loss": 0.8046, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029933088148060264, + "loss": 0.8155, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002992952900699964, + "loss": 0.6827, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002992596986593902, + "loss": 0.749, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002992241072487839, + "loss": 0.7145, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002991885158381777, + "loss": 0.7137, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029915292442757145, + "loss": 0.6598, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029911733301696524, + "loss": 0.6857, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 0.000299081741606359, + "loss": 0.7471, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002990461501957527, + "loss": 0.6752, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002990105587851465, + "loss": 0.7399, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029897496737454026, + "loss": 0.7851, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 0.000298939375963934, + "loss": 0.7845, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002989037845533278, + "loss": 0.7856, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029886819314272153, + "loss": 0.7442, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029883260173211527, + "loss": 0.7935, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 0.000298797010321509, + "loss": 0.8512, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002987614189109028, + "loss": 0.6972, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029872582750029654, + "loss": 0.7037, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029869023608969033, + "loss": 0.7217, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002986546446790841, + "loss": 0.7686, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029861905326847787, + "loss": 0.7723, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002985834618578716, + "loss": 0.6703, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002985478704472654, + "loss": 0.7431, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029851227903665914, + "loss": 0.8524, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002984766876260529, + "loss": 0.7808, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029844109621544667, + "loss": 0.7762, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002984055048048404, + "loss": 0.6834, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029836991339423415, + "loss": 0.7705, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002983343219836279, + "loss": 0.6926, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002982987305730217, + "loss": 0.6399, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002982631391624154, + "loss": 0.6441, + "step": 590 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002982275477518092, + "loss": 0.7007, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029819195634120296, + "loss": 0.7483, + "step": 610 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029815636493059675, + "loss": 0.7792, + "step": 620 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002981207735199905, + "loss": 0.7824, + "step": 630 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002980851821093843, + "loss": 0.8315, + "step": 640 + }, + { + "epoch": 0.02, + "learning_rate": 0.000298049590698778, + "loss": 0.7339, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029801399928817176, + "loss": 0.5957, + "step": 660 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002979784078775655, + "loss": 0.7083, + "step": 670 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002979428164669593, + "loss": 0.7184, + "step": 680 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029790722505635304, + "loss": 0.6226, + "step": 690 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002978716336457468, + "loss": 0.7426, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029783604223514057, + "loss": 0.7773, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002978004508245343, + "loss": 0.7387, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029776485941392805, + "loss": 0.7358, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029772926800332184, + "loss": 0.6371, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002976936765927156, + "loss": 0.7573, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002976580851821094, + "loss": 0.8435, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002976224937715031, + "loss": 0.7046, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002975869023608969, + "loss": 0.834, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029755131095029065, + "loss": 0.6835, + "step": 790 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002975157195396844, + "loss": 0.7006, + "step": 800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002974801281290782, + "loss": 0.6872, + "step": 810 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002974445367184719, + "loss": 0.7237, + "step": 820 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029740894530786566, + "loss": 0.6703, + "step": 830 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029737335389725945, + "loss": 0.788, + "step": 840 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002973377624866532, + "loss": 0.6923, + "step": 850 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029730217107604693, + "loss": 0.6793, + "step": 860 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002972665796654407, + "loss": 0.6775, + "step": 870 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029723098825483446, + "loss": 0.6772, + "step": 880 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002971953968442282, + "loss": 0.7651, + "step": 890 + }, + { + "epoch": 0.03, + "learning_rate": 0.000297159805433622, + "loss": 0.6891, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029712421402301574, + "loss": 0.733, + "step": 910 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029708862261240953, + "loss": 0.7815, + "step": 920 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029705303120180327, + "loss": 0.7949, + "step": 930 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029701743979119706, + "loss": 0.6192, + "step": 940 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002969818483805908, + "loss": 0.7257, + "step": 950 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029694625696998454, + "loss": 0.7321, + "step": 960 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029691066555937834, + "loss": 0.7603, + "step": 970 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002968750741487721, + "loss": 0.8687, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002968394827381658, + "loss": 0.7521, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029680389132755955, + "loss": 0.7754, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029676829991695335, + "loss": 0.5612, + "step": 1010 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002967327085063471, + "loss": 0.6117, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002966971170957409, + "loss": 0.7266, + "step": 1030 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002966615256851346, + "loss": 0.7288, + "step": 1040 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029662593427452836, + "loss": 0.7119, + "step": 1050 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029659034286392215, + "loss": 0.7122, + "step": 1060 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002965547514533159, + "loss": 0.662, + "step": 1070 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002965191600427097, + "loss": 0.6961, + "step": 1080 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002964835686321034, + "loss": 0.6354, + "step": 1090 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002964479772214972, + "loss": 0.7367, + "step": 1100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029641238581089096, + "loss": 0.7616, + "step": 1110 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002963767944002847, + "loss": 0.7861, + "step": 1120 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029634120298967844, + "loss": 0.7717, + "step": 1130 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029630561157907223, + "loss": 0.6594, + "step": 1140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029627002016846597, + "loss": 0.6343, + "step": 1150 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002962344287578597, + "loss": 0.7648, + "step": 1160 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002961988373472535, + "loss": 0.6647, + "step": 1170 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029616324593664724, + "loss": 0.6518, + "step": 1180 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029612765452604104, + "loss": 0.768, + "step": 1190 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002960920631154348, + "loss": 0.7371, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029605647170482857, + "loss": 0.6907, + "step": 1210 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002960208802942223, + "loss": 0.7132, + "step": 1220 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029598528888361605, + "loss": 0.8197, + "step": 1230 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029594969747300984, + "loss": 0.7333, + "step": 1240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002959141060624036, + "loss": 0.6143, + "step": 1250 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002958785146517973, + "loss": 0.7494, + "step": 1260 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002958429232411911, + "loss": 0.7385, + "step": 1270 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029580733183058486, + "loss": 0.8033, + "step": 1280 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002957717404199786, + "loss": 0.6221, + "step": 1290 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002957361490093724, + "loss": 0.715, + "step": 1300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029570055759876613, + "loss": 0.7975, + "step": 1310 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029566496618815987, + "loss": 0.6922, + "step": 1320 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029562937477755366, + "loss": 0.7557, + "step": 1330 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002955937833669474, + "loss": 0.7403, + "step": 1340 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002955581919563412, + "loss": 0.6744, + "step": 1350 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029552260054573493, + "loss": 0.863, + "step": 1360 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029548700913512873, + "loss": 0.6081, + "step": 1370 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029545141772452247, + "loss": 0.637, + "step": 1380 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002954158263139162, + "loss": 0.6088, + "step": 1390 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029538023490331, + "loss": 0.5868, + "step": 1400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029534464349270374, + "loss": 0.8172, + "step": 1410 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002953090520820975, + "loss": 0.7566, + "step": 1420 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002952734606714913, + "loss": 0.7284, + "step": 1430 + }, + { + "epoch": 0.05, + "learning_rate": 0.000295237869260885, + "loss": 0.6883, + "step": 1440 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029520227785027875, + "loss": 0.685, + "step": 1450 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002951666864396725, + "loss": 0.657, + "step": 1460 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002951310950290663, + "loss": 0.8236, + "step": 1470 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029509550361846, + "loss": 0.656, + "step": 1480 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002950599122078538, + "loss": 0.6151, + "step": 1490 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029502432079724756, + "loss": 0.8033, + "step": 1500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029498872938664135, + "loss": 0.748, + "step": 1510 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002949531379760351, + "loss": 0.7847, + "step": 1520 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002949175465654289, + "loss": 0.7599, + "step": 1530 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002948819551548226, + "loss": 0.6552, + "step": 1540 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029484636374421636, + "loss": 0.7262, + "step": 1550 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029481077233361016, + "loss": 0.6506, + "step": 1560 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002947751809230039, + "loss": 0.6471, + "step": 1570 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029473958951239764, + "loss": 0.6956, + "step": 1580 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002947039981017914, + "loss": 0.677, + "step": 1590 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029466840669118517, + "loss": 0.6832, + "step": 1600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002946328152805789, + "loss": 0.7038, + "step": 1610 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002945972238699727, + "loss": 0.6823, + "step": 1620 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029456163245936644, + "loss": 0.7551, + "step": 1630 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029452604104876024, + "loss": 0.8668, + "step": 1640 + }, + { + "epoch": 0.06, + "learning_rate": 0.000294490449638154, + "loss": 0.68, + "step": 1650 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002944548582275477, + "loss": 0.8423, + "step": 1660 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002944192668169415, + "loss": 0.7349, + "step": 1670 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029438367540633525, + "loss": 0.6724, + "step": 1680 + }, + { + "epoch": 0.06, + "learning_rate": 0.000294348083995729, + "loss": 0.652, + "step": 1690 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002943124925851228, + "loss": 0.7264, + "step": 1700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002942769011745165, + "loss": 0.743, + "step": 1710 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029424130976391026, + "loss": 0.7002, + "step": 1720 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029420571835330405, + "loss": 0.6784, + "step": 1730 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002941701269426978, + "loss": 0.7149, + "step": 1740 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029413453553209153, + "loss": 0.6883, + "step": 1750 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002940989441214853, + "loss": 0.7579, + "step": 1760 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029406335271087906, + "loss": 0.6891, + "step": 1770 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029402776130027286, + "loss": 0.6597, + "step": 1780 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002939921698896666, + "loss": 0.7376, + "step": 1790 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002939565784790604, + "loss": 0.6289, + "step": 1800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029392098706845413, + "loss": 0.7625, + "step": 1810 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002938889547989085, + "loss": 0.814, + "step": 1820 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029385336338830226, + "loss": 0.8062, + "step": 1830 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029381777197769605, + "loss": 0.7333, + "step": 1840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002937821805670898, + "loss": 0.7614, + "step": 1850 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029374658915648353, + "loss": 0.5884, + "step": 1860 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002937109977458773, + "loss": 0.7197, + "step": 1870 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029367540633527106, + "loss": 0.7779, + "step": 1880 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002936398149246648, + "loss": 0.719, + "step": 1890 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029360422351405854, + "loss": 0.6475, + "step": 1900 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029356863210345234, + "loss": 0.7283, + "step": 1910 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002935330406928461, + "loss": 0.7922, + "step": 1920 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029349744928223987, + "loss": 0.7844, + "step": 1930 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002934618578716336, + "loss": 0.7782, + "step": 1940 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002934262664610274, + "loss": 0.6992, + "step": 1950 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029339067505042114, + "loss": 0.8458, + "step": 1960 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029335508363981494, + "loss": 0.7222, + "step": 1970 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002933194922292087, + "loss": 0.7072, + "step": 1980 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002932839008186024, + "loss": 0.701, + "step": 1990 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029324830940799615, + "loss": 0.7371, + "step": 2000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029321271799738995, + "loss": 0.712, + "step": 2010 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002931771265867837, + "loss": 0.6398, + "step": 2020 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002931415351761774, + "loss": 0.7693, + "step": 2030 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002931059437655712, + "loss": 0.8002, + "step": 2040 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029307035235496496, + "loss": 0.7958, + "step": 2050 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029303476094435875, + "loss": 0.6528, + "step": 2060 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002929991695337525, + "loss": 0.6626, + "step": 2070 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029296357812314623, + "loss": 0.8145, + "step": 2080 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029292798671254, + "loss": 0.688, + "step": 2090 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029289239530193377, + "loss": 0.6805, + "step": 2100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029285680389132756, + "loss": 0.6324, + "step": 2110 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002928212124807213, + "loss": 0.7134, + "step": 2120 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029278562107011504, + "loss": 0.6492, + "step": 2130 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029275002965950883, + "loss": 0.7083, + "step": 2140 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029271443824890257, + "loss": 0.738, + "step": 2150 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002926788468382963, + "loss": 0.8718, + "step": 2160 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002926432554276901, + "loss": 0.6081, + "step": 2170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029260766401708384, + "loss": 0.7758, + "step": 2180 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002925720726064776, + "loss": 0.7193, + "step": 2190 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002925364811958714, + "loss": 0.6435, + "step": 2200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002925008897852651, + "loss": 0.6554, + "step": 2210 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002924652983746589, + "loss": 0.6164, + "step": 2220 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029242970696405265, + "loss": 0.8168, + "step": 2230 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029239411555344644, + "loss": 0.7689, + "step": 2240 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002923585241428402, + "loss": 0.8057, + "step": 2250 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002923229327322339, + "loss": 0.5907, + "step": 2260 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002922873413216277, + "loss": 0.7117, + "step": 2270 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029225174991102145, + "loss": 0.6962, + "step": 2280 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002922161585004152, + "loss": 0.6219, + "step": 2290 + }, + { + "epoch": 0.08, + "learning_rate": 0.000292180567089809, + "loss": 0.7221, + "step": 2300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029214497567920273, + "loss": 0.6648, + "step": 2310 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029210938426859647, + "loss": 0.6797, + "step": 2320 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002920737928579902, + "loss": 0.7903, + "step": 2330 + }, + { + "epoch": 0.08, + "learning_rate": 0.000292038201447384, + "loss": 0.6666, + "step": 2340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029200261003677774, + "loss": 0.5662, + "step": 2350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029196701862617153, + "loss": 0.7177, + "step": 2360 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029193142721556527, + "loss": 0.7102, + "step": 2370 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029189583580495907, + "loss": 0.6911, + "step": 2380 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002918602443943528, + "loss": 0.7276, + "step": 2390 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002918246529837466, + "loss": 0.5878, + "step": 2400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029178906157314034, + "loss": 0.6765, + "step": 2410 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002917534701625341, + "loss": 0.6597, + "step": 2420 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029171787875192787, + "loss": 0.7282, + "step": 2430 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002916822873413216, + "loss": 0.722, + "step": 2440 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029164669593071535, + "loss": 0.5749, + "step": 2450 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002916111045201091, + "loss": 0.7208, + "step": 2460 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002915755131095029, + "loss": 0.8862, + "step": 2470 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002915399216988966, + "loss": 0.6289, + "step": 2480 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029150433028829036, + "loss": 0.6025, + "step": 2490 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029146873887768416, + "loss": 0.8272, + "step": 2500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002914331474670779, + "loss": 0.7309, + "step": 2510 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002913975560564717, + "loss": 0.8548, + "step": 2520 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029136196464586543, + "loss": 0.643, + "step": 2530 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002913263732352592, + "loss": 0.7233, + "step": 2540 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029129078182465296, + "loss": 0.7321, + "step": 2550 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002912551904140467, + "loss": 0.7556, + "step": 2560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002912195990034405, + "loss": 0.7046, + "step": 2570 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029118400759283423, + "loss": 0.6583, + "step": 2580 + }, + { + "epoch": 0.09, + "learning_rate": 0.000291148416182228, + "loss": 0.6571, + "step": 2590 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029111282477162177, + "loss": 0.7019, + "step": 2600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002910772333610155, + "loss": 0.7106, + "step": 2610 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029104164195040925, + "loss": 0.6742, + "step": 2620 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029100605053980304, + "loss": 0.6429, + "step": 2630 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002909704591291968, + "loss": 0.6783, + "step": 2640 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002909348677185906, + "loss": 0.6635, + "step": 2650 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002908992763079843, + "loss": 0.6557, + "step": 2660 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002908636848973781, + "loss": 0.6747, + "step": 2670 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029082809348677185, + "loss": 0.6546, + "step": 2680 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002907925020761656, + "loss": 0.654, + "step": 2690 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002907569106655594, + "loss": 0.6698, + "step": 2700 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002907213192549531, + "loss": 0.6168, + "step": 2710 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029068572784434686, + "loss": 0.8227, + "step": 2720 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029065013643374065, + "loss": 0.6895, + "step": 2730 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002906145450231344, + "loss": 0.5398, + "step": 2740 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029057895361252813, + "loss": 0.7122, + "step": 2750 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002905433622019219, + "loss": 0.6676, + "step": 2760 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029050777079131566, + "loss": 0.7198, + "step": 2770 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002904721793807094, + "loss": 0.6876, + "step": 2780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002904365879701032, + "loss": 0.6428, + "step": 2790 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029040099655949694, + "loss": 0.6349, + "step": 2800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029036540514889073, + "loss": 0.6444, + "step": 2810 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029032981373828447, + "loss": 0.8203, + "step": 2820 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029029422232767826, + "loss": 0.7768, + "step": 2830 + }, + { + "epoch": 0.1, + "learning_rate": 0.000290258630917072, + "loss": 0.8246, + "step": 2840 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029022303950646574, + "loss": 0.625, + "step": 2850 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029018744809585954, + "loss": 0.6561, + "step": 2860 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002901518566852533, + "loss": 0.7038, + "step": 2870 + }, + { + "epoch": 0.1, + "learning_rate": 0.000290116265274647, + "loss": 0.7551, + "step": 2880 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002900806738640408, + "loss": 0.8089, + "step": 2890 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029004508245343455, + "loss": 0.6719, + "step": 2900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002900094910428283, + "loss": 0.6635, + "step": 2910 + }, + { + "epoch": 0.1, + "learning_rate": 0.000289973899632222, + "loss": 0.7078, + "step": 2920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002899383082216158, + "loss": 0.7614, + "step": 2930 + }, + { + "epoch": 0.1, + "learning_rate": 0.00028990271681100956, + "loss": 0.6785, + "step": 2940 + }, + { + "epoch": 0.1, + "learning_rate": 0.00028986712540040335, + "loss": 0.6359, + "step": 2950 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002898315339897971, + "loss": 0.701, + "step": 2960 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002897959425791909, + "loss": 0.6646, + "step": 2970 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002897603511685846, + "loss": 0.748, + "step": 2980 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002897247597579784, + "loss": 0.6089, + "step": 2990 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028968916834737216, + "loss": 0.6897, + "step": 3000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002896535769367659, + "loss": 0.6741, + "step": 3010 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028961798552615964, + "loss": 0.7575, + "step": 3020 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028958239411555343, + "loss": 0.6972, + "step": 3030 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028954680270494717, + "loss": 0.7312, + "step": 3040 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002895112112943409, + "loss": 0.6732, + "step": 3050 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002894756198837347, + "loss": 0.8834, + "step": 3060 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028944002847312844, + "loss": 0.7209, + "step": 3070 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002894044370625222, + "loss": 0.7598, + "step": 3080 + }, + { + "epoch": 0.11, + "learning_rate": 0.000289368845651916, + "loss": 0.6389, + "step": 3090 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002893332542413097, + "loss": 0.6792, + "step": 3100 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002892976628307035, + "loss": 0.743, + "step": 3110 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028926207142009725, + "loss": 0.7435, + "step": 3120 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028922648000949104, + "loss": 0.701, + "step": 3130 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002891908885988848, + "loss": 0.732, + "step": 3140 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002891552971882785, + "loss": 0.6616, + "step": 3150 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002891197057776723, + "loss": 0.7537, + "step": 3160 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028908411436706606, + "loss": 0.7532, + "step": 3170 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002890485229564598, + "loss": 0.8112, + "step": 3180 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002890129315458536, + "loss": 0.6904, + "step": 3190 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028897734013524733, + "loss": 0.6614, + "step": 3200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028894174872464107, + "loss": 0.6224, + "step": 3210 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028890615731403486, + "loss": 0.7146, + "step": 3220 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002888705659034286, + "loss": 0.7428, + "step": 3230 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002888349744928224, + "loss": 0.7138, + "step": 3240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028879938308221613, + "loss": 0.6817, + "step": 3250 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002887637916716099, + "loss": 0.7095, + "step": 3260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028872820026100367, + "loss": 0.5924, + "step": 3270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002886926088503974, + "loss": 0.6337, + "step": 3280 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002886570174397912, + "loss": 0.7494, + "step": 3290 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028862142602918494, + "loss": 0.6771, + "step": 3300 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002885858346185787, + "loss": 0.698, + "step": 3310 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028855024320797247, + "loss": 0.6463, + "step": 3320 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002885146517973662, + "loss": 0.7366, + "step": 3330 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028847906038675995, + "loss": 0.6497, + "step": 3340 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002884434689761537, + "loss": 0.6996, + "step": 3350 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002884078775655475, + "loss": 0.6958, + "step": 3360 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002883722861549412, + "loss": 0.7147, + "step": 3370 + }, + { + "epoch": 0.12, + "learning_rate": 0.000288336694744335, + "loss": 0.6932, + "step": 3380 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028830110333372876, + "loss": 0.7243, + "step": 3390 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028826551192312255, + "loss": 0.703, + "step": 3400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002882299205125163, + "loss": 0.6408, + "step": 3410 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002881943291019101, + "loss": 0.7289, + "step": 3420 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002881587376913038, + "loss": 0.6828, + "step": 3430 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028812314628069756, + "loss": 0.7103, + "step": 3440 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028808755487009136, + "loss": 0.636, + "step": 3450 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002880519634594851, + "loss": 0.6507, + "step": 3460 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028801637204887884, + "loss": 0.6438, + "step": 3470 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002879807806382726, + "loss": 0.6557, + "step": 3480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028794518922766637, + "loss": 0.6828, + "step": 3490 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002879095978170601, + "loss": 0.6395, + "step": 3500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028787400640645385, + "loss": 0.5837, + "step": 3510 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028783841499584764, + "loss": 0.6836, + "step": 3520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002878028235852414, + "loss": 0.7546, + "step": 3530 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002877672321746352, + "loss": 0.7592, + "step": 3540 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002877316407640289, + "loss": 0.7773, + "step": 3550 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002876960493534227, + "loss": 0.6794, + "step": 3560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028766045794281645, + "loss": 0.6742, + "step": 3570 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002876248665322102, + "loss": 0.707, + "step": 3580 + }, + { + "epoch": 0.13, + "learning_rate": 0.000287589275121604, + "loss": 0.7419, + "step": 3590 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002875536837109977, + "loss": 0.7305, + "step": 3600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028751809230039146, + "loss": 0.6657, + "step": 3610 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028748250088978525, + "loss": 0.8198, + "step": 3620 + }, + { + "epoch": 0.13, + "learning_rate": 0.000287446909479179, + "loss": 0.6374, + "step": 3630 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028741131806857273, + "loss": 0.679, + "step": 3640 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002873757266579665, + "loss": 0.5973, + "step": 3650 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028734013524736026, + "loss": 0.6287, + "step": 3660 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028730454383675406, + "loss": 0.7272, + "step": 3670 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002872689524261478, + "loss": 0.6948, + "step": 3680 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028723336101554154, + "loss": 0.6725, + "step": 3690 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028719776960493533, + "loss": 0.7764, + "step": 3700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028716217819432907, + "loss": 0.6805, + "step": 3710 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028712658678372286, + "loss": 0.649, + "step": 3720 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002870909953731166, + "loss": 0.6683, + "step": 3730 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028705540396251034, + "loss": 0.7738, + "step": 3740 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028701981255190414, + "loss": 0.71, + "step": 3750 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002869842211412979, + "loss": 0.764, + "step": 3760 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002869486297306916, + "loss": 0.6747, + "step": 3770 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002869130383200854, + "loss": 0.7046, + "step": 3780 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028687744690947915, + "loss": 0.7001, + "step": 3790 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002868418554988729, + "loss": 0.6032, + "step": 3800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002868062640882667, + "loss": 0.7204, + "step": 3810 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002867706726776604, + "loss": 0.5805, + "step": 3820 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002867350812670542, + "loss": 0.7294, + "step": 3830 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028669948985644795, + "loss": 0.6295, + "step": 3840 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028666389844584175, + "loss": 0.5724, + "step": 3850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002866283070352355, + "loss": 0.7361, + "step": 3860 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002865927156246292, + "loss": 0.6122, + "step": 3870 + }, + { + "epoch": 0.14, + "learning_rate": 0.000286557124214023, + "loss": 0.7171, + "step": 3880 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028652153280341676, + "loss": 0.6982, + "step": 3890 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002864859413928105, + "loss": 0.6306, + "step": 3900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002864503499822043, + "loss": 0.68, + "step": 3910 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028641475857159803, + "loss": 0.6748, + "step": 3920 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028637916716099177, + "loss": 0.6766, + "step": 3930 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002863435757503855, + "loss": 0.671, + "step": 3940 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002863079843397793, + "loss": 0.739, + "step": 3950 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028627239292917304, + "loss": 0.6141, + "step": 3960 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028623680151856684, + "loss": 0.6486, + "step": 3970 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002862012101079606, + "loss": 0.7817, + "step": 3980 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028616561869735437, + "loss": 0.8103, + "step": 3990 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002861300272867481, + "loss": 0.6599, + "step": 4000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002860944358761419, + "loss": 0.686, + "step": 4010 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028605884446553564, + "loss": 0.7672, + "step": 4020 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002860232530549294, + "loss": 0.8109, + "step": 4030 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002859876616443231, + "loss": 0.7248, + "step": 4040 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002859520702337169, + "loss": 0.6674, + "step": 4050 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028591647882311066, + "loss": 0.7082, + "step": 4060 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002858808874125044, + "loss": 0.6267, + "step": 4070 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002858452960018982, + "loss": 0.6835, + "step": 4080 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028580970459129193, + "loss": 0.6877, + "step": 4090 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028577411318068567, + "loss": 0.6944, + "step": 4100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028573852177007946, + "loss": 0.799, + "step": 4110 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002857029303594732, + "loss": 0.6408, + "step": 4120 + }, + { + "epoch": 0.15, + "learning_rate": 0.000285667338948867, + "loss": 0.6874, + "step": 4130 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028563174753826073, + "loss": 0.772, + "step": 4140 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028559615612765453, + "loss": 0.6307, + "step": 4150 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028556056471704827, + "loss": 0.6232, + "step": 4160 + }, + { + "epoch": 0.15, + "learning_rate": 0.000285524973306442, + "loss": 0.7, + "step": 4170 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002854893818958358, + "loss": 0.6334, + "step": 4180 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028545379048522954, + "loss": 0.8453, + "step": 4190 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002854181990746233, + "loss": 0.6225, + "step": 4200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028538260766401707, + "loss": 0.6716, + "step": 4210 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002853470162534108, + "loss": 0.6603, + "step": 4220 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028531142484280455, + "loss": 0.7166, + "step": 4230 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028527583343219835, + "loss": 0.674, + "step": 4240 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002852402420215921, + "loss": 0.6331, + "step": 4250 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002852046506109859, + "loss": 0.6652, + "step": 4260 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002851690592003796, + "loss": 0.6329, + "step": 4270 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002851334677897734, + "loss": 0.6661, + "step": 4280 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028509787637916715, + "loss": 0.6388, + "step": 4290 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002850622849685609, + "loss": 0.6874, + "step": 4300 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002850266935579547, + "loss": 0.6267, + "step": 4310 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002849911021473484, + "loss": 0.5687, + "step": 4320 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028495551073674216, + "loss": 0.6435, + "step": 4330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028491991932613596, + "loss": 0.7772, + "step": 4340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002848843279155297, + "loss": 0.7446, + "step": 4350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028484873650492344, + "loss": 0.7155, + "step": 4360 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002848131450943172, + "loss": 0.6608, + "step": 4370 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028477755368371097, + "loss": 0.6978, + "step": 4380 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002847419622731047, + "loss": 0.6413, + "step": 4390 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002847063708624985, + "loss": 0.711, + "step": 4400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028467077945189224, + "loss": 0.7413, + "step": 4410 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028463518804128603, + "loss": 0.7129, + "step": 4420 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002845995966306798, + "loss": 0.7398, + "step": 4430 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028456400522007357, + "loss": 0.7388, + "step": 4440 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002845284138094673, + "loss": 0.6713, + "step": 4450 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028449282239886105, + "loss": 0.6643, + "step": 4460 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028445723098825484, + "loss": 0.6667, + "step": 4470 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002844216395776486, + "loss": 0.6459, + "step": 4480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002843860481670423, + "loss": 0.6907, + "step": 4490 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028435045675643606, + "loss": 0.7452, + "step": 4500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028431486534582985, + "loss": 0.6538, + "step": 4510 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002842792739352236, + "loss": 0.6442, + "step": 4520 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028424368252461733, + "loss": 0.6921, + "step": 4530 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002842080911140111, + "loss": 0.7466, + "step": 4540 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028417249970340486, + "loss": 0.6833, + "step": 4550 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028413690829279866, + "loss": 0.7314, + "step": 4560 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002841013168821924, + "loss": 0.684, + "step": 4570 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002840657254715862, + "loss": 0.657, + "step": 4580 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028403013406097993, + "loss": 0.6492, + "step": 4590 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028399454265037367, + "loss": 0.6837, + "step": 4600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028395895123976746, + "loss": 0.6173, + "step": 4610 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002839233598291612, + "loss": 0.7272, + "step": 4620 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028388776841855494, + "loss": 0.6564, + "step": 4630 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028385217700794874, + "loss": 0.7441, + "step": 4640 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002838165855973425, + "loss": 0.5988, + "step": 4650 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002837809941867362, + "loss": 0.7319, + "step": 4660 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028374540277613, + "loss": 0.7096, + "step": 4670 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028370981136552375, + "loss": 0.6784, + "step": 4680 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002836742199549175, + "loss": 0.8013, + "step": 4690 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002836386285443113, + "loss": 0.6753, + "step": 4700 + }, + { + "epoch": 0.17, + "learning_rate": 0.000283603037133705, + "loss": 0.7424, + "step": 4710 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002835674457230988, + "loss": 0.6778, + "step": 4720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028353185431249255, + "loss": 0.723, + "step": 4730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028349626290188635, + "loss": 0.676, + "step": 4740 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002834606714912801, + "loss": 0.6816, + "step": 4750 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002834250800806738, + "loss": 0.6436, + "step": 4760 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002833894886700676, + "loss": 0.666, + "step": 4770 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028335389725946136, + "loss": 0.664, + "step": 4780 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002833183058488551, + "loss": 0.6867, + "step": 4790 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002832827144382489, + "loss": 0.7223, + "step": 4800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028324712302764263, + "loss": 0.7188, + "step": 4810 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028321153161703637, + "loss": 0.7236, + "step": 4820 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028317594020643017, + "loss": 0.7476, + "step": 4830 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002831403487958239, + "loss": 0.6511, + "step": 4840 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002831047573852177, + "loss": 0.659, + "step": 4850 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028306916597461144, + "loss": 0.7543, + "step": 4860 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028303357456400523, + "loss": 0.662, + "step": 4870 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028299798315339897, + "loss": 0.6815, + "step": 4880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002829623917427927, + "loss": 0.7013, + "step": 4890 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002829268003321865, + "loss": 0.7242, + "step": 4900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028289120892158024, + "loss": 0.7088, + "step": 4910 + }, + { + "epoch": 0.17, + "learning_rate": 0.000282855617510974, + "loss": 0.7081, + "step": 4920 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002828200261003678, + "loss": 0.6265, + "step": 4930 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002827844346897615, + "loss": 0.8361, + "step": 4940 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028274884327915526, + "loss": 0.6595, + "step": 4950 + }, + { + "epoch": 0.18, + "learning_rate": 0.000282713251868549, + "loss": 0.6483, + "step": 4960 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002826776604579428, + "loss": 0.6965, + "step": 4970 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028264206904733653, + "loss": 0.7617, + "step": 4980 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002826064776367303, + "loss": 0.7236, + "step": 4990 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028257088622612406, + "loss": 0.7769, + "step": 5000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028253529481551786, + "loss": 0.6, + "step": 5010 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002824997034049116, + "loss": 0.7488, + "step": 5020 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002824641119943054, + "loss": 0.598, + "step": 5030 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028242852058369913, + "loss": 0.7598, + "step": 5040 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028239292917309287, + "loss": 0.6441, + "step": 5050 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002823573377624866, + "loss": 0.6564, + "step": 5060 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002823217463518804, + "loss": 0.7177, + "step": 5070 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028228615494127414, + "loss": 0.7513, + "step": 5080 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002822505635306679, + "loss": 0.6029, + "step": 5090 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002822149721200617, + "loss": 0.7096, + "step": 5100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002821793807094554, + "loss": 0.6043, + "step": 5110 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028214378929884915, + "loss": 0.6816, + "step": 5120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028210819788824295, + "loss": 0.7107, + "step": 5130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002820726064776367, + "loss": 0.7088, + "step": 5140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002820370150670305, + "loss": 0.7065, + "step": 5150 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002820014236564242, + "loss": 0.6974, + "step": 5160 + }, + { + "epoch": 0.18, + "learning_rate": 0.000281965832245818, + "loss": 0.6437, + "step": 5170 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028193024083521175, + "loss": 0.7158, + "step": 5180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002818946494246055, + "loss": 0.6624, + "step": 5190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002818590580139993, + "loss": 0.6567, + "step": 5200 + }, + { + "epoch": 0.19, + "learning_rate": 0.000281823466603393, + "loss": 0.6789, + "step": 5210 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028178787519278676, + "loss": 0.8325, + "step": 5220 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028175228378218056, + "loss": 0.7748, + "step": 5230 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002817166923715743, + "loss": 0.6514, + "step": 5240 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028168110096096804, + "loss": 0.6611, + "step": 5250 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028164550955036183, + "loss": 0.6803, + "step": 5260 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028160991813975557, + "loss": 0.7386, + "step": 5270 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002815743267291493, + "loss": 0.7056, + "step": 5280 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002815387353185431, + "loss": 0.7716, + "step": 5290 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028150314390793684, + "loss": 0.7421, + "step": 5300 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028146755249733064, + "loss": 0.6784, + "step": 5310 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002814319610867244, + "loss": 0.5822, + "step": 5320 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028139636967611817, + "loss": 0.7016, + "step": 5330 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002813607782655119, + "loss": 0.7684, + "step": 5340 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028132518685490565, + "loss": 0.6963, + "step": 5350 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028128959544429944, + "loss": 0.6931, + "step": 5360 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002812540040336932, + "loss": 0.7024, + "step": 5370 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002812184126230869, + "loss": 0.6701, + "step": 5380 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028118282121248066, + "loss": 0.62, + "step": 5390 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028114722980187445, + "loss": 0.6614, + "step": 5400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002811116383912682, + "loss": 0.6506, + "step": 5410 + }, + { + "epoch": 0.19, + "learning_rate": 0.000281076046980662, + "loss": 0.6427, + "step": 5420 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002810404555700557, + "loss": 0.6979, + "step": 5430 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002810048641594495, + "loss": 0.7202, + "step": 5440 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028096927274884326, + "loss": 0.7734, + "step": 5450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028093368133823705, + "loss": 0.7284, + "step": 5460 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002808980899276308, + "loss": 0.5981, + "step": 5470 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028086249851702453, + "loss": 0.7213, + "step": 5480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002808269071064183, + "loss": 0.7447, + "step": 5490 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028079131569581206, + "loss": 0.65, + "step": 5500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002807557242852058, + "loss": 0.6703, + "step": 5510 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028072013287459954, + "loss": 0.7258, + "step": 5520 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028068454146399334, + "loss": 0.6441, + "step": 5530 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002806489500533871, + "loss": 0.6666, + "step": 5540 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002806133586427808, + "loss": 0.6953, + "step": 5550 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002805777672321746, + "loss": 0.8032, + "step": 5560 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028054217582156835, + "loss": 0.7212, + "step": 5570 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028050658441096214, + "loss": 0.7961, + "step": 5580 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002804709930003559, + "loss": 0.6428, + "step": 5590 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002804354015897497, + "loss": 0.6827, + "step": 5600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002803998101791434, + "loss": 0.6803, + "step": 5610 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028036421876853715, + "loss": 0.6114, + "step": 5620 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028032862735793095, + "loss": 0.6894, + "step": 5630 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002802930359473247, + "loss": 0.6454, + "step": 5640 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028025744453671843, + "loss": 0.6649, + "step": 5650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002802218531261122, + "loss": 0.6526, + "step": 5660 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028018626171550596, + "loss": 0.6863, + "step": 5670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002801506703048997, + "loss": 0.6503, + "step": 5680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002801150788942935, + "loss": 0.7383, + "step": 5690 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028007948748368723, + "loss": 0.6345, + "step": 5700 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028004389607308097, + "loss": 0.7334, + "step": 5710 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028000830466247477, + "loss": 0.8069, + "step": 5720 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002799727132518685, + "loss": 0.6889, + "step": 5730 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002799371218412623, + "loss": 0.6738, + "step": 5740 + }, + { + "epoch": 0.2, + "learning_rate": 0.00027990153043065604, + "loss": 0.6819, + "step": 5750 + }, + { + "epoch": 0.2, + "learning_rate": 0.00027986593902004983, + "loss": 0.6372, + "step": 5760 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027983034760944357, + "loss": 0.6791, + "step": 5770 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002797947561988373, + "loss": 0.7331, + "step": 5780 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002797591647882311, + "loss": 0.5917, + "step": 5790 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027972357337762484, + "loss": 0.7269, + "step": 5800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002796879819670186, + "loss": 0.7028, + "step": 5810 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002796523905564124, + "loss": 0.64, + "step": 5820 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002796167991458061, + "loss": 0.6924, + "step": 5830 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027958120773519986, + "loss": 0.5877, + "step": 5840 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027954561632459365, + "loss": 0.6111, + "step": 5850 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002795100249139874, + "loss": 0.5829, + "step": 5860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002794744335033812, + "loss": 0.6971, + "step": 5870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002794388420927749, + "loss": 0.6758, + "step": 5880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027940325068216866, + "loss": 0.7192, + "step": 5890 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027936765927156246, + "loss": 0.7653, + "step": 5900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002793320678609562, + "loss": 0.6789, + "step": 5910 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027929647645035, + "loss": 0.7232, + "step": 5920 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027926088503974373, + "loss": 0.6553, + "step": 5930 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027922529362913747, + "loss": 0.662, + "step": 5940 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027918970221853126, + "loss": 0.7693, + "step": 5950 + }, + { + "epoch": 0.21, + "learning_rate": 0.000279154110807925, + "loss": 0.6878, + "step": 5960 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027911851939731874, + "loss": 0.672, + "step": 5970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002790829279867125, + "loss": 0.6826, + "step": 5980 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002790473365761063, + "loss": 0.6641, + "step": 5990 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002790117451655, + "loss": 0.6687, + "step": 6000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002789761537548938, + "loss": 0.7214, + "step": 6010 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027894056234428755, + "loss": 0.6375, + "step": 6020 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027890497093368134, + "loss": 0.7117, + "step": 6030 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002788729386641357, + "loss": 0.6257, + "step": 6040 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027883734725352947, + "loss": 0.5788, + "step": 6050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002788017558429232, + "loss": 0.79, + "step": 6060 + }, + { + "epoch": 0.22, + "learning_rate": 0.000278766164432317, + "loss": 0.6082, + "step": 6070 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027873057302171074, + "loss": 0.673, + "step": 6080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002786949816111045, + "loss": 0.6672, + "step": 6090 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027865939020049827, + "loss": 0.7397, + "step": 6100 + }, + { + "epoch": 0.22, + "learning_rate": 0.000278623798789892, + "loss": 0.6331, + "step": 6110 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027858820737928575, + "loss": 0.6923, + "step": 6120 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027855261596867954, + "loss": 0.8609, + "step": 6130 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002785170245580733, + "loss": 0.6959, + "step": 6140 + }, + { + "epoch": 0.22, + "learning_rate": 0.000278481433147467, + "loss": 0.7997, + "step": 6150 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002784458417368608, + "loss": 0.6886, + "step": 6160 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027841025032625456, + "loss": 0.6786, + "step": 6170 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027837465891564835, + "loss": 0.616, + "step": 6180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002783390675050421, + "loss": 0.6354, + "step": 6190 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002783070352354965, + "loss": 0.6364, + "step": 6200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027827144382489027, + "loss": 0.6525, + "step": 6210 + }, + { + "epoch": 0.22, + "learning_rate": 0.000278235852414284, + "loss": 0.689, + "step": 6220 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027820026100367775, + "loss": 0.691, + "step": 6230 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002781646695930715, + "loss": 0.7241, + "step": 6240 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002781290781824653, + "loss": 0.7144, + "step": 6250 + }, + { + "epoch": 0.22, + "learning_rate": 0.000278093486771859, + "loss": 0.7479, + "step": 6260 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027805789536125276, + "loss": 0.6068, + "step": 6270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027802230395064656, + "loss": 0.6501, + "step": 6280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002779867125400403, + "loss": 0.7616, + "step": 6290 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002779511211294341, + "loss": 0.6096, + "step": 6300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027791552971882783, + "loss": 0.7053, + "step": 6310 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002778799383082216, + "loss": 0.7448, + "step": 6320 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027784434689761536, + "loss": 0.7046, + "step": 6330 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002778087554870091, + "loss": 0.6033, + "step": 6340 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002777731640764029, + "loss": 0.6678, + "step": 6350 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027773757266579663, + "loss": 0.6845, + "step": 6360 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027770198125519037, + "loss": 0.7314, + "step": 6370 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027766638984458417, + "loss": 0.5838, + "step": 6380 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002776307984339779, + "loss": 0.6447, + "step": 6390 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027759520702337165, + "loss": 0.6738, + "step": 6400 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027755961561276544, + "loss": 0.7543, + "step": 6410 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002775240242021592, + "loss": 0.6558, + "step": 6420 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002774884327915529, + "loss": 0.7479, + "step": 6430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002774528413809467, + "loss": 0.6823, + "step": 6440 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027741724997034045, + "loss": 0.7755, + "step": 6450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027738165855973424, + "loss": 0.6771, + "step": 6460 + }, + { + "epoch": 0.23, + "learning_rate": 0.000277346067149128, + "loss": 0.6559, + "step": 6470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002773104757385218, + "loss": 0.746, + "step": 6480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002772748843279155, + "loss": 0.6439, + "step": 6490 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027723929291730926, + "loss": 0.6347, + "step": 6500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027720370150670305, + "loss": 0.634, + "step": 6510 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002771681100960968, + "loss": 0.7174, + "step": 6520 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027713251868549053, + "loss": 0.6798, + "step": 6530 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002770969272748843, + "loss": 0.6309, + "step": 6540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027706133586427806, + "loss": 0.6051, + "step": 6550 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002770257444536718, + "loss": 0.7146, + "step": 6560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002769901530430656, + "loss": 0.7369, + "step": 6570 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027695456163245934, + "loss": 0.6924, + "step": 6580 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002769189702218531, + "loss": 0.7319, + "step": 6590 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027688337881124687, + "loss": 0.6855, + "step": 6600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002768477874006406, + "loss": 0.7762, + "step": 6610 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002768121959900344, + "loss": 0.5841, + "step": 6620 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027677660457942814, + "loss": 0.7067, + "step": 6630 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027674101316882193, + "loss": 0.6539, + "step": 6640 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002767054217582157, + "loss": 0.7795, + "step": 6650 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002766698303476094, + "loss": 0.6752, + "step": 6660 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002766342389370032, + "loss": 0.6993, + "step": 6670 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027659864752639695, + "loss": 0.7318, + "step": 6680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002765630561157907, + "loss": 0.7325, + "step": 6690 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002765274647051844, + "loss": 0.6644, + "step": 6700 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002764918732945782, + "loss": 0.6358, + "step": 6710 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027645628188397196, + "loss": 0.6958, + "step": 6720 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002764206904733657, + "loss": 0.6402, + "step": 6730 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002763850990627595, + "loss": 0.7679, + "step": 6740 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027634950765215323, + "loss": 0.6876, + "step": 6750 + }, + { + "epoch": 0.24, + "learning_rate": 0.000276313916241547, + "loss": 0.6936, + "step": 6760 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027627832483094076, + "loss": 0.6362, + "step": 6770 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027624273342033456, + "loss": 0.7141, + "step": 6780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002762071420097283, + "loss": 0.6013, + "step": 6790 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027617155059912204, + "loss": 0.6325, + "step": 6800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027613595918851583, + "loss": 0.5862, + "step": 6810 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027610036777790957, + "loss": 0.6688, + "step": 6820 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002760647763673033, + "loss": 0.685, + "step": 6830 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002760291849566971, + "loss": 0.7214, + "step": 6840 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027599359354609084, + "loss": 0.6369, + "step": 6850 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002759580021354846, + "loss": 0.6174, + "step": 6860 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002759224107248784, + "loss": 0.781, + "step": 6870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002758868193142721, + "loss": 0.6663, + "step": 6880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002758512279036659, + "loss": 0.6706, + "step": 6890 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027581563649305965, + "loss": 0.7238, + "step": 6900 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027578004508245344, + "loss": 0.7005, + "step": 6910 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002757444536718472, + "loss": 0.722, + "step": 6920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002757088622612409, + "loss": 0.6901, + "step": 6930 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002756732708506347, + "loss": 0.6618, + "step": 6940 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027563767944002845, + "loss": 0.6387, + "step": 6950 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002756020880294222, + "loss": 0.6953, + "step": 6960 + }, + { + "epoch": 0.25, + "learning_rate": 0.000275566496618816, + "loss": 0.6862, + "step": 6970 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002755309052082097, + "loss": 0.6424, + "step": 6980 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027549531379760347, + "loss": 0.7637, + "step": 6990 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027545972238699726, + "loss": 0.6712, + "step": 7000 + }, + { + "epoch": 0.25, + "learning_rate": 0.000275424130976391, + "loss": 0.7253, + "step": 7010 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027538853956578474, + "loss": 0.6319, + "step": 7020 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027535294815517853, + "loss": 0.7082, + "step": 7030 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027531735674457227, + "loss": 0.6681, + "step": 7040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027528176533396607, + "loss": 0.7333, + "step": 7050 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002752461739233598, + "loss": 0.6208, + "step": 7060 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002752105825127536, + "loss": 0.7219, + "step": 7070 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027517499110214734, + "loss": 0.729, + "step": 7080 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002751393996915411, + "loss": 0.699, + "step": 7090 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027510380828093487, + "loss": 0.6561, + "step": 7100 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002750682168703286, + "loss": 0.6682, + "step": 7110 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027503262545972235, + "loss": 0.7022, + "step": 7120 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027499703404911614, + "loss": 0.6909, + "step": 7130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002749614426385099, + "loss": 0.6412, + "step": 7140 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002749258512279036, + "loss": 0.639, + "step": 7150 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027489025981729736, + "loss": 0.7439, + "step": 7160 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027485466840669116, + "loss": 0.724, + "step": 7170 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002748190769960849, + "loss": 0.6737, + "step": 7180 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002747834855854787, + "loss": 0.6141, + "step": 7190 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027474789417487243, + "loss": 0.7455, + "step": 7200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002747123027642662, + "loss": 0.7237, + "step": 7210 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027467671135365996, + "loss": 0.6049, + "step": 7220 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027464111994305375, + "loss": 0.7072, + "step": 7230 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002746055285324475, + "loss": 0.7085, + "step": 7240 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027456993712184123, + "loss": 0.7091, + "step": 7250 + }, + { + "epoch": 0.26, + "learning_rate": 0.000274534345711235, + "loss": 0.6661, + "step": 7260 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027449875430062877, + "loss": 0.6861, + "step": 7270 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002744631628900225, + "loss": 0.6816, + "step": 7280 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027442757147941625, + "loss": 0.6438, + "step": 7290 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027439198006881004, + "loss": 0.6755, + "step": 7300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002743563886582038, + "loss": 0.6797, + "step": 7310 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027432079724759757, + "loss": 0.6453, + "step": 7320 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002742852058369913, + "loss": 0.7137, + "step": 7330 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027424961442638505, + "loss": 0.6215, + "step": 7340 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027421402301577885, + "loss": 0.6754, + "step": 7350 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002741784316051726, + "loss": 0.6035, + "step": 7360 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002741428401945664, + "loss": 0.7422, + "step": 7370 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002741072487839601, + "loss": 0.6829, + "step": 7380 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027407165737335386, + "loss": 0.6616, + "step": 7390 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027403606596274765, + "loss": 0.6977, + "step": 7400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002740004745521414, + "loss": 0.6686, + "step": 7410 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027396488314153513, + "loss": 0.693, + "step": 7420 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002739292917309289, + "loss": 0.7555, + "step": 7430 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027389370032032266, + "loss": 0.6327, + "step": 7440 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002738581089097164, + "loss": 0.6916, + "step": 7450 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002738225174991102, + "loss": 0.6086, + "step": 7460 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027378692608850394, + "loss": 0.7267, + "step": 7470 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027375133467789773, + "loss": 0.6479, + "step": 7480 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027371574326729147, + "loss": 0.6953, + "step": 7490 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027368015185668526, + "loss": 0.6323, + "step": 7500 + }, + { + "epoch": 0.27, + "learning_rate": 0.000273644560446079, + "loss": 0.6159, + "step": 7510 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027360896903547274, + "loss": 0.7172, + "step": 7520 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027357337762486653, + "loss": 0.7004, + "step": 7530 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002735377862142603, + "loss": 0.66, + "step": 7540 + }, + { + "epoch": 0.27, + "learning_rate": 0.000273502194803654, + "loss": 0.6667, + "step": 7550 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002734666033930478, + "loss": 0.7522, + "step": 7560 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027343101198244155, + "loss": 0.6494, + "step": 7570 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002733954205718353, + "loss": 0.6456, + "step": 7580 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002733598291612291, + "loss": 0.6514, + "step": 7590 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002733242377506228, + "loss": 0.7416, + "step": 7600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027328864634001656, + "loss": 0.7246, + "step": 7610 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027325305492941035, + "loss": 0.7575, + "step": 7620 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002732174635188041, + "loss": 0.5808, + "step": 7630 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002731818721081979, + "loss": 0.5846, + "step": 7640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002731462806975916, + "loss": 0.7003, + "step": 7650 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002731106892869854, + "loss": 0.6482, + "step": 7660 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027307509787637916, + "loss": 0.7011, + "step": 7670 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002730395064657729, + "loss": 0.622, + "step": 7680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002730039150551667, + "loss": 0.7317, + "step": 7690 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027296832364456043, + "loss": 0.6823, + "step": 7700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027293273223395417, + "loss": 0.6137, + "step": 7710 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002728971408233479, + "loss": 0.662, + "step": 7720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002728615494127417, + "loss": 0.6742, + "step": 7730 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027282595800213544, + "loss": 0.5736, + "step": 7740 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002727903665915292, + "loss": 0.618, + "step": 7750 + }, + { + "epoch": 0.28, + "learning_rate": 0.000272754775180923, + "loss": 0.6998, + "step": 7760 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002727191837703167, + "loss": 0.7414, + "step": 7770 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002726835923597105, + "loss": 0.7506, + "step": 7780 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027264800094910425, + "loss": 0.7766, + "step": 7790 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027261240953849804, + "loss": 0.635, + "step": 7800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002725768181278918, + "loss": 0.7063, + "step": 7810 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002725412267172855, + "loss": 0.7736, + "step": 7820 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002725056353066793, + "loss": 0.6714, + "step": 7830 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027247004389607305, + "loss": 0.6729, + "step": 7840 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002724344524854668, + "loss": 0.6707, + "step": 7850 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002723988610748606, + "loss": 0.6163, + "step": 7860 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002723632696642543, + "loss": 0.5852, + "step": 7870 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027232767825364807, + "loss": 0.6661, + "step": 7880 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027229208684304186, + "loss": 0.6629, + "step": 7890 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002722564954324356, + "loss": 0.7385, + "step": 7900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002722209040218294, + "loss": 0.6147, + "step": 7910 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027218531261122313, + "loss": 0.6261, + "step": 7920 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002721497212006169, + "loss": 0.6707, + "step": 7930 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027211412979001067, + "loss": 0.6723, + "step": 7940 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002720785383794044, + "loss": 0.7113, + "step": 7950 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002720429469687982, + "loss": 0.6316, + "step": 7960 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027200735555819194, + "loss": 0.7423, + "step": 7970 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002719717641475857, + "loss": 0.7018, + "step": 7980 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027193617273697947, + "loss": 0.6973, + "step": 7990 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002719005813263732, + "loss": 0.7192, + "step": 8000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027186498991576695, + "loss": 0.6437, + "step": 8010 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027182939850516074, + "loss": 0.7277, + "step": 8020 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002717938070945545, + "loss": 0.6786, + "step": 8030 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002717582156839482, + "loss": 0.652, + "step": 8040 + }, + { + "epoch": 0.29, + "learning_rate": 0.000271722624273342, + "loss": 0.6803, + "step": 8050 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027168703286273576, + "loss": 0.787, + "step": 8060 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027165144145212955, + "loss": 0.6576, + "step": 8070 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002716158500415233, + "loss": 0.7551, + "step": 8080 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002715802586309171, + "loss": 0.6326, + "step": 8090 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002715446672203108, + "loss": 0.7833, + "step": 8100 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027150907580970456, + "loss": 0.6458, + "step": 8110 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027147348439909836, + "loss": 0.6222, + "step": 8120 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002714378929884921, + "loss": 0.6498, + "step": 8130 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027140230157788583, + "loss": 0.7359, + "step": 8140 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027136671016727963, + "loss": 0.5633, + "step": 8150 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027133111875667337, + "loss": 0.5984, + "step": 8160 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002712955273460671, + "loss": 0.7422, + "step": 8170 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027125993593546085, + "loss": 0.7077, + "step": 8180 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027122434452485464, + "loss": 0.8808, + "step": 8190 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002711887531142484, + "loss": 0.7417, + "step": 8200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002711531617036422, + "loss": 0.683, + "step": 8210 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002711175702930359, + "loss": 0.7161, + "step": 8220 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002710819788824297, + "loss": 0.7363, + "step": 8230 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027104638747182345, + "loss": 0.7293, + "step": 8240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027101079606121724, + "loss": 0.7824, + "step": 8250 + }, + { + "epoch": 0.29, + "learning_rate": 0.000270975204650611, + "loss": 0.6711, + "step": 8260 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002709396132400047, + "loss": 0.6697, + "step": 8270 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027090402182939846, + "loss": 0.6549, + "step": 8280 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027086843041879225, + "loss": 0.5806, + "step": 8290 + }, + { + "epoch": 0.3, + "learning_rate": 0.000270832839008186, + "loss": 0.6773, + "step": 8300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027079724759757973, + "loss": 0.6777, + "step": 8310 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002707616561869735, + "loss": 0.7535, + "step": 8320 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027072606477636726, + "loss": 0.6484, + "step": 8330 + }, + { + "epoch": 0.3, + "learning_rate": 0.000270690473365761, + "loss": 0.6953, + "step": 8340 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002706548819551548, + "loss": 0.6748, + "step": 8350 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027061929054454854, + "loss": 0.6196, + "step": 8360 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027058369913394233, + "loss": 0.6543, + "step": 8370 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027054810772333607, + "loss": 0.6752, + "step": 8380 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027051251631272986, + "loss": 0.6977, + "step": 8390 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002704769249021236, + "loss": 0.6955, + "step": 8400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027044133349151734, + "loss": 0.7028, + "step": 8410 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027040574208091114, + "loss": 0.7077, + "step": 8420 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002703701506703049, + "loss": 0.6368, + "step": 8430 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002703345592596986, + "loss": 0.6696, + "step": 8440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002702989678490924, + "loss": 0.639, + "step": 8450 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027026337643848615, + "loss": 0.7711, + "step": 8460 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002702277850278799, + "loss": 0.7003, + "step": 8470 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002701921936172737, + "loss": 0.7199, + "step": 8480 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002701566022066674, + "loss": 0.6721, + "step": 8490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002701210107960612, + "loss": 0.6238, + "step": 8500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027008541938545495, + "loss": 0.6396, + "step": 8510 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027004982797484875, + "loss": 0.7214, + "step": 8520 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002700142365642425, + "loss": 0.7322, + "step": 8530 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002699786451536362, + "loss": 0.6307, + "step": 8540 + }, + { + "epoch": 0.3, + "learning_rate": 0.00026994305374303, + "loss": 0.595, + "step": 8550 + }, + { + "epoch": 0.3, + "learning_rate": 0.00026990746233242376, + "loss": 0.6987, + "step": 8560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002698718709218175, + "loss": 0.6147, + "step": 8570 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002698362795112113, + "loss": 0.7344, + "step": 8580 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026980068810060503, + "loss": 0.7043, + "step": 8590 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026976509668999877, + "loss": 0.7113, + "step": 8600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026972950527939256, + "loss": 0.6195, + "step": 8610 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002696939138687863, + "loss": 0.7611, + "step": 8620 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026965832245818004, + "loss": 0.6978, + "step": 8630 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026962273104757384, + "loss": 0.6013, + "step": 8640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002695871396369676, + "loss": 0.6677, + "step": 8650 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026955154822636137, + "loss": 0.641, + "step": 8660 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002695159568157551, + "loss": 0.7703, + "step": 8670 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002694803654051489, + "loss": 0.7784, + "step": 8680 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026944477399454264, + "loss": 0.6994, + "step": 8690 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002694091825839364, + "loss": 0.6066, + "step": 8700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002693735911733302, + "loss": 0.5995, + "step": 8710 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002693379997627239, + "loss": 0.6666, + "step": 8720 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026930240835211765, + "loss": 0.608, + "step": 8730 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002692668169415114, + "loss": 0.6598, + "step": 8740 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002692312255309052, + "loss": 0.7329, + "step": 8750 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026919563412029893, + "loss": 0.6537, + "step": 8760 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026916004270969267, + "loss": 0.5875, + "step": 8770 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026912445129908646, + "loss": 0.6592, + "step": 8780 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002690888598884802, + "loss": 0.6463, + "step": 8790 + }, + { + "epoch": 0.31, + "learning_rate": 0.000269053268477874, + "loss": 0.7381, + "step": 8800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026901767706726773, + "loss": 0.6912, + "step": 8810 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002689820856566615, + "loss": 0.6573, + "step": 8820 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026894649424605527, + "loss": 0.6521, + "step": 8830 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026891090283544906, + "loss": 0.6629, + "step": 8840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002688753114248428, + "loss": 0.655, + "step": 8850 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026883972001423654, + "loss": 0.6766, + "step": 8860 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002688041286036303, + "loss": 0.6779, + "step": 8870 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026876853719302407, + "loss": 0.6672, + "step": 8880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002687329457824178, + "loss": 0.8151, + "step": 8890 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026869735437181155, + "loss": 0.5627, + "step": 8900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026866176296120534, + "loss": 0.7126, + "step": 8910 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002686261715505991, + "loss": 0.7104, + "step": 8920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002685905801399929, + "loss": 0.7442, + "step": 8930 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002685549887293866, + "loss": 0.8611, + "step": 8940 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026851939731878036, + "loss": 0.6804, + "step": 8950 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026848380590817415, + "loss": 0.6368, + "step": 8960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002684482144975679, + "loss": 0.6316, + "step": 8970 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002684126230869617, + "loss": 0.6271, + "step": 8980 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002683770316763554, + "loss": 0.7894, + "step": 8990 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026834144026574916, + "loss": 0.6684, + "step": 9000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026830584885514296, + "loss": 0.6234, + "step": 9010 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002682702574445367, + "loss": 0.6944, + "step": 9020 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026823466603393043, + "loss": 0.7694, + "step": 9030 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026819907462332423, + "loss": 0.6311, + "step": 9040 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026816348321271797, + "loss": 0.7521, + "step": 9050 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002681278918021117, + "loss": 0.7274, + "step": 9060 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002680923003915055, + "loss": 0.6462, + "step": 9070 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026805670898089924, + "loss": 0.5708, + "step": 9080 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026802111757029303, + "loss": 0.6091, + "step": 9090 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002679855261596868, + "loss": 0.6495, + "step": 9100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026794993474908057, + "loss": 0.6499, + "step": 9110 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002679143433384743, + "loss": 0.6483, + "step": 9120 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026787875192786805, + "loss": 0.5742, + "step": 9130 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026784316051726184, + "loss": 0.7195, + "step": 9140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002678075691066556, + "loss": 0.7322, + "step": 9150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002677719776960493, + "loss": 0.6575, + "step": 9160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002677363862854431, + "loss": 0.6982, + "step": 9170 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026770079487483685, + "loss": 0.6903, + "step": 9180 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002676652034642306, + "loss": 0.758, + "step": 9190 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026762961205362433, + "loss": 0.6349, + "step": 9200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002675940206430181, + "loss": 0.7335, + "step": 9210 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026755842923241186, + "loss": 0.6959, + "step": 9220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026752283782180566, + "loss": 0.6329, + "step": 9230 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002674872464111994, + "loss": 0.7161, + "step": 9240 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002674516550005932, + "loss": 0.6257, + "step": 9250 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026741606358998693, + "loss": 0.6758, + "step": 9260 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002673804721793807, + "loss": 0.7651, + "step": 9270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026734488076877446, + "loss": 0.5855, + "step": 9280 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002673092893581682, + "loss": 0.5963, + "step": 9290 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026727369794756194, + "loss": 0.6991, + "step": 9300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026723810653695574, + "loss": 0.7755, + "step": 9310 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002672025151263495, + "loss": 0.6623, + "step": 9320 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002671669237157432, + "loss": 0.7013, + "step": 9330 + }, + { + "epoch": 0.33, + "learning_rate": 0.000267131332305137, + "loss": 0.6297, + "step": 9340 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026709574089453075, + "loss": 0.6676, + "step": 9350 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002670601494839245, + "loss": 0.6645, + "step": 9360 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002670245580733183, + "loss": 0.6607, + "step": 9370 + }, + { + "epoch": 0.33, + "learning_rate": 0.000266988966662712, + "loss": 0.7861, + "step": 9380 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002669533752521058, + "loss": 0.7143, + "step": 9390 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026691778384149955, + "loss": 0.6165, + "step": 9400 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026688219243089335, + "loss": 0.6654, + "step": 9410 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002668466010202871, + "loss": 0.754, + "step": 9420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002668110096096808, + "loss": 0.6519, + "step": 9430 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002667754181990746, + "loss": 0.6906, + "step": 9440 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026673982678846836, + "loss": 0.6446, + "step": 9450 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002667042353778621, + "loss": 0.6815, + "step": 9460 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002666686439672559, + "loss": 0.6959, + "step": 9470 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026663305255664963, + "loss": 0.656, + "step": 9480 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026659746114604337, + "loss": 0.6959, + "step": 9490 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026656186973543716, + "loss": 0.6717, + "step": 9500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002665262783248309, + "loss": 0.6418, + "step": 9510 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002664906869142247, + "loss": 0.7151, + "step": 9520 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026645509550361844, + "loss": 0.6329, + "step": 9530 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026641950409301223, + "loss": 0.7272, + "step": 9540 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026638391268240597, + "loss": 0.697, + "step": 9550 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002663483212717997, + "loss": 0.6158, + "step": 9560 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002663127298611935, + "loss": 0.7448, + "step": 9570 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026627713845058724, + "loss": 0.6351, + "step": 9580 + }, + { + "epoch": 0.34, + "learning_rate": 0.000266241547039981, + "loss": 0.7799, + "step": 9590 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002662059556293748, + "loss": 0.6192, + "step": 9600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002661703642187685, + "loss": 0.7461, + "step": 9610 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026613477280816225, + "loss": 0.7044, + "step": 9620 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026609918139755605, + "loss": 0.7068, + "step": 9630 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002660635899869498, + "loss": 0.6686, + "step": 9640 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026602799857634353, + "loss": 0.6377, + "step": 9650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002659924071657373, + "loss": 0.6554, + "step": 9660 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026595681575513106, + "loss": 0.6232, + "step": 9670 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026592122434452485, + "loss": 0.6906, + "step": 9680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002658856329339186, + "loss": 0.5595, + "step": 9690 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002658500415233124, + "loss": 0.5811, + "step": 9700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002658144501127061, + "loss": 0.6864, + "step": 9710 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026577885870209987, + "loss": 0.7228, + "step": 9720 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026574326729149366, + "loss": 0.6747, + "step": 9730 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002657076758808874, + "loss": 0.5234, + "step": 9740 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026567208447028114, + "loss": 0.7332, + "step": 9750 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002656364930596749, + "loss": 0.6973, + "step": 9760 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026560090164906867, + "loss": 0.6691, + "step": 9770 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002655653102384624, + "loss": 0.6731, + "step": 9780 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026552971882785615, + "loss": 0.6794, + "step": 9790 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026549412741724994, + "loss": 0.7355, + "step": 9800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002654585360066437, + "loss": 0.7245, + "step": 9810 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002654229445960375, + "loss": 0.6417, + "step": 9820 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002653873531854312, + "loss": 0.6887, + "step": 9830 + }, + { + "epoch": 0.35, + "learning_rate": 0.000265351761774825, + "loss": 0.5949, + "step": 9840 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026531617036421875, + "loss": 0.6635, + "step": 9850 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026528057895361254, + "loss": 0.6195, + "step": 9860 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002652449875430063, + "loss": 0.6533, + "step": 9870 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002652093961324, + "loss": 0.6308, + "step": 9880 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026517380472179376, + "loss": 0.595, + "step": 9890 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026513821331118756, + "loss": 0.6465, + "step": 9900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002651026219005813, + "loss": 0.7322, + "step": 9910 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026506703048997503, + "loss": 0.7787, + "step": 9920 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026503143907936883, + "loss": 0.7538, + "step": 9930 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026499584766876257, + "loss": 0.5519, + "step": 9940 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002649602562581563, + "loss": 0.5726, + "step": 9950 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002649246648475501, + "loss": 0.5972, + "step": 9960 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026488907343694384, + "loss": 0.6526, + "step": 9970 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026485348202633763, + "loss": 0.6898, + "step": 9980 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002648178906157314, + "loss": 0.6362, + "step": 9990 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026478229920512517, + "loss": 0.6991, + "step": 10000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002647467077945189, + "loss": 0.6635, + "step": 10010 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026471111638391265, + "loss": 0.6357, + "step": 10020 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026467552497330644, + "loss": 0.6655, + "step": 10030 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002646399335627002, + "loss": 0.7764, + "step": 10040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002646043421520939, + "loss": 0.6729, + "step": 10050 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002645687507414877, + "loss": 0.6758, + "step": 10060 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026453315933088145, + "loss": 0.6199, + "step": 10070 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002644975679202752, + "loss": 0.6462, + "step": 10080 + }, + { + "epoch": 0.36, + "learning_rate": 0.000264461976509669, + "loss": 0.698, + "step": 10090 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002644263850990627, + "loss": 0.6862, + "step": 10100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002643907936884565, + "loss": 0.7163, + "step": 10110 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026435520227785026, + "loss": 0.5766, + "step": 10120 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026431961086724405, + "loss": 0.6792, + "step": 10130 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002642840194566378, + "loss": 0.6469, + "step": 10140 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026424842804603153, + "loss": 0.6404, + "step": 10150 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002642128366354253, + "loss": 0.6711, + "step": 10160 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026417724522481906, + "loss": 0.7104, + "step": 10170 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002641416538142128, + "loss": 0.5911, + "step": 10180 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002641060624036066, + "loss": 0.6087, + "step": 10190 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026407047099300034, + "loss": 0.6863, + "step": 10200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002640348795823941, + "loss": 0.6974, + "step": 10210 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002639992881717878, + "loss": 0.6028, + "step": 10220 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002639636967611816, + "loss": 0.5956, + "step": 10230 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026392810535057535, + "loss": 0.7005, + "step": 10240 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026389251393996914, + "loss": 0.6589, + "step": 10250 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002638569225293629, + "loss": 0.646, + "step": 10260 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002638213311187567, + "loss": 0.6825, + "step": 10270 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026378929884921106, + "loss": 0.6398, + "step": 10280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002637537074386048, + "loss": 0.6914, + "step": 10290 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026371811602799854, + "loss": 0.6285, + "step": 10300 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026368252461739233, + "loss": 0.61, + "step": 10310 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002636469332067861, + "loss": 0.7384, + "step": 10320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002636113417961798, + "loss": 0.695, + "step": 10330 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002635757503855736, + "loss": 0.6646, + "step": 10340 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026354015897496735, + "loss": 0.6574, + "step": 10350 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002635045675643611, + "loss": 0.6208, + "step": 10360 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002634689761537549, + "loss": 0.6029, + "step": 10370 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002634333847431486, + "loss": 0.7264, + "step": 10380 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026339779333254236, + "loss": 0.7054, + "step": 10390 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026336220192193615, + "loss": 0.6087, + "step": 10400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002633266105113299, + "loss": 0.7008, + "step": 10410 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002632910191007237, + "loss": 0.6048, + "step": 10420 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002632554276901174, + "loss": 0.6031, + "step": 10430 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002632198362795112, + "loss": 0.6678, + "step": 10440 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026318424486890496, + "loss": 0.6607, + "step": 10450 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002631486534582987, + "loss": 0.6964, + "step": 10460 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002631130620476925, + "loss": 0.6993, + "step": 10470 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026307747063708623, + "loss": 0.7119, + "step": 10480 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026304187922647997, + "loss": 0.641, + "step": 10490 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026300628781587376, + "loss": 0.7175, + "step": 10500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002629706964052675, + "loss": 0.7156, + "step": 10510 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026293510499466124, + "loss": 0.6344, + "step": 10520 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026289951358405504, + "loss": 0.7467, + "step": 10530 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002628639221734488, + "loss": 0.6675, + "step": 10540 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026282833076284257, + "loss": 0.6612, + "step": 10550 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002627927393522363, + "loss": 0.7396, + "step": 10560 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026275714794163005, + "loss": 0.6916, + "step": 10570 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026272155653102384, + "loss": 0.57, + "step": 10580 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002626859651204176, + "loss": 0.6763, + "step": 10590 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002626503737098114, + "loss": 0.6774, + "step": 10600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002626147822992051, + "loss": 0.7538, + "step": 10610 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026257919088859885, + "loss": 0.7382, + "step": 10620 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002625435994779926, + "loss": 0.642, + "step": 10630 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002625080080673864, + "loss": 0.6947, + "step": 10640 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002624724166567801, + "loss": 0.732, + "step": 10650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026243682524617387, + "loss": 0.6579, + "step": 10660 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026240123383556766, + "loss": 0.665, + "step": 10670 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002623656424249614, + "loss": 0.7206, + "step": 10680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002623300510143552, + "loss": 0.6377, + "step": 10690 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026229445960374893, + "loss": 0.744, + "step": 10700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002622588681931427, + "loss": 0.6314, + "step": 10710 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026222327678253647, + "loss": 0.5859, + "step": 10720 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026218768537193026, + "loss": 0.6239, + "step": 10730 + }, + { + "epoch": 0.38, + "learning_rate": 0.000262152093961324, + "loss": 0.6289, + "step": 10740 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026211650255071774, + "loss": 0.7771, + "step": 10750 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002620809111401115, + "loss": 0.5974, + "step": 10760 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026204531972950527, + "loss": 0.629, + "step": 10770 + }, + { + "epoch": 0.38, + "learning_rate": 0.000262009728318899, + "loss": 0.7037, + "step": 10780 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026197413690829275, + "loss": 0.6756, + "step": 10790 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026193854549768654, + "loss": 0.6247, + "step": 10800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002619029540870803, + "loss": 0.562, + "step": 10810 + }, + { + "epoch": 0.38, + "learning_rate": 0.000261867362676474, + "loss": 0.6471, + "step": 10820 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002618317712658678, + "loss": 0.7036, + "step": 10830 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026179617985526156, + "loss": 0.7124, + "step": 10840 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026176058844465535, + "loss": 0.5975, + "step": 10850 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002617249970340491, + "loss": 0.6751, + "step": 10860 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002616894056234429, + "loss": 0.6748, + "step": 10870 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002616538142128366, + "loss": 0.7397, + "step": 10880 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026161822280223036, + "loss": 0.7737, + "step": 10890 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026158263139162415, + "loss": 0.6812, + "step": 10900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002615470399810179, + "loss": 0.7196, + "step": 10910 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026151144857041163, + "loss": 0.6576, + "step": 10920 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026147585715980543, + "loss": 0.6802, + "step": 10930 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026144026574919917, + "loss": 0.6089, + "step": 10940 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002614046743385929, + "loss": 0.7525, + "step": 10950 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002613690829279867, + "loss": 0.7278, + "step": 10960 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026133349151738044, + "loss": 0.7099, + "step": 10970 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002612979001067742, + "loss": 0.6143, + "step": 10980 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026126230869616797, + "loss": 0.6645, + "step": 10990 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002612267172855617, + "loss": 0.5811, + "step": 11000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002611911258749555, + "loss": 0.5476, + "step": 11010 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026115553446434925, + "loss": 0.7523, + "step": 11020 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026111994305374304, + "loss": 0.6987, + "step": 11030 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002610843516431368, + "loss": 0.6548, + "step": 11040 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002610487602325305, + "loss": 0.7009, + "step": 11050 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002610131688219243, + "loss": 0.6223, + "step": 11060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026097757741131805, + "loss": 0.7014, + "step": 11070 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002609419860007118, + "loss": 0.687, + "step": 11080 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026090639459010553, + "loss": 0.669, + "step": 11090 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002608708031794993, + "loss": 0.623, + "step": 11100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026083521176889306, + "loss": 0.5977, + "step": 11110 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026079962035828686, + "loss": 0.6418, + "step": 11120 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002607640289476806, + "loss": 0.6404, + "step": 11130 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002607284375370744, + "loss": 0.6264, + "step": 11140 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026069284612646813, + "loss": 0.6921, + "step": 11150 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002606572547158619, + "loss": 0.645, + "step": 11160 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026062166330525566, + "loss": 0.5915, + "step": 11170 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002605860718946494, + "loss": 0.6425, + "step": 11180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026055048048404314, + "loss": 0.6348, + "step": 11190 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026051488907343693, + "loss": 0.7087, + "step": 11200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002604792976628307, + "loss": 0.7018, + "step": 11210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002604437062522244, + "loss": 0.6267, + "step": 11220 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002604081148416182, + "loss": 0.6771, + "step": 11230 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026037252343101195, + "loss": 0.5874, + "step": 11240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002603369320204057, + "loss": 0.6597, + "step": 11250 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002603013406097995, + "loss": 0.6873, + "step": 11260 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002602657491991932, + "loss": 0.6519, + "step": 11270 + }, + { + "epoch": 0.4, + "learning_rate": 0.000260230157788587, + "loss": 0.6338, + "step": 11280 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026019456637798075, + "loss": 0.6631, + "step": 11290 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026015897496737455, + "loss": 0.6482, + "step": 11300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002601233835567683, + "loss": 0.5208, + "step": 11310 + }, + { + "epoch": 0.4, + "learning_rate": 0.000260087792146162, + "loss": 0.6797, + "step": 11320 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002600522007355558, + "loss": 0.6226, + "step": 11330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026001660932494956, + "loss": 0.6616, + "step": 11340 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002599810179143433, + "loss": 0.6695, + "step": 11350 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002599454265037371, + "loss": 0.7132, + "step": 11360 + }, + { + "epoch": 0.4, + "learning_rate": 0.00025990983509313083, + "loss": 0.6248, + "step": 11370 + }, + { + "epoch": 0.4, + "learning_rate": 0.00025987424368252457, + "loss": 0.5801, + "step": 11380 + }, + { + "epoch": 0.4, + "learning_rate": 0.00025983865227191836, + "loss": 0.5961, + "step": 11390 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002598030608613121, + "loss": 0.6429, + "step": 11400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025976746945070584, + "loss": 0.6534, + "step": 11410 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025973187804009964, + "loss": 0.6326, + "step": 11420 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002596962866294934, + "loss": 0.7653, + "step": 11430 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025966069521888717, + "loss": 0.5867, + "step": 11440 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002596251038082809, + "loss": 0.7721, + "step": 11450 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002595895123976747, + "loss": 0.6588, + "step": 11460 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025955392098706844, + "loss": 0.6176, + "step": 11470 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002595183295764622, + "loss": 0.7171, + "step": 11480 + }, + { + "epoch": 0.41, + "learning_rate": 0.000259482738165856, + "loss": 0.7859, + "step": 11490 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002594471467552497, + "loss": 0.7316, + "step": 11500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025941155534464345, + "loss": 0.7001, + "step": 11510 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025937596393403725, + "loss": 0.603, + "step": 11520 + }, + { + "epoch": 0.41, + "learning_rate": 0.000259340372523431, + "loss": 0.6713, + "step": 11530 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002593047811128247, + "loss": 0.7009, + "step": 11540 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002592691897022185, + "loss": 0.7104, + "step": 11550 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025923359829161226, + "loss": 0.566, + "step": 11560 + }, + { + "epoch": 0.41, + "learning_rate": 0.000259198006881006, + "loss": 0.6219, + "step": 11570 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002591624154703998, + "loss": 0.6206, + "step": 11580 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025912682405979353, + "loss": 0.6477, + "step": 11590 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002590912326491873, + "loss": 0.6757, + "step": 11600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025905564123858107, + "loss": 0.6812, + "step": 11610 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025902004982797486, + "loss": 0.6335, + "step": 11620 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002589844584173686, + "loss": 0.6062, + "step": 11630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025894886700676234, + "loss": 0.7371, + "step": 11640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002589132755961561, + "loss": 0.7567, + "step": 11650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025887768418554987, + "loss": 0.6705, + "step": 11660 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002588420927749436, + "loss": 0.6781, + "step": 11670 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025880650136433735, + "loss": 0.7395, + "step": 11680 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025877090995373114, + "loss": 0.6354, + "step": 11690 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002587353185431249, + "loss": 0.5913, + "step": 11700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002586997271325187, + "loss": 0.6296, + "step": 11710 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002586641357219124, + "loss": 0.6539, + "step": 11720 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002586285443113062, + "loss": 0.7085, + "step": 11730 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025859295290069995, + "loss": 0.7424, + "step": 11740 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025855736149009374, + "loss": 0.6588, + "step": 11750 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002585217700794875, + "loss": 0.6806, + "step": 11760 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002584861786688812, + "loss": 0.7187, + "step": 11770 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025845058725827496, + "loss": 0.6923, + "step": 11780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025841499584766876, + "loss": 0.6112, + "step": 11790 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002583794044370625, + "loss": 0.6067, + "step": 11800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025834381302645623, + "loss": 0.6562, + "step": 11810 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025830822161585003, + "loss": 0.6703, + "step": 11820 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025827263020524377, + "loss": 0.6112, + "step": 11830 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002582370387946375, + "loss": 0.7401, + "step": 11840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002582014473840313, + "loss": 0.5951, + "step": 11850 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025816585597342504, + "loss": 0.6889, + "step": 11860 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025813026456281883, + "loss": 0.6453, + "step": 11870 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002580946731522126, + "loss": 0.6925, + "step": 11880 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025805908174160637, + "loss": 0.6293, + "step": 11890 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002580234903310001, + "loss": 0.746, + "step": 11900 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025798789892039385, + "loss": 0.6438, + "step": 11910 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025795230750978764, + "loss": 0.6945, + "step": 11920 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002579167160991814, + "loss": 0.7621, + "step": 11930 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002578811246885751, + "loss": 0.6791, + "step": 11940 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002578455332779689, + "loss": 0.6895, + "step": 11950 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025780994186736265, + "loss": 0.6235, + "step": 11960 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002577743504567564, + "loss": 0.6027, + "step": 11970 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002577387590461502, + "loss": 0.7398, + "step": 11980 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002577031676355439, + "loss": 0.8015, + "step": 11990 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025766757622493766, + "loss": 0.726, + "step": 12000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025763198481433146, + "loss": 0.7539, + "step": 12010 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002575963934037252, + "loss": 0.5456, + "step": 12020 + }, + { + "epoch": 0.43, + "learning_rate": 0.000257560801993119, + "loss": 0.6644, + "step": 12030 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025752521058251273, + "loss": 0.6225, + "step": 12040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002574896191719065, + "loss": 0.6593, + "step": 12050 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025745402776130026, + "loss": 0.6988, + "step": 12060 + }, + { + "epoch": 0.43, + "learning_rate": 0.000257418436350694, + "loss": 0.6553, + "step": 12070 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002573828449400878, + "loss": 0.6502, + "step": 12080 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025734725352948154, + "loss": 0.6295, + "step": 12090 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002573116621188753, + "loss": 0.7528, + "step": 12100 + }, + { + "epoch": 0.43, + "learning_rate": 0.000257276070708269, + "loss": 0.6808, + "step": 12110 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002572404792976628, + "loss": 0.7277, + "step": 12120 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025720488788705655, + "loss": 0.6174, + "step": 12130 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025716929647645034, + "loss": 0.7484, + "step": 12140 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002571337050658441, + "loss": 0.602, + "step": 12150 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002570981136552379, + "loss": 0.7096, + "step": 12160 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002570625222446316, + "loss": 0.6005, + "step": 12170 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025702693083402535, + "loss": 0.6596, + "step": 12180 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025699133942341915, + "loss": 0.6502, + "step": 12190 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002569557480128129, + "loss": 0.7322, + "step": 12200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002569201566022067, + "loss": 0.6488, + "step": 12210 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002568845651916004, + "loss": 0.544, + "step": 12220 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025684897378099416, + "loss": 0.5705, + "step": 12230 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002568133823703879, + "loss": 0.6534, + "step": 12240 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002567777909597817, + "loss": 0.6648, + "step": 12250 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025674219954917543, + "loss": 0.7348, + "step": 12260 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025670660813856917, + "loss": 0.6103, + "step": 12270 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025667101672796296, + "loss": 0.7673, + "step": 12280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002566354253173567, + "loss": 0.709, + "step": 12290 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002565998339067505, + "loss": 0.6413, + "step": 12300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025656424249614424, + "loss": 0.6544, + "step": 12310 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025652865108553803, + "loss": 0.6615, + "step": 12320 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025649305967493177, + "loss": 0.6999, + "step": 12330 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025646102740538616, + "loss": 0.7117, + "step": 12340 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002564254359947799, + "loss": 0.7284, + "step": 12350 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002563898445841737, + "loss": 0.608, + "step": 12360 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025635425317356743, + "loss": 0.6565, + "step": 12370 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025631866176296117, + "loss": 0.6898, + "step": 12380 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025628307035235496, + "loss": 0.6654, + "step": 12390 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002562474789417487, + "loss": 0.678, + "step": 12400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025621188753114244, + "loss": 0.7709, + "step": 12410 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002561762961205362, + "loss": 0.6717, + "step": 12420 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025614070470993, + "loss": 0.6868, + "step": 12430 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002561051132993237, + "loss": 0.6469, + "step": 12440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002560695218887175, + "loss": 0.6912, + "step": 12450 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025603393047811125, + "loss": 0.6562, + "step": 12460 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025599833906750504, + "loss": 0.6982, + "step": 12470 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002559627476568988, + "loss": 0.7473, + "step": 12480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002559271562462926, + "loss": 0.6872, + "step": 12490 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002558915648356863, + "loss": 0.6941, + "step": 12500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025585597342508005, + "loss": 0.6579, + "step": 12510 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002558203820144738, + "loss": 0.6863, + "step": 12520 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002557847906038676, + "loss": 0.6288, + "step": 12530 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002557491991932613, + "loss": 0.5739, + "step": 12540 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025571360778265507, + "loss": 0.6202, + "step": 12550 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025567801637204886, + "loss": 0.7408, + "step": 12560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002556424249614426, + "loss": 0.5894, + "step": 12570 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002556068335508364, + "loss": 0.7237, + "step": 12580 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025557124214023013, + "loss": 0.6797, + "step": 12590 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025553565072962387, + "loss": 0.6336, + "step": 12600 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025550005931901766, + "loss": 0.5654, + "step": 12610 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002554644679084114, + "loss": 0.7306, + "step": 12620 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002554288764978052, + "loss": 0.7408, + "step": 12630 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025539328508719894, + "loss": 0.7022, + "step": 12640 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002553576936765927, + "loss": 0.7628, + "step": 12650 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025532210226598647, + "loss": 0.7272, + "step": 12660 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002552865108553802, + "loss": 0.6983, + "step": 12670 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025525091944477395, + "loss": 0.7305, + "step": 12680 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025521532803416774, + "loss": 0.6391, + "step": 12690 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002551797366235615, + "loss": 0.6999, + "step": 12700 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002551441452129552, + "loss": 0.7095, + "step": 12710 + }, + { + "epoch": 0.45, + "learning_rate": 0.000255108553802349, + "loss": 0.6445, + "step": 12720 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025507296239174275, + "loss": 0.7364, + "step": 12730 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025503737098113655, + "loss": 0.6244, + "step": 12740 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002550017795705303, + "loss": 0.6553, + "step": 12750 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002549661881599241, + "loss": 0.5593, + "step": 12760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002549305967493178, + "loss": 0.6772, + "step": 12770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025489500533871156, + "loss": 0.6262, + "step": 12780 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025485941392810535, + "loss": 0.6741, + "step": 12790 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002548238225174991, + "loss": 0.596, + "step": 12800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025478823110689283, + "loss": 0.6022, + "step": 12810 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002547526396962866, + "loss": 0.663, + "step": 12820 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025471704828568037, + "loss": 0.5627, + "step": 12830 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002546814568750741, + "loss": 0.7556, + "step": 12840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002546458654644679, + "loss": 0.7456, + "step": 12850 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025461027405386164, + "loss": 0.6417, + "step": 12860 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002545746826432554, + "loss": 0.6878, + "step": 12870 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025453909123264917, + "loss": 0.5615, + "step": 12880 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002545034998220429, + "loss": 0.6748, + "step": 12890 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002544679084114367, + "loss": 0.6035, + "step": 12900 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025443231700083044, + "loss": 0.6211, + "step": 12910 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025439672559022424, + "loss": 0.7444, + "step": 12920 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002543646933206786, + "loss": 0.7103, + "step": 12930 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025432910191007236, + "loss": 0.5651, + "step": 12940 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002542935104994661, + "loss": 0.7112, + "step": 12950 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025425791908885984, + "loss": 0.6323, + "step": 12960 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025422232767825364, + "loss": 0.5438, + "step": 12970 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002541867362676474, + "loss": 0.7769, + "step": 12980 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002541511448570411, + "loss": 0.6814, + "step": 12990 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002541155534464349, + "loss": 0.6867, + "step": 13000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025407996203582865, + "loss": 0.7224, + "step": 13010 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002540443706252224, + "loss": 0.6341, + "step": 13020 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002540087792146162, + "loss": 0.5995, + "step": 13030 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002539731878040099, + "loss": 0.7044, + "step": 13040 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002539375963934037, + "loss": 0.6987, + "step": 13050 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025390200498279746, + "loss": 0.6268, + "step": 13060 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025386641357219125, + "loss": 0.6315, + "step": 13070 + }, + { + "epoch": 0.46, + "learning_rate": 0.000253830822161585, + "loss": 0.6597, + "step": 13080 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025379523075097873, + "loss": 0.7198, + "step": 13090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002537596393403725, + "loss": 0.5952, + "step": 13100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025372404792976626, + "loss": 0.6484, + "step": 13110 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025368845651916, + "loss": 0.6402, + "step": 13120 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002536528651085538, + "loss": 0.6464, + "step": 13130 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025361727369794753, + "loss": 0.6433, + "step": 13140 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025358168228734127, + "loss": 0.6253, + "step": 13150 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025354609087673507, + "loss": 0.6625, + "step": 13160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002535104994661288, + "loss": 0.6262, + "step": 13170 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002534749080555226, + "loss": 0.6462, + "step": 13180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025343931664491634, + "loss": 0.7032, + "step": 13190 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025340372523431013, + "loss": 0.6469, + "step": 13200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025336813382370387, + "loss": 0.6922, + "step": 13210 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002533325424130976, + "loss": 0.7421, + "step": 13220 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002532969510024914, + "loss": 0.6098, + "step": 13230 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025326135959188514, + "loss": 0.6866, + "step": 13240 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002532257681812789, + "loss": 0.6846, + "step": 13250 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002531901767706727, + "loss": 0.6591, + "step": 13260 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002531545853600664, + "loss": 0.6894, + "step": 13270 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025311899394946016, + "loss": 0.6977, + "step": 13280 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002530834025388539, + "loss": 0.6943, + "step": 13290 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002530478111282477, + "loss": 0.7509, + "step": 13300 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025301221971764143, + "loss": 0.6725, + "step": 13310 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002529766283070352, + "loss": 0.6886, + "step": 13320 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025294103689642896, + "loss": 0.7321, + "step": 13330 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025290544548582276, + "loss": 0.6697, + "step": 13340 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002528698540752165, + "loss": 0.709, + "step": 13350 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002528342626646103, + "loss": 0.6317, + "step": 13360 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025279867125400403, + "loss": 0.6817, + "step": 13370 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025276307984339777, + "loss": 0.6444, + "step": 13380 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025272748843279156, + "loss": 0.6837, + "step": 13390 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002526918970221853, + "loss": 0.5978, + "step": 13400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025265630561157904, + "loss": 0.6873, + "step": 13410 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002526207142009728, + "loss": 0.6716, + "step": 13420 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002525851227903666, + "loss": 0.638, + "step": 13430 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002525495313797603, + "loss": 0.6422, + "step": 13440 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025251393996915405, + "loss": 0.6885, + "step": 13450 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025247834855854785, + "loss": 0.706, + "step": 13460 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002524427571479416, + "loss": 0.6301, + "step": 13470 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002524071657373354, + "loss": 0.632, + "step": 13480 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002523715743267291, + "loss": 0.5909, + "step": 13490 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002523359829161229, + "loss": 0.6306, + "step": 13500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025230039150551665, + "loss": 0.5696, + "step": 13510 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002522648000949104, + "loss": 0.6982, + "step": 13520 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002522292086843042, + "loss": 0.6278, + "step": 13530 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002521936172736979, + "loss": 0.73, + "step": 13540 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025215802586309166, + "loss": 0.7321, + "step": 13550 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025212243445248546, + "loss": 0.66, + "step": 13560 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002520868430418792, + "loss": 0.6594, + "step": 13570 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025205125163127294, + "loss": 0.6585, + "step": 13580 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025201566022066673, + "loss": 0.614, + "step": 13590 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025198006881006047, + "loss": 0.6811, + "step": 13600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025194447739945426, + "loss": 0.7428, + "step": 13610 + }, + { + "epoch": 0.48, + "learning_rate": 0.000251908885988848, + "loss": 0.666, + "step": 13620 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025187329457824174, + "loss": 0.7152, + "step": 13630 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025183770316763554, + "loss": 0.6649, + "step": 13640 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002518021117570293, + "loss": 0.6833, + "step": 13650 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025176652034642307, + "loss": 0.6988, + "step": 13660 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002517309289358168, + "loss": 0.5722, + "step": 13670 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025169533752521055, + "loss": 0.5754, + "step": 13680 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025165974611460434, + "loss": 0.6248, + "step": 13690 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002516241547039981, + "loss": 0.6394, + "step": 13700 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002515885632933918, + "loss": 0.6712, + "step": 13710 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002515529718827856, + "loss": 0.7226, + "step": 13720 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025151738047217935, + "loss": 0.688, + "step": 13730 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002514817890615731, + "loss": 0.7168, + "step": 13740 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002514461976509669, + "loss": 0.663, + "step": 13750 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002514106062403606, + "loss": 0.663, + "step": 13760 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002513750148297544, + "loss": 0.7507, + "step": 13770 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025133942341914816, + "loss": 0.6108, + "step": 13780 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025130383200854195, + "loss": 0.6595, + "step": 13790 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002512682405979357, + "loss": 0.7039, + "step": 13800 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025123264918732943, + "loss": 0.6335, + "step": 13810 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002511970577767232, + "loss": 0.6834, + "step": 13820 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025116146636611697, + "loss": 0.5485, + "step": 13830 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002511258749555107, + "loss": 0.7282, + "step": 13840 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025109028354490444, + "loss": 0.7947, + "step": 13850 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025105469213429824, + "loss": 0.5967, + "step": 13860 + }, + { + "epoch": 0.49, + "learning_rate": 0.000251019100723692, + "loss": 0.6473, + "step": 13870 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002509835093130857, + "loss": 0.6457, + "step": 13880 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002509479179024795, + "loss": 0.7073, + "step": 13890 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025091232649187325, + "loss": 0.5058, + "step": 13900 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025087673508126704, + "loss": 0.6743, + "step": 13910 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002508411436706608, + "loss": 0.624, + "step": 13920 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002508055522600546, + "loss": 0.6708, + "step": 13930 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002507699608494483, + "loss": 0.6928, + "step": 13940 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002507343694388421, + "loss": 0.6269, + "step": 13950 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025069877802823585, + "loss": 0.6982, + "step": 13960 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002506631866176296, + "loss": 0.5909, + "step": 13970 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025062759520702333, + "loss": 0.6451, + "step": 13980 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002505920037964171, + "loss": 0.6961, + "step": 13990 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025055641238581086, + "loss": 0.7153, + "step": 14000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002505208209752046, + "loss": 0.7103, + "step": 14010 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002504852295645984, + "loss": 0.7147, + "step": 14020 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025044963815399213, + "loss": 0.773, + "step": 14030 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002504140467433859, + "loss": 0.669, + "step": 14040 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025037845533277967, + "loss": 0.6569, + "step": 14050 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002503428639221734, + "loss": 0.7455, + "step": 14060 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002503072725115672, + "loss": 0.5277, + "step": 14070 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025027168110096094, + "loss": 0.58, + "step": 14080 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025023608969035473, + "loss": 0.7198, + "step": 14090 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025020049827974847, + "loss": 0.6862, + "step": 14100 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002501649068691422, + "loss": 0.7313, + "step": 14110 + }, + { + "epoch": 0.5, + "learning_rate": 0.000250129315458536, + "loss": 0.7646, + "step": 14120 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025009372404792975, + "loss": 0.6098, + "step": 14130 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002500581326373235, + "loss": 0.697, + "step": 14140 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002500225412267173, + "loss": 0.6837, + "step": 14150 + }, + { + "epoch": 0.5, + "learning_rate": 0.000249986949816111, + "loss": 0.5986, + "step": 14160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00024995135840550476, + "loss": 0.8238, + "step": 14170 + }, + { + "epoch": 0.5, + "learning_rate": 0.00024991576699489855, + "loss": 0.6794, + "step": 14180 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002498801755842923, + "loss": 0.6985, + "step": 14190 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002498445841736861, + "loss": 0.6153, + "step": 14200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002498089927630798, + "loss": 0.7146, + "step": 14210 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002497734013524736, + "loss": 0.6049, + "step": 14220 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024973780994186736, + "loss": 0.6025, + "step": 14230 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002497022185312611, + "loss": 0.7142, + "step": 14240 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002496666271206549, + "loss": 0.6612, + "step": 14250 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024963103571004863, + "loss": 0.6589, + "step": 14260 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024959544429944237, + "loss": 0.5938, + "step": 14270 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024955985288883616, + "loss": 0.7197, + "step": 14280 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002495242614782299, + "loss": 0.6527, + "step": 14290 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024948867006762364, + "loss": 0.7443, + "step": 14300 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002494530786570174, + "loss": 0.6561, + "step": 14310 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002494174872464112, + "loss": 0.6946, + "step": 14320 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002493818958358049, + "loss": 0.7613, + "step": 14330 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002493463044251987, + "loss": 0.6176, + "step": 14340 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024931071301459245, + "loss": 0.6723, + "step": 14350 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024927512160398624, + "loss": 0.6893, + "step": 14360 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024923953019338, + "loss": 0.6068, + "step": 14370 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002492039387827738, + "loss": 0.6743, + "step": 14380 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002491683473721675, + "loss": 0.6366, + "step": 14390 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024913275596156125, + "loss": 0.59, + "step": 14400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024909716455095505, + "loss": 0.6394, + "step": 14410 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002490615731403488, + "loss": 0.7755, + "step": 14420 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002490259817297425, + "loss": 0.5536, + "step": 14430 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024899039031913626, + "loss": 0.6219, + "step": 14440 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024895479890853006, + "loss": 0.6696, + "step": 14450 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002489192074979238, + "loss": 0.6677, + "step": 14460 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024888361608731754, + "loss": 0.6307, + "step": 14470 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024884802467671133, + "loss": 0.6472, + "step": 14480 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024881243326610507, + "loss": 0.6534, + "step": 14490 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024877684185549886, + "loss": 0.6878, + "step": 14500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002487412504448926, + "loss": 0.7111, + "step": 14510 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002487056590342864, + "loss": 0.6628, + "step": 14520 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024867006762368014, + "loss": 0.6701, + "step": 14530 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002486344762130739, + "loss": 0.6404, + "step": 14540 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024859888480246767, + "loss": 0.6782, + "step": 14550 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002485632933918614, + "loss": 0.7121, + "step": 14560 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024852770198125515, + "loss": 0.6517, + "step": 14570 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024849211057064894, + "loss": 0.646, + "step": 14580 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002484565191600427, + "loss": 0.6125, + "step": 14590 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002484209277494364, + "loss": 0.5713, + "step": 14600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002483853363388302, + "loss": 0.6996, + "step": 14610 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024834974492822395, + "loss": 0.674, + "step": 14620 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002483141535176177, + "loss": 0.7223, + "step": 14630 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002482785621070115, + "loss": 0.7383, + "step": 14640 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002482429706964052, + "loss": 0.6706, + "step": 14650 + }, + { + "epoch": 0.52, + "learning_rate": 0.000248207379285799, + "loss": 0.6425, + "step": 14660 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024817178787519276, + "loss": 0.6618, + "step": 14670 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024813619646458655, + "loss": 0.7539, + "step": 14680 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002481006050539803, + "loss": 0.6389, + "step": 14690 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024806501364337403, + "loss": 0.6362, + "step": 14700 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002480294222327678, + "loss": 0.7019, + "step": 14710 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024799383082216157, + "loss": 0.737, + "step": 14720 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002479582394115553, + "loss": 0.6228, + "step": 14730 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002479226480009491, + "loss": 0.7023, + "step": 14740 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024788705659034284, + "loss": 0.6379, + "step": 14750 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002478514651797366, + "loss": 0.7517, + "step": 14760 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024781587376913037, + "loss": 0.6845, + "step": 14770 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002477802823585241, + "loss": 0.7448, + "step": 14780 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002477446909479179, + "loss": 0.6916, + "step": 14790 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024770909953731164, + "loss": 0.7748, + "step": 14800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024767350812670544, + "loss": 0.6597, + "step": 14810 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002476379167160992, + "loss": 0.699, + "step": 14820 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002476023253054929, + "loss": 0.7032, + "step": 14830 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002475667338948867, + "loss": 0.6889, + "step": 14840 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024753114248428045, + "loss": 0.6236, + "step": 14850 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002474955510736742, + "loss": 0.667, + "step": 14860 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024745995966306793, + "loss": 0.6916, + "step": 14870 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002474243682524617, + "loss": 0.7216, + "step": 14880 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024738877684185546, + "loss": 0.6766, + "step": 14890 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002473531854312492, + "loss": 0.6951, + "step": 14900 + }, + { + "epoch": 0.53, + "learning_rate": 0.000247317594020643, + "loss": 0.6766, + "step": 14910 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024728200261003673, + "loss": 0.6693, + "step": 14920 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024724641119943053, + "loss": 0.6616, + "step": 14930 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024721081978882427, + "loss": 0.6795, + "step": 14940 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024717522837821806, + "loss": 0.6114, + "step": 14950 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002471396369676118, + "loss": 0.6599, + "step": 14960 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002471040455570056, + "loss": 0.6986, + "step": 14970 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024706845414639933, + "loss": 0.653, + "step": 14980 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002470328627357931, + "loss": 0.6804, + "step": 14990 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002469972713251868, + "loss": 0.7093, + "step": 15000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002469616799145806, + "loss": 0.6299, + "step": 15010 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024692608850397435, + "loss": 0.6224, + "step": 15020 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002468904970933681, + "loss": 0.7414, + "step": 15030 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002468549056827619, + "loss": 0.707, + "step": 15040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002468193142721556, + "loss": 0.7286, + "step": 15050 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024678372286154936, + "loss": 0.7436, + "step": 15060 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024674813145094315, + "loss": 0.671, + "step": 15070 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002467125400403369, + "loss": 0.7036, + "step": 15080 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002466769486297307, + "loss": 0.5889, + "step": 15090 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002466413572191244, + "loss": 0.72, + "step": 15100 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002466057658085182, + "loss": 0.6538, + "step": 15110 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024657017439791196, + "loss": 0.6813, + "step": 15120 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002465345829873057, + "loss": 0.6662, + "step": 15130 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002464989915766995, + "loss": 0.6727, + "step": 15140 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024646340016609323, + "loss": 0.5828, + "step": 15150 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024642780875548697, + "loss": 0.6408, + "step": 15160 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024639221734488076, + "loss": 0.6419, + "step": 15170 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002463566259342745, + "loss": 0.687, + "step": 15180 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024632103452366824, + "loss": 0.6691, + "step": 15190 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024628544311306204, + "loss": 0.7671, + "step": 15200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002462498517024558, + "loss": 0.6963, + "step": 15210 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002462142602918495, + "loss": 0.6452, + "step": 15220 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002461786688812433, + "loss": 0.7518, + "step": 15230 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024614307747063705, + "loss": 0.6961, + "step": 15240 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024610748606003084, + "loss": 0.6614, + "step": 15250 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002460718946494246, + "loss": 0.6297, + "step": 15260 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002460363032388184, + "loss": 0.7473, + "step": 15270 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002460007118282121, + "loss": 0.6758, + "step": 15280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024596512041760585, + "loss": 0.6179, + "step": 15290 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024592952900699965, + "loss": 0.631, + "step": 15300 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002458939375963934, + "loss": 0.6952, + "step": 15310 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002458583461857871, + "loss": 0.6148, + "step": 15320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024582275477518086, + "loss": 0.7256, + "step": 15330 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024578716336457466, + "loss": 0.6646, + "step": 15340 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002457515719539684, + "loss": 0.6832, + "step": 15350 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002457159805433622, + "loss": 0.7076, + "step": 15360 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024568038913275593, + "loss": 0.6201, + "step": 15370 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002456447977221497, + "loss": 0.6015, + "step": 15380 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024560920631154346, + "loss": 0.6716, + "step": 15390 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024557361490093726, + "loss": 0.5865, + "step": 15400 + }, + { + "epoch": 0.55, + "learning_rate": 0.000245538023490331, + "loss": 0.7035, + "step": 15410 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024550243207972474, + "loss": 0.6434, + "step": 15420 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024546684066911853, + "loss": 0.7037, + "step": 15430 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024543124925851227, + "loss": 0.6758, + "step": 15440 + }, + { + "epoch": 0.55, + "learning_rate": 0.000245395657847906, + "loss": 0.6509, + "step": 15450 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024536006643729975, + "loss": 0.6783, + "step": 15460 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024532447502669354, + "loss": 0.6512, + "step": 15470 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002452888836160873, + "loss": 0.682, + "step": 15480 + }, + { + "epoch": 0.55, + "learning_rate": 0.000245253292205481, + "loss": 0.6338, + "step": 15490 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002452177007948748, + "loss": 0.6194, + "step": 15500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024518210938426855, + "loss": 0.5854, + "step": 15510 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024514651797366235, + "loss": 0.575, + "step": 15520 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002451109265630561, + "loss": 0.768, + "step": 15530 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002450753351524499, + "loss": 0.7963, + "step": 15540 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002450397437418436, + "loss": 0.6532, + "step": 15550 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024500415233123736, + "loss": 0.5938, + "step": 15560 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024496856092063115, + "loss": 0.7571, + "step": 15570 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002449329695100249, + "loss": 0.7252, + "step": 15580 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024489737809941863, + "loss": 0.6731, + "step": 15590 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002448617866888124, + "loss": 0.7083, + "step": 15600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024482619527820617, + "loss": 0.652, + "step": 15610 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002447906038675999, + "loss": 0.682, + "step": 15620 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002447550124569937, + "loss": 0.6907, + "step": 15630 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024471942104638744, + "loss": 0.6447, + "step": 15640 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002446838296357812, + "loss": 0.7077, + "step": 15650 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024464823822517497, + "loss": 0.7185, + "step": 15660 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002446126468145687, + "loss": 0.6684, + "step": 15670 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002445770554039625, + "loss": 0.6726, + "step": 15680 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024454146399335624, + "loss": 0.693, + "step": 15690 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024450587258275004, + "loss": 0.6531, + "step": 15700 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002444702811721438, + "loss": 0.6829, + "step": 15710 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002444346897615375, + "loss": 0.6906, + "step": 15720 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002443990983509313, + "loss": 0.6097, + "step": 15730 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024436350694032505, + "loss": 0.6018, + "step": 15740 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002443279155297188, + "loss": 0.6812, + "step": 15750 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002442923241191126, + "loss": 0.6864, + "step": 15760 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002442567327085063, + "loss": 0.6853, + "step": 15770 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024422114129790006, + "loss": 0.6916, + "step": 15780 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024418554988729386, + "loss": 0.6241, + "step": 15790 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002441499584766876, + "loss": 0.8003, + "step": 15800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024411436706608136, + "loss": 0.6329, + "step": 15810 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002440787756554751, + "loss": 0.7585, + "step": 15820 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002440431842448689, + "loss": 0.6591, + "step": 15830 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024400759283426263, + "loss": 0.6838, + "step": 15840 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002439720014236564, + "loss": 0.6254, + "step": 15850 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024393641001305017, + "loss": 0.7214, + "step": 15860 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024390081860244393, + "loss": 0.7125, + "step": 15870 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024386522719183767, + "loss": 0.7192, + "step": 15880 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024383319492229206, + "loss": 0.6695, + "step": 15890 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002437976035116858, + "loss": 0.6677, + "step": 15900 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002437620121010796, + "loss": 0.6524, + "step": 15910 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024372642069047333, + "loss": 0.6934, + "step": 15920 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002436908292798671, + "loss": 0.6521, + "step": 15930 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024365523786926087, + "loss": 0.6595, + "step": 15940 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024361964645865463, + "loss": 0.6675, + "step": 15950 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024358405504804837, + "loss": 0.7328, + "step": 15960 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024354846363744214, + "loss": 0.6115, + "step": 15970 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002435128722268359, + "loss": 0.6868, + "step": 15980 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024347728081622967, + "loss": 0.6666, + "step": 15990 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002434416894056234, + "loss": 0.6702, + "step": 16000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024340609799501718, + "loss": 0.6715, + "step": 16010 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024337050658441094, + "loss": 0.673, + "step": 16020 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024333491517380468, + "loss": 0.6646, + "step": 16030 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024329932376319848, + "loss": 0.6465, + "step": 16040 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024326373235259222, + "loss": 0.7336, + "step": 16050 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024322814094198598, + "loss": 0.5739, + "step": 16060 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024319254953137975, + "loss": 0.6218, + "step": 16070 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024315695812077352, + "loss": 0.676, + "step": 16080 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024312136671016726, + "loss": 0.609, + "step": 16090 + }, + { + "epoch": 0.57, + "learning_rate": 0.000243085775299561, + "loss": 0.6883, + "step": 16100 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002430501838889548, + "loss": 0.636, + "step": 16110 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024301459247834853, + "loss": 0.6424, + "step": 16120 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002429790010677423, + "loss": 0.7523, + "step": 16130 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024294340965713606, + "loss": 0.7219, + "step": 16140 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024290781824652983, + "loss": 0.6846, + "step": 16150 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024287222683592357, + "loss": 0.6735, + "step": 16160 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024283663542531736, + "loss": 0.6184, + "step": 16170 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002428010440147111, + "loss": 0.6521, + "step": 16180 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024276545260410484, + "loss": 0.5792, + "step": 16190 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002427298611934986, + "loss": 0.6913, + "step": 16200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024269426978289237, + "loss": 0.6387, + "step": 16210 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024265867837228614, + "loss": 0.6401, + "step": 16220 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024262308696167988, + "loss": 0.7866, + "step": 16230 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024258749555107367, + "loss": 0.6497, + "step": 16240 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002425519041404674, + "loss": 0.6951, + "step": 16250 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024251631272986115, + "loss": 0.6984, + "step": 16260 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024248072131925495, + "loss": 0.6134, + "step": 16270 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024244512990864869, + "loss": 0.5983, + "step": 16280 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024240953849804245, + "loss": 0.6449, + "step": 16290 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024237394708743622, + "loss": 0.6779, + "step": 16300 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024233835567682998, + "loss": 0.6475, + "step": 16310 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024230276426622372, + "loss": 0.7089, + "step": 16320 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024226717285561746, + "loss": 0.74, + "step": 16330 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024223158144501126, + "loss": 0.7564, + "step": 16340 + }, + { + "epoch": 0.58, + "learning_rate": 0.000242195990034405, + "loss": 0.6169, + "step": 16350 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024216039862379876, + "loss": 0.6995, + "step": 16360 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024212480721319253, + "loss": 0.6493, + "step": 16370 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002420892158025863, + "loss": 0.6621, + "step": 16380 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024205362439198004, + "loss": 0.6218, + "step": 16390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024201803298137383, + "loss": 0.6516, + "step": 16400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024198244157076757, + "loss": 0.6984, + "step": 16410 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002419468501601613, + "loss": 0.6704, + "step": 16420 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024191125874955508, + "loss": 0.6176, + "step": 16430 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024187566733894884, + "loss": 0.5708, + "step": 16440 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002418400759283426, + "loss": 0.6644, + "step": 16450 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024180448451773635, + "loss": 0.5657, + "step": 16460 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024176889310713014, + "loss": 0.6141, + "step": 16470 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024173330169652388, + "loss": 0.6269, + "step": 16480 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024169771028591765, + "loss": 0.6035, + "step": 16490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024166211887531141, + "loss": 0.6679, + "step": 16500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024162652746470515, + "loss": 0.6359, + "step": 16510 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024159093605409892, + "loss": 0.7402, + "step": 16520 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002415553446434927, + "loss": 0.6607, + "step": 16530 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024151975323288645, + "loss": 0.6889, + "step": 16540 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002414841618222802, + "loss": 0.6536, + "step": 16550 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024144857041167396, + "loss": 0.6205, + "step": 16560 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024141297900106773, + "loss": 0.7202, + "step": 16570 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002413773875904615, + "loss": 0.6024, + "step": 16580 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024134179617985523, + "loss": 0.6302, + "step": 16590 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024130620476924903, + "loss": 0.708, + "step": 16600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024127061335864276, + "loss": 0.6989, + "step": 16610 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002412350219480365, + "loss": 0.674, + "step": 16620 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002411994305374303, + "loss": 0.6691, + "step": 16630 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024116383912682404, + "loss": 0.7193, + "step": 16640 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002411282477162178, + "loss": 0.649, + "step": 16650 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024109265630561154, + "loss": 0.729, + "step": 16660 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024105706489500534, + "loss": 0.8033, + "step": 16670 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024102147348439908, + "loss": 0.6541, + "step": 16680 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024098588207379282, + "loss": 0.6373, + "step": 16690 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002409502906631866, + "loss": 0.6739, + "step": 16700 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024091469925258035, + "loss": 0.6097, + "step": 16710 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024087910784197412, + "loss": 0.6785, + "step": 16720 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024084351643136788, + "loss": 0.6112, + "step": 16730 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024080792502076165, + "loss": 0.6891, + "step": 16740 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002407723336101554, + "loss": 0.6048, + "step": 16750 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024073674219954918, + "loss": 0.7102, + "step": 16760 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024070115078894292, + "loss": 0.6232, + "step": 16770 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024066555937833666, + "loss": 0.5981, + "step": 16780 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024062996796773043, + "loss": 0.6137, + "step": 16790 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002405943765571242, + "loss": 0.6426, + "step": 16800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024055878514651796, + "loss": 0.631, + "step": 16810 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002405231937359117, + "loss": 0.6843, + "step": 16820 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002404876023253055, + "loss": 0.79, + "step": 16830 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024045201091469923, + "loss": 0.655, + "step": 16840 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024041641950409297, + "loss": 0.6323, + "step": 16850 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024038082809348677, + "loss": 0.6674, + "step": 16860 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002403452366828805, + "loss": 0.6571, + "step": 16870 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024030964527227427, + "loss": 0.6069, + "step": 16880 + }, + { + "epoch": 0.6, + "learning_rate": 0.000240274053861668, + "loss": 0.6858, + "step": 16890 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002402384624510618, + "loss": 0.6182, + "step": 16900 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024020287104045554, + "loss": 0.5668, + "step": 16910 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024016727962984928, + "loss": 0.717, + "step": 16920 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024013168821924308, + "loss": 0.7248, + "step": 16930 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024009609680863682, + "loss": 0.6498, + "step": 16940 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024006050539803058, + "loss": 0.6457, + "step": 16950 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024002491398742435, + "loss": 0.7854, + "step": 16960 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023998932257681812, + "loss": 0.6564, + "step": 16970 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023995373116621186, + "loss": 0.9188, + "step": 16980 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002399181397556056, + "loss": 0.6874, + "step": 16990 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002398825483449994, + "loss": 0.5946, + "step": 17000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023984695693439313, + "loss": 0.6985, + "step": 17010 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002398113655237869, + "loss": 0.6121, + "step": 17020 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023977577411318066, + "loss": 0.6853, + "step": 17030 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023974018270257443, + "loss": 0.6888, + "step": 17040 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023970459129196817, + "loss": 0.726, + "step": 17050 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023966899988136196, + "loss": 0.7037, + "step": 17060 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002396334084707557, + "loss": 0.6722, + "step": 17070 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023959781706014947, + "loss": 0.7054, + "step": 17080 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023956222564954323, + "loss": 0.6357, + "step": 17090 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023952663423893697, + "loss": 0.6819, + "step": 17100 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023949104282833074, + "loss": 0.7216, + "step": 17110 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023945545141772448, + "loss": 0.824, + "step": 17120 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023941986000711827, + "loss": 0.7002, + "step": 17130 + }, + { + "epoch": 0.61, + "learning_rate": 0.000239384268596512, + "loss": 0.5983, + "step": 17140 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023934867718590578, + "loss": 0.7212, + "step": 17150 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023931308577529955, + "loss": 0.6232, + "step": 17160 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002392774943646933, + "loss": 0.7449, + "step": 17170 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023924190295408705, + "loss": 0.6891, + "step": 17180 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023920631154348085, + "loss": 0.6748, + "step": 17190 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023917072013287459, + "loss": 0.6518, + "step": 17200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023913512872226832, + "loss": 0.7308, + "step": 17210 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002390995373116621, + "loss": 0.6391, + "step": 17220 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023906394590105586, + "loss": 0.6932, + "step": 17230 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023902835449044962, + "loss": 0.7924, + "step": 17240 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023899276307984336, + "loss": 0.7031, + "step": 17250 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023895717166923716, + "loss": 0.6973, + "step": 17260 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002389215802586309, + "loss": 0.6141, + "step": 17270 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023888598884802464, + "loss": 0.6046, + "step": 17280 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023885039743741843, + "loss": 0.6557, + "step": 17290 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023881480602681217, + "loss": 0.6583, + "step": 17300 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023877921461620594, + "loss": 0.7034, + "step": 17310 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002387436232055997, + "loss": 0.6628, + "step": 17320 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023870803179499347, + "loss": 0.6703, + "step": 17330 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002386724403843872, + "loss": 0.7009, + "step": 17340 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023863684897378095, + "loss": 0.668, + "step": 17350 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023860125756317474, + "loss": 0.6194, + "step": 17360 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023856566615256848, + "loss": 0.7085, + "step": 17370 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023853007474196225, + "loss": 0.6256, + "step": 17380 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023849448333135601, + "loss": 0.608, + "step": 17390 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023845889192074978, + "loss": 0.7403, + "step": 17400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023842330051014352, + "loss": 0.6186, + "step": 17410 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023838770909953731, + "loss": 0.5992, + "step": 17420 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023835211768893105, + "loss": 0.6174, + "step": 17430 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002383165262783248, + "loss": 0.7687, + "step": 17440 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023828093486771856, + "loss": 0.6416, + "step": 17450 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023824534345711233, + "loss": 0.7833, + "step": 17460 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002382097520465061, + "loss": 0.676, + "step": 17470 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023817416063589983, + "loss": 0.6723, + "step": 17480 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023813856922529363, + "loss": 0.7845, + "step": 17490 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023810297781468737, + "loss": 0.779, + "step": 17500 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002380673864040811, + "loss": 0.6912, + "step": 17510 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002380317949934749, + "loss": 0.6264, + "step": 17520 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023799620358286864, + "loss": 0.6203, + "step": 17530 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002379606121722624, + "loss": 0.6193, + "step": 17540 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023792502076165617, + "loss": 0.6647, + "step": 17550 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023788942935104994, + "loss": 0.6078, + "step": 17560 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023785383794044368, + "loss": 0.6986, + "step": 17570 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023781824652983744, + "loss": 0.7036, + "step": 17580 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002377826551192312, + "loss": 0.6974, + "step": 17590 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023774706370862495, + "loss": 0.5049, + "step": 17600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023771147229801872, + "loss": 0.7322, + "step": 17610 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023767588088741248, + "loss": 0.6772, + "step": 17620 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023764028947680625, + "loss": 0.6642, + "step": 17630 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002376046980662, + "loss": 0.6999, + "step": 17640 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023756910665559378, + "loss": 0.6289, + "step": 17650 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023753351524498752, + "loss": 0.678, + "step": 17660 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002374979238343813, + "loss": 0.5548, + "step": 17670 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023746233242377503, + "loss": 0.7317, + "step": 17680 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023742674101316882, + "loss": 0.6638, + "step": 17690 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023739114960256256, + "loss": 0.6814, + "step": 17700 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002373555581919563, + "loss": 0.6299, + "step": 17710 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002373199667813501, + "loss": 0.785, + "step": 17720 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023728437537074383, + "loss": 0.6203, + "step": 17730 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002372487839601376, + "loss": 0.6013, + "step": 17740 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023721319254953137, + "loss": 0.7272, + "step": 17750 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023717760113892513, + "loss": 0.6356, + "step": 17760 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023714200972831887, + "loss": 0.6399, + "step": 17770 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023710641831771267, + "loss": 0.7376, + "step": 17780 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002370708269071064, + "loss": 0.7339, + "step": 17790 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023703523549650015, + "loss": 0.6343, + "step": 17800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002369996440858939, + "loss": 0.7444, + "step": 17810 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023696405267528768, + "loss": 0.7102, + "step": 17820 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023692846126468144, + "loss": 0.6182, + "step": 17830 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023689286985407518, + "loss": 0.6601, + "step": 17840 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023685727844346898, + "loss": 0.6617, + "step": 17850 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023682168703286272, + "loss": 0.7277, + "step": 17860 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023678609562225646, + "loss": 0.6839, + "step": 17870 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023675050421165025, + "loss": 0.655, + "step": 17880 + }, + { + "epoch": 0.64, + "learning_rate": 0.000236714912801044, + "loss": 0.6719, + "step": 17890 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023667932139043776, + "loss": 0.6424, + "step": 17900 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002366437299798315, + "loss": 0.8434, + "step": 17910 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002366081385692253, + "loss": 0.5587, + "step": 17920 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023657254715861903, + "loss": 0.685, + "step": 17930 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023653695574801277, + "loss": 0.7029, + "step": 17940 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023650136433740656, + "loss": 0.6163, + "step": 17950 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002364657729268003, + "loss": 0.6539, + "step": 17960 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023643018151619407, + "loss": 0.5889, + "step": 17970 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023639459010558783, + "loss": 0.804, + "step": 17980 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002363589986949816, + "loss": 0.7608, + "step": 17990 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023632340728437534, + "loss": 0.6514, + "step": 18000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023628781587376913, + "loss": 0.6232, + "step": 18010 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023625222446316287, + "loss": 0.5585, + "step": 18020 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002362166330525566, + "loss": 0.649, + "step": 18030 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023618104164195038, + "loss": 0.6381, + "step": 18040 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023614545023134415, + "loss": 0.698, + "step": 18050 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002361098588207379, + "loss": 0.7034, + "step": 18060 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023607426741013165, + "loss": 0.6213, + "step": 18070 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023603867599952545, + "loss": 0.6623, + "step": 18080 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023600308458891919, + "loss": 0.6636, + "step": 18090 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023596749317831293, + "loss": 0.6383, + "step": 18100 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023593190176770672, + "loss": 0.6441, + "step": 18110 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023589631035710046, + "loss": 0.6686, + "step": 18120 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023586071894649422, + "loss": 0.7046, + "step": 18130 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023582512753588796, + "loss": 0.7612, + "step": 18140 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023578953612528176, + "loss": 0.5519, + "step": 18150 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002357539447146755, + "loss": 0.6051, + "step": 18160 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023571835330406926, + "loss": 0.6077, + "step": 18170 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023568276189346303, + "loss": 0.6924, + "step": 18180 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002356471704828568, + "loss": 0.755, + "step": 18190 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023561157907225054, + "loss": 0.6421, + "step": 18200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002355759876616443, + "loss": 0.551, + "step": 18210 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023554039625103807, + "loss": 0.6816, + "step": 18220 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002355048048404318, + "loss": 0.6998, + "step": 18230 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023546921342982558, + "loss": 0.6218, + "step": 18240 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023543362201921934, + "loss": 0.6317, + "step": 18250 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002353980306086131, + "loss": 0.6521, + "step": 18260 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023536243919800685, + "loss": 0.7457, + "step": 18270 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023532684778740064, + "loss": 0.5839, + "step": 18280 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023529125637679438, + "loss": 0.6309, + "step": 18290 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023525566496618812, + "loss": 0.6057, + "step": 18300 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023522007355558191, + "loss": 0.5953, + "step": 18310 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023518448214497565, + "loss": 0.6366, + "step": 18320 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023514889073436942, + "loss": 0.6361, + "step": 18330 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002351132993237632, + "loss": 0.6183, + "step": 18340 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023507770791315695, + "loss": 0.6767, + "step": 18350 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002350421165025507, + "loss": 0.5789, + "step": 18360 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023500652509194443, + "loss": 0.6328, + "step": 18370 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023497093368133823, + "loss": 0.7435, + "step": 18380 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023493534227073197, + "loss": 0.709, + "step": 18390 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023489975086012573, + "loss": 0.7099, + "step": 18400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002348641594495195, + "loss": 0.6409, + "step": 18410 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023482856803891327, + "loss": 0.6494, + "step": 18420 + }, + { + "epoch": 0.66, + "learning_rate": 0.000234792976628307, + "loss": 0.5604, + "step": 18430 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002347573852177008, + "loss": 0.6061, + "step": 18440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023472179380709454, + "loss": 0.6116, + "step": 18450 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023468620239648828, + "loss": 0.6071, + "step": 18460 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023465061098588204, + "loss": 0.6972, + "step": 18470 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002346150195752758, + "loss": 0.6298, + "step": 18480 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023457942816466958, + "loss": 0.7004, + "step": 18490 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023454383675406332, + "loss": 0.6554, + "step": 18500 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002345082453434571, + "loss": 0.6561, + "step": 18510 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023447265393285085, + "loss": 0.712, + "step": 18520 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002344370625222446, + "loss": 0.7452, + "step": 18530 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023440147111163838, + "loss": 0.6506, + "step": 18540 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023436587970103212, + "loss": 0.6961, + "step": 18550 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002343302882904259, + "loss": 0.6945, + "step": 18560 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023429469687981966, + "loss": 0.6521, + "step": 18570 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023425910546921342, + "loss": 0.6465, + "step": 18580 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023422351405860716, + "loss": 0.5555, + "step": 18590 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002341879226480009, + "loss": 0.6359, + "step": 18600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002341523312373947, + "loss": 0.5451, + "step": 18610 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023411673982678843, + "loss": 0.6433, + "step": 18620 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002340811484161822, + "loss": 0.6054, + "step": 18630 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023404555700557597, + "loss": 0.6511, + "step": 18640 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023400996559496973, + "loss": 0.7823, + "step": 18650 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023397437418436347, + "loss": 0.6425, + "step": 18660 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023393878277375727, + "loss": 0.5399, + "step": 18670 + }, + { + "epoch": 0.66, + "learning_rate": 0.000233903191363151, + "loss": 0.5563, + "step": 18680 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023386759995254477, + "loss": 0.6647, + "step": 18690 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002338320085419385, + "loss": 0.6004, + "step": 18700 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023379641713133228, + "loss": 0.6395, + "step": 18710 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023376082572072605, + "loss": 0.6454, + "step": 18720 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023372523431011978, + "loss": 0.5879, + "step": 18730 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023368964289951358, + "loss": 0.6499, + "step": 18740 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023365405148890732, + "loss": 0.6918, + "step": 18750 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023361846007830108, + "loss": 0.639, + "step": 18760 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023358286866769485, + "loss": 0.7304, + "step": 18770 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023354727725708862, + "loss": 0.6493, + "step": 18780 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023351168584648236, + "loss": 0.5601, + "step": 18790 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023347609443587615, + "loss": 0.5798, + "step": 18800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002334405030252699, + "loss": 0.6412, + "step": 18810 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023340491161466363, + "loss": 0.6057, + "step": 18820 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002333693202040574, + "loss": 0.5315, + "step": 18830 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023333372879345116, + "loss": 0.6499, + "step": 18840 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023329813738284493, + "loss": 0.6574, + "step": 18850 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023326254597223867, + "loss": 0.7359, + "step": 18860 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023322695456163246, + "loss": 0.6882, + "step": 18870 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002331913631510262, + "loss": 0.6748, + "step": 18880 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023315577174041994, + "loss": 0.6029, + "step": 18890 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023312018032981373, + "loss": 0.6673, + "step": 18900 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023308458891920747, + "loss": 0.6692, + "step": 18910 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023304899750860124, + "loss": 0.7255, + "step": 18920 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023301340609799498, + "loss": 0.6505, + "step": 18930 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023297781468738877, + "loss": 0.5785, + "step": 18940 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002329422232767825, + "loss": 0.6408, + "step": 18950 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023290663186617625, + "loss": 0.6524, + "step": 18960 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023287104045557005, + "loss": 0.6736, + "step": 18970 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023283544904496379, + "loss": 0.6631, + "step": 18980 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023279985763435755, + "loss": 0.6336, + "step": 18990 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023276426622375132, + "loss": 0.6114, + "step": 19000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023272867481314509, + "loss": 0.715, + "step": 19010 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023269308340253883, + "loss": 0.6618, + "step": 19020 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023265749199193262, + "loss": 0.5972, + "step": 19030 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023262190058132636, + "loss": 0.6777, + "step": 19040 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002325863091707201, + "loss": 0.6464, + "step": 19050 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023255071776011386, + "loss": 0.685, + "step": 19060 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023251512634950763, + "loss": 0.6758, + "step": 19070 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002324795349389014, + "loss": 0.7147, + "step": 19080 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023244394352829514, + "loss": 0.6866, + "step": 19090 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023240835211768893, + "loss": 0.679, + "step": 19100 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023237276070708267, + "loss": 0.5401, + "step": 19110 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002323371692964764, + "loss": 0.7182, + "step": 19120 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002323015778858702, + "loss": 0.6568, + "step": 19130 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023226598647526394, + "loss": 0.6682, + "step": 19140 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002322303950646577, + "loss": 0.5967, + "step": 19150 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023219480365405145, + "loss": 0.6372, + "step": 19160 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023215921224344524, + "loss": 0.6562, + "step": 19170 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023212362083283898, + "loss": 0.7334, + "step": 19180 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023208802942223275, + "loss": 0.6626, + "step": 19190 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023205243801162651, + "loss": 0.6613, + "step": 19200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023201684660102025, + "loss": 0.709, + "step": 19210 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023198125519041402, + "loss": 0.6017, + "step": 19220 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002319456637798078, + "loss": 0.6983, + "step": 19230 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023191007236920155, + "loss": 0.6919, + "step": 19240 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002318744809585953, + "loss": 0.6744, + "step": 19250 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023183888954798906, + "loss": 0.6933, + "step": 19260 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023180329813738283, + "loss": 0.6186, + "step": 19270 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002317677067267766, + "loss": 0.6386, + "step": 19280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023173211531617033, + "loss": 0.6283, + "step": 19290 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023169652390556413, + "loss": 0.5855, + "step": 19300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023166093249495787, + "loss": 0.6169, + "step": 19310 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002316253410843516, + "loss": 0.6587, + "step": 19320 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002315897496737454, + "loss": 0.6463, + "step": 19330 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023155415826313914, + "loss": 0.6328, + "step": 19340 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002315185668525329, + "loss": 0.6238, + "step": 19350 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023148297544192667, + "loss": 0.6241, + "step": 19360 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023144738403132044, + "loss": 0.6635, + "step": 19370 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023141179262071418, + "loss": 0.5463, + "step": 19380 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023137620121010792, + "loss": 0.8037, + "step": 19390 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002313406097995017, + "loss": 0.6431, + "step": 19400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023130501838889545, + "loss": 0.6366, + "step": 19410 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023126942697828922, + "loss": 0.602, + "step": 19420 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023123383556768298, + "loss": 0.6757, + "step": 19430 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023119824415707675, + "loss": 0.7078, + "step": 19440 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002311626527464705, + "loss": 0.7049, + "step": 19450 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023112706133586428, + "loss": 0.6411, + "step": 19460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023109146992525802, + "loss": 0.6704, + "step": 19470 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023105587851465176, + "loss": 0.6019, + "step": 19480 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023102028710404553, + "loss": 0.644, + "step": 19490 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002309846956934393, + "loss": 0.6781, + "step": 19500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023094910428283306, + "loss": 0.6416, + "step": 19510 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002309135128722268, + "loss": 0.7016, + "step": 19520 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002308779214616206, + "loss": 0.6446, + "step": 19530 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023084233005101433, + "loss": 0.5951, + "step": 19540 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023080673864040807, + "loss": 0.6995, + "step": 19550 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023077114722980187, + "loss": 0.7051, + "step": 19560 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002307355558191956, + "loss": 0.5402, + "step": 19570 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023069996440858937, + "loss": 0.691, + "step": 19580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023066793213904376, + "loss": 0.6954, + "step": 19590 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002306323407284375, + "loss": 0.6712, + "step": 19600 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002305967493178313, + "loss": 0.7072, + "step": 19610 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023056115790722503, + "loss": 0.634, + "step": 19620 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023052556649661877, + "loss": 0.6561, + "step": 19630 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023048997508601257, + "loss": 0.6982, + "step": 19640 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002304543836754063, + "loss": 0.6414, + "step": 19650 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023041879226480007, + "loss": 0.6651, + "step": 19660 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023038320085419384, + "loss": 0.6791, + "step": 19670 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002303476094435876, + "loss": 0.7104, + "step": 19680 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023031201803298134, + "loss": 0.6449, + "step": 19690 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002302764266223751, + "loss": 0.6245, + "step": 19700 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023024083521176888, + "loss": 0.6934, + "step": 19710 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023020524380116264, + "loss": 0.6353, + "step": 19720 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023016965239055638, + "loss": 0.6019, + "step": 19730 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023013406097995015, + "loss": 0.7251, + "step": 19740 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023009846956934392, + "loss": 0.5592, + "step": 19750 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023006287815873766, + "loss": 0.6104, + "step": 19760 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023002728674813145, + "loss": 0.6394, + "step": 19770 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002299916953375252, + "loss": 0.6707, + "step": 19780 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022995610392691896, + "loss": 0.6402, + "step": 19790 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002299205125163127, + "loss": 0.782, + "step": 19800 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002298849211057065, + "loss": 0.6372, + "step": 19810 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022984932969510023, + "loss": 0.7639, + "step": 19820 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022981373828449397, + "loss": 0.6159, + "step": 19830 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022977814687388776, + "loss": 0.6632, + "step": 19840 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002297425554632815, + "loss": 0.7924, + "step": 19850 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022970696405267527, + "loss": 0.6808, + "step": 19860 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022967137264206903, + "loss": 0.6661, + "step": 19870 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002296357812314628, + "loss": 0.7144, + "step": 19880 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022960018982085654, + "loss": 0.6898, + "step": 19890 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022956459841025033, + "loss": 0.681, + "step": 19900 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022952900699964407, + "loss": 0.7596, + "step": 19910 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002294934155890378, + "loss": 0.616, + "step": 19920 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022945782417843158, + "loss": 0.6178, + "step": 19930 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022942223276782535, + "loss": 0.6443, + "step": 19940 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002293866413572191, + "loss": 0.5472, + "step": 19950 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022935104994661285, + "loss": 0.6182, + "step": 19960 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022931901767706724, + "loss": 0.7005, + "step": 19970 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022928342626646103, + "loss": 0.642, + "step": 19980 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022924783485585477, + "loss": 0.6855, + "step": 19990 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002292122434452485, + "loss": 0.6917, + "step": 20000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022917665203464228, + "loss": 0.7462, + "step": 20010 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022914106062403604, + "loss": 0.5595, + "step": 20020 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002291054692134298, + "loss": 0.6667, + "step": 20030 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022906987780282355, + "loss": 0.6866, + "step": 20040 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022903428639221734, + "loss": 0.6738, + "step": 20050 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022899869498161108, + "loss": 0.7155, + "step": 20060 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022896310357100482, + "loss": 0.675, + "step": 20070 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022892751216039862, + "loss": 0.57, + "step": 20080 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022889192074979236, + "loss": 0.6655, + "step": 20090 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022885632933918612, + "loss": 0.6502, + "step": 20100 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022882073792857986, + "loss": 0.722, + "step": 20110 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022878514651797366, + "loss": 0.6463, + "step": 20120 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002287495551073674, + "loss": 0.7275, + "step": 20130 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022871396369676116, + "loss": 0.6045, + "step": 20140 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022867837228615493, + "loss": 0.6648, + "step": 20150 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022864278087554867, + "loss": 0.6522, + "step": 20160 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022860718946494243, + "loss": 0.6463, + "step": 20170 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002285715980543362, + "loss": 0.6922, + "step": 20180 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022853600664372997, + "loss": 0.8122, + "step": 20190 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002285004152331237, + "loss": 0.626, + "step": 20200 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002284648238225175, + "loss": 0.6516, + "step": 20210 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022842923241191124, + "loss": 0.6461, + "step": 20220 + }, + { + "epoch": 0.72, + "learning_rate": 0.000228393641001305, + "loss": 0.6284, + "step": 20230 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022835804959069875, + "loss": 0.6514, + "step": 20240 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022832245818009254, + "loss": 0.6398, + "step": 20250 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022828686676948628, + "loss": 0.8107, + "step": 20260 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022825127535888002, + "loss": 0.656, + "step": 20270 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002282156839482738, + "loss": 0.6242, + "step": 20280 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022818009253766755, + "loss": 0.5611, + "step": 20290 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022814450112706132, + "loss": 0.6623, + "step": 20300 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022810890971645509, + "loss": 0.7116, + "step": 20310 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022807331830584885, + "loss": 0.7271, + "step": 20320 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002280377268952426, + "loss": 0.6944, + "step": 20330 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022800213548463633, + "loss": 0.6581, + "step": 20340 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022796654407403012, + "loss": 0.5974, + "step": 20350 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022793095266342386, + "loss": 0.7046, + "step": 20360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022789536125281763, + "loss": 0.6562, + "step": 20370 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002278597698422114, + "loss": 0.6347, + "step": 20380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022782417843160516, + "loss": 0.7294, + "step": 20390 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002277885870209989, + "loss": 0.659, + "step": 20400 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002277529956103927, + "loss": 0.6288, + "step": 20410 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022771740419978644, + "loss": 0.6708, + "step": 20420 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022768181278918018, + "loss": 0.6003, + "step": 20430 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022764622137857397, + "loss": 0.5782, + "step": 20440 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002276106299679677, + "loss": 0.6199, + "step": 20450 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022757503855736148, + "loss": 0.6037, + "step": 20460 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022753944714675521, + "loss": 0.6377, + "step": 20470 + }, + { + "epoch": 0.73, + "learning_rate": 0.000227503855736149, + "loss": 0.6462, + "step": 20480 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022746826432554275, + "loss": 0.6154, + "step": 20490 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002274326729149365, + "loss": 0.6506, + "step": 20500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022739708150433028, + "loss": 0.6676, + "step": 20510 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022736149009372402, + "loss": 0.7379, + "step": 20520 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002273258986831178, + "loss": 0.7759, + "step": 20530 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022729030727251155, + "loss": 0.7138, + "step": 20540 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022725471586190532, + "loss": 0.6242, + "step": 20550 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022721912445129906, + "loss": 0.6462, + "step": 20560 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002271835330406928, + "loss": 0.6444, + "step": 20570 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002271479416300866, + "loss": 0.6105, + "step": 20580 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022711235021948033, + "loss": 0.6967, + "step": 20590 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002270767588088741, + "loss": 0.7439, + "step": 20600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022704116739826787, + "loss": 0.6773, + "step": 20610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022700557598766163, + "loss": 0.6473, + "step": 20620 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022696998457705537, + "loss": 0.6519, + "step": 20630 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022693439316644916, + "loss": 0.6662, + "step": 20640 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002268988017558429, + "loss": 0.6533, + "step": 20650 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022686321034523664, + "loss": 0.6194, + "step": 20660 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002268276189346304, + "loss": 0.6648, + "step": 20670 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022679202752402418, + "loss": 0.6999, + "step": 20680 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022675643611341794, + "loss": 0.7355, + "step": 20690 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022672084470281168, + "loss": 0.8183, + "step": 20700 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022668525329220548, + "loss": 0.6669, + "step": 20710 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022664966188159922, + "loss": 0.6699, + "step": 20720 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022661407047099298, + "loss": 0.6213, + "step": 20730 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022657847906038675, + "loss": 0.712, + "step": 20740 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022654288764978052, + "loss": 0.6427, + "step": 20750 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022650729623917426, + "loss": 0.7187, + "step": 20760 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022647170482856802, + "loss": 0.7066, + "step": 20770 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002264361134179618, + "loss": 0.7484, + "step": 20780 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022640052200735553, + "loss": 0.6576, + "step": 20790 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002263649305967493, + "loss": 0.6458, + "step": 20800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022632933918614306, + "loss": 0.6914, + "step": 20810 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022629374777553683, + "loss": 0.6687, + "step": 20820 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022625815636493057, + "loss": 0.7222, + "step": 20830 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022622256495432436, + "loss": 0.7683, + "step": 20840 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002261869735437181, + "loss": 0.64, + "step": 20850 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022615138213311184, + "loss": 0.6753, + "step": 20860 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022611579072250563, + "loss": 0.6746, + "step": 20870 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022608019931189937, + "loss": 0.677, + "step": 20880 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022604460790129314, + "loss": 0.6285, + "step": 20890 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022600901649068688, + "loss": 0.6377, + "step": 20900 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022597342508008067, + "loss": 0.7048, + "step": 20910 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002259378336694744, + "loss": 0.7889, + "step": 20920 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022590224225886815, + "loss": 0.6158, + "step": 20930 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022586665084826194, + "loss": 0.6217, + "step": 20940 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022583105943765568, + "loss": 0.6617, + "step": 20950 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022579546802704945, + "loss": 0.618, + "step": 20960 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022575987661644322, + "loss": 0.7708, + "step": 20970 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022572428520583698, + "loss": 0.7523, + "step": 20980 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022568869379523072, + "loss": 0.5512, + "step": 20990 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022565310238462452, + "loss": 0.7961, + "step": 21000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022561751097401826, + "loss": 0.6394, + "step": 21010 + }, + { + "epoch": 0.75, + "learning_rate": 0.000225581919563412, + "loss": 0.605, + "step": 21020 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022554632815280576, + "loss": 0.6933, + "step": 21030 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022551073674219953, + "loss": 0.691, + "step": 21040 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002254751453315933, + "loss": 0.6184, + "step": 21050 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022543955392098704, + "loss": 0.6156, + "step": 21060 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022540396251038083, + "loss": 0.6559, + "step": 21070 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022536837109977457, + "loss": 0.6691, + "step": 21080 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002253327796891683, + "loss": 0.6425, + "step": 21090 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002252971882785621, + "loss": 0.7222, + "step": 21100 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022526159686795584, + "loss": 0.6686, + "step": 21110 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002252260054573496, + "loss": 0.6461, + "step": 21120 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022519041404674335, + "loss": 0.713, + "step": 21130 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022515482263613714, + "loss": 0.7068, + "step": 21140 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022511923122553088, + "loss": 0.738, + "step": 21150 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022508363981492462, + "loss": 0.6787, + "step": 21160 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002250480484043184, + "loss": 0.5874, + "step": 21170 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022501245699371215, + "loss": 0.5772, + "step": 21180 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022497686558310592, + "loss": 0.5926, + "step": 21190 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022494127417249969, + "loss": 0.7182, + "step": 21200 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022490568276189345, + "loss": 0.6939, + "step": 21210 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002248700913512872, + "loss": 0.6401, + "step": 21220 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022483449994068099, + "loss": 0.6344, + "step": 21230 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022479890853007472, + "loss": 0.6844, + "step": 21240 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002247633171194685, + "loss": 0.6147, + "step": 21250 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022472772570886223, + "loss": 0.6807, + "step": 21260 + }, + { + "epoch": 0.76, + "learning_rate": 0.000224692134298256, + "loss": 0.585, + "step": 21270 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022465654288764976, + "loss": 0.6365, + "step": 21280 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002246209514770435, + "loss": 0.7233, + "step": 21290 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002245853600664373, + "loss": 0.6798, + "step": 21300 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022454976865583104, + "loss": 0.6734, + "step": 21310 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002245141772452248, + "loss": 0.6018, + "step": 21320 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022447858583461857, + "loss": 0.6911, + "step": 21330 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022444299442401234, + "loss": 0.7042, + "step": 21340 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022440740301340608, + "loss": 0.7912, + "step": 21350 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022437181160279982, + "loss": 0.6508, + "step": 21360 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002243362201921936, + "loss": 0.6119, + "step": 21370 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022430062878158735, + "loss": 0.6678, + "step": 21380 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022426503737098111, + "loss": 0.6884, + "step": 21390 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022422944596037488, + "loss": 0.6549, + "step": 21400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022419385454976865, + "loss": 0.6042, + "step": 21410 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002241582631391624, + "loss": 0.7057, + "step": 21420 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022412267172855618, + "loss": 0.7154, + "step": 21430 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022408708031794992, + "loss": 0.6302, + "step": 21440 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022405148890734366, + "loss": 0.6844, + "step": 21450 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022401589749673745, + "loss": 0.6593, + "step": 21460 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002239803060861312, + "loss": 0.575, + "step": 21470 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022394471467552496, + "loss": 0.7661, + "step": 21480 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002239091232649187, + "loss": 0.6389, + "step": 21490 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002238735318543125, + "loss": 0.7234, + "step": 21500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022383794044370623, + "loss": 0.7059, + "step": 21510 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022380234903309997, + "loss": 0.6627, + "step": 21520 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022376675762249377, + "loss": 0.7917, + "step": 21530 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002237311662118875, + "loss": 0.7276, + "step": 21540 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022369557480128127, + "loss": 0.7118, + "step": 21550 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022365998339067504, + "loss": 0.6611, + "step": 21560 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002236243919800688, + "loss": 0.6855, + "step": 21570 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022358880056946254, + "loss": 0.6333, + "step": 21580 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022355320915885628, + "loss": 0.6405, + "step": 21590 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022351761774825008, + "loss": 0.694, + "step": 21600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022348202633764382, + "loss": 0.6741, + "step": 21610 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022344643492703758, + "loss": 0.6241, + "step": 21620 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022341084351643135, + "loss": 0.6782, + "step": 21630 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022337525210582512, + "loss": 0.7203, + "step": 21640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022333966069521886, + "loss": 0.7264, + "step": 21650 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022330406928461265, + "loss": 0.6397, + "step": 21660 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002232684778740064, + "loss": 0.6174, + "step": 21670 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022323288646340013, + "loss": 0.6127, + "step": 21680 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002231972950527939, + "loss": 0.6089, + "step": 21690 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022316170364218766, + "loss": 0.7288, + "step": 21700 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022312611223158143, + "loss": 0.7038, + "step": 21710 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022309052082097517, + "loss": 0.6144, + "step": 21720 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022305492941036896, + "loss": 0.7353, + "step": 21730 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002230193379997627, + "loss": 0.5906, + "step": 21740 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022298374658915647, + "loss": 0.704, + "step": 21750 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022294815517855023, + "loss": 0.6354, + "step": 21760 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022291256376794397, + "loss": 0.6431, + "step": 21770 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022287697235733774, + "loss": 0.6342, + "step": 21780 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002228413809467315, + "loss": 0.697, + "step": 21790 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022280578953612527, + "loss": 0.6362, + "step": 21800 + }, + { + "epoch": 0.78, + "learning_rate": 0.000222770198125519, + "loss": 0.7236, + "step": 21810 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022273460671491278, + "loss": 0.7482, + "step": 21820 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022269901530430655, + "loss": 0.7031, + "step": 21830 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002226634238937003, + "loss": 0.6303, + "step": 21840 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022262783248309405, + "loss": 0.6204, + "step": 21850 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022259224107248784, + "loss": 0.6819, + "step": 21860 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022255664966188158, + "loss": 0.7482, + "step": 21870 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022252105825127532, + "loss": 0.7001, + "step": 21880 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022248546684066912, + "loss": 0.5931, + "step": 21890 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022244987543006286, + "loss": 0.6268, + "step": 21900 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022241428401945662, + "loss": 0.6059, + "step": 21910 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022237869260885036, + "loss": 0.6061, + "step": 21920 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022234310119824416, + "loss": 0.6952, + "step": 21930 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002223075097876379, + "loss": 0.7549, + "step": 21940 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022227191837703164, + "loss": 0.6742, + "step": 21950 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022223632696642543, + "loss": 0.7529, + "step": 21960 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022220073555581917, + "loss": 0.613, + "step": 21970 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022216514414521294, + "loss": 0.6903, + "step": 21980 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002221295527346067, + "loss": 0.5902, + "step": 21990 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022209396132400047, + "loss": 0.6025, + "step": 22000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002220583699133942, + "loss": 0.6908, + "step": 22010 + }, + { + "epoch": 0.78, + "learning_rate": 0.000222022778502788, + "loss": 0.6366, + "step": 22020 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022198718709218174, + "loss": 0.6271, + "step": 22030 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022195159568157548, + "loss": 0.6397, + "step": 22040 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022191600427096925, + "loss": 0.582, + "step": 22050 + }, + { + "epoch": 0.78, + "learning_rate": 0.000221880412860363, + "loss": 0.5925, + "step": 22060 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022184482144975678, + "loss": 0.5928, + "step": 22070 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022180923003915052, + "loss": 0.5699, + "step": 22080 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002217736386285443, + "loss": 0.6549, + "step": 22090 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022173804721793805, + "loss": 0.5977, + "step": 22100 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002217024558073318, + "loss": 0.6567, + "step": 22110 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022166686439672559, + "loss": 0.7211, + "step": 22120 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022163127298611933, + "loss": 0.7105, + "step": 22130 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002215956815755131, + "loss": 0.7407, + "step": 22140 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022156009016490683, + "loss": 0.622, + "step": 22150 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022152449875430062, + "loss": 0.7253, + "step": 22160 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022148890734369436, + "loss": 0.6544, + "step": 22170 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002214533159330881, + "loss": 0.6621, + "step": 22180 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002214177245224819, + "loss": 0.6408, + "step": 22190 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022138213311187564, + "loss": 0.6616, + "step": 22200 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002213465417012694, + "loss": 0.6375, + "step": 22210 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022131095029066317, + "loss": 0.6129, + "step": 22220 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022127535888005694, + "loss": 0.7209, + "step": 22230 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022123976746945068, + "loss": 0.6351, + "step": 22240 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022120417605884447, + "loss": 0.6695, + "step": 22250 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002211685846482382, + "loss": 0.6799, + "step": 22260 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022113299323763195, + "loss": 0.6838, + "step": 22270 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022109740182702572, + "loss": 0.7017, + "step": 22280 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022106181041641948, + "loss": 0.6515, + "step": 22290 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022102621900581325, + "loss": 0.7148, + "step": 22300 + }, + { + "epoch": 0.79, + "learning_rate": 0.000220990627595207, + "loss": 0.644, + "step": 22310 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022095503618460078, + "loss": 0.7058, + "step": 22320 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022091944477399452, + "loss": 0.723, + "step": 22330 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002208838533633883, + "loss": 0.7131, + "step": 22340 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022084826195278205, + "loss": 0.6011, + "step": 22350 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002208126705421758, + "loss": 0.5587, + "step": 22360 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022077707913156956, + "loss": 0.6083, + "step": 22370 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002207414877209633, + "loss": 0.7306, + "step": 22380 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002207058963103571, + "loss": 0.5984, + "step": 22390 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022067030489975083, + "loss": 0.6928, + "step": 22400 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002206347134891446, + "loss": 0.5628, + "step": 22410 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022059912207853837, + "loss": 0.7584, + "step": 22420 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022056353066793213, + "loss": 0.5712, + "step": 22430 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022052793925732587, + "loss": 0.5928, + "step": 22440 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022049234784671967, + "loss": 0.6635, + "step": 22450 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002204567564361134, + "loss": 0.7736, + "step": 22460 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022042116502550714, + "loss": 0.712, + "step": 22470 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022038557361490094, + "loss": 0.6705, + "step": 22480 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022034998220429468, + "loss": 0.6164, + "step": 22490 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022031439079368844, + "loss": 0.6774, + "step": 22500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022027879938308218, + "loss": 0.688, + "step": 22510 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022024320797247598, + "loss": 0.5857, + "step": 22520 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022020761656186972, + "loss": 0.6271, + "step": 22530 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022017202515126346, + "loss": 0.7079, + "step": 22540 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022013643374065725, + "loss": 0.7367, + "step": 22550 + }, + { + "epoch": 0.8, + "learning_rate": 0.000220100842330051, + "loss": 0.7031, + "step": 22560 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022006525091944476, + "loss": 0.7114, + "step": 22570 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022002965950883852, + "loss": 0.6925, + "step": 22580 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002199940680982323, + "loss": 0.5954, + "step": 22590 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021995847668762603, + "loss": 0.6636, + "step": 22600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021992288527701977, + "loss": 0.7198, + "step": 22610 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021988729386641356, + "loss": 0.6426, + "step": 22620 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002198517024558073, + "loss": 0.6602, + "step": 22630 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021981611104520107, + "loss": 0.6446, + "step": 22640 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021978051963459483, + "loss": 0.6166, + "step": 22650 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002197449282239886, + "loss": 0.6427, + "step": 22660 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021970933681338234, + "loss": 0.6831, + "step": 22670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021967374540277613, + "loss": 0.7164, + "step": 22680 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021963815399216987, + "loss": 0.6409, + "step": 22690 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002196025625815636, + "loss": 0.6726, + "step": 22700 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021956697117095738, + "loss": 0.6248, + "step": 22710 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021953137976035115, + "loss": 0.7968, + "step": 22720 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002194957883497449, + "loss": 0.746, + "step": 22730 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021946019693913865, + "loss": 0.6355, + "step": 22740 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021942460552853245, + "loss": 0.7184, + "step": 22750 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021938901411792618, + "loss": 0.6026, + "step": 22760 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021935342270731992, + "loss": 0.6736, + "step": 22770 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021931783129671372, + "loss": 0.6708, + "step": 22780 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021928223988610746, + "loss": 0.5723, + "step": 22790 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021924664847550122, + "loss": 0.5899, + "step": 22800 + }, + { + "epoch": 0.81, + "learning_rate": 0.000219211057064895, + "loss": 0.6618, + "step": 22810 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021917546565428876, + "loss": 0.5732, + "step": 22820 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002191398742436825, + "loss": 0.6333, + "step": 22830 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021910428283307626, + "loss": 0.7018, + "step": 22840 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021906869142247003, + "loss": 0.5962, + "step": 22850 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021903310001186377, + "loss": 0.6515, + "step": 22860 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021899750860125754, + "loss": 0.6849, + "step": 22870 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002189619171906513, + "loss": 0.6241, + "step": 22880 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021892632578004507, + "loss": 0.6589, + "step": 22890 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002188907343694388, + "loss": 0.7201, + "step": 22900 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002188551429588326, + "loss": 0.6962, + "step": 22910 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021881955154822634, + "loss": 0.5817, + "step": 22920 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002187839601376201, + "loss": 0.6587, + "step": 22930 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021874836872701385, + "loss": 0.7155, + "step": 22940 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021871277731640764, + "loss": 0.7024, + "step": 22950 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021867718590580138, + "loss": 0.6567, + "step": 22960 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021864159449519512, + "loss": 0.6889, + "step": 22970 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002186060030845889, + "loss": 0.6675, + "step": 22980 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021857041167398265, + "loss": 0.6704, + "step": 22990 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021853482026337642, + "loss": 0.7088, + "step": 23000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021849922885277019, + "loss": 0.7261, + "step": 23010 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021846363744216395, + "loss": 0.6212, + "step": 23020 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002184280460315577, + "loss": 0.6683, + "step": 23030 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021839245462095149, + "loss": 0.6688, + "step": 23040 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021835686321034523, + "loss": 0.711, + "step": 23050 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021832127179973896, + "loss": 0.7493, + "step": 23060 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021828568038913273, + "loss": 0.6598, + "step": 23070 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002182500889785265, + "loss": 0.661, + "step": 23080 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021821449756792026, + "loss": 0.6, + "step": 23090 + }, + { + "epoch": 0.82, + "learning_rate": 0.000218178906157314, + "loss": 0.6686, + "step": 23100 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002181433147467078, + "loss": 0.5928, + "step": 23110 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021810772333610154, + "loss": 0.6861, + "step": 23120 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021807213192549528, + "loss": 0.6099, + "step": 23130 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021803654051488907, + "loss": 0.6501, + "step": 23140 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002180009491042828, + "loss": 0.6379, + "step": 23150 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021796535769367658, + "loss": 0.5497, + "step": 23160 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021792976628307032, + "loss": 0.6391, + "step": 23170 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002178941748724641, + "loss": 0.6955, + "step": 23180 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021785858346185785, + "loss": 0.6144, + "step": 23190 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002178229920512516, + "loss": 0.6872, + "step": 23200 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021778740064064538, + "loss": 0.6293, + "step": 23210 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021775180923003912, + "loss": 0.7028, + "step": 23220 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002177162178194329, + "loss": 0.6583, + "step": 23230 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021768062640882665, + "loss": 0.7602, + "step": 23240 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021764503499822042, + "loss": 0.629, + "step": 23250 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021760944358761416, + "loss": 0.6693, + "step": 23260 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021757385217700795, + "loss": 0.6421, + "step": 23270 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002175382607664017, + "loss": 0.6615, + "step": 23280 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021750266935579543, + "loss": 0.6098, + "step": 23290 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002174670779451892, + "loss": 0.8521, + "step": 23300 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021743148653458297, + "loss": 0.623, + "step": 23310 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021739589512397673, + "loss": 0.5615, + "step": 23320 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021736030371337047, + "loss": 0.6875, + "step": 23330 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021732471230276427, + "loss": 0.6646, + "step": 23340 + }, + { + "epoch": 0.83, + "learning_rate": 0.000217289120892158, + "loss": 0.6809, + "step": 23350 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021725352948155174, + "loss": 0.6204, + "step": 23360 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021721793807094554, + "loss": 0.7022, + "step": 23370 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021718234666033928, + "loss": 0.6835, + "step": 23380 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021714675524973304, + "loss": 0.688, + "step": 23390 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021711116383912678, + "loss": 0.7093, + "step": 23400 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021707557242852058, + "loss": 0.7176, + "step": 23410 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021703998101791432, + "loss": 0.5609, + "step": 23420 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021700438960730808, + "loss": 0.7258, + "step": 23430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021696879819670185, + "loss": 0.6654, + "step": 23440 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021693320678609562, + "loss": 0.6018, + "step": 23450 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021689761537548936, + "loss": 0.6934, + "step": 23460 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021686202396488312, + "loss": 0.6124, + "step": 23470 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002168264325542769, + "loss": 0.6355, + "step": 23480 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021679084114367063, + "loss": 0.7241, + "step": 23490 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021675524973306442, + "loss": 0.6915, + "step": 23500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021671965832245816, + "loss": 0.6674, + "step": 23510 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021668406691185193, + "loss": 0.6517, + "step": 23520 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021664847550124567, + "loss": 0.7194, + "step": 23530 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021661288409063946, + "loss": 0.6935, + "step": 23540 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002165772926800332, + "loss": 0.7225, + "step": 23550 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021654170126942694, + "loss": 0.71, + "step": 23560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021650610985882073, + "loss": 0.6087, + "step": 23570 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021647051844821447, + "loss": 0.6463, + "step": 23580 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021643492703760824, + "loss": 0.6577, + "step": 23590 + }, + { + "epoch": 0.84, + "learning_rate": 0.000216399335627002, + "loss": 0.6488, + "step": 23600 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021636374421639577, + "loss": 0.6637, + "step": 23610 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002163281528057895, + "loss": 0.6177, + "step": 23620 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021629256139518325, + "loss": 0.659, + "step": 23630 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021625696998457705, + "loss": 0.6429, + "step": 23640 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021622137857397079, + "loss": 0.574, + "step": 23650 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021618578716336455, + "loss": 0.6259, + "step": 23660 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021615019575275832, + "loss": 0.5686, + "step": 23670 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021611460434215208, + "loss": 0.6295, + "step": 23680 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021607901293154582, + "loss": 0.6249, + "step": 23690 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021604342152093962, + "loss": 0.8182, + "step": 23700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021600783011033336, + "loss": 0.6668, + "step": 23710 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002159722386997271, + "loss": 0.6005, + "step": 23720 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002159366472891209, + "loss": 0.7095, + "step": 23730 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021590105587851463, + "loss": 0.6505, + "step": 23740 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002158654644679084, + "loss": 0.6339, + "step": 23750 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021582987305730214, + "loss": 0.6027, + "step": 23760 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021579428164669593, + "loss": 0.7002, + "step": 23770 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021575869023608967, + "loss": 0.7061, + "step": 23780 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002157230988254834, + "loss": 0.5693, + "step": 23790 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002156875074148772, + "loss": 0.6641, + "step": 23800 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021565191600427094, + "loss": 0.6497, + "step": 23810 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002156163245936647, + "loss": 0.6601, + "step": 23820 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021558073318305847, + "loss": 0.657, + "step": 23830 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021554514177245224, + "loss": 0.6743, + "step": 23840 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021550955036184598, + "loss": 0.6167, + "step": 23850 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021547395895123972, + "loss": 0.5797, + "step": 23860 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021543836754063351, + "loss": 0.5988, + "step": 23870 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021540277613002725, + "loss": 0.6667, + "step": 23880 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021536718471942102, + "loss": 0.6856, + "step": 23890 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021533159330881479, + "loss": 0.5389, + "step": 23900 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021529600189820855, + "loss": 0.594, + "step": 23910 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002152604104876023, + "loss": 0.661, + "step": 23920 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021522481907699609, + "loss": 0.6726, + "step": 23930 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021518922766638983, + "loss": 0.6056, + "step": 23940 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002151536362557836, + "loss": 0.659, + "step": 23950 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021511804484517733, + "loss": 0.6191, + "step": 23960 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002150824534345711, + "loss": 0.6118, + "step": 23970 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021504686202396486, + "loss": 0.639, + "step": 23980 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002150112706133586, + "loss": 0.6646, + "step": 23990 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002149756792027524, + "loss": 0.7752, + "step": 24000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021494008779214614, + "loss": 0.6936, + "step": 24010 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002149044963815399, + "loss": 0.6972, + "step": 24020 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021486890497093367, + "loss": 0.7192, + "step": 24030 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021483331356032744, + "loss": 0.7002, + "step": 24040 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021479772214972118, + "loss": 0.671, + "step": 24050 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021476213073911497, + "loss": 0.6143, + "step": 24060 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002147265393285087, + "loss": 0.6781, + "step": 24070 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021469094791790245, + "loss": 0.5778, + "step": 24080 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021465535650729622, + "loss": 0.6537, + "step": 24090 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021461976509668998, + "loss": 0.732, + "step": 24100 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021458417368608375, + "loss": 0.7184, + "step": 24110 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002145485822754775, + "loss": 0.6724, + "step": 24120 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021451299086487128, + "loss": 0.617, + "step": 24130 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021447739945426502, + "loss": 0.6749, + "step": 24140 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021444180804365876, + "loss": 0.6895, + "step": 24150 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021440621663305255, + "loss": 0.6203, + "step": 24160 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002143706252224463, + "loss": 0.6452, + "step": 24170 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021433503381184006, + "loss": 0.6399, + "step": 24180 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002142994424012338, + "loss": 0.6841, + "step": 24190 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002142638509906276, + "loss": 0.6895, + "step": 24200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021422825958002133, + "loss": 0.618, + "step": 24210 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021419266816941507, + "loss": 0.6563, + "step": 24220 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021415707675880887, + "loss": 0.6964, + "step": 24230 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002141214853482026, + "loss": 0.6146, + "step": 24240 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021408589393759637, + "loss": 0.6711, + "step": 24250 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021405030252699014, + "loss": 0.6778, + "step": 24260 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002140147111163839, + "loss": 0.6479, + "step": 24270 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021397911970577764, + "loss": 0.7096, + "step": 24280 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021394352829517144, + "loss": 0.6512, + "step": 24290 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021390793688456518, + "loss": 0.6121, + "step": 24300 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021387234547395892, + "loss": 0.6608, + "step": 24310 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021383675406335268, + "loss": 0.6588, + "step": 24320 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021380116265274645, + "loss": 0.6457, + "step": 24330 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021376557124214022, + "loss": 0.68, + "step": 24340 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021372997983153396, + "loss": 0.6817, + "step": 24350 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021369438842092775, + "loss": 0.6561, + "step": 24360 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002136587970103215, + "loss": 0.7018, + "step": 24370 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021362320559971523, + "loss": 0.5937, + "step": 24380 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021358761418910902, + "loss": 0.6661, + "step": 24390 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021355202277850276, + "loss": 0.6834, + "step": 24400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021351643136789653, + "loss": 0.6064, + "step": 24410 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021348083995729027, + "loss": 0.7056, + "step": 24420 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021344524854668406, + "loss": 0.6306, + "step": 24430 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002134096571360778, + "loss": 0.7785, + "step": 24440 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021337406572547157, + "loss": 0.7589, + "step": 24450 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021333847431486533, + "loss": 0.672, + "step": 24460 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021330288290425907, + "loss": 0.6247, + "step": 24470 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021326729149365284, + "loss": 0.571, + "step": 24480 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002132317000830466, + "loss": 0.6371, + "step": 24490 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021319610867244037, + "loss": 0.6205, + "step": 24500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002131605172618341, + "loss": 0.6545, + "step": 24510 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002131249258512279, + "loss": 0.6522, + "step": 24520 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021308933444062165, + "loss": 0.6607, + "step": 24530 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002130537430300154, + "loss": 0.7228, + "step": 24540 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021301815161940915, + "loss": 0.6036, + "step": 24550 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021298256020880295, + "loss": 0.7375, + "step": 24560 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021294696879819669, + "loss": 0.5867, + "step": 24570 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021291137738759042, + "loss": 0.6492, + "step": 24580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021287578597698422, + "loss": 0.8046, + "step": 24590 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021284019456637796, + "loss": 0.6346, + "step": 24600 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021280460315577172, + "loss": 0.6588, + "step": 24610 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002127690117451655, + "loss": 0.6426, + "step": 24620 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021273342033455926, + "loss": 0.8215, + "step": 24630 + }, + { + "epoch": 0.88, + "learning_rate": 0.000212697828923953, + "loss": 0.6988, + "step": 24640 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021266223751334674, + "loss": 0.6879, + "step": 24650 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021262664610274053, + "loss": 0.6661, + "step": 24660 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021259105469213427, + "loss": 0.6869, + "step": 24670 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021255546328152804, + "loss": 0.6005, + "step": 24680 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002125198718709218, + "loss": 0.7094, + "step": 24690 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021248428046031557, + "loss": 0.6434, + "step": 24700 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002124486890497093, + "loss": 0.7092, + "step": 24710 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002124130976391031, + "loss": 0.6788, + "step": 24720 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021237750622849684, + "loss": 0.6604, + "step": 24730 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021234191481789058, + "loss": 0.6514, + "step": 24740 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021230632340728437, + "loss": 0.5851, + "step": 24750 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021227073199667811, + "loss": 0.615, + "step": 24760 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021223514058607188, + "loss": 0.6591, + "step": 24770 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021219954917546562, + "loss": 0.5754, + "step": 24780 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021216395776485941, + "loss": 0.6369, + "step": 24790 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021212836635425315, + "loss": 0.5483, + "step": 24800 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002120927749436469, + "loss": 0.5318, + "step": 24810 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002120571835330407, + "loss": 0.6116, + "step": 24820 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021202159212243443, + "loss": 0.6153, + "step": 24830 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002119860007118282, + "loss": 0.6181, + "step": 24840 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021195040930122196, + "loss": 0.6992, + "step": 24850 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021191481789061573, + "loss": 0.6264, + "step": 24860 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021187922648000947, + "loss": 0.647, + "step": 24870 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002118436350694032, + "loss": 0.7102, + "step": 24880 + }, + { + "epoch": 0.88, + "learning_rate": 0.000211808043658797, + "loss": 0.706, + "step": 24890 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021177245224819074, + "loss": 0.7109, + "step": 24900 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002117368608375845, + "loss": 0.674, + "step": 24910 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021170126942697827, + "loss": 0.5747, + "step": 24920 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021166567801637204, + "loss": 0.5869, + "step": 24930 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021163008660576578, + "loss": 0.6326, + "step": 24940 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021159449519515957, + "loss": 0.604, + "step": 24950 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002115589037845533, + "loss": 0.5984, + "step": 24960 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021152331237394705, + "loss": 0.6848, + "step": 24970 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021148772096334082, + "loss": 0.6002, + "step": 24980 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021145212955273458, + "loss": 0.6652, + "step": 24990 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021141653814212835, + "loss": 0.6565, + "step": 25000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002113809467315221, + "loss": 0.5294, + "step": 25010 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021134535532091588, + "loss": 0.6235, + "step": 25020 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021130976391030962, + "loss": 0.693, + "step": 25030 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002112741724997034, + "loss": 0.6202, + "step": 25040 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021123858108909715, + "loss": 0.6329, + "step": 25050 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021120298967849092, + "loss": 0.6508, + "step": 25060 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021116739826788466, + "loss": 0.665, + "step": 25070 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021113180685727843, + "loss": 0.6903, + "step": 25080 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002110962154466722, + "loss": 0.5951, + "step": 25090 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021106062403606593, + "loss": 0.6147, + "step": 25100 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021102859176652032, + "loss": 0.6204, + "step": 25110 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002109930003559141, + "loss": 0.5971, + "step": 25120 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021095740894530785, + "loss": 0.7054, + "step": 25130 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021092181753470162, + "loss": 0.6839, + "step": 25140 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021088622612409536, + "loss": 0.7601, + "step": 25150 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021085063471348915, + "loss": 0.6126, + "step": 25160 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002108150433028829, + "loss": 0.666, + "step": 25170 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021077945189227663, + "loss": 0.5879, + "step": 25180 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002107438604816704, + "loss": 0.5566, + "step": 25190 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021070826907106417, + "loss": 0.5532, + "step": 25200 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021067267766045793, + "loss": 0.5103, + "step": 25210 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021063708624985167, + "loss": 0.7056, + "step": 25220 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021060149483924546, + "loss": 0.6174, + "step": 25230 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002105659034286392, + "loss": 0.6695, + "step": 25240 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021053031201803294, + "loss": 0.6743, + "step": 25250 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021049472060742674, + "loss": 0.7128, + "step": 25260 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021045912919682048, + "loss": 0.7096, + "step": 25270 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021042353778621424, + "loss": 0.6342, + "step": 25280 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021038794637560798, + "loss": 0.6372, + "step": 25290 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021035235496500178, + "loss": 0.6164, + "step": 25300 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021031676355439552, + "loss": 0.7463, + "step": 25310 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021028117214378926, + "loss": 0.5984, + "step": 25320 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021024558073318305, + "loss": 0.6135, + "step": 25330 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002102099893225768, + "loss": 0.599, + "step": 25340 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021017439791197056, + "loss": 0.544, + "step": 25350 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021013880650136432, + "loss": 0.6356, + "step": 25360 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002101032150907581, + "loss": 0.6155, + "step": 25370 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021006762368015183, + "loss": 0.6026, + "step": 25380 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021003203226954562, + "loss": 0.7397, + "step": 25390 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020999644085893936, + "loss": 0.7323, + "step": 25400 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002099608494483331, + "loss": 0.6009, + "step": 25410 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020992525803772687, + "loss": 0.7188, + "step": 25420 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020988966662712063, + "loss": 0.6921, + "step": 25430 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002098540752165144, + "loss": 0.6335, + "step": 25440 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020981848380590814, + "loss": 0.6102, + "step": 25450 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020978289239530193, + "loss": 0.5553, + "step": 25460 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020974730098469567, + "loss": 0.5685, + "step": 25470 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020971170957408944, + "loss": 0.6116, + "step": 25480 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002096761181634832, + "loss": 0.6517, + "step": 25490 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020964052675287695, + "loss": 0.6218, + "step": 25500 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002096049353422707, + "loss": 0.6992, + "step": 25510 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020956934393166445, + "loss": 0.6841, + "step": 25520 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020953375252105824, + "loss": 0.7155, + "step": 25530 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020949816111045198, + "loss": 0.6672, + "step": 25540 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020946256969984575, + "loss": 0.6233, + "step": 25550 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020942697828923952, + "loss": 0.5852, + "step": 25560 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020939138687863328, + "loss": 0.6086, + "step": 25570 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020935579546802702, + "loss": 0.6582, + "step": 25580 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020932020405742082, + "loss": 0.6266, + "step": 25590 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020928461264681456, + "loss": 0.5886, + "step": 25600 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002092490212362083, + "loss": 0.5479, + "step": 25610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002092134298256021, + "loss": 0.576, + "step": 25620 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020917783841499583, + "loss": 0.7268, + "step": 25630 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002091422470043896, + "loss": 0.6456, + "step": 25640 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020910665559378334, + "loss": 0.6997, + "step": 25650 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020907106418317713, + "loss": 0.6492, + "step": 25660 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020903547277257087, + "loss": 0.6925, + "step": 25670 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002089998813619646, + "loss": 0.5458, + "step": 25680 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002089642899513584, + "loss": 0.7299, + "step": 25690 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020892869854075214, + "loss": 0.7081, + "step": 25700 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002088931071301459, + "loss": 0.6795, + "step": 25710 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020885751571953967, + "loss": 0.5751, + "step": 25720 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020882192430893344, + "loss": 0.6375, + "step": 25730 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020878633289832718, + "loss": 0.7122, + "step": 25740 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020875074148772092, + "loss": 0.5829, + "step": 25750 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002087151500771147, + "loss": 0.6133, + "step": 25760 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020867955866650845, + "loss": 0.6149, + "step": 25770 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020864396725590222, + "loss": 0.5053, + "step": 25780 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020860837584529599, + "loss": 0.6298, + "step": 25790 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020857278443468975, + "loss": 0.591, + "step": 25800 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002085371930240835, + "loss": 0.5814, + "step": 25810 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020850160161347729, + "loss": 0.6186, + "step": 25820 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020846601020287102, + "loss": 0.6398, + "step": 25830 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020843041879226476, + "loss": 0.6231, + "step": 25840 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020839482738165856, + "loss": 0.6087, + "step": 25850 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002083592359710523, + "loss": 0.685, + "step": 25860 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020832364456044606, + "loss": 0.5726, + "step": 25870 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002082880531498398, + "loss": 0.5558, + "step": 25880 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002082524617392336, + "loss": 0.6361, + "step": 25890 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020821687032862734, + "loss": 0.7315, + "step": 25900 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020818127891802108, + "loss": 0.7273, + "step": 25910 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020814568750741487, + "loss": 0.6599, + "step": 25920 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002081100960968086, + "loss": 0.6681, + "step": 25930 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020807450468620238, + "loss": 0.6932, + "step": 25940 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020803891327559614, + "loss": 0.6618, + "step": 25950 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002080033218649899, + "loss": 0.6164, + "step": 25960 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020796773045438365, + "loss": 0.6372, + "step": 25970 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020793213904377741, + "loss": 0.6694, + "step": 25980 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020789654763317118, + "loss": 0.5338, + "step": 25990 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020786095622256492, + "loss": 0.7207, + "step": 26000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002078253648119587, + "loss": 0.5842, + "step": 26010 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020778977340135245, + "loss": 0.576, + "step": 26020 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020775418199074622, + "loss": 0.7333, + "step": 26030 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020771859058013996, + "loss": 0.6898, + "step": 26040 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020768299916953375, + "loss": 0.7037, + "step": 26050 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002076474077589275, + "loss": 0.6515, + "step": 26060 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020761181634832126, + "loss": 0.5971, + "step": 26070 + }, + { + "epoch": 0.93, + "learning_rate": 0.000207576224937715, + "loss": 0.6318, + "step": 26080 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002075406335271088, + "loss": 0.8018, + "step": 26090 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020750504211650253, + "loss": 0.7297, + "step": 26100 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020746945070589627, + "loss": 0.6522, + "step": 26110 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020743385929529007, + "loss": 0.6638, + "step": 26120 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002073982678846838, + "loss": 0.6081, + "step": 26130 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020736267647407757, + "loss": 0.7137, + "step": 26140 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020732708506347134, + "loss": 0.6705, + "step": 26150 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002072914936528651, + "loss": 0.6482, + "step": 26160 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020725590224225884, + "loss": 0.5674, + "step": 26170 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020722031083165264, + "loss": 0.6184, + "step": 26180 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020718471942104638, + "loss": 0.6329, + "step": 26190 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020714912801044012, + "loss": 0.5367, + "step": 26200 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020711353659983388, + "loss": 0.6797, + "step": 26210 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020707794518922765, + "loss": 0.6757, + "step": 26220 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020704235377862142, + "loss": 0.5862, + "step": 26230 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020700676236801516, + "loss": 0.6135, + "step": 26240 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020697117095740895, + "loss": 0.7624, + "step": 26250 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002069355795468027, + "loss": 0.6736, + "step": 26260 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020689998813619643, + "loss": 0.5605, + "step": 26270 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020686439672559022, + "loss": 0.7557, + "step": 26280 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020682880531498396, + "loss": 0.51, + "step": 26290 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020679321390437773, + "loss": 0.6603, + "step": 26300 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020675762249377147, + "loss": 0.6667, + "step": 26310 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020672203108316526, + "loss": 0.5928, + "step": 26320 + }, + { + "epoch": 0.94, + "learning_rate": 0.000206686439672559, + "loss": 0.5362, + "step": 26330 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020665084826195274, + "loss": 0.7326, + "step": 26340 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020661525685134653, + "loss": 0.5695, + "step": 26350 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020657966544074027, + "loss": 0.7449, + "step": 26360 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020654407403013404, + "loss": 0.6409, + "step": 26370 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002065084826195278, + "loss": 0.6279, + "step": 26380 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020647289120892157, + "loss": 0.6915, + "step": 26390 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002064372997983153, + "loss": 0.6275, + "step": 26400 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002064017083877091, + "loss": 0.6435, + "step": 26410 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020636611697710285, + "loss": 0.5778, + "step": 26420 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020633052556649658, + "loss": 0.7292, + "step": 26430 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020629493415589035, + "loss": 0.5922, + "step": 26440 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020625934274528412, + "loss": 0.6766, + "step": 26450 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020622375133467788, + "loss": 0.5949, + "step": 26460 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020618815992407162, + "loss": 0.7015, + "step": 26470 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020615256851346542, + "loss": 0.5744, + "step": 26480 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020611697710285916, + "loss": 0.862, + "step": 26490 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002060813856922529, + "loss": 0.6688, + "step": 26500 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002060457942816467, + "loss": 0.5885, + "step": 26510 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020601020287104043, + "loss": 0.6128, + "step": 26520 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002059746114604342, + "loss": 0.5724, + "step": 26530 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020593902004982794, + "loss": 0.6691, + "step": 26540 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020590342863922173, + "loss": 0.6181, + "step": 26550 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020586783722861547, + "loss": 0.6309, + "step": 26560 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020583224581800924, + "loss": 0.7534, + "step": 26570 + }, + { + "epoch": 0.94, + "learning_rate": 0.000205796654407403, + "loss": 0.5758, + "step": 26580 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020576106299679677, + "loss": 0.6409, + "step": 26590 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002057254715861905, + "loss": 0.6712, + "step": 26600 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020568988017558427, + "loss": 0.721, + "step": 26610 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020565428876497804, + "loss": 0.7174, + "step": 26620 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020561869735437178, + "loss": 0.5943, + "step": 26630 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020558310594376557, + "loss": 0.616, + "step": 26640 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002055475145331593, + "loss": 0.6261, + "step": 26650 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020551192312255308, + "loss": 0.7557, + "step": 26660 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020547633171194682, + "loss": 0.7811, + "step": 26670 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002054407403013406, + "loss": 0.6167, + "step": 26680 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020540514889073435, + "loss": 0.738, + "step": 26690 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002053695574801281, + "loss": 0.6525, + "step": 26700 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020533396606952189, + "loss": 0.6053, + "step": 26710 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020529837465891563, + "loss": 0.762, + "step": 26720 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002052627832483094, + "loss": 0.6098, + "step": 26730 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020522719183770316, + "loss": 0.6802, + "step": 26740 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020519160042709692, + "loss": 0.6567, + "step": 26750 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020515600901649066, + "loss": 0.6614, + "step": 26760 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002051204176058844, + "loss": 0.7237, + "step": 26770 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002050848261952782, + "loss": 0.6868, + "step": 26780 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020504923478467194, + "loss": 0.7121, + "step": 26790 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002050136433740657, + "loss": 0.7011, + "step": 26800 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020497805196345947, + "loss": 0.6464, + "step": 26810 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020494246055285324, + "loss": 0.742, + "step": 26820 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020490686914224698, + "loss": 0.694, + "step": 26830 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020487127773164077, + "loss": 0.6971, + "step": 26840 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002048356863210345, + "loss": 0.7087, + "step": 26850 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020480009491042825, + "loss": 0.6163, + "step": 26860 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020476450349982204, + "loss": 0.6463, + "step": 26870 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020472891208921578, + "loss": 0.787, + "step": 26880 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020469332067860955, + "loss": 0.6716, + "step": 26890 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002046577292680033, + "loss": 0.6185, + "step": 26900 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020462213785739708, + "loss": 0.5922, + "step": 26910 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020458654644679082, + "loss": 0.524, + "step": 26920 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020455095503618456, + "loss": 0.6935, + "step": 26930 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020451536362557835, + "loss": 0.605, + "step": 26940 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002044797722149721, + "loss": 0.5761, + "step": 26950 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020444418080436586, + "loss": 0.6525, + "step": 26960 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020440858939375963, + "loss": 0.6534, + "step": 26970 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002043729979831534, + "loss": 0.674, + "step": 26980 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020433740657254713, + "loss": 0.5688, + "step": 26990 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020430181516194087, + "loss": 0.6869, + "step": 27000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020426622375133467, + "loss": 0.7077, + "step": 27010 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002042306323407284, + "loss": 0.6277, + "step": 27020 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020419504093012217, + "loss": 0.6084, + "step": 27030 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020415944951951594, + "loss": 0.6069, + "step": 27040 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002041238581089097, + "loss": 0.7101, + "step": 27050 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020408826669830344, + "loss": 0.7001, + "step": 27060 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020405267528769724, + "loss": 0.7447, + "step": 27070 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020401708387709098, + "loss": 0.6495, + "step": 27080 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020398149246648474, + "loss": 0.6171, + "step": 27090 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002039459010558785, + "loss": 0.5767, + "step": 27100 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020391030964527225, + "loss": 0.6179, + "step": 27110 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020387471823466602, + "loss": 0.6114, + "step": 27120 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020383912682405976, + "loss": 0.6196, + "step": 27130 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020380353541345355, + "loss": 0.5652, + "step": 27140 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002037679440028473, + "loss": 0.7054, + "step": 27150 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020373235259224106, + "loss": 0.691, + "step": 27160 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020369676118163482, + "loss": 0.6268, + "step": 27170 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002036611697710286, + "loss": 0.7375, + "step": 27180 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020362557836042233, + "loss": 0.6059, + "step": 27190 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020358998694981612, + "loss": 0.6291, + "step": 27200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020355439553920986, + "loss": 0.6086, + "step": 27210 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002035188041286036, + "loss": 0.4827, + "step": 27220 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020348321271799737, + "loss": 0.7278, + "step": 27230 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020344762130739113, + "loss": 0.6589, + "step": 27240 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002034120298967849, + "loss": 0.7108, + "step": 27250 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020337643848617864, + "loss": 0.7147, + "step": 27260 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020334084707557243, + "loss": 0.6623, + "step": 27270 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020330525566496617, + "loss": 0.6269, + "step": 27280 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002032696642543599, + "loss": 0.6308, + "step": 27290 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002032340728437537, + "loss": 0.6481, + "step": 27300 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020319848143314745, + "loss": 0.6844, + "step": 27310 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002031628900225412, + "loss": 0.7158, + "step": 27320 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020312729861193495, + "loss": 0.707, + "step": 27330 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020309170720132875, + "loss": 0.6504, + "step": 27340 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020305611579072248, + "loss": 0.5878, + "step": 27350 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020302052438011622, + "loss": 0.6616, + "step": 27360 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020298493296951002, + "loss": 0.6946, + "step": 27370 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020294934155890376, + "loss": 0.705, + "step": 27380 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020291375014829752, + "loss": 0.7303, + "step": 27390 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002028781587376913, + "loss": 0.6954, + "step": 27400 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020284256732708506, + "loss": 0.6319, + "step": 27410 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002028069759164788, + "loss": 0.6644, + "step": 27420 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002027713845058726, + "loss": 0.5741, + "step": 27430 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020273579309526633, + "loss": 0.6206, + "step": 27440 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020270020168466007, + "loss": 0.7001, + "step": 27450 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020266461027405384, + "loss": 0.6392, + "step": 27460 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002026290188634476, + "loss": 0.5602, + "step": 27470 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020259342745284137, + "loss": 0.5764, + "step": 27480 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002025578360422351, + "loss": 0.6606, + "step": 27490 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002025222446316289, + "loss": 0.7072, + "step": 27500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020248665322102264, + "loss": 0.7065, + "step": 27510 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020245106181041638, + "loss": 0.6665, + "step": 27520 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020241547039981017, + "loss": 0.5677, + "step": 27530 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020237987898920391, + "loss": 0.6295, + "step": 27540 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020234428757859768, + "loss": 0.6355, + "step": 27550 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020230869616799142, + "loss": 0.5665, + "step": 27560 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020227310475738521, + "loss": 0.6433, + "step": 27570 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020223751334677895, + "loss": 0.5624, + "step": 27580 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020220192193617272, + "loss": 0.6971, + "step": 27590 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020216633052556649, + "loss": 0.689, + "step": 27600 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020213073911496023, + "loss": 0.6623, + "step": 27610 + }, + { + "epoch": 0.98, + "learning_rate": 0.000202095147704354, + "loss": 0.771, + "step": 27620 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020205955629374776, + "loss": 0.6584, + "step": 27630 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020202396488314153, + "loss": 0.7589, + "step": 27640 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020198837347253526, + "loss": 0.5451, + "step": 27650 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020195278206192906, + "loss": 0.6312, + "step": 27660 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002019171906513228, + "loss": 0.588, + "step": 27670 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020188159924071656, + "loss": 0.662, + "step": 27680 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002018460078301103, + "loss": 0.551, + "step": 27690 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002018104164195041, + "loss": 0.7277, + "step": 27700 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020177482500889784, + "loss": 0.6379, + "step": 27710 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020173923359829158, + "loss": 0.703, + "step": 27720 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020170364218768537, + "loss": 0.6775, + "step": 27730 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002016680507770791, + "loss": 0.6896, + "step": 27740 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020163245936647288, + "loss": 0.7108, + "step": 27750 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020159686795586664, + "loss": 0.627, + "step": 27760 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002015612765452604, + "loss": 0.6355, + "step": 27770 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020152568513465415, + "loss": 0.5611, + "step": 27780 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002014900937240479, + "loss": 0.5786, + "step": 27790 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020145450231344168, + "loss": 0.7293, + "step": 27800 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020141891090283542, + "loss": 0.5865, + "step": 27810 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002013833194922292, + "loss": 0.6886, + "step": 27820 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020134772808162295, + "loss": 0.5695, + "step": 27830 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020131213667101672, + "loss": 0.7456, + "step": 27840 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020127654526041046, + "loss": 0.7422, + "step": 27850 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020124095384980425, + "loss": 0.6441, + "step": 27860 + }, + { + "epoch": 0.99, + "learning_rate": 0.000201205362439198, + "loss": 0.6496, + "step": 27870 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020116977102859173, + "loss": 0.5499, + "step": 27880 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020113417961798553, + "loss": 0.5501, + "step": 27890 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020109858820737927, + "loss": 0.6419, + "step": 27900 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020106299679677303, + "loss": 0.6405, + "step": 27910 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020102740538616677, + "loss": 0.5286, + "step": 27920 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020099181397556057, + "loss": 0.6556, + "step": 27930 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002009562225649543, + "loss": 0.6558, + "step": 27940 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020092063115434804, + "loss": 0.5947, + "step": 27950 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020088503974374184, + "loss": 0.6813, + "step": 27960 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020084944833313558, + "loss": 0.7141, + "step": 27970 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020081385692252934, + "loss": 0.6779, + "step": 27980 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002007782655119231, + "loss": 0.6857, + "step": 27990 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020074267410131688, + "loss": 0.662, + "step": 28000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020070708269071062, + "loss": 0.5596, + "step": 28010 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020067149128010436, + "loss": 0.6393, + "step": 28020 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020063589986949815, + "loss": 0.7231, + "step": 28030 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002006003084588919, + "loss": 0.6233, + "step": 28040 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020056471704828566, + "loss": 0.5444, + "step": 28050 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020052912563767942, + "loss": 0.5914, + "step": 28060 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002004935342270732, + "loss": 0.5731, + "step": 28070 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020045794281646693, + "loss": 0.606, + "step": 28080 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020042235140586072, + "loss": 0.6294, + "step": 28090 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020038675999525446, + "loss": 0.605, + "step": 28100 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002003511685846482, + "loss": 0.7581, + "step": 28110 + }, + { + "epoch": 1.0, + "learning_rate": 0.000200315577174042, + "loss": 0.5733, + "step": 28120 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020027998576343573, + "loss": 0.7206, + "step": 28130 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002002443943528295, + "loss": 0.5966, + "step": 28140 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020020880294222324, + "loss": 0.59, + "step": 28150 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020017321153161703, + "loss": 0.5038, + "step": 28160 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020013762012101077, + "loss": 0.5659, + "step": 28170 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020010202871040454, + "loss": 0.6425, + "step": 28180 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002000664372997983, + "loss": 0.5297, + "step": 28190 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020003084588919205, + "loss": 0.6559, + "step": 28200 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001999952544785858, + "loss": 0.7653, + "step": 28210 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019995966306797958, + "loss": 0.6657, + "step": 28220 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019992407165737335, + "loss": 0.6222, + "step": 28230 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019988848024676709, + "loss": 0.5282, + "step": 28240 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019985288883616085, + "loss": 0.651, + "step": 28250 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019981729742555462, + "loss": 0.5301, + "step": 28260 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019978170601494838, + "loss": 0.7477, + "step": 28270 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019974611460434212, + "loss": 0.6336, + "step": 28280 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019971052319373592, + "loss": 0.5094, + "step": 28290 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019967493178312966, + "loss": 0.6093, + "step": 28300 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001996393403725234, + "loss": 0.5328, + "step": 28310 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001996037489619172, + "loss": 0.6482, + "step": 28320 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019956815755131093, + "loss": 0.582, + "step": 28330 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001995325661407047, + "loss": 0.6296, + "step": 28340 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019949697473009844, + "loss": 0.6128, + "step": 28350 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019946138331949223, + "loss": 0.6229, + "step": 28360 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019942579190888597, + "loss": 0.6707, + "step": 28370 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001993902004982797, + "loss": 0.5611, + "step": 28380 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001993546090876735, + "loss": 0.5661, + "step": 28390 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019931901767706724, + "loss": 0.5204, + "step": 28400 + }, + { + "epoch": 1.01, + "learning_rate": 0.000199283426266461, + "loss": 0.657, + "step": 28410 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019924783485585477, + "loss": 0.5813, + "step": 28420 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019921224344524854, + "loss": 0.6144, + "step": 28430 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019917665203464228, + "loss": 0.5808, + "step": 28440 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019914106062403607, + "loss": 0.6813, + "step": 28450 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019910546921342981, + "loss": 0.6955, + "step": 28460 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019906987780282355, + "loss": 0.6562, + "step": 28470 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019903428639221732, + "loss": 0.7348, + "step": 28480 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001989986949816111, + "loss": 0.6497, + "step": 28490 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019896310357100485, + "loss": 0.5262, + "step": 28500 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001989275121603986, + "loss": 0.6663, + "step": 28510 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019889192074979239, + "loss": 0.6218, + "step": 28520 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019885632933918613, + "loss": 0.6694, + "step": 28530 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019882073792857987, + "loss": 0.6031, + "step": 28540 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019878514651797366, + "loss": 0.6519, + "step": 28550 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001987495551073674, + "loss": 0.7603, + "step": 28560 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019871396369676116, + "loss": 0.553, + "step": 28570 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001986783722861549, + "loss": 0.5838, + "step": 28580 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001986427808755487, + "loss": 0.6756, + "step": 28590 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019860718946494244, + "loss": 0.5814, + "step": 28600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019857159805433618, + "loss": 0.6594, + "step": 28610 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019853600664372997, + "loss": 0.6274, + "step": 28620 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001985004152331237, + "loss": 0.5459, + "step": 28630 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019846482382251748, + "loss": 0.6561, + "step": 28640 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019842923241191124, + "loss": 0.5954, + "step": 28650 + }, + { + "epoch": 1.02, + "learning_rate": 0.000198393641001305, + "loss": 0.6076, + "step": 28660 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019835804959069875, + "loss": 0.5277, + "step": 28670 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019832245818009254, + "loss": 0.7147, + "step": 28680 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019828686676948628, + "loss": 0.5694, + "step": 28690 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019825127535888002, + "loss": 0.6536, + "step": 28700 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001982156839482738, + "loss": 0.7101, + "step": 28710 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019818009253766755, + "loss": 0.563, + "step": 28720 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019814450112706132, + "loss": 0.5785, + "step": 28730 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019810890971645506, + "loss": 0.5594, + "step": 28740 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019807331830584885, + "loss": 0.5593, + "step": 28750 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001980377268952426, + "loss": 0.6456, + "step": 28760 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019800213548463636, + "loss": 0.6331, + "step": 28770 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019796654407403013, + "loss": 0.6078, + "step": 28780 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001979309526634239, + "loss": 0.6091, + "step": 28790 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019789536125281763, + "loss": 0.5555, + "step": 28800 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019785976984221137, + "loss": 0.652, + "step": 28810 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019782417843160517, + "loss": 0.5708, + "step": 28820 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001977885870209989, + "loss": 0.554, + "step": 28830 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019775299561039267, + "loss": 0.5766, + "step": 28840 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019771740419978644, + "loss": 0.627, + "step": 28850 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001976818127891802, + "loss": 0.5692, + "step": 28860 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019764622137857394, + "loss": 0.6086, + "step": 28870 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019761062996796774, + "loss": 0.6443, + "step": 28880 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019757503855736148, + "loss": 0.5224, + "step": 28890 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019753944714675522, + "loss": 0.6631, + "step": 28900 + }, + { + "epoch": 1.03, + "learning_rate": 0.000197503855736149, + "loss": 0.6056, + "step": 28910 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019746826432554275, + "loss": 0.6264, + "step": 28920 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019743267291493652, + "loss": 0.5449, + "step": 28930 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019739708150433026, + "loss": 0.5239, + "step": 28940 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019736149009372405, + "loss": 0.5933, + "step": 28950 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001973258986831178, + "loss": 0.6511, + "step": 28960 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019729030727251153, + "loss": 0.5593, + "step": 28970 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019725471586190532, + "loss": 0.5306, + "step": 28980 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019721912445129906, + "loss": 0.5435, + "step": 28990 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019718353304069283, + "loss": 0.6044, + "step": 29000 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001971479416300866, + "loss": 0.5722, + "step": 29010 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019711235021948036, + "loss": 0.6026, + "step": 29020 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001970767588088741, + "loss": 0.6134, + "step": 29030 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019704116739826784, + "loss": 0.5894, + "step": 29040 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019700557598766163, + "loss": 0.6285, + "step": 29050 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019696998457705537, + "loss": 0.6018, + "step": 29060 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019693439316644914, + "loss": 0.5791, + "step": 29070 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001968988017558429, + "loss": 0.6415, + "step": 29080 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019686321034523667, + "loss": 0.8141, + "step": 29090 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001968276189346304, + "loss": 0.5848, + "step": 29100 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001967920275240242, + "loss": 0.6386, + "step": 29110 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019675643611341795, + "loss": 0.6422, + "step": 29120 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019672084470281169, + "loss": 0.6203, + "step": 29130 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019668525329220548, + "loss": 0.7088, + "step": 29140 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019664966188159922, + "loss": 0.6892, + "step": 29150 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019661407047099299, + "loss": 0.5799, + "step": 29160 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019657847906038672, + "loss": 0.6491, + "step": 29170 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019654288764978052, + "loss": 0.5799, + "step": 29180 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019650729623917426, + "loss": 0.6544, + "step": 29190 + }, + { + "epoch": 1.04, + "learning_rate": 0.000196471704828568, + "loss": 0.4886, + "step": 29200 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001964361134179618, + "loss": 0.7112, + "step": 29210 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019640052200735553, + "loss": 0.608, + "step": 29220 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963649305967493, + "loss": 0.5511, + "step": 29230 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019632933918614306, + "loss": 0.5738, + "step": 29240 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019629374777553683, + "loss": 0.6637, + "step": 29250 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019625815636493057, + "loss": 0.677, + "step": 29260 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019622256495432434, + "loss": 0.6311, + "step": 29270 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001961869735437181, + "loss": 0.7086, + "step": 29280 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019615138213311187, + "loss": 0.6743, + "step": 29290 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001961157907225056, + "loss": 0.5836, + "step": 29300 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019608019931189938, + "loss": 0.6241, + "step": 29310 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019604460790129314, + "loss": 0.5797, + "step": 29320 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019600901649068688, + "loss": 0.7705, + "step": 29330 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019597342508008067, + "loss": 0.597, + "step": 29340 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019593783366947441, + "loss": 0.6414, + "step": 29350 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019590224225886818, + "loss": 0.5742, + "step": 29360 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019587020998932257, + "loss": 0.6124, + "step": 29370 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001958346185787163, + "loss": 0.6807, + "step": 29380 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001957990271681101, + "loss": 0.7224, + "step": 29390 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019576343575750384, + "loss": 0.6915, + "step": 29400 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019572784434689758, + "loss": 0.5763, + "step": 29410 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019569225293629137, + "loss": 0.5492, + "step": 29420 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001956566615256851, + "loss": 0.6291, + "step": 29430 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019562107011507888, + "loss": 0.6692, + "step": 29440 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019558547870447265, + "loss": 0.6334, + "step": 29450 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001955498872938664, + "loss": 0.5618, + "step": 29460 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019551429588326015, + "loss": 0.669, + "step": 29470 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001954787044726539, + "loss": 0.5792, + "step": 29480 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019544311306204769, + "loss": 0.5664, + "step": 29490 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019540752165144142, + "loss": 0.6126, + "step": 29500 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001953719302408352, + "loss": 0.5098, + "step": 29510 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019533633883022896, + "loss": 0.5635, + "step": 29520 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019530074741962272, + "loss": 0.5499, + "step": 29530 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019526515600901646, + "loss": 0.5739, + "step": 29540 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019522956459841026, + "loss": 0.6652, + "step": 29550 + }, + { + "epoch": 1.05, + "learning_rate": 0.000195193973187804, + "loss": 0.599, + "step": 29560 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019515838177719774, + "loss": 0.666, + "step": 29570 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001951227903665915, + "loss": 0.6752, + "step": 29580 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019508719895598527, + "loss": 0.6749, + "step": 29590 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019505160754537904, + "loss": 0.5859, + "step": 29600 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019501601613477278, + "loss": 0.6023, + "step": 29610 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019498042472416657, + "loss": 0.5729, + "step": 29620 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001949448333135603, + "loss": 0.5465, + "step": 29630 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019490924190295405, + "loss": 0.6709, + "step": 29640 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019487365049234784, + "loss": 0.6552, + "step": 29650 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019483805908174158, + "loss": 0.5259, + "step": 29660 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019480246767113535, + "loss": 0.5458, + "step": 29670 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001947668762605291, + "loss": 0.5634, + "step": 29680 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019473128484992288, + "loss": 0.5401, + "step": 29690 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019469569343931662, + "loss": 0.5727, + "step": 29700 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001946601020287104, + "loss": 0.5539, + "step": 29710 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019462451061810415, + "loss": 0.6099, + "step": 29720 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001945889192074979, + "loss": 0.6921, + "step": 29730 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019455332779689166, + "loss": 0.6187, + "step": 29740 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019451773638628543, + "loss": 0.6416, + "step": 29750 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944821449756792, + "loss": 0.6761, + "step": 29760 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019444655356507293, + "loss": 0.6494, + "step": 29770 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019441096215446673, + "loss": 0.6762, + "step": 29780 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019437537074386047, + "loss": 0.6592, + "step": 29790 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019433977933325423, + "loss": 0.65, + "step": 29800 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019430418792264797, + "loss": 0.6523, + "step": 29810 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019426859651204177, + "loss": 0.6337, + "step": 29820 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001942330051014355, + "loss": 0.5405, + "step": 29830 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019419741369082924, + "loss": 0.6695, + "step": 29840 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019416182228022304, + "loss": 0.5708, + "step": 29850 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019412623086961678, + "loss": 0.6517, + "step": 29860 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019409063945901054, + "loss": 0.6473, + "step": 29870 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001940550480484043, + "loss": 0.6628, + "step": 29880 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019401945663779808, + "loss": 0.592, + "step": 29890 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019398386522719182, + "loss": 0.6215, + "step": 29900 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019394827381658556, + "loss": 0.5818, + "step": 29910 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019391268240597935, + "loss": 0.5959, + "step": 29920 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001938770909953731, + "loss": 0.5689, + "step": 29930 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019384149958476686, + "loss": 0.5582, + "step": 29940 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019380590817416062, + "loss": 0.5278, + "step": 29950 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001937703167635544, + "loss": 0.5955, + "step": 29960 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019373472535294813, + "loss": 0.607, + "step": 29970 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019369913394234192, + "loss": 0.7097, + "step": 29980 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019366354253173566, + "loss": 0.5955, + "step": 29990 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001936279511211294, + "loss": 0.6152, + "step": 30000 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001935923597105232, + "loss": 0.6122, + "step": 30010 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019355676829991693, + "loss": 0.7045, + "step": 30020 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001935211768893107, + "loss": 0.6079, + "step": 30030 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019348558547870444, + "loss": 0.6126, + "step": 30040 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019344999406809823, + "loss": 0.5262, + "step": 30050 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019341440265749197, + "loss": 0.6714, + "step": 30060 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001933788112468857, + "loss": 0.6276, + "step": 30070 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001933432198362795, + "loss": 0.5706, + "step": 30080 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019330762842567325, + "loss": 0.5729, + "step": 30090 + }, + { + "epoch": 1.07, + "learning_rate": 0.000193272037015067, + "loss": 0.6276, + "step": 30100 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019323644560446078, + "loss": 0.5996, + "step": 30110 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019320085419385455, + "loss": 0.5797, + "step": 30120 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019316526278324828, + "loss": 0.5938, + "step": 30130 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019312967137264202, + "loss": 0.6318, + "step": 30140 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019309407996203582, + "loss": 0.5628, + "step": 30150 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019305848855142956, + "loss": 0.536, + "step": 30160 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019302289714082332, + "loss": 0.6441, + "step": 30170 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001929873057302171, + "loss": 0.5948, + "step": 30180 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019295171431961086, + "loss": 0.6447, + "step": 30190 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001929161229090046, + "loss": 0.5741, + "step": 30200 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001928805314983984, + "loss": 0.5729, + "step": 30210 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019284494008779213, + "loss": 0.6507, + "step": 30220 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019280934867718587, + "loss": 0.6223, + "step": 30230 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019277375726657966, + "loss": 0.6154, + "step": 30240 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001927381658559734, + "loss": 0.6246, + "step": 30250 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019270257444536717, + "loss": 0.6071, + "step": 30260 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001926669830347609, + "loss": 0.6656, + "step": 30270 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001926313916241547, + "loss": 0.6727, + "step": 30280 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019259580021354844, + "loss": 0.6625, + "step": 30290 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001925602088029422, + "loss": 0.5885, + "step": 30300 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019252461739233597, + "loss": 0.6322, + "step": 30310 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019248902598172974, + "loss": 0.5777, + "step": 30320 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019245343457112348, + "loss": 0.5824, + "step": 30330 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019241784316051725, + "loss": 0.7026, + "step": 30340 + }, + { + "epoch": 1.08, + "learning_rate": 0.000192382251749911, + "loss": 0.6116, + "step": 30350 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019234666033930475, + "loss": 0.573, + "step": 30360 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019231106892869852, + "loss": 0.5644, + "step": 30370 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019227547751809229, + "loss": 0.516, + "step": 30380 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019223988610748605, + "loss": 0.6469, + "step": 30390 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001922042946968798, + "loss": 0.5876, + "step": 30400 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019216870328627359, + "loss": 0.6664, + "step": 30410 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019213311187566732, + "loss": 0.6819, + "step": 30420 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019209752046506106, + "loss": 0.6065, + "step": 30430 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019206192905445486, + "loss": 0.5705, + "step": 30440 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001920263376438486, + "loss": 0.596, + "step": 30450 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019199074623324236, + "loss": 0.6182, + "step": 30460 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019195515482263613, + "loss": 0.6622, + "step": 30470 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001919195634120299, + "loss": 0.6638, + "step": 30480 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019188397200142364, + "loss": 0.7151, + "step": 30490 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019184838059081738, + "loss": 0.5788, + "step": 30500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019181278918021117, + "loss": 0.6483, + "step": 30510 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001917771977696049, + "loss": 0.5185, + "step": 30520 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019174160635899868, + "loss": 0.6717, + "step": 30530 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019170601494839244, + "loss": 0.6547, + "step": 30540 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001916704235377862, + "loss": 0.6774, + "step": 30550 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019163483212717995, + "loss": 0.595, + "step": 30560 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019159924071657374, + "loss": 0.6523, + "step": 30570 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019156364930596748, + "loss": 0.5798, + "step": 30580 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019152805789536122, + "loss": 0.5794, + "step": 30590 + }, + { + "epoch": 1.09, + "learning_rate": 0.000191492466484755, + "loss": 0.6824, + "step": 30600 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019145687507414875, + "loss": 0.6457, + "step": 30610 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019142128366354252, + "loss": 0.5968, + "step": 30620 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019138569225293626, + "loss": 0.5473, + "step": 30630 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019135010084233005, + "loss": 0.6169, + "step": 30640 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001913145094317238, + "loss": 0.6106, + "step": 30650 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019127891802111753, + "loss": 0.609, + "step": 30660 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019124332661051133, + "loss": 0.5997, + "step": 30670 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019120773519990507, + "loss": 0.6476, + "step": 30680 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019117214378929883, + "loss": 0.6854, + "step": 30690 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019113655237869257, + "loss": 0.6241, + "step": 30700 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019110096096808637, + "loss": 0.5748, + "step": 30710 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001910653695574801, + "loss": 0.5583, + "step": 30720 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019102977814687384, + "loss": 0.586, + "step": 30730 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019099418673626764, + "loss": 0.673, + "step": 30740 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019095859532566138, + "loss": 0.7133, + "step": 30750 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019092300391505514, + "loss": 0.6676, + "step": 30760 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001908874125044489, + "loss": 0.6081, + "step": 30770 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019085182109384268, + "loss": 0.619, + "step": 30780 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019081622968323642, + "loss": 0.6002, + "step": 30790 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001907806382726302, + "loss": 0.6361, + "step": 30800 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019074504686202395, + "loss": 0.684, + "step": 30810 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019070945545141772, + "loss": 0.6789, + "step": 30820 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019067386404081146, + "loss": 0.5953, + "step": 30830 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019063827263020522, + "loss": 0.6358, + "step": 30840 + }, + { + "epoch": 1.1, + "learning_rate": 0.000190602681219599, + "loss": 0.6736, + "step": 30850 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019056708980899273, + "loss": 0.5862, + "step": 30860 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019053149839838652, + "loss": 0.6267, + "step": 30870 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019049590698778026, + "loss": 0.5714, + "step": 30880 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019046031557717403, + "loss": 0.6286, + "step": 30890 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001904247241665678, + "loss": 0.5203, + "step": 30900 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019038913275596156, + "loss": 0.5733, + "step": 30910 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001903535413453553, + "loss": 0.5402, + "step": 30920 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019031794993474904, + "loss": 0.5563, + "step": 30930 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019028235852414283, + "loss": 0.6556, + "step": 30940 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019024676711353657, + "loss": 0.68, + "step": 30950 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019021117570293034, + "loss": 0.6529, + "step": 30960 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001901755842923241, + "loss": 0.6043, + "step": 30970 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019013999288171787, + "loss": 0.5667, + "step": 30980 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001901044014711116, + "loss": 0.6946, + "step": 30990 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001900688100605054, + "loss": 0.6208, + "step": 31000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019003321864989915, + "loss": 0.6348, + "step": 31010 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018999762723929288, + "loss": 0.6067, + "step": 31020 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018996203582868668, + "loss": 0.6481, + "step": 31030 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018992644441808042, + "loss": 0.6563, + "step": 31040 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018989085300747418, + "loss": 0.5584, + "step": 31050 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018985526159686792, + "loss": 0.6712, + "step": 31060 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018981967018626172, + "loss": 0.5644, + "step": 31070 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018978407877565546, + "loss": 0.6973, + "step": 31080 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001897484873650492, + "loss": 0.6659, + "step": 31090 + }, + { + "epoch": 1.11, + "learning_rate": 0.000189712895954443, + "loss": 0.6289, + "step": 31100 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018967730454383673, + "loss": 0.5485, + "step": 31110 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001896417131332305, + "loss": 0.5416, + "step": 31120 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018960612172262426, + "loss": 0.634, + "step": 31130 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018957053031201803, + "loss": 0.589, + "step": 31140 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018953493890141177, + "loss": 0.5771, + "step": 31150 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001894993474908055, + "loss": 0.6364, + "step": 31160 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001894637560801993, + "loss": 0.554, + "step": 31170 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018942816466959304, + "loss": 0.6121, + "step": 31180 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001893925732589868, + "loss": 0.6219, + "step": 31190 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018935698184838057, + "loss": 0.5954, + "step": 31200 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018932139043777434, + "loss": 0.6046, + "step": 31210 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018928579902716808, + "loss": 0.5894, + "step": 31220 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018925020761656187, + "loss": 0.616, + "step": 31230 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018921461620595561, + "loss": 0.6432, + "step": 31240 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018917902479534935, + "loss": 0.6032, + "step": 31250 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018914343338474315, + "loss": 0.6726, + "step": 31260 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018910784197413689, + "loss": 0.5796, + "step": 31270 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018907225056353065, + "loss": 0.4966, + "step": 31280 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001890366591529244, + "loss": 0.5782, + "step": 31290 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018900106774231819, + "loss": 0.6468, + "step": 31300 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018896547633171193, + "loss": 0.6178, + "step": 31310 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001889298849211057, + "loss": 0.6163, + "step": 31320 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018889429351049946, + "loss": 0.6408, + "step": 31330 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001888587020998932, + "loss": 0.5468, + "step": 31340 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018882311068928696, + "loss": 0.6014, + "step": 31350 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018878751927868073, + "loss": 0.5388, + "step": 31360 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001887519278680745, + "loss": 0.599, + "step": 31370 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018871989559852888, + "loss": 0.7771, + "step": 31380 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018868430418792262, + "loss": 0.6362, + "step": 31390 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001886487127773164, + "loss": 0.6653, + "step": 31400 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018861312136671016, + "loss": 0.5234, + "step": 31410 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018857752995610392, + "loss": 0.6835, + "step": 31420 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018854193854549766, + "loss": 0.5704, + "step": 31430 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018850634713489146, + "loss": 0.6448, + "step": 31440 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001884707557242852, + "loss": 0.5872, + "step": 31450 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018843516431367894, + "loss": 0.6878, + "step": 31460 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001883995729030727, + "loss": 0.6881, + "step": 31470 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018836398149246647, + "loss": 0.5858, + "step": 31480 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018832839008186024, + "loss": 0.5946, + "step": 31490 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018829279867125398, + "loss": 0.5911, + "step": 31500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018825720726064777, + "loss": 0.6424, + "step": 31510 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001882216158500415, + "loss": 0.6112, + "step": 31520 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018818602443943525, + "loss": 0.5837, + "step": 31530 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018815043302882904, + "loss": 0.6755, + "step": 31540 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018811484161822278, + "loss": 0.5554, + "step": 31550 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018807925020761655, + "loss": 0.5791, + "step": 31560 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018804365879701031, + "loss": 0.5715, + "step": 31570 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018800806738640408, + "loss": 0.6105, + "step": 31580 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018797247597579782, + "loss": 0.5968, + "step": 31590 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018793688456519156, + "loss": 0.5584, + "step": 31600 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018790129315458535, + "loss": 0.6295, + "step": 31610 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001878657017439791, + "loss": 0.6482, + "step": 31620 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018783011033337286, + "loss": 0.589, + "step": 31630 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018779451892276663, + "loss": 0.6268, + "step": 31640 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001877589275121604, + "loss": 0.6331, + "step": 31650 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018772333610155413, + "loss": 0.6903, + "step": 31660 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018768774469094793, + "loss": 0.651, + "step": 31670 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018765215328034166, + "loss": 0.6041, + "step": 31680 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001876165618697354, + "loss": 0.6155, + "step": 31690 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018758097045912917, + "loss": 0.6285, + "step": 31700 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018754537904852294, + "loss": 0.5604, + "step": 31710 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001875097876379167, + "loss": 0.6576, + "step": 31720 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018747419622731044, + "loss": 0.6277, + "step": 31730 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018743860481670424, + "loss": 0.5915, + "step": 31740 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018740301340609798, + "loss": 0.5546, + "step": 31750 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018736742199549172, + "loss": 0.6625, + "step": 31760 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001873318305848855, + "loss": 0.5997, + "step": 31770 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018729623917427925, + "loss": 0.5392, + "step": 31780 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018726064776367302, + "loss": 0.5222, + "step": 31790 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018722505635306676, + "loss": 0.5332, + "step": 31800 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018718946494246055, + "loss": 0.66, + "step": 31810 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001871538735318543, + "loss": 0.5814, + "step": 31820 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018711828212124805, + "loss": 0.6608, + "step": 31830 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018708269071064182, + "loss": 0.5929, + "step": 31840 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001870470993000356, + "loss": 0.6067, + "step": 31850 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018701150788942933, + "loss": 0.6957, + "step": 31860 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001869759164788231, + "loss": 0.5435, + "step": 31870 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018694032506821686, + "loss": 0.6778, + "step": 31880 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001869047336576106, + "loss": 0.6716, + "step": 31890 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001868691422470044, + "loss": 0.5864, + "step": 31900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018683355083639813, + "loss": 0.6951, + "step": 31910 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001867979594257919, + "loss": 0.6377, + "step": 31920 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018676236801518564, + "loss": 0.6124, + "step": 31930 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018672677660457943, + "loss": 0.5202, + "step": 31940 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018669118519397317, + "loss": 0.788, + "step": 31950 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001866555937833669, + "loss": 0.612, + "step": 31960 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001866200023727607, + "loss": 0.6215, + "step": 31970 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018658441096215444, + "loss": 0.6006, + "step": 31980 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001865488195515482, + "loss": 0.6522, + "step": 31990 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018651322814094198, + "loss": 0.7201, + "step": 32000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018647763673033574, + "loss": 0.6283, + "step": 32010 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018644204531972948, + "loss": 0.7636, + "step": 32020 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018640645390912322, + "loss": 0.5267, + "step": 32030 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018637086249851702, + "loss": 0.6133, + "step": 32040 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018633527108791076, + "loss": 0.464, + "step": 32050 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018629967967730452, + "loss": 0.6047, + "step": 32060 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001862640882666983, + "loss": 0.556, + "step": 32070 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018622849685609206, + "loss": 0.661, + "step": 32080 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001861929054454858, + "loss": 0.6919, + "step": 32090 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001861573140348796, + "loss": 0.6544, + "step": 32100 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018612172262427333, + "loss": 0.5914, + "step": 32110 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018608613121366707, + "loss": 0.5636, + "step": 32120 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018605053980306086, + "loss": 0.5786, + "step": 32130 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001860149483924546, + "loss": 0.6001, + "step": 32140 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018597935698184837, + "loss": 0.5692, + "step": 32150 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001859437655712421, + "loss": 0.6065, + "step": 32160 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001859081741606359, + "loss": 0.6789, + "step": 32170 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018587258275002964, + "loss": 0.635, + "step": 32180 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018583699133942338, + "loss": 0.5824, + "step": 32190 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018580139992881717, + "loss": 0.5386, + "step": 32200 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001857658085182109, + "loss": 0.5915, + "step": 32210 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018573021710760468, + "loss": 0.6075, + "step": 32220 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018569462569699845, + "loss": 0.5791, + "step": 32230 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001856590342863922, + "loss": 0.6135, + "step": 32240 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018562344287578595, + "loss": 0.5579, + "step": 32250 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001855878514651797, + "loss": 0.6392, + "step": 32260 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018555226005457349, + "loss": 0.6123, + "step": 32270 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018551666864396722, + "loss": 0.6541, + "step": 32280 + }, + { + "epoch": 1.15, + "learning_rate": 0.000185481077233361, + "loss": 0.6483, + "step": 32290 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018544548582275476, + "loss": 0.6971, + "step": 32300 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018540989441214852, + "loss": 0.7035, + "step": 32310 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018537430300154226, + "loss": 0.53, + "step": 32320 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018533871159093606, + "loss": 0.5919, + "step": 32330 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001853031201803298, + "loss": 0.612, + "step": 32340 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018526752876972356, + "loss": 0.645, + "step": 32350 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018523193735911733, + "loss": 0.6166, + "step": 32360 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018519634594851107, + "loss": 0.6795, + "step": 32370 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018516075453790484, + "loss": 0.6391, + "step": 32380 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018512516312729858, + "loss": 0.6132, + "step": 32390 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018508957171669237, + "loss": 0.6031, + "step": 32400 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001850539803060861, + "loss": 0.5784, + "step": 32410 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018501838889547988, + "loss": 0.6042, + "step": 32420 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018498279748487364, + "loss": 0.5264, + "step": 32430 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001849472060742674, + "loss": 0.629, + "step": 32440 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018491161466366115, + "loss": 0.5702, + "step": 32450 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018487602325305494, + "loss": 0.659, + "step": 32460 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018484043184244868, + "loss": 0.6369, + "step": 32470 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018480484043184242, + "loss": 0.5802, + "step": 32480 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001847692490212362, + "loss": 0.5751, + "step": 32490 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018473365761062995, + "loss": 0.6506, + "step": 32500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018469806620002372, + "loss": 0.6221, + "step": 32510 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018466247478941746, + "loss": 0.6299, + "step": 32520 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018462688337881125, + "loss": 0.585, + "step": 32530 + }, + { + "epoch": 1.16, + "learning_rate": 0.000184591291968205, + "loss": 0.616, + "step": 32540 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018455570055759873, + "loss": 0.5664, + "step": 32550 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018452010914699253, + "loss": 0.6559, + "step": 32560 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018448451773638627, + "loss": 0.6587, + "step": 32570 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018444892632578003, + "loss": 0.6059, + "step": 32580 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001844133349151738, + "loss": 0.6542, + "step": 32590 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018437774350456756, + "loss": 0.6728, + "step": 32600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001843421520939613, + "loss": 0.6334, + "step": 32610 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018430656068335504, + "loss": 0.5968, + "step": 32620 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018427096927274884, + "loss": 0.6071, + "step": 32630 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018423537786214258, + "loss": 0.6046, + "step": 32640 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018419978645153634, + "loss": 0.5641, + "step": 32650 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001841641950409301, + "loss": 0.5985, + "step": 32660 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018412860363032388, + "loss": 0.5761, + "step": 32670 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018409301221971762, + "loss": 0.661, + "step": 32680 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001840574208091114, + "loss": 0.6681, + "step": 32690 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018402182939850515, + "loss": 0.5929, + "step": 32700 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001839862379878989, + "loss": 0.6298, + "step": 32710 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018395064657729266, + "loss": 0.5373, + "step": 32720 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018391505516668642, + "loss": 0.6077, + "step": 32730 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001838794637560802, + "loss": 0.6707, + "step": 32740 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018384387234547393, + "loss": 0.6899, + "step": 32750 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018380828093486772, + "loss": 0.6628, + "step": 32760 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018377268952426146, + "loss": 0.5342, + "step": 32770 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001837370981136552, + "loss": 0.7048, + "step": 32780 + }, + { + "epoch": 1.17, + "learning_rate": 0.000183701506703049, + "loss": 0.5897, + "step": 32790 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018366591529244273, + "loss": 0.6144, + "step": 32800 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001836303238818365, + "loss": 0.5443, + "step": 32810 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018359473247123027, + "loss": 0.5402, + "step": 32820 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018355914106062403, + "loss": 0.6291, + "step": 32830 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018352354965001777, + "loss": 0.5933, + "step": 32840 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018348795823941154, + "loss": 0.5603, + "step": 32850 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001834523668288053, + "loss": 0.6125, + "step": 32860 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018341677541819905, + "loss": 0.5784, + "step": 32870 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001833811840075928, + "loss": 0.6617, + "step": 32880 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018334559259698658, + "loss": 0.6192, + "step": 32890 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018331000118638034, + "loss": 0.6708, + "step": 32900 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018327440977577408, + "loss": 0.6606, + "step": 32910 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018323881836516788, + "loss": 0.6642, + "step": 32920 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018320322695456162, + "loss": 0.5525, + "step": 32930 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018316763554395538, + "loss": 0.5653, + "step": 32940 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018313204413334912, + "loss": 0.652, + "step": 32950 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018309645272274292, + "loss": 0.5572, + "step": 32960 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018306086131213666, + "loss": 0.5222, + "step": 32970 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001830252699015304, + "loss": 0.6703, + "step": 32980 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001829896784909242, + "loss": 0.6858, + "step": 32990 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018295408708031793, + "loss": 0.6153, + "step": 33000 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001829184956697117, + "loss": 0.5675, + "step": 33010 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018288290425910546, + "loss": 0.5905, + "step": 33020 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018284731284849923, + "loss": 0.6414, + "step": 33030 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018281172143789297, + "loss": 0.5974, + "step": 33040 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001827761300272867, + "loss": 0.6254, + "step": 33050 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001827405386166805, + "loss": 0.6567, + "step": 33060 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018270494720607424, + "loss": 0.6914, + "step": 33070 + }, + { + "epoch": 1.18, + "learning_rate": 0.000182669355795468, + "loss": 0.6322, + "step": 33080 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018263376438486177, + "loss": 0.5958, + "step": 33090 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018259817297425554, + "loss": 0.6443, + "step": 33100 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018256258156364928, + "loss": 0.5448, + "step": 33110 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018252699015304307, + "loss": 0.6193, + "step": 33120 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001824913987424368, + "loss": 0.703, + "step": 33130 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018245580733183055, + "loss": 0.6919, + "step": 33140 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018242021592122435, + "loss": 0.5957, + "step": 33150 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018238462451061809, + "loss": 0.5019, + "step": 33160 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018234903310001185, + "loss": 0.5574, + "step": 33170 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001823134416894056, + "loss": 0.6947, + "step": 33180 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018227785027879939, + "loss": 0.6355, + "step": 33190 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018224225886819312, + "loss": 0.5957, + "step": 33200 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018220666745758686, + "loss": 0.6044, + "step": 33210 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018217107604698066, + "loss": 0.5609, + "step": 33220 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001821354846363744, + "loss": 0.5992, + "step": 33230 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018209989322576816, + "loss": 0.6972, + "step": 33240 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018206430181516193, + "loss": 0.6488, + "step": 33250 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001820287104045557, + "loss": 0.7164, + "step": 33260 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018199311899394944, + "loss": 0.6044, + "step": 33270 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018195752758334318, + "loss": 0.5762, + "step": 33280 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018192193617273697, + "loss": 0.5715, + "step": 33290 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001818863447621307, + "loss": 0.5597, + "step": 33300 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018185075335152448, + "loss": 0.6488, + "step": 33310 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018181516194091824, + "loss": 0.5351, + "step": 33320 + }, + { + "epoch": 1.18, + "learning_rate": 0.000181779570530312, + "loss": 0.6402, + "step": 33330 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018174397911970575, + "loss": 0.6542, + "step": 33340 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018170838770909954, + "loss": 0.6288, + "step": 33350 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018167279629849328, + "loss": 0.594, + "step": 33360 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018163720488788702, + "loss": 0.5057, + "step": 33370 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018160161347728081, + "loss": 0.6633, + "step": 33380 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018156602206667455, + "loss": 0.6429, + "step": 33390 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018153043065606832, + "loss": 0.6355, + "step": 33400 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018149483924546206, + "loss": 0.5896, + "step": 33410 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018145924783485585, + "loss": 0.6558, + "step": 33420 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001814236564242496, + "loss": 0.6432, + "step": 33430 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018138806501364336, + "loss": 0.6185, + "step": 33440 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018135247360303713, + "loss": 0.6005, + "step": 33450 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018131688219243087, + "loss": 0.6265, + "step": 33460 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018128129078182463, + "loss": 0.618, + "step": 33470 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001812456993712184, + "loss": 0.5359, + "step": 33480 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018121366710167276, + "loss": 0.655, + "step": 33490 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018117807569106655, + "loss": 0.6602, + "step": 33500 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001811424842804603, + "loss": 0.5523, + "step": 33510 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018110689286985406, + "loss": 0.5736, + "step": 33520 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018107130145924782, + "loss": 0.5712, + "step": 33530 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001810357100486416, + "loss": 0.6814, + "step": 33540 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018100011863803533, + "loss": 0.7061, + "step": 33550 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018096452722742912, + "loss": 0.6695, + "step": 33560 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018092893581682286, + "loss": 0.579, + "step": 33570 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001808933444062166, + "loss": 0.6537, + "step": 33580 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018085775299561037, + "loss": 0.6494, + "step": 33590 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018082216158500414, + "loss": 0.5447, + "step": 33600 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001807865701743979, + "loss": 0.7083, + "step": 33610 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018075097876379164, + "loss": 0.6694, + "step": 33620 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018071538735318544, + "loss": 0.6047, + "step": 33630 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018067979594257918, + "loss": 0.586, + "step": 33640 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018064420453197292, + "loss": 0.5902, + "step": 33650 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001806086131213667, + "loss": 0.6112, + "step": 33660 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018057302171076045, + "loss": 0.6323, + "step": 33670 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018053743030015421, + "loss": 0.5426, + "step": 33680 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018050183888954798, + "loss": 0.5764, + "step": 33690 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018046624747894175, + "loss": 0.6374, + "step": 33700 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001804306560683355, + "loss": 0.6675, + "step": 33710 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018039506465772923, + "loss": 0.681, + "step": 33720 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018035947324712302, + "loss": 0.6083, + "step": 33730 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018032388183651676, + "loss": 0.5993, + "step": 33740 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018028829042591053, + "loss": 0.7208, + "step": 33750 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001802526990153043, + "loss": 0.6094, + "step": 33760 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018021710760469806, + "loss": 0.6612, + "step": 33770 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001801815161940918, + "loss": 0.6659, + "step": 33780 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001801459247834856, + "loss": 0.5837, + "step": 33790 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018011033337287933, + "loss": 0.5903, + "step": 33800 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018007474196227307, + "loss": 0.745, + "step": 33810 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018003915055166684, + "loss": 0.6172, + "step": 33820 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001800035591410606, + "loss": 0.6034, + "step": 33830 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017996796773045437, + "loss": 0.5568, + "step": 33840 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001799323763198481, + "loss": 0.7227, + "step": 33850 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001798967849092419, + "loss": 0.7274, + "step": 33860 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017986119349863564, + "loss": 0.6121, + "step": 33870 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017982560208802938, + "loss": 0.6195, + "step": 33880 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017979001067742318, + "loss": 0.6621, + "step": 33890 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017975441926681692, + "loss": 0.6413, + "step": 33900 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017971882785621068, + "loss": 0.6694, + "step": 33910 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017968323644560445, + "loss": 0.6867, + "step": 33920 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017964764503499822, + "loss": 0.7622, + "step": 33930 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017961205362439196, + "loss": 0.5562, + "step": 33940 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017957646221378572, + "loss": 0.6508, + "step": 33950 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001795408708031795, + "loss": 0.5687, + "step": 33960 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017950527939257326, + "loss": 0.6203, + "step": 33970 + }, + { + "epoch": 1.21, + "learning_rate": 0.000179469687981967, + "loss": 0.6242, + "step": 33980 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017943409657136076, + "loss": 0.5707, + "step": 33990 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017939850516075453, + "loss": 0.6166, + "step": 34000 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017936291375014827, + "loss": 0.5797, + "step": 34010 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017932732233954206, + "loss": 0.5798, + "step": 34020 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001792917309289358, + "loss": 0.5398, + "step": 34030 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017925613951832957, + "loss": 0.5489, + "step": 34040 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001792205481077233, + "loss": 0.5271, + "step": 34050 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001791849566971171, + "loss": 0.6222, + "step": 34060 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017914936528651084, + "loss": 0.6482, + "step": 34070 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017911377387590458, + "loss": 0.5883, + "step": 34080 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017907818246529837, + "loss": 0.5076, + "step": 34090 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001790425910546921, + "loss": 0.7126, + "step": 34100 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017900699964408588, + "loss": 0.6297, + "step": 34110 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001789785265156009, + "loss": 0.5615, + "step": 34120 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017894293510499465, + "loss": 0.6069, + "step": 34130 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001789073436943884, + "loss": 0.6186, + "step": 34140 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017887175228378219, + "loss": 0.5059, + "step": 34150 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017883616087317593, + "loss": 0.6379, + "step": 34160 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017880056946256967, + "loss": 0.6156, + "step": 34170 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017876497805196346, + "loss": 0.7217, + "step": 34180 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001787293866413572, + "loss": 0.5996, + "step": 34190 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017869379523075096, + "loss": 0.6325, + "step": 34200 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001786582038201447, + "loss": 0.6992, + "step": 34210 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001786226124095385, + "loss": 0.5919, + "step": 34220 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017858702099893224, + "loss": 0.5612, + "step": 34230 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017855142958832598, + "loss": 0.6702, + "step": 34240 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017851583817771977, + "loss": 0.5283, + "step": 34250 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001784802467671135, + "loss": 0.602, + "step": 34260 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017844465535650728, + "loss": 0.5963, + "step": 34270 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017840906394590104, + "loss": 0.7406, + "step": 34280 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001783734725352948, + "loss": 0.5287, + "step": 34290 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017833788112468855, + "loss": 0.6692, + "step": 34300 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017830228971408232, + "loss": 0.5996, + "step": 34310 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017826669830347608, + "loss": 0.5591, + "step": 34320 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017823110689286982, + "loss": 0.6203, + "step": 34330 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001781955154822636, + "loss": 0.6526, + "step": 34340 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017815992407165735, + "loss": 0.681, + "step": 34350 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017812433266105112, + "loss": 0.5821, + "step": 34360 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017808874125044486, + "loss": 0.6636, + "step": 34370 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017805314983983865, + "loss": 0.7057, + "step": 34380 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001780175584292324, + "loss": 0.5769, + "step": 34390 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017798196701862616, + "loss": 0.6173, + "step": 34400 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017794637560801993, + "loss": 0.6631, + "step": 34410 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001779107841974137, + "loss": 0.6114, + "step": 34420 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017787519278680743, + "loss": 0.5739, + "step": 34430 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017783960137620117, + "loss": 0.6282, + "step": 34440 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017780400996559497, + "loss": 0.6898, + "step": 34450 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001777684185549887, + "loss": 0.5748, + "step": 34460 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017773282714438247, + "loss": 0.6064, + "step": 34470 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017769723573377624, + "loss": 0.6415, + "step": 34480 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017766164432317, + "loss": 0.6279, + "step": 34490 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017762605291256374, + "loss": 0.6234, + "step": 34500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017759046150195754, + "loss": 0.7118, + "step": 34510 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017755487009135128, + "loss": 0.5518, + "step": 34520 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017751927868074502, + "loss": 0.5394, + "step": 34530 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017748368727013878, + "loss": 0.516, + "step": 34540 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017744809585953255, + "loss": 0.6158, + "step": 34550 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017741250444892632, + "loss": 0.5519, + "step": 34560 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017737691303832006, + "loss": 0.6187, + "step": 34570 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017734132162771385, + "loss": 0.586, + "step": 34580 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001773057302171076, + "loss": 0.5116, + "step": 34590 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017727013880650133, + "loss": 0.5188, + "step": 34600 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017723454739589512, + "loss": 0.7858, + "step": 34610 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017719895598528886, + "loss": 0.5352, + "step": 34620 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017716336457468263, + "loss": 0.635, + "step": 34630 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001771277731640764, + "loss": 0.7007, + "step": 34640 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017709218175347016, + "loss": 0.6168, + "step": 34650 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001770565903428639, + "loss": 0.5854, + "step": 34660 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017702099893225764, + "loss": 0.6019, + "step": 34670 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017698540752165143, + "loss": 0.5687, + "step": 34680 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017694981611104517, + "loss": 0.5601, + "step": 34690 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017691422470043894, + "loss": 0.6441, + "step": 34700 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001768786332898327, + "loss": 0.6256, + "step": 34710 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017684304187922647, + "loss": 0.6179, + "step": 34720 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001768074504686202, + "loss": 0.5836, + "step": 34730 + }, + { + "epoch": 1.23, + "learning_rate": 0.000176771859058014, + "loss": 0.6985, + "step": 34740 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017673626764740775, + "loss": 0.5912, + "step": 34750 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017670067623680149, + "loss": 0.5808, + "step": 34760 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017666508482619525, + "loss": 0.5467, + "step": 34770 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017662949341558902, + "loss": 0.5888, + "step": 34780 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017659390200498279, + "loss": 0.6884, + "step": 34790 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017655831059437652, + "loss": 0.6265, + "step": 34800 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017652271918377032, + "loss": 0.6266, + "step": 34810 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017648712777316406, + "loss": 0.6485, + "step": 34820 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001764515363625578, + "loss": 0.592, + "step": 34830 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001764159449519516, + "loss": 0.5474, + "step": 34840 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017638035354134533, + "loss": 0.6784, + "step": 34850 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001763447621307391, + "loss": 0.5738, + "step": 34860 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017630917072013286, + "loss": 0.6016, + "step": 34870 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017627357930952663, + "loss": 0.6417, + "step": 34880 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017623798789892037, + "loss": 0.5677, + "step": 34890 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017620239648831414, + "loss": 0.6381, + "step": 34900 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001761668050777079, + "loss": 0.6315, + "step": 34910 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017613121366710167, + "loss": 0.7025, + "step": 34920 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001760956222564954, + "loss": 0.5007, + "step": 34930 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017606003084588918, + "loss": 0.6842, + "step": 34940 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017602443943528294, + "loss": 0.5468, + "step": 34950 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017598884802467668, + "loss": 0.5222, + "step": 34960 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017595325661407047, + "loss": 0.545, + "step": 34970 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017591766520346421, + "loss": 0.5539, + "step": 34980 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017588207379285798, + "loss": 0.6645, + "step": 34990 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017584648238225172, + "loss": 0.6729, + "step": 35000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017581089097164551, + "loss": 0.564, + "step": 35010 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017577529956103925, + "loss": 0.6072, + "step": 35020 + }, + { + "epoch": 1.25, + "learning_rate": 0.000175739708150433, + "loss": 0.6382, + "step": 35030 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001757041167398268, + "loss": 0.5577, + "step": 35040 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017566852532922053, + "loss": 0.5998, + "step": 35050 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001756329339186143, + "loss": 0.6007, + "step": 35060 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017559734250800806, + "loss": 0.6123, + "step": 35070 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017556175109740183, + "loss": 0.5719, + "step": 35080 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017552615968679557, + "loss": 0.6146, + "step": 35090 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017549056827618936, + "loss": 0.5799, + "step": 35100 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001754549768655831, + "loss": 0.6302, + "step": 35110 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017541938545497684, + "loss": 0.6366, + "step": 35120 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001753837940443706, + "loss": 0.687, + "step": 35130 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017534820263376437, + "loss": 0.5285, + "step": 35140 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017531261122315814, + "loss": 0.6264, + "step": 35150 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017527701981255188, + "loss": 0.6108, + "step": 35160 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017524142840194567, + "loss": 0.5217, + "step": 35170 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001752058369913394, + "loss": 0.6241, + "step": 35180 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017517024558073315, + "loss": 0.5877, + "step": 35190 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017513465417012694, + "loss": 0.628, + "step": 35200 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017509906275952068, + "loss": 0.5322, + "step": 35210 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017506347134891445, + "loss": 0.5287, + "step": 35220 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001750278799383082, + "loss": 0.6168, + "step": 35230 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017499228852770198, + "loss": 0.5809, + "step": 35240 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017495669711709572, + "loss": 0.5863, + "step": 35250 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017492110570648946, + "loss": 0.6099, + "step": 35260 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017488551429588325, + "loss": 0.5736, + "step": 35270 + }, + { + "epoch": 1.25, + "learning_rate": 0.000174849922885277, + "loss": 0.5413, + "step": 35280 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017481433147467076, + "loss": 0.6549, + "step": 35290 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017477874006406453, + "loss": 0.6414, + "step": 35300 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001747431486534583, + "loss": 0.5341, + "step": 35310 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017470755724285203, + "loss": 0.7005, + "step": 35320 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017467196583224583, + "loss": 0.5184, + "step": 35330 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017463637442163957, + "loss": 0.5416, + "step": 35340 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001746007830110333, + "loss": 0.5534, + "step": 35350 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017456519160042707, + "loss": 0.6309, + "step": 35360 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017452960018982084, + "loss": 0.6217, + "step": 35370 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001744940087792146, + "loss": 0.6795, + "step": 35380 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017445841736860835, + "loss": 0.5418, + "step": 35390 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017442282595800214, + "loss": 0.6551, + "step": 35400 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017438723454739588, + "loss": 0.6562, + "step": 35410 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017435164313678964, + "loss": 0.5856, + "step": 35420 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001743160517261834, + "loss": 0.6489, + "step": 35430 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017428046031557715, + "loss": 0.5691, + "step": 35440 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017424486890497092, + "loss": 0.66, + "step": 35450 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017420927749436466, + "loss": 0.5538, + "step": 35460 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017417368608375845, + "loss": 0.6236, + "step": 35470 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001741380946731522, + "loss": 0.5936, + "step": 35480 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017410250326254596, + "loss": 0.5961, + "step": 35490 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017406691185193972, + "loss": 0.5615, + "step": 35500 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001740313204413335, + "loss": 0.64, + "step": 35510 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017399572903072723, + "loss": 0.6744, + "step": 35520 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017396013762012102, + "loss": 0.665, + "step": 35530 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017392454620951476, + "loss": 0.5765, + "step": 35540 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001738889547989085, + "loss": 0.6261, + "step": 35550 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017385336338830227, + "loss": 0.6324, + "step": 35560 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017381777197769603, + "loss": 0.5916, + "step": 35570 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001737821805670898, + "loss": 0.667, + "step": 35580 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017374658915648354, + "loss": 0.6032, + "step": 35590 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017371099774587733, + "loss": 0.5934, + "step": 35600 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017367540633527107, + "loss": 0.6625, + "step": 35610 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001736398149246648, + "loss": 0.5713, + "step": 35620 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001736042235140586, + "loss": 0.5756, + "step": 35630 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017356863210345235, + "loss": 0.5164, + "step": 35640 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001735330406928461, + "loss": 0.5321, + "step": 35650 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017349744928223988, + "loss": 0.6054, + "step": 35660 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017346185787163365, + "loss": 0.5949, + "step": 35670 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017342626646102739, + "loss": 0.6288, + "step": 35680 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017339067505042113, + "loss": 0.6339, + "step": 35690 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017335508363981492, + "loss": 0.6201, + "step": 35700 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017331949222920866, + "loss": 0.6531, + "step": 35710 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017328390081860242, + "loss": 0.599, + "step": 35720 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001732483094079962, + "loss": 0.521, + "step": 35730 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017321271799738996, + "loss": 0.6082, + "step": 35740 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001731771265867837, + "loss": 0.5898, + "step": 35750 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001731415351761775, + "loss": 0.7437, + "step": 35760 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017310594376557123, + "loss": 0.5672, + "step": 35770 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017307035235496497, + "loss": 0.5288, + "step": 35780 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017303476094435874, + "loss": 0.5916, + "step": 35790 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001729991695337525, + "loss": 0.5892, + "step": 35800 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017296357812314627, + "loss": 0.6411, + "step": 35810 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017292798671254, + "loss": 0.6004, + "step": 35820 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001728923953019338, + "loss": 0.6444, + "step": 35830 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017285680389132754, + "loss": 0.5217, + "step": 35840 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017282121248072128, + "loss": 0.6596, + "step": 35850 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017278562107011508, + "loss": 0.5288, + "step": 35860 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017275002965950881, + "loss": 0.5804, + "step": 35870 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017271443824890258, + "loss": 0.6687, + "step": 35880 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017267884683829635, + "loss": 0.6379, + "step": 35890 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017264325542769011, + "loss": 0.6385, + "step": 35900 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017260766401708385, + "loss": 0.6302, + "step": 35910 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017257207260647762, + "loss": 0.4826, + "step": 35920 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001725364811958714, + "loss": 0.5856, + "step": 35930 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017250088978526513, + "loss": 0.6481, + "step": 35940 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001724652983746589, + "loss": 0.5813, + "step": 35950 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017242970696405266, + "loss": 0.5414, + "step": 35960 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017239411555344643, + "loss": 0.5318, + "step": 35970 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017235852414284017, + "loss": 0.739, + "step": 35980 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017232293273223396, + "loss": 0.6323, + "step": 35990 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001722873413216277, + "loss": 0.6266, + "step": 36000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017225174991102147, + "loss": 0.6285, + "step": 36010 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001722161585004152, + "loss": 0.619, + "step": 36020 + }, + { + "epoch": 1.28, + "learning_rate": 0.000172180567089809, + "loss": 0.6538, + "step": 36030 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017214497567920274, + "loss": 0.6417, + "step": 36040 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017210938426859648, + "loss": 0.5667, + "step": 36050 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017207379285799027, + "loss": 0.5555, + "step": 36060 + }, + { + "epoch": 1.28, + "learning_rate": 0.000172038201447384, + "loss": 0.5365, + "step": 36070 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017200261003677778, + "loss": 0.5798, + "step": 36080 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017196701862617154, + "loss": 0.64, + "step": 36090 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001719314272155653, + "loss": 0.5595, + "step": 36100 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017189583580495905, + "loss": 0.633, + "step": 36110 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017186024439435284, + "loss": 0.6322, + "step": 36120 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017182465298374658, + "loss": 0.6041, + "step": 36130 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017178906157314032, + "loss": 0.5973, + "step": 36140 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001717534701625341, + "loss": 0.5494, + "step": 36150 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017171787875192786, + "loss": 0.6574, + "step": 36160 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017168228734132162, + "loss": 0.59, + "step": 36170 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017164669593071536, + "loss": 0.6757, + "step": 36180 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017161110452010915, + "loss": 0.5482, + "step": 36190 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001715755131095029, + "loss": 0.6004, + "step": 36200 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017153992169889663, + "loss": 0.6071, + "step": 36210 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017150433028829043, + "loss": 0.5847, + "step": 36220 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017146873887768417, + "loss": 0.5477, + "step": 36230 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017143314746707793, + "loss": 0.6186, + "step": 36240 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017139755605647167, + "loss": 0.5453, + "step": 36250 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017136196464586547, + "loss": 0.5113, + "step": 36260 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001713263732352592, + "loss": 0.5824, + "step": 36270 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017129078182465295, + "loss": 0.6613, + "step": 36280 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017125519041404674, + "loss": 0.6176, + "step": 36290 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017121959900344048, + "loss": 0.5936, + "step": 36300 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017118400759283425, + "loss": 0.6983, + "step": 36310 + }, + { + "epoch": 1.29, + "learning_rate": 0.000171148416182228, + "loss": 0.6346, + "step": 36320 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017111282477162178, + "loss": 0.6482, + "step": 36330 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017107723336101552, + "loss": 0.5746, + "step": 36340 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001710416419504093, + "loss": 0.6006, + "step": 36350 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017100605053980305, + "loss": 0.6274, + "step": 36360 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001709704591291968, + "loss": 0.693, + "step": 36370 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017093486771859056, + "loss": 0.6332, + "step": 36380 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017089927630798432, + "loss": 0.5767, + "step": 36390 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001708636848973781, + "loss": 0.5879, + "step": 36400 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017082809348677183, + "loss": 0.6397, + "step": 36410 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017079250207616562, + "loss": 0.5569, + "step": 36420 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017075691066555936, + "loss": 0.6219, + "step": 36430 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001707213192549531, + "loss": 0.7114, + "step": 36440 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001706857278443469, + "loss": 0.6532, + "step": 36450 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017065013643374064, + "loss": 0.6401, + "step": 36460 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001706145450231344, + "loss": 0.6031, + "step": 36470 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017057895361252814, + "loss": 0.5322, + "step": 36480 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017054336220192193, + "loss": 0.5923, + "step": 36490 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017050777079131567, + "loss": 0.6229, + "step": 36500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017047217938070944, + "loss": 0.5956, + "step": 36510 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001704365879701032, + "loss": 0.7061, + "step": 36520 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017040099655949697, + "loss": 0.6813, + "step": 36530 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001703654051488907, + "loss": 0.5965, + "step": 36540 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017032981373828448, + "loss": 0.6617, + "step": 36550 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017029422232767825, + "loss": 0.6197, + "step": 36560 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017025863091707199, + "loss": 0.5923, + "step": 36570 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017022303950646575, + "loss": 0.6407, + "step": 36580 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017018744809585952, + "loss": 0.7491, + "step": 36590 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017015185668525329, + "loss": 0.5788, + "step": 36600 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017011626527464703, + "loss": 0.5912, + "step": 36610 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017008067386404082, + "loss": 0.6158, + "step": 36620 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017004508245343456, + "loss": 0.5639, + "step": 36630 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001700094910428283, + "loss": 0.6204, + "step": 36640 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001699738996322221, + "loss": 0.542, + "step": 36650 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016993830822161583, + "loss": 0.5382, + "step": 36660 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001699027168110096, + "loss": 0.6179, + "step": 36670 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016986712540040336, + "loss": 0.5101, + "step": 36680 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016983153398979713, + "loss": 0.6837, + "step": 36690 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016979594257919087, + "loss": 0.66, + "step": 36700 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001697603511685846, + "loss": 0.5087, + "step": 36710 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001697247597579784, + "loss": 0.662, + "step": 36720 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016968916834737214, + "loss": 0.6966, + "step": 36730 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001696535769367659, + "loss": 0.6055, + "step": 36740 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016961798552615968, + "loss": 0.5968, + "step": 36750 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016958239411555344, + "loss": 0.6237, + "step": 36760 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016954680270494718, + "loss": 0.5716, + "step": 36770 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016951121129434098, + "loss": 0.5689, + "step": 36780 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016947561988373471, + "loss": 0.6113, + "step": 36790 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016944002847312845, + "loss": 0.6078, + "step": 36800 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016940443706252222, + "loss": 0.5904, + "step": 36810 + }, + { + "epoch": 1.31, + "learning_rate": 0.000169368845651916, + "loss": 0.606, + "step": 36820 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016933325424130975, + "loss": 0.6208, + "step": 36830 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001692976628307035, + "loss": 0.723, + "step": 36840 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001692620714200973, + "loss": 0.638, + "step": 36850 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016922648000949103, + "loss": 0.7124, + "step": 36860 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016919088859888477, + "loss": 0.5899, + "step": 36870 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016915529718827856, + "loss": 0.5808, + "step": 36880 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001691197057776723, + "loss": 0.4925, + "step": 36890 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016908411436706607, + "loss": 0.5048, + "step": 36900 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016904852295645983, + "loss": 0.5432, + "step": 36910 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001690129315458536, + "loss": 0.6509, + "step": 36920 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016897734013524734, + "loss": 0.5877, + "step": 36930 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016894174872464108, + "loss": 0.5515, + "step": 36940 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016890615731403487, + "loss": 0.6036, + "step": 36950 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001688705659034286, + "loss": 0.66, + "step": 36960 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016883497449282238, + "loss": 0.5511, + "step": 36970 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016879938308221614, + "loss": 0.6044, + "step": 36980 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001687637916716099, + "loss": 0.7099, + "step": 36990 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016872820026100365, + "loss": 0.6027, + "step": 37000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016869260885039744, + "loss": 0.5572, + "step": 37010 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016865701743979118, + "loss": 0.6017, + "step": 37020 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016862142602918495, + "loss": 0.6449, + "step": 37030 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001685858346185787, + "loss": 0.5116, + "step": 37040 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016855024320797246, + "loss": 0.653, + "step": 37050 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016851465179736622, + "loss": 0.5415, + "step": 37060 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016847906038675996, + "loss": 0.6105, + "step": 37070 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016844346897615376, + "loss": 0.6459, + "step": 37080 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001684078775655475, + "loss": 0.6447, + "step": 37090 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016837228615494126, + "loss": 0.6761, + "step": 37100 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016833669474433503, + "loss": 0.5986, + "step": 37110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001683011033337288, + "loss": 0.6495, + "step": 37120 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016826551192312253, + "loss": 0.5321, + "step": 37130 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016822992051251633, + "loss": 0.5852, + "step": 37140 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016819432910191007, + "loss": 0.5463, + "step": 37150 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001681587376913038, + "loss": 0.676, + "step": 37160 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016812314628069757, + "loss": 0.6045, + "step": 37170 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016808755487009134, + "loss": 0.6173, + "step": 37180 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001680519634594851, + "loss": 0.6602, + "step": 37190 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016801637204887885, + "loss": 0.6244, + "step": 37200 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016798078063827264, + "loss": 0.6517, + "step": 37210 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016794518922766638, + "loss": 0.6033, + "step": 37220 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016790959781706012, + "loss": 0.594, + "step": 37230 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001678740064064539, + "loss": 0.6216, + "step": 37240 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016783841499584765, + "loss": 0.6204, + "step": 37250 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016780282358524142, + "loss": 0.564, + "step": 37260 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016776723217463516, + "loss": 0.6548, + "step": 37270 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016773164076402895, + "loss": 0.5397, + "step": 37280 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001676960493534227, + "loss": 0.666, + "step": 37290 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016766045794281643, + "loss": 0.7033, + "step": 37300 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016762486653221022, + "loss": 0.6451, + "step": 37310 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016758927512160396, + "loss": 0.6316, + "step": 37320 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016755368371099773, + "loss": 0.606, + "step": 37330 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001675180923003915, + "loss": 0.6093, + "step": 37340 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016748250088978526, + "loss": 0.5978, + "step": 37350 + }, + { + "epoch": 1.33, + "learning_rate": 0.000167446909479179, + "loss": 0.5997, + "step": 37360 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001674113180685728, + "loss": 0.5662, + "step": 37370 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016737572665796654, + "loss": 0.5696, + "step": 37380 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016734013524736027, + "loss": 0.5701, + "step": 37390 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016730454383675404, + "loss": 0.5345, + "step": 37400 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001672689524261478, + "loss": 0.6047, + "step": 37410 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016723336101554157, + "loss": 0.6684, + "step": 37420 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016719776960493531, + "loss": 0.6394, + "step": 37430 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001671621781943291, + "loss": 0.6469, + "step": 37440 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016712658678372285, + "loss": 0.526, + "step": 37450 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016709099537311659, + "loss": 0.5772, + "step": 37460 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016705540396251038, + "loss": 0.61, + "step": 37470 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016701981255190412, + "loss": 0.5937, + "step": 37480 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016698422114129789, + "loss": 0.6498, + "step": 37490 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016694862973069163, + "loss": 0.5828, + "step": 37500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016691303832008542, + "loss": 0.6154, + "step": 37510 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016687744690947916, + "loss": 0.5776, + "step": 37520 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016684185549887293, + "loss": 0.6514, + "step": 37530 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001668062640882667, + "loss": 0.6176, + "step": 37540 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016677067267766043, + "loss": 0.6143, + "step": 37550 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001667350812670542, + "loss": 0.5776, + "step": 37560 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016669948985644796, + "loss": 0.6282, + "step": 37570 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016666389844584173, + "loss": 0.7094, + "step": 37580 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016662830703523547, + "loss": 0.6112, + "step": 37590 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016659271562462924, + "loss": 0.6485, + "step": 37600 + }, + { + "epoch": 1.34, + "learning_rate": 0.000166557124214023, + "loss": 0.5092, + "step": 37610 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016652153280341677, + "loss": 0.648, + "step": 37620 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001664859413928105, + "loss": 0.669, + "step": 37630 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001664503499822043, + "loss": 0.5825, + "step": 37640 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016641475857159804, + "loss": 0.7019, + "step": 37650 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016637916716099178, + "loss": 0.6895, + "step": 37660 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016634357575038558, + "loss": 0.6605, + "step": 37670 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016630798433977932, + "loss": 0.6304, + "step": 37680 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016627239292917308, + "loss": 0.5878, + "step": 37690 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016623680151856685, + "loss": 0.5567, + "step": 37700 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016620121010796061, + "loss": 0.7252, + "step": 37710 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016616561869735435, + "loss": 0.5765, + "step": 37720 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001661300272867481, + "loss": 0.6325, + "step": 37730 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001660944358761419, + "loss": 0.6906, + "step": 37740 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016605884446553563, + "loss": 0.6197, + "step": 37750 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001660232530549294, + "loss": 0.6186, + "step": 37760 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016598766164432316, + "loss": 0.6073, + "step": 37770 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016595207023371693, + "loss": 0.7429, + "step": 37780 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016591647882311067, + "loss": 0.6595, + "step": 37790 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016588088741250446, + "loss": 0.6925, + "step": 37800 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001658452960018982, + "loss": 0.6715, + "step": 37810 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016580970459129194, + "loss": 0.6119, + "step": 37820 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001657741131806857, + "loss": 0.6941, + "step": 37830 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016573852177007947, + "loss": 0.5321, + "step": 37840 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016570293035947324, + "loss": 0.5437, + "step": 37850 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016566733894886698, + "loss": 0.6818, + "step": 37860 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016563174753826077, + "loss": 0.5539, + "step": 37870 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001655961561276545, + "loss": 0.5567, + "step": 37880 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016556056471704825, + "loss": 0.5346, + "step": 37890 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016552497330644204, + "loss": 0.6578, + "step": 37900 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016548938189583578, + "loss": 0.621, + "step": 37910 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016545379048522955, + "loss": 0.6001, + "step": 37920 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016541819907462332, + "loss": 0.6444, + "step": 37930 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016538260766401708, + "loss": 0.6457, + "step": 37940 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016534701625341082, + "loss": 0.6677, + "step": 37950 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016531142484280456, + "loss": 0.6791, + "step": 37960 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016527583343219836, + "loss": 0.5871, + "step": 37970 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001652402420215921, + "loss": 0.5761, + "step": 37980 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016520465061098586, + "loss": 0.5987, + "step": 37990 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016516905920037963, + "loss": 0.7028, + "step": 38000 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001651334677897734, + "loss": 0.4932, + "step": 38010 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016509787637916713, + "loss": 0.6094, + "step": 38020 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016506228496856093, + "loss": 0.5965, + "step": 38030 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016502669355795467, + "loss": 0.5805, + "step": 38040 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001649911021473484, + "loss": 0.5983, + "step": 38050 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016495551073674217, + "loss": 0.5337, + "step": 38060 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016491991932613594, + "loss": 0.605, + "step": 38070 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001648843279155297, + "loss": 0.6278, + "step": 38080 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016484873650492345, + "loss": 0.5465, + "step": 38090 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016481314509431724, + "loss": 0.609, + "step": 38100 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016477755368371098, + "loss": 0.5686, + "step": 38110 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016474196227310475, + "loss": 0.7291, + "step": 38120 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001647063708624985, + "loss": 0.6359, + "step": 38130 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016467077945189225, + "loss": 0.5942, + "step": 38140 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016463518804128602, + "loss": 0.6549, + "step": 38150 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016459959663067978, + "loss": 0.5157, + "step": 38160 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016456400522007355, + "loss": 0.5522, + "step": 38170 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001645284138094673, + "loss": 0.6147, + "step": 38180 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016449282239886106, + "loss": 0.6557, + "step": 38190 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016445723098825482, + "loss": 0.71, + "step": 38200 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001644216395776486, + "loss": 0.469, + "step": 38210 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016438604816704233, + "loss": 0.6635, + "step": 38220 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016435045675643612, + "loss": 0.6331, + "step": 38230 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016431486534582986, + "loss": 0.5411, + "step": 38240 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001642792739352236, + "loss": 0.5569, + "step": 38250 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001642436825246174, + "loss": 0.7296, + "step": 38260 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016420809111401114, + "loss": 0.5555, + "step": 38270 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001641724997034049, + "loss": 0.6592, + "step": 38280 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016413690829279864, + "loss": 0.5971, + "step": 38290 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016410131688219244, + "loss": 0.5677, + "step": 38300 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016406572547158617, + "loss": 0.6217, + "step": 38310 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016403013406097991, + "loss": 0.622, + "step": 38320 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001639945426503737, + "loss": 0.6465, + "step": 38330 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016395895123976745, + "loss": 0.5848, + "step": 38340 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016392335982916121, + "loss": 0.6005, + "step": 38350 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016388776841855498, + "loss": 0.6728, + "step": 38360 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016385217700794875, + "loss": 0.5593, + "step": 38370 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001638165855973425, + "loss": 0.5825, + "step": 38380 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016378099418673628, + "loss": 0.6818, + "step": 38390 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016374540277613002, + "loss": 0.6416, + "step": 38400 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016370981136552376, + "loss": 0.7466, + "step": 38410 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016367421995491753, + "loss": 0.596, + "step": 38420 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001636386285443113, + "loss": 0.574, + "step": 38430 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016360303713370506, + "loss": 0.6595, + "step": 38440 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001635674457230988, + "loss": 0.6545, + "step": 38450 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001635318543124926, + "loss": 0.6703, + "step": 38460 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016349626290188633, + "loss": 0.6218, + "step": 38470 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016346067149128007, + "loss": 0.6813, + "step": 38480 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016342508008067386, + "loss": 0.5914, + "step": 38490 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001633894886700676, + "loss": 0.5602, + "step": 38500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016335389725946137, + "loss": 0.5716, + "step": 38510 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001633183058488551, + "loss": 0.4835, + "step": 38520 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001632827144382489, + "loss": 0.5643, + "step": 38530 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016324712302764264, + "loss": 0.6415, + "step": 38540 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016321153161703638, + "loss": 0.5389, + "step": 38550 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016317594020643018, + "loss": 0.53, + "step": 38560 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016314034879582392, + "loss": 0.653, + "step": 38570 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016310475738521768, + "loss": 0.6952, + "step": 38580 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016306916597461145, + "loss": 0.587, + "step": 38590 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016303357456400522, + "loss": 0.6972, + "step": 38600 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016299798315339895, + "loss": 0.5588, + "step": 38610 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016296239174279275, + "loss": 0.5923, + "step": 38620 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001629268003321865, + "loss": 0.5814, + "step": 38630 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016289120892158023, + "loss": 0.5603, + "step": 38640 + }, + { + "epoch": 1.37, + "learning_rate": 0.000162855617510974, + "loss": 0.7134, + "step": 38650 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016282002610036776, + "loss": 0.6625, + "step": 38660 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016278443468976153, + "loss": 0.6164, + "step": 38670 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016274884327915527, + "loss": 0.5891, + "step": 38680 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016271325186854906, + "loss": 0.6687, + "step": 38690 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001626776604579428, + "loss": 0.6397, + "step": 38700 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016264206904733657, + "loss": 0.6173, + "step": 38710 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016260647763673033, + "loss": 0.6145, + "step": 38720 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001625708862261241, + "loss": 0.5998, + "step": 38730 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016253529481551784, + "loss": 0.6278, + "step": 38740 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016249970340491158, + "loss": 0.5276, + "step": 38750 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016246411199430537, + "loss": 0.5773, + "step": 38760 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001624285205836991, + "loss": 0.5389, + "step": 38770 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016239292917309288, + "loss": 0.5396, + "step": 38780 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016235733776248664, + "loss": 0.6357, + "step": 38790 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001623217463518804, + "loss": 0.6555, + "step": 38800 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016228615494127415, + "loss": 0.6004, + "step": 38810 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016225056353066794, + "loss": 0.6563, + "step": 38820 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016221497212006168, + "loss": 0.5752, + "step": 38830 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016217938070945542, + "loss": 0.5483, + "step": 38840 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001621437892988492, + "loss": 0.6046, + "step": 38850 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016210819788824296, + "loss": 0.6429, + "step": 38860 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016207260647763672, + "loss": 0.6216, + "step": 38870 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016203701506703046, + "loss": 0.6383, + "step": 38880 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016200142365642426, + "loss": 0.569, + "step": 38890 + }, + { + "epoch": 1.38, + "learning_rate": 0.000161965832245818, + "loss": 0.5757, + "step": 38900 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016193024083521173, + "loss": 0.562, + "step": 38910 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016189464942460553, + "loss": 0.6005, + "step": 38920 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016185905801399927, + "loss": 0.6711, + "step": 38930 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016182346660339303, + "loss": 0.545, + "step": 38940 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001617878751927868, + "loss": 0.5323, + "step": 38950 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016175228378218057, + "loss": 0.5466, + "step": 38960 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001617166923715743, + "loss": 0.6332, + "step": 38970 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016168110096096805, + "loss": 0.6207, + "step": 38980 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016164550955036184, + "loss": 0.7036, + "step": 38990 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016160991813975558, + "loss": 0.6836, + "step": 39000 + } + ], + "max_steps": 84390, + "num_train_epochs": 3, + "total_flos": 3.4788268014659174e+17, + "trial_name": null, + "trial_params": null +}