diff --git "a/training_graph.json" "b/training_graph.json" new file mode 100644--- /dev/null +++ "b/training_graph.json" @@ -0,0 +1,6458 @@ +[ + { + "current_steps": 1, + "loss": 2.243, + "learning_rate": 0.0, + "epoch": 0.0 + }, + { + "current_steps": 3, + "loss": 2.4906, + "learning_rate": 0.0, + "epoch": 0.0 + }, + { + "current_steps": 5, + "loss": 2.4031, + "learning_rate": 0.0, + "epoch": 0.0 + }, + { + "current_steps": 7, + "loss": 2.0465, + "learning_rate": 4.9999999999999996e-06, + "epoch": 0.0 + }, + { + "current_steps": 9, + "loss": 2.1139, + "learning_rate": 4.9999999999999996e-06, + "epoch": 0.0 + }, + { + "current_steps": 11, + "loss": 2.3079, + "learning_rate": 9.999999999999999e-06, + "epoch": 0.0 + }, + { + "current_steps": 13, + "loss": 2.4454, + "learning_rate": 9.999999999999999e-06, + "epoch": 0.0 + }, + { + "current_steps": 15, + "loss": 2.1482, + "learning_rate": 1.4999999999999999e-05, + "epoch": 0.0 + }, + { + "current_steps": 17, + "loss": 2.2773, + "learning_rate": 1.9999999999999998e-05, + "epoch": 0.0 + }, + { + "current_steps": 19, + "loss": 2.1211, + "learning_rate": 1.9999999999999998e-05, + "epoch": 0.0 + }, + { + "current_steps": 21, + "loss": 2.4949, + "learning_rate": 2.4999999999999998e-05, + "epoch": 0.0 + }, + { + "current_steps": 23, + "loss": 1.9488, + "learning_rate": 2.9999999999999997e-05, + "epoch": 0.0 + }, + { + "current_steps": 25, + "loss": 2.0129, + "learning_rate": 3.5e-05, + "epoch": 0.0 + }, + { + "current_steps": 27, + "loss": 2.3205, + "learning_rate": 3.9999999999999996e-05, + "epoch": 0.0 + }, + { + "current_steps": 29, + "loss": 1.7864, + "learning_rate": 4.4999999999999996e-05, + "epoch": 0.0 + }, + { + "current_steps": 31, + "loss": 2.161, + "learning_rate": 4.9999999999999996e-05, + "epoch": 0.0 + }, + { + "current_steps": 33, + "loss": 2.0796, + "learning_rate": 5.499999999999999e-05, + "epoch": 0.0 + }, + { + "current_steps": 35, + "loss": 2.0092, + "learning_rate": 5.9999999999999995e-05, + "epoch": 0.0 + }, + { + "current_steps": 37, + "loss": 1.9249, + "learning_rate": 6.5e-05, + "epoch": 0.0 + }, + { + "current_steps": 39, + "loss": 2.0162, + "learning_rate": 7e-05, + "epoch": 0.0 + }, + { + "current_steps": 41, + "loss": 1.9155, + "learning_rate": 7.5e-05, + "epoch": 0.0 + }, + { + "current_steps": 43, + "loss": 1.9234, + "learning_rate": 7.999999999999999e-05, + "epoch": 0.01 + }, + { + "current_steps": 45, + "loss": 1.8821, + "learning_rate": 8.499999999999999e-05, + "epoch": 0.01 + }, + { + "current_steps": 47, + "loss": 1.8072, + "learning_rate": 8.999999999999999e-05, + "epoch": 0.01 + }, + { + "current_steps": 49, + "loss": 1.9472, + "learning_rate": 9.499999999999999e-05, + "epoch": 0.01 + }, + { + "current_steps": 51, + "loss": 1.8368, + "learning_rate": 9.999999999999999e-05, + "epoch": 0.01 + }, + { + "current_steps": 53, + "loss": 1.6759, + "learning_rate": 0.00010499999999999999, + "epoch": 0.01 + }, + { + "current_steps": 55, + "loss": 1.9959, + "learning_rate": 0.00010999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 57, + "loss": 1.7571, + "learning_rate": 0.000115, + "epoch": 0.01 + }, + { + "current_steps": 59, + "loss": 1.6964, + "learning_rate": 0.00011999999999999999, + "epoch": 0.01 + }, + { + "current_steps": 61, + "loss": 1.7602, + "learning_rate": 0.000125, + "epoch": 0.01 + }, + { + "current_steps": 63, + "loss": 1.6103, + "learning_rate": 0.00013, + "epoch": 0.01 + }, + { + "current_steps": 65, + "loss": 1.73, + "learning_rate": 0.000135, + "epoch": 0.01 + }, + { + "current_steps": 67, + "loss": 1.6363, + "learning_rate": 0.00014, + "epoch": 0.01 + }, + { + "current_steps": 69, + "loss": 1.6342, + "learning_rate": 0.000145, + "epoch": 0.01 + }, + { + "current_steps": 71, + "loss": 1.6227, + "learning_rate": 0.00015, + "epoch": 0.01 + }, + { + "current_steps": 73, + "loss": 1.7408, + "learning_rate": 0.000155, + "epoch": 0.01 + }, + { + "current_steps": 75, + "loss": 1.6787, + "learning_rate": 0.00015999999999999999, + "epoch": 0.01 + }, + { + "current_steps": 77, + "loss": 1.998, + "learning_rate": 0.000165, + "epoch": 0.01 + }, + { + "current_steps": 79, + "loss": 1.5218, + "learning_rate": 0.00016999999999999999, + "epoch": 0.01 + }, + { + "current_steps": 81, + "loss": 1.648, + "learning_rate": 0.000175, + "epoch": 0.01 + }, + { + "current_steps": 83, + "loss": 1.8393, + "learning_rate": 0.00017999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 85, + "loss": 1.5411, + "learning_rate": 0.000185, + "epoch": 0.01 + }, + { + "current_steps": 87, + "loss": 1.5455, + "learning_rate": 0.00018999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 89, + "loss": 1.8884, + "learning_rate": 0.000195, + "epoch": 0.01 + }, + { + "current_steps": 91, + "loss": 1.7582, + "learning_rate": 0.00019999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 93, + "loss": 1.6639, + "learning_rate": 0.000205, + "epoch": 0.01 + }, + { + "current_steps": 95, + "loss": 1.5912, + "learning_rate": 0.00020999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 97, + "loss": 1.8708, + "learning_rate": 0.000215, + "epoch": 0.01 + }, + { + "current_steps": 99, + "loss": 1.776, + "learning_rate": 0.00021999999999999995, + "epoch": 0.01 + }, + { + "current_steps": 101, + "loss": 1.726, + "learning_rate": 0.000225, + "epoch": 0.01 + }, + { + "current_steps": 103, + "loss": 1.7652, + "learning_rate": 0.00023, + "epoch": 0.01 + }, + { + "current_steps": 105, + "loss": 1.655, + "learning_rate": 0.00023499999999999997, + "epoch": 0.01 + }, + { + "current_steps": 107, + "loss": 1.552, + "learning_rate": 0.00023999999999999998, + "epoch": 0.01 + }, + { + "current_steps": 109, + "loss": 1.8468, + "learning_rate": 0.000245, + "epoch": 0.01 + }, + { + "current_steps": 111, + "loss": 1.7365, + "learning_rate": 0.00025, + "epoch": 0.01 + }, + { + "current_steps": 113, + "loss": 1.8068, + "learning_rate": 0.00025499999999999996, + "epoch": 0.01 + }, + { + "current_steps": 115, + "loss": 1.6334, + "learning_rate": 0.00026, + "epoch": 0.01 + }, + { + "current_steps": 117, + "loss": 1.7037, + "learning_rate": 0.000265, + "epoch": 0.01 + }, + { + "current_steps": 119, + "loss": 1.7575, + "learning_rate": 0.00027, + "epoch": 0.01 + }, + { + "current_steps": 121, + "loss": 1.5549, + "learning_rate": 0.00027499999999999996, + "epoch": 0.01 + }, + { + "current_steps": 123, + "loss": 1.7386, + "learning_rate": 0.00028, + "epoch": 0.01 + }, + { + "current_steps": 125, + "loss": 1.4847, + "learning_rate": 0.000285, + "epoch": 0.01 + }, + { + "current_steps": 127, + "loss": 1.912, + "learning_rate": 0.00029, + "epoch": 0.01 + }, + { + "current_steps": 129, + "loss": 1.4259, + "learning_rate": 0.00029499999999999996, + "epoch": 0.02 + }, + { + "current_steps": 131, + "loss": 1.7833, + "learning_rate": 0.0003, + "epoch": 0.02 + }, + { + "current_steps": 133, + "loss": 1.6208, + "learning_rate": 0.00029970443349753694, + "epoch": 0.02 + }, + { + "current_steps": 135, + "loss": 1.7041, + "learning_rate": 0.00029940886699507385, + "epoch": 0.02 + }, + { + "current_steps": 137, + "loss": 1.6153, + "learning_rate": 0.0002991133004926108, + "epoch": 0.02 + }, + { + "current_steps": 139, + "loss": 1.716, + "learning_rate": 0.00029881773399014774, + "epoch": 0.02 + }, + { + "current_steps": 141, + "loss": 1.5853, + "learning_rate": 0.0002985221674876847, + "epoch": 0.02 + }, + { + "current_steps": 143, + "loss": 1.728, + "learning_rate": 0.00029822660098522167, + "epoch": 0.02 + }, + { + "current_steps": 145, + "loss": 1.9886, + "learning_rate": 0.0002979310344827586, + "epoch": 0.02 + }, + { + "current_steps": 147, + "loss": 1.7207, + "learning_rate": 0.00029763546798029555, + "epoch": 0.02 + }, + { + "current_steps": 149, + "loss": 1.6006, + "learning_rate": 0.00029733990147783247, + "epoch": 0.02 + }, + { + "current_steps": 151, + "loss": 1.3388, + "learning_rate": 0.00029704433497536943, + "epoch": 0.02 + }, + { + "current_steps": 153, + "loss": 1.5003, + "learning_rate": 0.00029674876847290635, + "epoch": 0.02 + }, + { + "current_steps": 155, + "loss": 1.479, + "learning_rate": 0.0002964532019704433, + "epoch": 0.02 + }, + { + "current_steps": 157, + "loss": 1.58, + "learning_rate": 0.0002961576354679803, + "epoch": 0.02 + }, + { + "current_steps": 159, + "loss": 1.8409, + "learning_rate": 0.0002958620689655172, + "epoch": 0.02 + }, + { + "current_steps": 161, + "loss": 1.6689, + "learning_rate": 0.00029556650246305416, + "epoch": 0.02 + }, + { + "current_steps": 163, + "loss": 1.5511, + "learning_rate": 0.0002952709359605911, + "epoch": 0.02 + }, + { + "current_steps": 165, + "loss": 1.6765, + "learning_rate": 0.00029497536945812805, + "epoch": 0.02 + }, + { + "current_steps": 167, + "loss": 1.8226, + "learning_rate": 0.000294679802955665, + "epoch": 0.02 + }, + { + "current_steps": 169, + "loss": 1.7741, + "learning_rate": 0.0002943842364532019, + "epoch": 0.02 + }, + { + "current_steps": 171, + "loss": 1.59, + "learning_rate": 0.0002940886699507389, + "epoch": 0.02 + }, + { + "current_steps": 173, + "loss": 1.6031, + "learning_rate": 0.0002937931034482758, + "epoch": 0.02 + }, + { + "current_steps": 175, + "loss": 1.568, + "learning_rate": 0.0002934975369458128, + "epoch": 0.02 + }, + { + "current_steps": 177, + "loss": 1.2813, + "learning_rate": 0.00029320197044334974, + "epoch": 0.02 + }, + { + "current_steps": 179, + "loss": 1.6867, + "learning_rate": 0.00029290640394088666, + "epoch": 0.02 + }, + { + "current_steps": 181, + "loss": 1.7537, + "learning_rate": 0.0002926108374384236, + "epoch": 0.02 + }, + { + "current_steps": 183, + "loss": 1.6056, + "learning_rate": 0.00029231527093596054, + "epoch": 0.02 + }, + { + "current_steps": 185, + "loss": 1.6866, + "learning_rate": 0.0002920197044334975, + "epoch": 0.02 + }, + { + "current_steps": 187, + "loss": 1.7206, + "learning_rate": 0.0002917241379310344, + "epoch": 0.02 + }, + { + "current_steps": 189, + "loss": 1.6578, + "learning_rate": 0.0002914285714285714, + "epoch": 0.02 + }, + { + "current_steps": 191, + "loss": 1.5329, + "learning_rate": 0.00029113300492610836, + "epoch": 0.02 + }, + { + "current_steps": 193, + "loss": 1.6231, + "learning_rate": 0.00029083743842364527, + "epoch": 0.02 + }, + { + "current_steps": 195, + "loss": 1.4756, + "learning_rate": 0.00029054187192118224, + "epoch": 0.02 + }, + { + "current_steps": 197, + "loss": 1.4725, + "learning_rate": 0.00029024630541871915, + "epoch": 0.02 + }, + { + "current_steps": 199, + "loss": 1.7892, + "learning_rate": 0.0002899507389162561, + "epoch": 0.02 + }, + { + "current_steps": 201, + "loss": 1.7455, + "learning_rate": 0.0002896551724137931, + "epoch": 0.02 + }, + { + "current_steps": 203, + "loss": 1.6733, + "learning_rate": 0.00028935960591133, + "epoch": 0.02 + }, + { + "current_steps": 205, + "loss": 1.4954, + "learning_rate": 0.00028906403940886697, + "epoch": 0.02 + }, + { + "current_steps": 207, + "loss": 1.6448, + "learning_rate": 0.0002887684729064039, + "epoch": 0.02 + }, + { + "current_steps": 209, + "loss": 1.7882, + "learning_rate": 0.00028847290640394085, + "epoch": 0.02 + }, + { + "current_steps": 211, + "loss": 1.7796, + "learning_rate": 0.0002881773399014778, + "epoch": 0.02 + }, + { + "current_steps": 213, + "loss": 1.6399, + "learning_rate": 0.00028788177339901473, + "epoch": 0.02 + }, + { + "current_steps": 215, + "loss": 1.6645, + "learning_rate": 0.0002875862068965517, + "epoch": 0.03 + }, + { + "current_steps": 217, + "loss": 1.7824, + "learning_rate": 0.0002872906403940886, + "epoch": 0.03 + }, + { + "current_steps": 219, + "loss": 1.5053, + "learning_rate": 0.0002869950738916256, + "epoch": 0.03 + }, + { + "current_steps": 221, + "loss": 1.5483, + "learning_rate": 0.00028669950738916255, + "epoch": 0.03 + }, + { + "current_steps": 223, + "loss": 1.5874, + "learning_rate": 0.00028640394088669946, + "epoch": 0.03 + }, + { + "current_steps": 225, + "loss": 1.6807, + "learning_rate": 0.00028610837438423643, + "epoch": 0.03 + }, + { + "current_steps": 227, + "loss": 1.502, + "learning_rate": 0.00028581280788177334, + "epoch": 0.03 + }, + { + "current_steps": 229, + "loss": 1.6234, + "learning_rate": 0.0002855172413793103, + "epoch": 0.03 + }, + { + "current_steps": 231, + "loss": 1.8706, + "learning_rate": 0.0002852216748768473, + "epoch": 0.03 + }, + { + "current_steps": 233, + "loss": 1.456, + "learning_rate": 0.0002849261083743842, + "epoch": 0.03 + }, + { + "current_steps": 235, + "loss": 1.4441, + "learning_rate": 0.00028463054187192116, + "epoch": 0.03 + }, + { + "current_steps": 237, + "loss": 1.7222, + "learning_rate": 0.00028433497536945807, + "epoch": 0.03 + }, + { + "current_steps": 239, + "loss": 1.6667, + "learning_rate": 0.00028403940886699504, + "epoch": 0.03 + }, + { + "current_steps": 241, + "loss": 1.6506, + "learning_rate": 0.000283743842364532, + "epoch": 0.03 + }, + { + "current_steps": 243, + "loss": 1.4267, + "learning_rate": 0.0002834482758620689, + "epoch": 0.03 + }, + { + "current_steps": 245, + "loss": 1.6695, + "learning_rate": 0.0002831527093596059, + "epoch": 0.03 + }, + { + "current_steps": 247, + "loss": 1.6691, + "learning_rate": 0.0002828571428571428, + "epoch": 0.03 + }, + { + "current_steps": 249, + "loss": 1.5338, + "learning_rate": 0.00028256157635467977, + "epoch": 0.03 + }, + { + "current_steps": 251, + "loss": 1.6988, + "learning_rate": 0.00028226600985221674, + "epoch": 0.03 + }, + { + "current_steps": 253, + "loss": 1.4844, + "learning_rate": 0.00028197044334975365, + "epoch": 0.03 + }, + { + "current_steps": 255, + "loss": 1.7708, + "learning_rate": 0.0002816748768472906, + "epoch": 0.03 + }, + { + "current_steps": 257, + "loss": 1.6342, + "learning_rate": 0.00028137931034482753, + "epoch": 0.03 + }, + { + "current_steps": 259, + "loss": 1.4992, + "learning_rate": 0.0002810837438423645, + "epoch": 0.03 + }, + { + "current_steps": 261, + "loss": 1.5933, + "learning_rate": 0.00028078817733990147, + "epoch": 0.03 + }, + { + "current_steps": 263, + "loss": 1.5269, + "learning_rate": 0.0002804926108374384, + "epoch": 0.03 + }, + { + "current_steps": 265, + "loss": 1.7497, + "learning_rate": 0.00028019704433497535, + "epoch": 0.03 + }, + { + "current_steps": 267, + "loss": 1.3407, + "learning_rate": 0.00027990147783251226, + "epoch": 0.03 + }, + { + "current_steps": 269, + "loss": 1.611, + "learning_rate": 0.00027960591133004923, + "epoch": 0.03 + }, + { + "current_steps": 271, + "loss": 1.5756, + "learning_rate": 0.0002793103448275862, + "epoch": 0.03 + }, + { + "current_steps": 273, + "loss": 1.566, + "learning_rate": 0.0002790147783251231, + "epoch": 0.03 + }, + { + "current_steps": 275, + "loss": 1.5732, + "learning_rate": 0.0002787192118226601, + "epoch": 0.03 + }, + { + "current_steps": 277, + "loss": 1.5015, + "learning_rate": 0.00027842364532019705, + "epoch": 0.03 + }, + { + "current_steps": 279, + "loss": 1.5882, + "learning_rate": 0.00027812807881773396, + "epoch": 0.03 + }, + { + "current_steps": 281, + "loss": 1.7877, + "learning_rate": 0.00027783251231527093, + "epoch": 0.03 + }, + { + "current_steps": 283, + "loss": 1.4578, + "learning_rate": 0.00027753694581280784, + "epoch": 0.03 + }, + { + "current_steps": 285, + "loss": 1.4568, + "learning_rate": 0.0002772413793103448, + "epoch": 0.03 + }, + { + "current_steps": 287, + "loss": 1.4897, + "learning_rate": 0.0002769458128078818, + "epoch": 0.03 + }, + { + "current_steps": 289, + "loss": 1.642, + "learning_rate": 0.0002766502463054187, + "epoch": 0.03 + }, + { + "current_steps": 291, + "loss": 1.5697, + "learning_rate": 0.00027635467980295566, + "epoch": 0.03 + }, + { + "current_steps": 293, + "loss": 1.4063, + "learning_rate": 0.00027605911330049257, + "epoch": 0.03 + }, + { + "current_steps": 295, + "loss": 1.5415, + "learning_rate": 0.00027576354679802954, + "epoch": 0.03 + }, + { + "current_steps": 297, + "loss": 1.497, + "learning_rate": 0.0002754679802955665, + "epoch": 0.03 + }, + { + "current_steps": 299, + "loss": 1.561, + "learning_rate": 0.0002751724137931034, + "epoch": 0.03 + }, + { + "current_steps": 301, + "loss": 1.5017, + "learning_rate": 0.0002748768472906404, + "epoch": 0.04 + }, + { + "current_steps": 303, + "loss": 1.478, + "learning_rate": 0.0002745812807881773, + "epoch": 0.04 + }, + { + "current_steps": 305, + "loss": 1.4427, + "learning_rate": 0.00027428571428571427, + "epoch": 0.04 + }, + { + "current_steps": 307, + "loss": 1.6162, + "learning_rate": 0.00027399014778325124, + "epoch": 0.04 + }, + { + "current_steps": 309, + "loss": 1.5174, + "learning_rate": 0.00027369458128078815, + "epoch": 0.04 + }, + { + "current_steps": 311, + "loss": 1.6452, + "learning_rate": 0.0002733990147783251, + "epoch": 0.04 + }, + { + "current_steps": 313, + "loss": 1.6152, + "learning_rate": 0.00027310344827586203, + "epoch": 0.04 + }, + { + "current_steps": 315, + "loss": 1.5749, + "learning_rate": 0.000272807881773399, + "epoch": 0.04 + }, + { + "current_steps": 317, + "loss": 1.6, + "learning_rate": 0.00027251231527093597, + "epoch": 0.04 + }, + { + "current_steps": 319, + "loss": 1.4854, + "learning_rate": 0.0002722167487684729, + "epoch": 0.04 + }, + { + "current_steps": 321, + "loss": 1.5066, + "learning_rate": 0.00027192118226600985, + "epoch": 0.04 + }, + { + "current_steps": 323, + "loss": 1.6054, + "learning_rate": 0.00027162561576354676, + "epoch": 0.04 + }, + { + "current_steps": 325, + "loss": 1.5749, + "learning_rate": 0.00027133004926108373, + "epoch": 0.04 + }, + { + "current_steps": 327, + "loss": 1.6021, + "learning_rate": 0.0002710344827586207, + "epoch": 0.04 + }, + { + "current_steps": 329, + "loss": 1.4024, + "learning_rate": 0.0002707389162561576, + "epoch": 0.04 + }, + { + "current_steps": 331, + "loss": 1.6317, + "learning_rate": 0.0002704433497536946, + "epoch": 0.04 + }, + { + "current_steps": 333, + "loss": 1.4919, + "learning_rate": 0.00027014778325123155, + "epoch": 0.04 + }, + { + "current_steps": 335, + "loss": 1.4037, + "learning_rate": 0.00026985221674876846, + "epoch": 0.04 + }, + { + "current_steps": 337, + "loss": 1.5371, + "learning_rate": 0.00026955665024630543, + "epoch": 0.04 + }, + { + "current_steps": 339, + "loss": 1.5984, + "learning_rate": 0.00026926108374384234, + "epoch": 0.04 + }, + { + "current_steps": 341, + "loss": 1.5313, + "learning_rate": 0.0002689655172413793, + "epoch": 0.04 + }, + { + "current_steps": 343, + "loss": 1.586, + "learning_rate": 0.0002686699507389162, + "epoch": 0.04 + }, + { + "current_steps": 345, + "loss": 1.5473, + "learning_rate": 0.0002683743842364532, + "epoch": 0.04 + }, + { + "current_steps": 347, + "loss": 1.6482, + "learning_rate": 0.00026807881773399016, + "epoch": 0.04 + }, + { + "current_steps": 349, + "loss": 1.5501, + "learning_rate": 0.00026778325123152707, + "epoch": 0.04 + }, + { + "current_steps": 351, + "loss": 1.8194, + "learning_rate": 0.00026748768472906404, + "epoch": 0.04 + }, + { + "current_steps": 353, + "loss": 1.4082, + "learning_rate": 0.00026719211822660095, + "epoch": 0.04 + }, + { + "current_steps": 355, + "loss": 1.7386, + "learning_rate": 0.0002668965517241379, + "epoch": 0.04 + }, + { + "current_steps": 357, + "loss": 1.591, + "learning_rate": 0.0002666009852216749, + "epoch": 0.04 + }, + { + "current_steps": 359, + "loss": 1.6685, + "learning_rate": 0.0002663054187192118, + "epoch": 0.04 + }, + { + "current_steps": 361, + "loss": 1.7065, + "learning_rate": 0.00026600985221674877, + "epoch": 0.04 + }, + { + "current_steps": 363, + "loss": 1.6918, + "learning_rate": 0.0002657142857142857, + "epoch": 0.04 + }, + { + "current_steps": 365, + "loss": 1.5165, + "learning_rate": 0.00026541871921182265, + "epoch": 0.04 + }, + { + "current_steps": 367, + "loss": 1.535, + "learning_rate": 0.0002651231527093596, + "epoch": 0.04 + }, + { + "current_steps": 369, + "loss": 1.6364, + "learning_rate": 0.00026482758620689653, + "epoch": 0.04 + }, + { + "current_steps": 371, + "loss": 1.4763, + "learning_rate": 0.0002645320197044335, + "epoch": 0.04 + }, + { + "current_steps": 373, + "loss": 1.4425, + "learning_rate": 0.0002642364532019704, + "epoch": 0.04 + }, + { + "current_steps": 375, + "loss": 1.3717, + "learning_rate": 0.0002639408866995074, + "epoch": 0.04 + }, + { + "current_steps": 377, + "loss": 1.4793, + "learning_rate": 0.0002636453201970443, + "epoch": 0.04 + }, + { + "current_steps": 379, + "loss": 1.7218, + "learning_rate": 0.00026334975369458126, + "epoch": 0.04 + }, + { + "current_steps": 381, + "loss": 1.4009, + "learning_rate": 0.00026305418719211823, + "epoch": 0.04 + }, + { + "current_steps": 383, + "loss": 1.7674, + "learning_rate": 0.00026275862068965514, + "epoch": 0.04 + }, + { + "current_steps": 385, + "loss": 1.5999, + "learning_rate": 0.0002624630541871921, + "epoch": 0.04 + }, + { + "current_steps": 387, + "loss": 1.5977, + "learning_rate": 0.000262167487684729, + "epoch": 0.05 + }, + { + "current_steps": 389, + "loss": 1.5001, + "learning_rate": 0.000261871921182266, + "epoch": 0.05 + }, + { + "current_steps": 391, + "loss": 1.6639, + "learning_rate": 0.00026157635467980296, + "epoch": 0.05 + }, + { + "current_steps": 393, + "loss": 1.578, + "learning_rate": 0.0002612807881773399, + "epoch": 0.05 + }, + { + "current_steps": 395, + "loss": 1.7178, + "learning_rate": 0.00026098522167487684, + "epoch": 0.05 + }, + { + "current_steps": 397, + "loss": 1.6051, + "learning_rate": 0.00026068965517241376, + "epoch": 0.05 + }, + { + "current_steps": 399, + "loss": 1.7263, + "learning_rate": 0.0002603940886699507, + "epoch": 0.05 + }, + { + "current_steps": 401, + "loss": 1.4514, + "learning_rate": 0.0002600985221674877, + "epoch": 0.05 + }, + { + "current_steps": 403, + "loss": 1.5681, + "learning_rate": 0.0002598029556650246, + "epoch": 0.05 + }, + { + "current_steps": 405, + "loss": 1.3968, + "learning_rate": 0.00025950738916256157, + "epoch": 0.05 + }, + { + "current_steps": 407, + "loss": 1.5056, + "learning_rate": 0.0002592118226600985, + "epoch": 0.05 + }, + { + "current_steps": 409, + "loss": 1.4911, + "learning_rate": 0.00025891625615763545, + "epoch": 0.05 + }, + { + "current_steps": 411, + "loss": 1.1793, + "learning_rate": 0.00025862068965517237, + "epoch": 0.05 + }, + { + "current_steps": 413, + "loss": 1.6403, + "learning_rate": 0.00025832512315270933, + "epoch": 0.05 + }, + { + "current_steps": 415, + "loss": 1.4649, + "learning_rate": 0.0002580295566502463, + "epoch": 0.05 + }, + { + "current_steps": 417, + "loss": 1.4229, + "learning_rate": 0.0002577339901477832, + "epoch": 0.05 + }, + { + "current_steps": 419, + "loss": 1.5858, + "learning_rate": 0.0002574384236453202, + "epoch": 0.05 + }, + { + "current_steps": 421, + "loss": 1.5472, + "learning_rate": 0.0002571428571428571, + "epoch": 0.05 + }, + { + "current_steps": 423, + "loss": 1.521, + "learning_rate": 0.00025684729064039406, + "epoch": 0.05 + }, + { + "current_steps": 425, + "loss": 1.3612, + "learning_rate": 0.00025655172413793103, + "epoch": 0.05 + }, + { + "current_steps": 427, + "loss": 1.6238, + "learning_rate": 0.00025625615763546795, + "epoch": 0.05 + }, + { + "current_steps": 429, + "loss": 1.8308, + "learning_rate": 0.0002559605911330049, + "epoch": 0.05 + }, + { + "current_steps": 431, + "loss": 1.3374, + "learning_rate": 0.00025566502463054183, + "epoch": 0.05 + }, + { + "current_steps": 433, + "loss": 1.4902, + "learning_rate": 0.0002553694581280788, + "epoch": 0.05 + }, + { + "current_steps": 435, + "loss": 1.5373, + "learning_rate": 0.00025507389162561576, + "epoch": 0.05 + }, + { + "current_steps": 437, + "loss": 1.5823, + "learning_rate": 0.0002547783251231527, + "epoch": 0.05 + }, + { + "current_steps": 439, + "loss": 1.4721, + "learning_rate": 0.00025448275862068964, + "epoch": 0.05 + }, + { + "current_steps": 441, + "loss": 1.5371, + "learning_rate": 0.00025418719211822656, + "epoch": 0.05 + }, + { + "current_steps": 443, + "loss": 1.6405, + "learning_rate": 0.0002538916256157635, + "epoch": 0.05 + }, + { + "current_steps": 445, + "loss": 1.4804, + "learning_rate": 0.00025359605911330044, + "epoch": 0.05 + }, + { + "current_steps": 447, + "loss": 1.3803, + "learning_rate": 0.0002533004926108374, + "epoch": 0.05 + }, + { + "current_steps": 449, + "loss": 1.5504, + "learning_rate": 0.0002530049261083744, + "epoch": 0.05 + }, + { + "current_steps": 451, + "loss": 1.5377, + "learning_rate": 0.0002527093596059113, + "epoch": 0.05 + }, + { + "current_steps": 453, + "loss": 1.5253, + "learning_rate": 0.00025241379310344826, + "epoch": 0.05 + }, + { + "current_steps": 455, + "loss": 1.4337, + "learning_rate": 0.00025211822660098517, + "epoch": 0.05 + }, + { + "current_steps": 457, + "loss": 1.4687, + "learning_rate": 0.00025182266009852214, + "epoch": 0.05 + }, + { + "current_steps": 459, + "loss": 1.6957, + "learning_rate": 0.0002515270935960591, + "epoch": 0.05 + }, + { + "current_steps": 461, + "loss": 1.4894, + "learning_rate": 0.000251231527093596, + "epoch": 0.05 + }, + { + "current_steps": 463, + "loss": 1.5078, + "learning_rate": 0.000250935960591133, + "epoch": 0.05 + }, + { + "current_steps": 465, + "loss": 1.5807, + "learning_rate": 0.0002506403940886699, + "epoch": 0.05 + }, + { + "current_steps": 467, + "loss": 1.5709, + "learning_rate": 0.00025034482758620687, + "epoch": 0.05 + }, + { + "current_steps": 469, + "loss": 1.6079, + "learning_rate": 0.00025004926108374383, + "epoch": 0.05 + }, + { + "current_steps": 471, + "loss": 1.6427, + "learning_rate": 0.00024975369458128075, + "epoch": 0.05 + }, + { + "current_steps": 473, + "loss": 1.3753, + "learning_rate": 0.0002494581280788177, + "epoch": 0.06 + }, + { + "current_steps": 475, + "loss": 1.6384, + "learning_rate": 0.00024916256157635463, + "epoch": 0.06 + }, + { + "current_steps": 477, + "loss": 1.5394, + "learning_rate": 0.0002488669950738916, + "epoch": 0.06 + }, + { + "current_steps": 479, + "loss": 1.4127, + "learning_rate": 0.00024857142857142857, + "epoch": 0.06 + }, + { + "current_steps": 481, + "loss": 1.5786, + "learning_rate": 0.0002482758620689655, + "epoch": 0.06 + }, + { + "current_steps": 483, + "loss": 1.4848, + "learning_rate": 0.00024798029556650245, + "epoch": 0.06 + }, + { + "current_steps": 485, + "loss": 1.4502, + "learning_rate": 0.00024768472906403936, + "epoch": 0.06 + }, + { + "current_steps": 487, + "loss": 1.6508, + "learning_rate": 0.00024738916256157633, + "epoch": 0.06 + }, + { + "current_steps": 489, + "loss": 1.5376, + "learning_rate": 0.00024709359605911324, + "epoch": 0.06 + }, + { + "current_steps": 491, + "loss": 1.5825, + "learning_rate": 0.0002467980295566502, + "epoch": 0.06 + }, + { + "current_steps": 493, + "loss": 1.6356, + "learning_rate": 0.0002465024630541872, + "epoch": 0.06 + }, + { + "current_steps": 495, + "loss": 1.5132, + "learning_rate": 0.0002462068965517241, + "epoch": 0.06 + }, + { + "current_steps": 497, + "loss": 1.6001, + "learning_rate": 0.00024591133004926106, + "epoch": 0.06 + }, + { + "current_steps": 499, + "loss": 1.4645, + "learning_rate": 0.00024561576354679797, + "epoch": 0.06 + }, + { + "current_steps": 501, + "loss": 1.592, + "learning_rate": 0.00024532019704433494, + "epoch": 0.06 + }, + { + "current_steps": 503, + "loss": 1.4911, + "learning_rate": 0.0002450246305418719, + "epoch": 0.06 + }, + { + "current_steps": 505, + "loss": 1.6054, + "learning_rate": 0.0002447290640394088, + "epoch": 0.06 + }, + { + "current_steps": 507, + "loss": 1.6558, + "learning_rate": 0.0002444334975369458, + "epoch": 0.06 + }, + { + "current_steps": 509, + "loss": 1.4264, + "learning_rate": 0.00024413793103448273, + "epoch": 0.06 + }, + { + "current_steps": 511, + "loss": 1.7094, + "learning_rate": 0.0002438423645320197, + "epoch": 0.06 + }, + { + "current_steps": 513, + "loss": 1.4223, + "learning_rate": 0.00024354679802955664, + "epoch": 0.06 + }, + { + "current_steps": 515, + "loss": 1.3422, + "learning_rate": 0.00024325123152709358, + "epoch": 0.06 + }, + { + "current_steps": 517, + "loss": 1.6417, + "learning_rate": 0.00024295566502463052, + "epoch": 0.06 + }, + { + "current_steps": 519, + "loss": 1.4039, + "learning_rate": 0.00024266009852216746, + "epoch": 0.06 + }, + { + "current_steps": 521, + "loss": 1.4574, + "learning_rate": 0.00024236453201970443, + "epoch": 0.06 + }, + { + "current_steps": 523, + "loss": 1.4048, + "learning_rate": 0.00024206896551724134, + "epoch": 0.06 + }, + { + "current_steps": 525, + "loss": 1.5909, + "learning_rate": 0.0002417733990147783, + "epoch": 0.06 + }, + { + "current_steps": 527, + "loss": 1.3579, + "learning_rate": 0.00024147783251231525, + "epoch": 0.06 + }, + { + "current_steps": 529, + "loss": 1.4976, + "learning_rate": 0.0002411822660098522, + "epoch": 0.06 + }, + { + "current_steps": 531, + "loss": 1.5886, + "learning_rate": 0.00024088669950738916, + "epoch": 0.06 + }, + { + "current_steps": 533, + "loss": 1.6522, + "learning_rate": 0.00024059113300492607, + "epoch": 0.06 + }, + { + "current_steps": 535, + "loss": 1.4365, + "learning_rate": 0.00024029556650246304, + "epoch": 0.06 + }, + { + "current_steps": 537, + "loss": 1.5958, + "learning_rate": 0.00023999999999999998, + "epoch": 0.06 + }, + { + "current_steps": 539, + "loss": 1.5983, + "learning_rate": 0.00023970443349753692, + "epoch": 0.06 + }, + { + "current_steps": 541, + "loss": 1.5317, + "learning_rate": 0.0002394088669950739, + "epoch": 0.06 + }, + { + "current_steps": 543, + "loss": 1.6437, + "learning_rate": 0.0002391133004926108, + "epoch": 0.06 + }, + { + "current_steps": 545, + "loss": 1.3556, + "learning_rate": 0.00023881773399014777, + "epoch": 0.06 + }, + { + "current_steps": 547, + "loss": 1.3637, + "learning_rate": 0.00023852216748768474, + "epoch": 0.06 + }, + { + "current_steps": 549, + "loss": 1.3729, + "learning_rate": 0.00023822660098522165, + "epoch": 0.06 + }, + { + "current_steps": 551, + "loss": 1.656, + "learning_rate": 0.00023793103448275862, + "epoch": 0.06 + }, + { + "current_steps": 553, + "loss": 1.5467, + "learning_rate": 0.00023763546798029553, + "epoch": 0.06 + }, + { + "current_steps": 555, + "loss": 1.4409, + "learning_rate": 0.0002373399014778325, + "epoch": 0.06 + }, + { + "current_steps": 557, + "loss": 1.4597, + "learning_rate": 0.0002370443349753694, + "epoch": 0.06 + }, + { + "current_steps": 559, + "loss": 1.4803, + "learning_rate": 0.00023674876847290638, + "epoch": 0.07 + }, + { + "current_steps": 561, + "loss": 1.4419, + "learning_rate": 0.00023645320197044335, + "epoch": 0.07 + }, + { + "current_steps": 563, + "loss": 1.5701, + "learning_rate": 0.00023615763546798026, + "epoch": 0.07 + }, + { + "current_steps": 565, + "loss": 1.543, + "learning_rate": 0.00023586206896551723, + "epoch": 0.07 + }, + { + "current_steps": 567, + "loss": 1.625, + "learning_rate": 0.00023556650246305414, + "epoch": 0.07 + }, + { + "current_steps": 569, + "loss": 1.7143, + "learning_rate": 0.0002352709359605911, + "epoch": 0.07 + }, + { + "current_steps": 571, + "loss": 1.4128, + "learning_rate": 0.00023497536945812808, + "epoch": 0.07 + }, + { + "current_steps": 573, + "loss": 1.6889, + "learning_rate": 0.000234679802955665, + "epoch": 0.07 + }, + { + "current_steps": 575, + "loss": 1.5572, + "learning_rate": 0.00023438423645320196, + "epoch": 0.07 + }, + { + "current_steps": 577, + "loss": 1.3485, + "learning_rate": 0.00023408866995073887, + "epoch": 0.07 + }, + { + "current_steps": 579, + "loss": 1.6611, + "learning_rate": 0.00023379310344827584, + "epoch": 0.07 + }, + { + "current_steps": 581, + "loss": 1.503, + "learning_rate": 0.0002334975369458128, + "epoch": 0.07 + }, + { + "current_steps": 583, + "loss": 1.4661, + "learning_rate": 0.00023320197044334972, + "epoch": 0.07 + }, + { + "current_steps": 585, + "loss": 1.4585, + "learning_rate": 0.0002329064039408867, + "epoch": 0.07 + }, + { + "current_steps": 587, + "loss": 1.5381, + "learning_rate": 0.0002326108374384236, + "epoch": 0.07 + }, + { + "current_steps": 589, + "loss": 1.5061, + "learning_rate": 0.00023231527093596057, + "epoch": 0.07 + }, + { + "current_steps": 591, + "loss": 1.5201, + "learning_rate": 0.00023201970443349754, + "epoch": 0.07 + }, + { + "current_steps": 593, + "loss": 1.5733, + "learning_rate": 0.00023172413793103445, + "epoch": 0.07 + }, + { + "current_steps": 595, + "loss": 1.4034, + "learning_rate": 0.00023142857142857142, + "epoch": 0.07 + }, + { + "current_steps": 597, + "loss": 1.6211, + "learning_rate": 0.00023113300492610833, + "epoch": 0.07 + }, + { + "current_steps": 599, + "loss": 1.6118, + "learning_rate": 0.0002308374384236453, + "epoch": 0.07 + }, + { + "current_steps": 601, + "loss": 1.5431, + "learning_rate": 0.00023054187192118224, + "epoch": 0.07 + }, + { + "current_steps": 603, + "loss": 1.6332, + "learning_rate": 0.00023024630541871918, + "epoch": 0.07 + }, + { + "current_steps": 605, + "loss": 1.4212, + "learning_rate": 0.00022995073891625615, + "epoch": 0.07 + }, + { + "current_steps": 607, + "loss": 1.5234, + "learning_rate": 0.00022965517241379306, + "epoch": 0.07 + }, + { + "current_steps": 609, + "loss": 1.7482, + "learning_rate": 0.00022935960591133003, + "epoch": 0.07 + }, + { + "current_steps": 611, + "loss": 1.483, + "learning_rate": 0.00022906403940886697, + "epoch": 0.07 + }, + { + "current_steps": 613, + "loss": 1.3697, + "learning_rate": 0.0002287684729064039, + "epoch": 0.07 + }, + { + "current_steps": 615, + "loss": 1.484, + "learning_rate": 0.00022847290640394088, + "epoch": 0.07 + }, + { + "current_steps": 617, + "loss": 1.7224, + "learning_rate": 0.0002281773399014778, + "epoch": 0.07 + }, + { + "current_steps": 619, + "loss": 1.3295, + "learning_rate": 0.00022788177339901476, + "epoch": 0.07 + }, + { + "current_steps": 621, + "loss": 1.7212, + "learning_rate": 0.0002275862068965517, + "epoch": 0.07 + }, + { + "current_steps": 623, + "loss": 1.4627, + "learning_rate": 0.00022729064039408864, + "epoch": 0.07 + }, + { + "current_steps": 625, + "loss": 1.6296, + "learning_rate": 0.0002269950738916256, + "epoch": 0.07 + }, + { + "current_steps": 627, + "loss": 1.6696, + "learning_rate": 0.00022669950738916255, + "epoch": 0.07 + }, + { + "current_steps": 629, + "loss": 1.5944, + "learning_rate": 0.0002264039408866995, + "epoch": 0.07 + }, + { + "current_steps": 631, + "loss": 1.6079, + "learning_rate": 0.00022610837438423643, + "epoch": 0.07 + }, + { + "current_steps": 633, + "loss": 1.4051, + "learning_rate": 0.00022581280788177337, + "epoch": 0.07 + }, + { + "current_steps": 635, + "loss": 1.4022, + "learning_rate": 0.00022551724137931031, + "epoch": 0.07 + }, + { + "current_steps": 637, + "loss": 1.6161, + "learning_rate": 0.00022522167487684728, + "epoch": 0.07 + }, + { + "current_steps": 639, + "loss": 1.5758, + "learning_rate": 0.00022492610837438422, + "epoch": 0.07 + }, + { + "current_steps": 641, + "loss": 1.4163, + "learning_rate": 0.00022463054187192116, + "epoch": 0.07 + }, + { + "current_steps": 643, + "loss": 1.4388, + "learning_rate": 0.0002243349753694581, + "epoch": 0.07 + }, + { + "current_steps": 645, + "loss": 1.5208, + "learning_rate": 0.00022403940886699504, + "epoch": 0.08 + }, + { + "current_steps": 647, + "loss": 1.6124, + "learning_rate": 0.000223743842364532, + "epoch": 0.08 + }, + { + "current_steps": 649, + "loss": 1.6304, + "learning_rate": 0.00022344827586206895, + "epoch": 0.08 + }, + { + "current_steps": 651, + "loss": 1.2748, + "learning_rate": 0.0002231527093596059, + "epoch": 0.08 + }, + { + "current_steps": 653, + "loss": 1.4693, + "learning_rate": 0.00022285714285714283, + "epoch": 0.08 + }, + { + "current_steps": 655, + "loss": 1.4548, + "learning_rate": 0.00022256157635467977, + "epoch": 0.08 + }, + { + "current_steps": 657, + "loss": 1.5686, + "learning_rate": 0.00022226600985221674, + "epoch": 0.08 + }, + { + "current_steps": 659, + "loss": 1.467, + "learning_rate": 0.00022197044334975368, + "epoch": 0.08 + }, + { + "current_steps": 661, + "loss": 1.4681, + "learning_rate": 0.00022167487684729062, + "epoch": 0.08 + }, + { + "current_steps": 663, + "loss": 1.6515, + "learning_rate": 0.00022137931034482756, + "epoch": 0.08 + }, + { + "current_steps": 665, + "loss": 1.4428, + "learning_rate": 0.0002210837438423645, + "epoch": 0.08 + }, + { + "current_steps": 667, + "loss": 1.4966, + "learning_rate": 0.00022078817733990147, + "epoch": 0.08 + }, + { + "current_steps": 669, + "loss": 1.526, + "learning_rate": 0.00022049261083743839, + "epoch": 0.08 + }, + { + "current_steps": 671, + "loss": 1.5171, + "learning_rate": 0.00022019704433497535, + "epoch": 0.08 + }, + { + "current_steps": 673, + "loss": 1.478, + "learning_rate": 0.0002199014778325123, + "epoch": 0.08 + }, + { + "current_steps": 675, + "loss": 1.3102, + "learning_rate": 0.00021960591133004923, + "epoch": 0.08 + }, + { + "current_steps": 677, + "loss": 1.4725, + "learning_rate": 0.0002193103448275862, + "epoch": 0.08 + }, + { + "current_steps": 679, + "loss": 1.6475, + "learning_rate": 0.00021901477832512312, + "epoch": 0.08 + }, + { + "current_steps": 681, + "loss": 1.4398, + "learning_rate": 0.00021871921182266008, + "epoch": 0.08 + }, + { + "current_steps": 683, + "loss": 1.4224, + "learning_rate": 0.00021842364532019705, + "epoch": 0.08 + }, + { + "current_steps": 685, + "loss": 1.4644, + "learning_rate": 0.00021812807881773397, + "epoch": 0.08 + }, + { + "current_steps": 687, + "loss": 1.4869, + "learning_rate": 0.00021783251231527093, + "epoch": 0.08 + }, + { + "current_steps": 689, + "loss": 1.5006, + "learning_rate": 0.00021753694581280785, + "epoch": 0.08 + }, + { + "current_steps": 691, + "loss": 1.385, + "learning_rate": 0.00021724137931034481, + "epoch": 0.08 + }, + { + "current_steps": 693, + "loss": 1.4807, + "learning_rate": 0.00021694581280788178, + "epoch": 0.08 + }, + { + "current_steps": 695, + "loss": 1.4785, + "learning_rate": 0.0002166502463054187, + "epoch": 0.08 + }, + { + "current_steps": 697, + "loss": 1.3646, + "learning_rate": 0.00021635467980295566, + "epoch": 0.08 + }, + { + "current_steps": 699, + "loss": 1.5786, + "learning_rate": 0.00021605911330049258, + "epoch": 0.08 + }, + { + "current_steps": 701, + "loss": 1.5026, + "learning_rate": 0.00021576354679802954, + "epoch": 0.08 + }, + { + "current_steps": 703, + "loss": 1.5796, + "learning_rate": 0.0002154679802955665, + "epoch": 0.08 + }, + { + "current_steps": 705, + "loss": 1.3805, + "learning_rate": 0.00021517241379310343, + "epoch": 0.08 + }, + { + "current_steps": 707, + "loss": 1.3003, + "learning_rate": 0.0002148768472906404, + "epoch": 0.08 + }, + { + "current_steps": 709, + "loss": 1.6073, + "learning_rate": 0.0002145812807881773, + "epoch": 0.08 + }, + { + "current_steps": 711, + "loss": 1.6322, + "learning_rate": 0.00021428571428571427, + "epoch": 0.08 + }, + { + "current_steps": 713, + "loss": 1.4224, + "learning_rate": 0.0002139901477832512, + "epoch": 0.08 + }, + { + "current_steps": 715, + "loss": 1.5127, + "learning_rate": 0.00021369458128078816, + "epoch": 0.08 + }, + { + "current_steps": 717, + "loss": 1.5959, + "learning_rate": 0.00021339901477832512, + "epoch": 0.08 + }, + { + "current_steps": 719, + "loss": 1.5482, + "learning_rate": 0.00021310344827586204, + "epoch": 0.08 + }, + { + "current_steps": 721, + "loss": 1.4671, + "learning_rate": 0.000212807881773399, + "epoch": 0.08 + }, + { + "current_steps": 723, + "loss": 1.3553, + "learning_rate": 0.00021251231527093592, + "epoch": 0.08 + }, + { + "current_steps": 725, + "loss": 1.3771, + "learning_rate": 0.00021221674876847289, + "epoch": 0.08 + }, + { + "current_steps": 727, + "loss": 1.3838, + "learning_rate": 0.00021192118226600985, + "epoch": 0.08 + }, + { + "current_steps": 729, + "loss": 1.5396, + "learning_rate": 0.00021162561576354677, + "epoch": 0.08 + }, + { + "current_steps": 731, + "loss": 1.4486, + "learning_rate": 0.00021133004926108374, + "epoch": 0.09 + }, + { + "current_steps": 733, + "loss": 1.7393, + "learning_rate": 0.00021103448275862065, + "epoch": 0.09 + }, + { + "current_steps": 735, + "loss": 1.6453, + "learning_rate": 0.00021073891625615762, + "epoch": 0.09 + }, + { + "current_steps": 737, + "loss": 1.531, + "learning_rate": 0.00021044334975369458, + "epoch": 0.09 + }, + { + "current_steps": 739, + "loss": 1.4001, + "learning_rate": 0.0002101477832512315, + "epoch": 0.09 + }, + { + "current_steps": 741, + "loss": 1.715, + "learning_rate": 0.00020985221674876847, + "epoch": 0.09 + }, + { + "current_steps": 743, + "loss": 1.5752, + "learning_rate": 0.00020955665024630538, + "epoch": 0.09 + }, + { + "current_steps": 745, + "loss": 1.6571, + "learning_rate": 0.00020926108374384235, + "epoch": 0.09 + }, + { + "current_steps": 747, + "loss": 1.3944, + "learning_rate": 0.0002089655172413793, + "epoch": 0.09 + }, + { + "current_steps": 749, + "loss": 1.4435, + "learning_rate": 0.00020866995073891623, + "epoch": 0.09 + }, + { + "current_steps": 751, + "loss": 1.5315, + "learning_rate": 0.0002083743842364532, + "epoch": 0.09 + }, + { + "current_steps": 753, + "loss": 1.5234, + "learning_rate": 0.0002080788177339901, + "epoch": 0.09 + }, + { + "current_steps": 755, + "loss": 1.3887, + "learning_rate": 0.00020778325123152708, + "epoch": 0.09 + }, + { + "current_steps": 757, + "loss": 1.7222, + "learning_rate": 0.00020748768472906402, + "epoch": 0.09 + }, + { + "current_steps": 759, + "loss": 1.3696, + "learning_rate": 0.00020719211822660096, + "epoch": 0.09 + }, + { + "current_steps": 761, + "loss": 1.7286, + "learning_rate": 0.00020689655172413793, + "epoch": 0.09 + }, + { + "current_steps": 763, + "loss": 1.4161, + "learning_rate": 0.00020660098522167484, + "epoch": 0.09 + }, + { + "current_steps": 765, + "loss": 1.3924, + "learning_rate": 0.0002063054187192118, + "epoch": 0.09 + }, + { + "current_steps": 767, + "loss": 1.5465, + "learning_rate": 0.00020600985221674875, + "epoch": 0.09 + }, + { + "current_steps": 769, + "loss": 1.4817, + "learning_rate": 0.0002057142857142857, + "epoch": 0.09 + }, + { + "current_steps": 771, + "loss": 1.3681, + "learning_rate": 0.00020541871921182266, + "epoch": 0.09 + }, + { + "current_steps": 773, + "loss": 1.4576, + "learning_rate": 0.0002051231527093596, + "epoch": 0.09 + }, + { + "current_steps": 775, + "loss": 1.6408, + "learning_rate": 0.00020482758620689654, + "epoch": 0.09 + }, + { + "current_steps": 777, + "loss": 1.3061, + "learning_rate": 0.00020453201970443348, + "epoch": 0.09 + }, + { + "current_steps": 779, + "loss": 1.3361, + "learning_rate": 0.00020423645320197042, + "epoch": 0.09 + }, + { + "current_steps": 781, + "loss": 1.6419, + "learning_rate": 0.00020394088669950736, + "epoch": 0.09 + }, + { + "current_steps": 783, + "loss": 1.5729, + "learning_rate": 0.00020364532019704433, + "epoch": 0.09 + }, + { + "current_steps": 785, + "loss": 1.4599, + "learning_rate": 0.00020334975369458127, + "epoch": 0.09 + }, + { + "current_steps": 787, + "loss": 1.6963, + "learning_rate": 0.0002030541871921182, + "epoch": 0.09 + }, + { + "current_steps": 789, + "loss": 1.34, + "learning_rate": 0.00020275862068965515, + "epoch": 0.09 + }, + { + "current_steps": 791, + "loss": 1.5912, + "learning_rate": 0.0002024630541871921, + "epoch": 0.09 + }, + { + "current_steps": 793, + "loss": 1.2494, + "learning_rate": 0.00020216748768472906, + "epoch": 0.09 + }, + { + "current_steps": 795, + "loss": 1.6001, + "learning_rate": 0.000201871921182266, + "epoch": 0.09 + }, + { + "current_steps": 797, + "loss": 1.476, + "learning_rate": 0.00020157635467980294, + "epoch": 0.09 + }, + { + "current_steps": 799, + "loss": 1.4616, + "learning_rate": 0.00020128078817733988, + "epoch": 0.09 + }, + { + "current_steps": 801, + "loss": 1.4727, + "learning_rate": 0.00020098522167487682, + "epoch": 0.09 + }, + { + "current_steps": 803, + "loss": 1.6162, + "learning_rate": 0.0002006896551724138, + "epoch": 0.09 + }, + { + "current_steps": 805, + "loss": 1.4267, + "learning_rate": 0.00020039408866995073, + "epoch": 0.09 + }, + { + "current_steps": 807, + "loss": 1.3787, + "learning_rate": 0.00020009852216748767, + "epoch": 0.09 + }, + { + "current_steps": 809, + "loss": 1.6201, + "learning_rate": 0.0001998029556650246, + "epoch": 0.09 + }, + { + "current_steps": 811, + "loss": 1.5628, + "learning_rate": 0.00019950738916256155, + "epoch": 0.09 + }, + { + "current_steps": 813, + "loss": 1.5113, + "learning_rate": 0.00019921182266009852, + "epoch": 0.09 + }, + { + "current_steps": 815, + "loss": 1.5544, + "learning_rate": 0.00019891625615763543, + "epoch": 0.09 + }, + { + "current_steps": 817, + "loss": 1.4982, + "learning_rate": 0.0001986206896551724, + "epoch": 0.1 + }, + { + "current_steps": 819, + "loss": 1.4648, + "learning_rate": 0.00019832512315270934, + "epoch": 0.1 + }, + { + "current_steps": 821, + "loss": 1.3321, + "learning_rate": 0.00019802955665024628, + "epoch": 0.1 + }, + { + "current_steps": 823, + "loss": 1.3048, + "learning_rate": 0.00019773399014778325, + "epoch": 0.1 + }, + { + "current_steps": 825, + "loss": 1.4533, + "learning_rate": 0.00019743842364532016, + "epoch": 0.1 + }, + { + "current_steps": 827, + "loss": 1.6091, + "learning_rate": 0.00019714285714285713, + "epoch": 0.1 + }, + { + "current_steps": 829, + "loss": 1.5188, + "learning_rate": 0.0001968472906403941, + "epoch": 0.1 + }, + { + "current_steps": 831, + "loss": 1.612, + "learning_rate": 0.000196551724137931, + "epoch": 0.1 + }, + { + "current_steps": 833, + "loss": 1.3739, + "learning_rate": 0.00019625615763546798, + "epoch": 0.1 + }, + { + "current_steps": 835, + "loss": 1.5841, + "learning_rate": 0.0001959605911330049, + "epoch": 0.1 + }, + { + "current_steps": 837, + "loss": 1.382, + "learning_rate": 0.00019566502463054186, + "epoch": 0.1 + }, + { + "current_steps": 839, + "loss": 1.4158, + "learning_rate": 0.00019536945812807883, + "epoch": 0.1 + }, + { + "current_steps": 841, + "loss": 1.4126, + "learning_rate": 0.00019507389162561574, + "epoch": 0.1 + }, + { + "current_steps": 843, + "loss": 1.5635, + "learning_rate": 0.0001947783251231527, + "epoch": 0.1 + }, + { + "current_steps": 845, + "loss": 1.375, + "learning_rate": 0.00019448275862068962, + "epoch": 0.1 + }, + { + "current_steps": 847, + "loss": 1.6939, + "learning_rate": 0.0001941871921182266, + "epoch": 0.1 + }, + { + "current_steps": 849, + "loss": 1.6394, + "learning_rate": 0.00019389162561576356, + "epoch": 0.1 + }, + { + "current_steps": 851, + "loss": 1.6799, + "learning_rate": 0.00019359605911330047, + "epoch": 0.1 + }, + { + "current_steps": 853, + "loss": 1.393, + "learning_rate": 0.00019330049261083744, + "epoch": 0.1 + }, + { + "current_steps": 855, + "loss": 1.3931, + "learning_rate": 0.00019300492610837435, + "epoch": 0.1 + }, + { + "current_steps": 857, + "loss": 1.5691, + "learning_rate": 0.00019270935960591132, + "epoch": 0.1 + }, + { + "current_steps": 859, + "loss": 1.3421, + "learning_rate": 0.00019241379310344823, + "epoch": 0.1 + }, + { + "current_steps": 861, + "loss": 1.3255, + "learning_rate": 0.0001921182266009852, + "epoch": 0.1 + }, + { + "current_steps": 863, + "loss": 1.468, + "learning_rate": 0.00019182266009852217, + "epoch": 0.1 + }, + { + "current_steps": 865, + "loss": 1.6568, + "learning_rate": 0.00019152709359605908, + "epoch": 0.1 + }, + { + "current_steps": 867, + "loss": 1.6134, + "learning_rate": 0.00019123152709359605, + "epoch": 0.1 + }, + { + "current_steps": 869, + "loss": 1.5257, + "learning_rate": 0.00019093596059113296, + "epoch": 0.1 + }, + { + "current_steps": 871, + "loss": 1.3118, + "learning_rate": 0.00019064039408866993, + "epoch": 0.1 + }, + { + "current_steps": 873, + "loss": 1.449, + "learning_rate": 0.0001903448275862069, + "epoch": 0.1 + }, + { + "current_steps": 875, + "loss": 1.627, + "learning_rate": 0.0001900492610837438, + "epoch": 0.1 + }, + { + "current_steps": 877, + "loss": 1.4511, + "learning_rate": 0.00018975369458128078, + "epoch": 0.1 + }, + { + "current_steps": 879, + "loss": 1.6959, + "learning_rate": 0.0001894581280788177, + "epoch": 0.1 + }, + { + "current_steps": 881, + "loss": 1.5174, + "learning_rate": 0.00018916256157635466, + "epoch": 0.1 + }, + { + "current_steps": 883, + "loss": 1.5205, + "learning_rate": 0.00018886699507389163, + "epoch": 0.1 + }, + { + "current_steps": 885, + "loss": 1.4595, + "learning_rate": 0.00018857142857142854, + "epoch": 0.1 + }, + { + "current_steps": 887, + "loss": 1.5407, + "learning_rate": 0.0001882758620689655, + "epoch": 0.1 + }, + { + "current_steps": 889, + "loss": 1.8308, + "learning_rate": 0.00018798029556650242, + "epoch": 0.1 + }, + { + "current_steps": 891, + "loss": 1.3327, + "learning_rate": 0.0001876847290640394, + "epoch": 0.1 + }, + { + "current_steps": 893, + "loss": 1.5598, + "learning_rate": 0.00018738916256157633, + "epoch": 0.1 + }, + { + "current_steps": 895, + "loss": 1.5892, + "learning_rate": 0.00018709359605911327, + "epoch": 0.1 + }, + { + "current_steps": 897, + "loss": 1.5615, + "learning_rate": 0.00018679802955665024, + "epoch": 0.1 + }, + { + "current_steps": 899, + "loss": 1.3798, + "learning_rate": 0.00018650246305418715, + "epoch": 0.1 + }, + { + "current_steps": 901, + "loss": 1.7268, + "learning_rate": 0.00018620689655172412, + "epoch": 0.1 + }, + { + "current_steps": 903, + "loss": 1.3698, + "learning_rate": 0.00018591133004926106, + "epoch": 0.11 + }, + { + "current_steps": 905, + "loss": 1.6856, + "learning_rate": 0.000185615763546798, + "epoch": 0.11 + }, + { + "current_steps": 907, + "loss": 1.4168, + "learning_rate": 0.00018532019704433497, + "epoch": 0.11 + }, + { + "current_steps": 909, + "loss": 1.3779, + "learning_rate": 0.0001850246305418719, + "epoch": 0.11 + }, + { + "current_steps": 911, + "loss": 1.4477, + "learning_rate": 0.00018472906403940885, + "epoch": 0.11 + }, + { + "current_steps": 913, + "loss": 1.5091, + "learning_rate": 0.0001844334975369458, + "epoch": 0.11 + }, + { + "current_steps": 915, + "loss": 1.4676, + "learning_rate": 0.00018413793103448273, + "epoch": 0.11 + }, + { + "current_steps": 917, + "loss": 1.417, + "learning_rate": 0.0001838423645320197, + "epoch": 0.11 + }, + { + "current_steps": 919, + "loss": 1.537, + "learning_rate": 0.00018354679802955664, + "epoch": 0.11 + }, + { + "current_steps": 921, + "loss": 1.4578, + "learning_rate": 0.00018325123152709358, + "epoch": 0.11 + }, + { + "current_steps": 923, + "loss": 1.5152, + "learning_rate": 0.00018295566502463052, + "epoch": 0.11 + }, + { + "current_steps": 925, + "loss": 1.5826, + "learning_rate": 0.00018266009852216746, + "epoch": 0.11 + }, + { + "current_steps": 927, + "loss": 1.4088, + "learning_rate": 0.0001823645320197044, + "epoch": 0.11 + }, + { + "current_steps": 929, + "loss": 1.4405, + "learning_rate": 0.00018206896551724137, + "epoch": 0.11 + }, + { + "current_steps": 931, + "loss": 1.4493, + "learning_rate": 0.0001817733990147783, + "epoch": 0.11 + }, + { + "current_steps": 933, + "loss": 1.4781, + "learning_rate": 0.00018147783251231525, + "epoch": 0.11 + }, + { + "current_steps": 935, + "loss": 1.5156, + "learning_rate": 0.0001811822660098522, + "epoch": 0.11 + }, + { + "current_steps": 937, + "loss": 1.3937, + "learning_rate": 0.00018088669950738914, + "epoch": 0.11 + }, + { + "current_steps": 939, + "loss": 1.4797, + "learning_rate": 0.0001805911330049261, + "epoch": 0.11 + }, + { + "current_steps": 941, + "loss": 1.504, + "learning_rate": 0.00018029556650246304, + "epoch": 0.11 + }, + { + "current_steps": 943, + "loss": 1.5977, + "learning_rate": 0.00017999999999999998, + "epoch": 0.11 + }, + { + "current_steps": 945, + "loss": 1.6771, + "learning_rate": 0.00017970443349753692, + "epoch": 0.11 + }, + { + "current_steps": 947, + "loss": 1.5078, + "learning_rate": 0.00017940886699507387, + "epoch": 0.11 + }, + { + "current_steps": 949, + "loss": 1.3564, + "learning_rate": 0.00017911330049261083, + "epoch": 0.11 + }, + { + "current_steps": 951, + "loss": 1.4263, + "learning_rate": 0.00017881773399014777, + "epoch": 0.11 + }, + { + "current_steps": 953, + "loss": 1.4985, + "learning_rate": 0.00017852216748768471, + "epoch": 0.11 + }, + { + "current_steps": 955, + "loss": 1.3852, + "learning_rate": 0.00017822660098522166, + "epoch": 0.11 + }, + { + "current_steps": 957, + "loss": 1.2973, + "learning_rate": 0.0001779310344827586, + "epoch": 0.11 + }, + { + "current_steps": 959, + "loss": 1.3745, + "learning_rate": 0.00017763546798029556, + "epoch": 0.11 + }, + { + "current_steps": 961, + "loss": 1.5937, + "learning_rate": 0.0001773399014778325, + "epoch": 0.11 + }, + { + "current_steps": 963, + "loss": 1.5452, + "learning_rate": 0.00017704433497536944, + "epoch": 0.11 + }, + { + "current_steps": 965, + "loss": 1.5924, + "learning_rate": 0.0001767487684729064, + "epoch": 0.11 + }, + { + "current_steps": 967, + "loss": 1.5265, + "learning_rate": 0.00017645320197044333, + "epoch": 0.11 + }, + { + "current_steps": 969, + "loss": 1.4742, + "learning_rate": 0.0001761576354679803, + "epoch": 0.11 + }, + { + "current_steps": 971, + "loss": 1.45, + "learning_rate": 0.0001758620689655172, + "epoch": 0.11 + }, + { + "current_steps": 973, + "loss": 1.5054, + "learning_rate": 0.00017556650246305418, + "epoch": 0.11 + }, + { + "current_steps": 975, + "loss": 1.2477, + "learning_rate": 0.00017527093596059114, + "epoch": 0.11 + }, + { + "current_steps": 977, + "loss": 1.4494, + "learning_rate": 0.00017497536945812806, + "epoch": 0.11 + }, + { + "current_steps": 979, + "loss": 1.4671, + "learning_rate": 0.00017467980295566502, + "epoch": 0.11 + }, + { + "current_steps": 981, + "loss": 1.4439, + "learning_rate": 0.00017438423645320194, + "epoch": 0.11 + }, + { + "current_steps": 983, + "loss": 1.457, + "learning_rate": 0.0001740886699507389, + "epoch": 0.11 + }, + { + "current_steps": 985, + "loss": 1.3146, + "learning_rate": 0.00017379310344827587, + "epoch": 0.11 + }, + { + "current_steps": 987, + "loss": 1.6909, + "learning_rate": 0.0001734975369458128, + "epoch": 0.11 + }, + { + "current_steps": 989, + "loss": 1.4458, + "learning_rate": 0.00017320197044334975, + "epoch": 0.12 + }, + { + "current_steps": 991, + "loss": 1.4335, + "learning_rate": 0.00017290640394088667, + "epoch": 0.12 + }, + { + "current_steps": 993, + "loss": 1.4131, + "learning_rate": 0.00017261083743842364, + "epoch": 0.12 + }, + { + "current_steps": 995, + "loss": 1.6884, + "learning_rate": 0.0001723152709359606, + "epoch": 0.12 + }, + { + "current_steps": 997, + "loss": 1.4066, + "learning_rate": 0.00017201970443349752, + "epoch": 0.12 + }, + { + "current_steps": 999, + "loss": 1.4257, + "learning_rate": 0.00017172413793103448, + "epoch": 0.12 + }, + { + "current_steps": 1001, + "loss": 1.452, + "learning_rate": 0.0001714285714285714, + "epoch": 0.12 + }, + { + "current_steps": 1003, + "loss": 1.4184, + "learning_rate": 0.00017113300492610837, + "epoch": 0.12 + }, + { + "current_steps": 1005, + "loss": 1.3286, + "learning_rate": 0.00017083743842364528, + "epoch": 0.12 + }, + { + "current_steps": 1007, + "loss": 1.3791, + "learning_rate": 0.00017054187192118225, + "epoch": 0.12 + }, + { + "current_steps": 1009, + "loss": 1.6685, + "learning_rate": 0.00017024630541871921, + "epoch": 0.12 + }, + { + "current_steps": 1011, + "loss": 1.6164, + "learning_rate": 0.00016995073891625613, + "epoch": 0.12 + }, + { + "current_steps": 1013, + "loss": 1.4075, + "learning_rate": 0.0001696551724137931, + "epoch": 0.12 + }, + { + "current_steps": 1015, + "loss": 1.5294, + "learning_rate": 0.00016935960591133, + "epoch": 0.12 + }, + { + "current_steps": 1017, + "loss": 1.3351, + "learning_rate": 0.00016906403940886698, + "epoch": 0.12 + }, + { + "current_steps": 1019, + "loss": 1.6398, + "learning_rate": 0.00016876847290640395, + "epoch": 0.12 + }, + { + "current_steps": 1021, + "loss": 1.2795, + "learning_rate": 0.00016847290640394086, + "epoch": 0.12 + }, + { + "current_steps": 1023, + "loss": 1.3947, + "learning_rate": 0.00016817733990147783, + "epoch": 0.12 + }, + { + "current_steps": 1025, + "loss": 1.4945, + "learning_rate": 0.00016788177339901474, + "epoch": 0.12 + }, + { + "current_steps": 1027, + "loss": 1.4894, + "learning_rate": 0.0001675862068965517, + "epoch": 0.12 + }, + { + "current_steps": 1029, + "loss": 1.3364, + "learning_rate": 0.00016729064039408868, + "epoch": 0.12 + }, + { + "current_steps": 1031, + "loss": 1.4511, + "learning_rate": 0.0001669950738916256, + "epoch": 0.12 + }, + { + "current_steps": 1033, + "loss": 1.4264, + "learning_rate": 0.00016669950738916256, + "epoch": 0.12 + }, + { + "current_steps": 1035, + "loss": 1.455, + "learning_rate": 0.00016640394088669947, + "epoch": 0.12 + }, + { + "current_steps": 1037, + "loss": 1.4287, + "learning_rate": 0.00016610837438423644, + "epoch": 0.12 + }, + { + "current_steps": 1039, + "loss": 1.6167, + "learning_rate": 0.00016581280788177338, + "epoch": 0.12 + }, + { + "current_steps": 1041, + "loss": 1.6667, + "learning_rate": 0.00016551724137931032, + "epoch": 0.12 + }, + { + "current_steps": 1043, + "loss": 1.5287, + "learning_rate": 0.0001652216748768473, + "epoch": 0.12 + }, + { + "current_steps": 1045, + "loss": 1.5189, + "learning_rate": 0.0001649261083743842, + "epoch": 0.12 + }, + { + "current_steps": 1047, + "loss": 1.6377, + "learning_rate": 0.00016463054187192117, + "epoch": 0.12 + }, + { + "current_steps": 1049, + "loss": 1.6074, + "learning_rate": 0.0001643349753694581, + "epoch": 0.12 + }, + { + "current_steps": 1051, + "loss": 1.4497, + "learning_rate": 0.00016403940886699505, + "epoch": 0.12 + }, + { + "current_steps": 1053, + "loss": 1.5705, + "learning_rate": 0.00016374384236453202, + "epoch": 0.12 + }, + { + "current_steps": 1055, + "loss": 1.7055, + "learning_rate": 0.00016344827586206896, + "epoch": 0.12 + }, + { + "current_steps": 1057, + "loss": 1.4076, + "learning_rate": 0.0001631527093596059, + "epoch": 0.12 + }, + { + "current_steps": 1059, + "loss": 1.4607, + "learning_rate": 0.00016285714285714284, + "epoch": 0.12 + }, + { + "current_steps": 1061, + "loss": 1.4689, + "learning_rate": 0.00016256157635467978, + "epoch": 0.12 + }, + { + "current_steps": 1063, + "loss": 1.3713, + "learning_rate": 0.00016226600985221675, + "epoch": 0.12 + }, + { + "current_steps": 1065, + "loss": 1.407, + "learning_rate": 0.0001619704433497537, + "epoch": 0.12 + }, + { + "current_steps": 1067, + "loss": 1.5291, + "learning_rate": 0.00016167487684729063, + "epoch": 0.12 + }, + { + "current_steps": 1069, + "loss": 1.382, + "learning_rate": 0.00016137931034482757, + "epoch": 0.12 + }, + { + "current_steps": 1071, + "loss": 1.553, + "learning_rate": 0.0001610837438423645, + "epoch": 0.12 + }, + { + "current_steps": 1073, + "loss": 1.6119, + "learning_rate": 0.00016078817733990145, + "epoch": 0.12 + }, + { + "current_steps": 1075, + "loss": 1.5013, + "learning_rate": 0.00016049261083743842, + "epoch": 0.13 + }, + { + "current_steps": 1077, + "loss": 1.6357, + "learning_rate": 0.00016019704433497536, + "epoch": 0.13 + }, + { + "current_steps": 1079, + "loss": 1.4906, + "learning_rate": 0.0001599014778325123, + "epoch": 0.13 + }, + { + "current_steps": 1081, + "loss": 1.3798, + "learning_rate": 0.00015960591133004924, + "epoch": 0.13 + }, + { + "current_steps": 1083, + "loss": 1.4316, + "learning_rate": 0.00015931034482758618, + "epoch": 0.13 + }, + { + "current_steps": 1085, + "loss": 1.4263, + "learning_rate": 0.00015901477832512315, + "epoch": 0.13 + }, + { + "current_steps": 1087, + "loss": 1.2947, + "learning_rate": 0.0001587192118226601, + "epoch": 0.13 + }, + { + "current_steps": 1089, + "loss": 1.5948, + "learning_rate": 0.00015842364532019703, + "epoch": 0.13 + }, + { + "current_steps": 1091, + "loss": 1.4573, + "learning_rate": 0.00015812807881773397, + "epoch": 0.13 + }, + { + "current_steps": 1093, + "loss": 1.267, + "learning_rate": 0.0001578325123152709, + "epoch": 0.13 + }, + { + "current_steps": 1095, + "loss": 1.4615, + "learning_rate": 0.00015753694581280788, + "epoch": 0.13 + }, + { + "current_steps": 1097, + "loss": 1.4817, + "learning_rate": 0.00015724137931034482, + "epoch": 0.13 + }, + { + "current_steps": 1099, + "loss": 1.5427, + "learning_rate": 0.00015694581280788176, + "epoch": 0.13 + }, + { + "current_steps": 1101, + "loss": 1.5629, + "learning_rate": 0.0001566502463054187, + "epoch": 0.13 + }, + { + "current_steps": 1103, + "loss": 1.5958, + "learning_rate": 0.00015635467980295564, + "epoch": 0.13 + }, + { + "current_steps": 1105, + "loss": 1.5782, + "learning_rate": 0.0001560591133004926, + "epoch": 0.13 + }, + { + "current_steps": 1107, + "loss": 1.533, + "learning_rate": 0.00015576354679802955, + "epoch": 0.13 + }, + { + "current_steps": 1109, + "loss": 1.486, + "learning_rate": 0.0001554679802955665, + "epoch": 0.13 + }, + { + "current_steps": 1111, + "loss": 1.6014, + "learning_rate": 0.00015517241379310346, + "epoch": 0.13 + }, + { + "current_steps": 1113, + "loss": 1.5115, + "learning_rate": 0.00015487684729064037, + "epoch": 0.13 + }, + { + "current_steps": 1115, + "loss": 1.4503, + "learning_rate": 0.00015458128078817734, + "epoch": 0.13 + }, + { + "current_steps": 1117, + "loss": 1.4544, + "learning_rate": 0.00015428571428571425, + "epoch": 0.13 + }, + { + "current_steps": 1119, + "loss": 1.4324, + "learning_rate": 0.00015399014778325122, + "epoch": 0.13 + }, + { + "current_steps": 1121, + "loss": 1.6173, + "learning_rate": 0.0001536945812807882, + "epoch": 0.13 + }, + { + "current_steps": 1123, + "loss": 1.2486, + "learning_rate": 0.0001533990147783251, + "epoch": 0.13 + }, + { + "current_steps": 1125, + "loss": 1.3345, + "learning_rate": 0.00015310344827586207, + "epoch": 0.13 + }, + { + "current_steps": 1127, + "loss": 1.3289, + "learning_rate": 0.00015280788177339898, + "epoch": 0.13 + }, + { + "current_steps": 1129, + "loss": 1.4645, + "learning_rate": 0.00015251231527093595, + "epoch": 0.13 + }, + { + "current_steps": 1131, + "loss": 1.2622, + "learning_rate": 0.00015221674876847292, + "epoch": 0.13 + }, + { + "current_steps": 1133, + "loss": 1.6447, + "learning_rate": 0.00015192118226600983, + "epoch": 0.13 + }, + { + "current_steps": 1135, + "loss": 1.4101, + "learning_rate": 0.0001516256157635468, + "epoch": 0.13 + }, + { + "current_steps": 1137, + "loss": 1.2335, + "learning_rate": 0.0001513300492610837, + "epoch": 0.13 + }, + { + "current_steps": 1139, + "loss": 1.5197, + "learning_rate": 0.00015103448275862068, + "epoch": 0.13 + }, + { + "current_steps": 1141, + "loss": 1.4544, + "learning_rate": 0.00015073891625615765, + "epoch": 0.13 + }, + { + "current_steps": 1143, + "loss": 1.653, + "learning_rate": 0.00015044334975369456, + "epoch": 0.13 + }, + { + "current_steps": 1145, + "loss": 1.4697, + "learning_rate": 0.00015014778325123153, + "epoch": 0.13 + }, + { + "current_steps": 1147, + "loss": 1.5149, + "learning_rate": 0.00014985221674876847, + "epoch": 0.13 + }, + { + "current_steps": 1149, + "loss": 1.5345, + "learning_rate": 0.0001495566502463054, + "epoch": 0.13 + }, + { + "current_steps": 1151, + "loss": 1.4074, + "learning_rate": 0.00014926108374384235, + "epoch": 0.13 + }, + { + "current_steps": 1153, + "loss": 1.6013, + "learning_rate": 0.0001489655172413793, + "epoch": 0.13 + }, + { + "current_steps": 1155, + "loss": 1.4285, + "learning_rate": 0.00014866995073891623, + "epoch": 0.13 + }, + { + "current_steps": 1157, + "loss": 1.4755, + "learning_rate": 0.00014837438423645317, + "epoch": 0.13 + }, + { + "current_steps": 1159, + "loss": 1.3552, + "learning_rate": 0.00014807881773399014, + "epoch": 0.13 + }, + { + "current_steps": 1161, + "loss": 1.3876, + "learning_rate": 0.00014778325123152708, + "epoch": 0.14 + }, + { + "current_steps": 1163, + "loss": 1.3403, + "learning_rate": 0.00014748768472906402, + "epoch": 0.14 + }, + { + "current_steps": 1165, + "loss": 1.305, + "learning_rate": 0.00014719211822660096, + "epoch": 0.14 + }, + { + "current_steps": 1167, + "loss": 1.523, + "learning_rate": 0.0001468965517241379, + "epoch": 0.14 + }, + { + "current_steps": 1169, + "loss": 1.575, + "learning_rate": 0.00014660098522167487, + "epoch": 0.14 + }, + { + "current_steps": 1171, + "loss": 1.6639, + "learning_rate": 0.0001463054187192118, + "epoch": 0.14 + }, + { + "current_steps": 1173, + "loss": 1.4092, + "learning_rate": 0.00014600985221674875, + "epoch": 0.14 + }, + { + "current_steps": 1175, + "loss": 1.6079, + "learning_rate": 0.0001457142857142857, + "epoch": 0.14 + }, + { + "current_steps": 1177, + "loss": 1.6555, + "learning_rate": 0.00014541871921182263, + "epoch": 0.14 + }, + { + "current_steps": 1179, + "loss": 1.5001, + "learning_rate": 0.00014512315270935958, + "epoch": 0.14 + }, + { + "current_steps": 1181, + "loss": 1.4116, + "learning_rate": 0.00014482758620689654, + "epoch": 0.14 + }, + { + "current_steps": 1183, + "loss": 1.4401, + "learning_rate": 0.00014453201970443348, + "epoch": 0.14 + }, + { + "current_steps": 1185, + "loss": 1.6073, + "learning_rate": 0.00014423645320197042, + "epoch": 0.14 + }, + { + "current_steps": 1187, + "loss": 1.5916, + "learning_rate": 0.00014394088669950736, + "epoch": 0.14 + }, + { + "current_steps": 1189, + "loss": 1.4508, + "learning_rate": 0.0001436453201970443, + "epoch": 0.14 + }, + { + "current_steps": 1191, + "loss": 1.2755, + "learning_rate": 0.00014334975369458127, + "epoch": 0.14 + }, + { + "current_steps": 1193, + "loss": 1.4433, + "learning_rate": 0.00014305418719211821, + "epoch": 0.14 + }, + { + "current_steps": 1195, + "loss": 1.6638, + "learning_rate": 0.00014275862068965515, + "epoch": 0.14 + }, + { + "current_steps": 1197, + "loss": 1.5391, + "learning_rate": 0.0001424630541871921, + "epoch": 0.14 + }, + { + "current_steps": 1199, + "loss": 1.2966, + "learning_rate": 0.00014216748768472904, + "epoch": 0.14 + }, + { + "current_steps": 1201, + "loss": 1.6218, + "learning_rate": 0.000141871921182266, + "epoch": 0.14 + }, + { + "current_steps": 1203, + "loss": 1.3367, + "learning_rate": 0.00014157635467980294, + "epoch": 0.14 + }, + { + "current_steps": 1205, + "loss": 1.5205, + "learning_rate": 0.00014128078817733988, + "epoch": 0.14 + }, + { + "current_steps": 1207, + "loss": 1.2991, + "learning_rate": 0.00014098522167487683, + "epoch": 0.14 + }, + { + "current_steps": 1209, + "loss": 1.5471, + "learning_rate": 0.00014068965517241377, + "epoch": 0.14 + }, + { + "current_steps": 1211, + "loss": 1.3356, + "learning_rate": 0.00014039408866995073, + "epoch": 0.14 + }, + { + "current_steps": 1213, + "loss": 1.4511, + "learning_rate": 0.00014009852216748767, + "epoch": 0.14 + }, + { + "current_steps": 1215, + "loss": 1.3815, + "learning_rate": 0.00013980295566502461, + "epoch": 0.14 + }, + { + "current_steps": 1217, + "loss": 1.5201, + "learning_rate": 0.00013950738916256156, + "epoch": 0.14 + }, + { + "current_steps": 1219, + "loss": 1.5778, + "learning_rate": 0.00013921182266009852, + "epoch": 0.14 + }, + { + "current_steps": 1221, + "loss": 1.3516, + "learning_rate": 0.00013891625615763546, + "epoch": 0.14 + }, + { + "current_steps": 1223, + "loss": 1.4532, + "learning_rate": 0.0001386206896551724, + "epoch": 0.14 + }, + { + "current_steps": 1225, + "loss": 1.468, + "learning_rate": 0.00013832512315270935, + "epoch": 0.14 + }, + { + "current_steps": 1227, + "loss": 1.4443, + "learning_rate": 0.00013802955665024629, + "epoch": 0.14 + }, + { + "current_steps": 1229, + "loss": 1.2362, + "learning_rate": 0.00013773399014778325, + "epoch": 0.14 + }, + { + "current_steps": 1231, + "loss": 1.6585, + "learning_rate": 0.0001374384236453202, + "epoch": 0.14 + }, + { + "current_steps": 1233, + "loss": 1.7561, + "learning_rate": 0.00013714285714285713, + "epoch": 0.14 + }, + { + "current_steps": 1235, + "loss": 1.6997, + "learning_rate": 0.00013684729064039408, + "epoch": 0.14 + }, + { + "current_steps": 1237, + "loss": 1.4349, + "learning_rate": 0.00013655172413793102, + "epoch": 0.14 + }, + { + "current_steps": 1239, + "loss": 1.3657, + "learning_rate": 0.00013625615763546798, + "epoch": 0.14 + }, + { + "current_steps": 1241, + "loss": 1.6985, + "learning_rate": 0.00013596059113300492, + "epoch": 0.14 + }, + { + "current_steps": 1243, + "loss": 1.5181, + "learning_rate": 0.00013566502463054186, + "epoch": 0.14 + }, + { + "current_steps": 1245, + "loss": 1.4471, + "learning_rate": 0.0001353694581280788, + "epoch": 0.14 + }, + { + "current_steps": 1247, + "loss": 1.65, + "learning_rate": 0.00013507389162561577, + "epoch": 0.15 + }, + { + "current_steps": 1249, + "loss": 1.1517, + "learning_rate": 0.00013477832512315271, + "epoch": 0.15 + }, + { + "current_steps": 1251, + "loss": 1.4166, + "learning_rate": 0.00013448275862068965, + "epoch": 0.15 + }, + { + "current_steps": 1253, + "loss": 1.4719, + "learning_rate": 0.0001341871921182266, + "epoch": 0.15 + }, + { + "current_steps": 1255, + "loss": 1.4172, + "learning_rate": 0.00013389162561576354, + "epoch": 0.15 + }, + { + "current_steps": 1257, + "loss": 1.3626, + "learning_rate": 0.00013359605911330048, + "epoch": 0.15 + }, + { + "current_steps": 1259, + "loss": 1.4102, + "learning_rate": 0.00013330049261083744, + "epoch": 0.15 + }, + { + "current_steps": 1261, + "loss": 1.3181, + "learning_rate": 0.00013300492610837438, + "epoch": 0.15 + }, + { + "current_steps": 1263, + "loss": 1.4147, + "learning_rate": 0.00013270935960591133, + "epoch": 0.15 + }, + { + "current_steps": 1265, + "loss": 1.5692, + "learning_rate": 0.00013241379310344827, + "epoch": 0.15 + }, + { + "current_steps": 1267, + "loss": 1.3556, + "learning_rate": 0.0001321182266009852, + "epoch": 0.15 + }, + { + "current_steps": 1269, + "loss": 1.5808, + "learning_rate": 0.00013182266009852215, + "epoch": 0.15 + }, + { + "current_steps": 1271, + "loss": 1.2675, + "learning_rate": 0.00013152709359605912, + "epoch": 0.15 + }, + { + "current_steps": 1273, + "loss": 1.4857, + "learning_rate": 0.00013123152709359606, + "epoch": 0.15 + }, + { + "current_steps": 1275, + "loss": 1.445, + "learning_rate": 0.000130935960591133, + "epoch": 0.15 + }, + { + "current_steps": 1277, + "loss": 1.6075, + "learning_rate": 0.00013064039408866994, + "epoch": 0.15 + }, + { + "current_steps": 1279, + "loss": 1.3829, + "learning_rate": 0.00013034482758620688, + "epoch": 0.15 + }, + { + "current_steps": 1281, + "loss": 1.3487, + "learning_rate": 0.00013004926108374385, + "epoch": 0.15 + }, + { + "current_steps": 1283, + "loss": 1.3105, + "learning_rate": 0.00012975369458128079, + "epoch": 0.15 + }, + { + "current_steps": 1285, + "loss": 1.5299, + "learning_rate": 0.00012945812807881773, + "epoch": 0.15 + }, + { + "current_steps": 1287, + "loss": 1.5279, + "learning_rate": 0.00012916256157635467, + "epoch": 0.15 + }, + { + "current_steps": 1289, + "loss": 1.5722, + "learning_rate": 0.0001288669950738916, + "epoch": 0.15 + }, + { + "current_steps": 1291, + "loss": 1.4816, + "learning_rate": 0.00012857142857142855, + "epoch": 0.15 + }, + { + "current_steps": 1293, + "loss": 1.3207, + "learning_rate": 0.00012827586206896552, + "epoch": 0.15 + }, + { + "current_steps": 1295, + "loss": 1.566, + "learning_rate": 0.00012798029556650246, + "epoch": 0.15 + }, + { + "current_steps": 1297, + "loss": 1.5285, + "learning_rate": 0.0001276847290640394, + "epoch": 0.15 + }, + { + "current_steps": 1299, + "loss": 1.6871, + "learning_rate": 0.00012738916256157634, + "epoch": 0.15 + }, + { + "current_steps": 1301, + "loss": 1.3097, + "learning_rate": 0.00012709359605911328, + "epoch": 0.15 + }, + { + "current_steps": 1303, + "loss": 1.408, + "learning_rate": 0.00012679802955665022, + "epoch": 0.15 + }, + { + "current_steps": 1305, + "loss": 1.3767, + "learning_rate": 0.0001265024630541872, + "epoch": 0.15 + }, + { + "current_steps": 1307, + "loss": 1.3903, + "learning_rate": 0.00012620689655172413, + "epoch": 0.15 + }, + { + "current_steps": 1309, + "loss": 1.433, + "learning_rate": 0.00012591133004926107, + "epoch": 0.15 + }, + { + "current_steps": 1311, + "loss": 1.3245, + "learning_rate": 0.000125615763546798, + "epoch": 0.15 + }, + { + "current_steps": 1313, + "loss": 1.3325, + "learning_rate": 0.00012532019704433495, + "epoch": 0.15 + }, + { + "current_steps": 1315, + "loss": 1.4503, + "learning_rate": 0.00012502463054187192, + "epoch": 0.15 + }, + { + "current_steps": 1317, + "loss": 1.49, + "learning_rate": 0.00012472906403940886, + "epoch": 0.15 + }, + { + "current_steps": 1319, + "loss": 1.485, + "learning_rate": 0.0001244334975369458, + "epoch": 0.15 + }, + { + "current_steps": 1321, + "loss": 1.4404, + "learning_rate": 0.00012413793103448274, + "epoch": 0.15 + }, + { + "current_steps": 1323, + "loss": 1.3019, + "learning_rate": 0.00012384236453201968, + "epoch": 0.15 + }, + { + "current_steps": 1325, + "loss": 1.5872, + "learning_rate": 0.00012354679802955662, + "epoch": 0.15 + }, + { + "current_steps": 1327, + "loss": 1.5612, + "learning_rate": 0.0001232512315270936, + "epoch": 0.15 + }, + { + "current_steps": 1329, + "loss": 1.4557, + "learning_rate": 0.00012295566502463053, + "epoch": 0.15 + }, + { + "current_steps": 1331, + "loss": 1.5886, + "learning_rate": 0.00012266009852216747, + "epoch": 0.15 + }, + { + "current_steps": 1333, + "loss": 1.3797, + "learning_rate": 0.0001223645320197044, + "epoch": 0.16 + }, + { + "current_steps": 1335, + "loss": 1.3713, + "learning_rate": 0.00012206896551724136, + "epoch": 0.16 + }, + { + "current_steps": 1337, + "loss": 1.4158, + "learning_rate": 0.00012177339901477832, + "epoch": 0.16 + }, + { + "current_steps": 1339, + "loss": 1.4021, + "learning_rate": 0.00012147783251231526, + "epoch": 0.16 + }, + { + "current_steps": 1341, + "loss": 1.4421, + "learning_rate": 0.00012118226600985221, + "epoch": 0.16 + }, + { + "current_steps": 1343, + "loss": 1.2342, + "learning_rate": 0.00012088669950738915, + "epoch": 0.16 + }, + { + "current_steps": 1345, + "loss": 1.3491, + "learning_rate": 0.0001205911330049261, + "epoch": 0.16 + }, + { + "current_steps": 1347, + "loss": 1.3119, + "learning_rate": 0.00012029556650246304, + "epoch": 0.16 + }, + { + "current_steps": 1349, + "loss": 1.328, + "learning_rate": 0.00011999999999999999, + "epoch": 0.16 + }, + { + "current_steps": 1351, + "loss": 1.5423, + "learning_rate": 0.00011970443349753694, + "epoch": 0.16 + }, + { + "current_steps": 1353, + "loss": 1.4857, + "learning_rate": 0.00011940886699507388, + "epoch": 0.16 + }, + { + "current_steps": 1355, + "loss": 1.6532, + "learning_rate": 0.00011911330049261082, + "epoch": 0.16 + }, + { + "current_steps": 1357, + "loss": 1.3726, + "learning_rate": 0.00011881773399014777, + "epoch": 0.16 + }, + { + "current_steps": 1359, + "loss": 1.1621, + "learning_rate": 0.0001185221674876847, + "epoch": 0.16 + }, + { + "current_steps": 1361, + "loss": 1.4227, + "learning_rate": 0.00011822660098522167, + "epoch": 0.16 + }, + { + "current_steps": 1363, + "loss": 1.3642, + "learning_rate": 0.00011793103448275861, + "epoch": 0.16 + }, + { + "current_steps": 1365, + "loss": 1.5377, + "learning_rate": 0.00011763546798029556, + "epoch": 0.16 + }, + { + "current_steps": 1367, + "loss": 1.4044, + "learning_rate": 0.0001173399014778325, + "epoch": 0.16 + }, + { + "current_steps": 1369, + "loss": 1.4965, + "learning_rate": 0.00011704433497536944, + "epoch": 0.16 + }, + { + "current_steps": 1371, + "loss": 1.2706, + "learning_rate": 0.0001167487684729064, + "epoch": 0.16 + }, + { + "current_steps": 1373, + "loss": 1.5986, + "learning_rate": 0.00011645320197044334, + "epoch": 0.16 + }, + { + "current_steps": 1375, + "loss": 1.3399, + "learning_rate": 0.00011615763546798029, + "epoch": 0.16 + }, + { + "current_steps": 1377, + "loss": 1.5509, + "learning_rate": 0.00011586206896551723, + "epoch": 0.16 + }, + { + "current_steps": 1379, + "loss": 1.473, + "learning_rate": 0.00011556650246305417, + "epoch": 0.16 + }, + { + "current_steps": 1381, + "loss": 1.487, + "learning_rate": 0.00011527093596059112, + "epoch": 0.16 + }, + { + "current_steps": 1383, + "loss": 1.5032, + "learning_rate": 0.00011497536945812808, + "epoch": 0.16 + }, + { + "current_steps": 1385, + "loss": 1.4614, + "learning_rate": 0.00011467980295566502, + "epoch": 0.16 + }, + { + "current_steps": 1387, + "loss": 1.756, + "learning_rate": 0.00011438423645320196, + "epoch": 0.16 + }, + { + "current_steps": 1389, + "loss": 1.4472, + "learning_rate": 0.0001140886699507389, + "epoch": 0.16 + }, + { + "current_steps": 1391, + "loss": 1.3235, + "learning_rate": 0.00011379310344827585, + "epoch": 0.16 + }, + { + "current_steps": 1393, + "loss": 1.4611, + "learning_rate": 0.0001134975369458128, + "epoch": 0.16 + }, + { + "current_steps": 1395, + "loss": 1.5012, + "learning_rate": 0.00011320197044334975, + "epoch": 0.16 + }, + { + "current_steps": 1397, + "loss": 1.4022, + "learning_rate": 0.00011290640394088669, + "epoch": 0.16 + }, + { + "current_steps": 1399, + "loss": 1.6699, + "learning_rate": 0.00011261083743842364, + "epoch": 0.16 + }, + { + "current_steps": 1401, + "loss": 1.3621, + "learning_rate": 0.00011231527093596058, + "epoch": 0.16 + }, + { + "current_steps": 1403, + "loss": 1.414, + "learning_rate": 0.00011201970443349752, + "epoch": 0.16 + }, + { + "current_steps": 1405, + "loss": 1.2507, + "learning_rate": 0.00011172413793103448, + "epoch": 0.16 + }, + { + "current_steps": 1407, + "loss": 1.3533, + "learning_rate": 0.00011142857142857142, + "epoch": 0.16 + }, + { + "current_steps": 1409, + "loss": 1.3269, + "learning_rate": 0.00011113300492610837, + "epoch": 0.16 + }, + { + "current_steps": 1411, + "loss": 1.3367, + "learning_rate": 0.00011083743842364531, + "epoch": 0.16 + }, + { + "current_steps": 1413, + "loss": 1.354, + "learning_rate": 0.00011054187192118225, + "epoch": 0.16 + }, + { + "current_steps": 1415, + "loss": 1.4322, + "learning_rate": 0.00011024630541871919, + "epoch": 0.16 + }, + { + "current_steps": 1417, + "loss": 1.3051, + "learning_rate": 0.00010995073891625615, + "epoch": 0.16 + }, + { + "current_steps": 1419, + "loss": 1.4336, + "learning_rate": 0.0001096551724137931, + "epoch": 0.17 + }, + { + "current_steps": 1421, + "loss": 1.4927, + "learning_rate": 0.00010935960591133004, + "epoch": 0.17 + }, + { + "current_steps": 1423, + "loss": 1.2925, + "learning_rate": 0.00010906403940886698, + "epoch": 0.17 + }, + { + "current_steps": 1425, + "loss": 1.5501, + "learning_rate": 0.00010876847290640392, + "epoch": 0.17 + }, + { + "current_steps": 1427, + "loss": 1.4563, + "learning_rate": 0.00010847290640394089, + "epoch": 0.17 + }, + { + "current_steps": 1429, + "loss": 1.5805, + "learning_rate": 0.00010817733990147783, + "epoch": 0.17 + }, + { + "current_steps": 1431, + "loss": 1.603, + "learning_rate": 0.00010788177339901477, + "epoch": 0.17 + }, + { + "current_steps": 1433, + "loss": 1.5101, + "learning_rate": 0.00010758620689655171, + "epoch": 0.17 + }, + { + "current_steps": 1435, + "loss": 1.3574, + "learning_rate": 0.00010729064039408865, + "epoch": 0.17 + }, + { + "current_steps": 1437, + "loss": 1.527, + "learning_rate": 0.0001069950738916256, + "epoch": 0.17 + }, + { + "current_steps": 1439, + "loss": 1.6483, + "learning_rate": 0.00010669950738916256, + "epoch": 0.17 + }, + { + "current_steps": 1441, + "loss": 1.5076, + "learning_rate": 0.0001064039408866995, + "epoch": 0.17 + }, + { + "current_steps": 1443, + "loss": 1.1374, + "learning_rate": 0.00010610837438423644, + "epoch": 0.17 + }, + { + "current_steps": 1445, + "loss": 1.4851, + "learning_rate": 0.00010581280788177338, + "epoch": 0.17 + }, + { + "current_steps": 1447, + "loss": 1.4381, + "learning_rate": 0.00010551724137931032, + "epoch": 0.17 + }, + { + "current_steps": 1449, + "loss": 1.4604, + "learning_rate": 0.00010522167487684729, + "epoch": 0.17 + }, + { + "current_steps": 1451, + "loss": 1.3896, + "learning_rate": 0.00010492610837438423, + "epoch": 0.17 + }, + { + "current_steps": 1453, + "loss": 1.6003, + "learning_rate": 0.00010463054187192117, + "epoch": 0.17 + }, + { + "current_steps": 1455, + "loss": 1.4545, + "learning_rate": 0.00010433497536945811, + "epoch": 0.17 + }, + { + "current_steps": 1457, + "loss": 1.5914, + "learning_rate": 0.00010403940886699505, + "epoch": 0.17 + }, + { + "current_steps": 1459, + "loss": 1.4277, + "learning_rate": 0.00010374384236453201, + "epoch": 0.17 + }, + { + "current_steps": 1461, + "loss": 1.4904, + "learning_rate": 0.00010344827586206896, + "epoch": 0.17 + }, + { + "current_steps": 1463, + "loss": 1.2917, + "learning_rate": 0.0001031527093596059, + "epoch": 0.17 + }, + { + "current_steps": 1465, + "loss": 1.5209, + "learning_rate": 0.00010285714285714284, + "epoch": 0.17 + }, + { + "current_steps": 1467, + "loss": 1.6083, + "learning_rate": 0.0001025615763546798, + "epoch": 0.17 + }, + { + "current_steps": 1469, + "loss": 1.322, + "learning_rate": 0.00010226600985221674, + "epoch": 0.17 + }, + { + "current_steps": 1471, + "loss": 1.3146, + "learning_rate": 0.00010197044334975368, + "epoch": 0.17 + }, + { + "current_steps": 1473, + "loss": 1.4603, + "learning_rate": 0.00010167487684729063, + "epoch": 0.17 + }, + { + "current_steps": 1475, + "loss": 1.2714, + "learning_rate": 0.00010137931034482757, + "epoch": 0.17 + }, + { + "current_steps": 1477, + "loss": 1.3224, + "learning_rate": 0.00010108374384236453, + "epoch": 0.17 + }, + { + "current_steps": 1479, + "loss": 1.3709, + "learning_rate": 0.00010078817733990147, + "epoch": 0.17 + }, + { + "current_steps": 1481, + "loss": 1.6218, + "learning_rate": 0.00010049261083743841, + "epoch": 0.17 + }, + { + "current_steps": 1483, + "loss": 1.5694, + "learning_rate": 0.00010019704433497536, + "epoch": 0.17 + }, + { + "current_steps": 1485, + "loss": 1.4105, + "learning_rate": 9.99014778325123e-05, + "epoch": 0.17 + }, + { + "current_steps": 1487, + "loss": 1.5503, + "learning_rate": 9.960591133004926e-05, + "epoch": 0.17 + }, + { + "current_steps": 1489, + "loss": 1.4169, + "learning_rate": 9.93103448275862e-05, + "epoch": 0.17 + }, + { + "current_steps": 1491, + "loss": 1.5529, + "learning_rate": 9.901477832512314e-05, + "epoch": 0.17 + }, + { + "current_steps": 1493, + "loss": 1.339, + "learning_rate": 9.871921182266008e-05, + "epoch": 0.17 + }, + { + "current_steps": 1495, + "loss": 1.4177, + "learning_rate": 9.842364532019705e-05, + "epoch": 0.17 + }, + { + "current_steps": 1497, + "loss": 1.3418, + "learning_rate": 9.812807881773399e-05, + "epoch": 0.17 + }, + { + "current_steps": 1499, + "loss": 1.405, + "learning_rate": 9.783251231527093e-05, + "epoch": 0.17 + }, + { + "current_steps": 1501, + "loss": 1.3082, + "learning_rate": 9.753694581280787e-05, + "epoch": 0.17 + }, + { + "current_steps": 1503, + "loss": 1.5519, + "learning_rate": 9.724137931034481e-05, + "epoch": 0.18 + }, + { + "current_steps": 1505, + "loss": 1.4063, + "learning_rate": 9.694581280788178e-05, + "epoch": 0.18 + }, + { + "current_steps": 1507, + "loss": 1.4751, + "learning_rate": 9.694581280788178e-05, + "epoch": 0.18 + }, + { + "current_steps": 1509, + "loss": 1.6339, + "learning_rate": 9.665024630541872e-05, + "epoch": 0.18 + }, + { + "current_steps": 1511, + "loss": 1.4362, + "learning_rate": 9.635467980295566e-05, + "epoch": 0.18 + }, + { + "current_steps": 1513, + "loss": 1.5477, + "learning_rate": 9.60591133004926e-05, + "epoch": 0.18 + }, + { + "current_steps": 1515, + "loss": 1.3371, + "learning_rate": 9.576354679802954e-05, + "epoch": 0.18 + }, + { + "current_steps": 1517, + "loss": 1.2858, + "learning_rate": 9.546798029556648e-05, + "epoch": 0.18 + }, + { + "current_steps": 1519, + "loss": 1.2767, + "learning_rate": 9.517241379310345e-05, + "epoch": 0.18 + }, + { + "current_steps": 1521, + "loss": 1.374, + "learning_rate": 9.487684729064039e-05, + "epoch": 0.18 + }, + { + "current_steps": 1523, + "loss": 1.264, + "learning_rate": 9.458128078817733e-05, + "epoch": 0.18 + }, + { + "current_steps": 1525, + "loss": 1.3859, + "learning_rate": 9.428571428571427e-05, + "epoch": 0.18 + }, + { + "current_steps": 1527, + "loss": 1.538, + "learning_rate": 9.399014778325121e-05, + "epoch": 0.18 + }, + { + "current_steps": 1529, + "loss": 1.3722, + "learning_rate": 9.369458128078817e-05, + "epoch": 0.18 + }, + { + "current_steps": 1531, + "loss": 1.5568, + "learning_rate": 9.339901477832512e-05, + "epoch": 0.18 + }, + { + "current_steps": 1533, + "loss": 1.5843, + "learning_rate": 9.310344827586206e-05, + "epoch": 0.18 + }, + { + "current_steps": 1535, + "loss": 1.3413, + "learning_rate": 9.2807881773399e-05, + "epoch": 0.18 + }, + { + "current_steps": 1537, + "loss": 1.4297, + "learning_rate": 9.251231527093596e-05, + "epoch": 0.18 + }, + { + "current_steps": 1539, + "loss": 1.359, + "learning_rate": 9.22167487684729e-05, + "epoch": 0.18 + }, + { + "current_steps": 1541, + "loss": 1.3543, + "learning_rate": 9.192118226600985e-05, + "epoch": 0.18 + }, + { + "current_steps": 1543, + "loss": 1.5025, + "learning_rate": 9.162561576354679e-05, + "epoch": 0.18 + }, + { + "current_steps": 1545, + "loss": 1.4669, + "learning_rate": 9.133004926108373e-05, + "epoch": 0.18 + }, + { + "current_steps": 1547, + "loss": 1.3861, + "learning_rate": 9.103448275862069e-05, + "epoch": 0.18 + }, + { + "current_steps": 1549, + "loss": 1.2934, + "learning_rate": 9.073891625615763e-05, + "epoch": 0.18 + }, + { + "current_steps": 1551, + "loss": 1.3889, + "learning_rate": 9.044334975369457e-05, + "epoch": 0.18 + }, + { + "current_steps": 1553, + "loss": 1.4878, + "learning_rate": 9.014778325123152e-05, + "epoch": 0.18 + }, + { + "current_steps": 1555, + "loss": 1.6128, + "learning_rate": 8.985221674876846e-05, + "epoch": 0.18 + }, + { + "current_steps": 1557, + "loss": 1.3818, + "learning_rate": 8.955665024630542e-05, + "epoch": 0.18 + }, + { + "current_steps": 1559, + "loss": 1.5759, + "learning_rate": 8.926108374384236e-05, + "epoch": 0.18 + }, + { + "current_steps": 1561, + "loss": 1.3481, + "learning_rate": 8.89655172413793e-05, + "epoch": 0.18 + }, + { + "current_steps": 1563, + "loss": 1.402, + "learning_rate": 8.866995073891625e-05, + "epoch": 0.18 + }, + { + "current_steps": 1565, + "loss": 1.4652, + "learning_rate": 8.83743842364532e-05, + "epoch": 0.18 + }, + { + "current_steps": 1567, + "loss": 1.4362, + "learning_rate": 8.807881773399015e-05, + "epoch": 0.18 + }, + { + "current_steps": 1569, + "loss": 1.1903, + "learning_rate": 8.778325123152709e-05, + "epoch": 0.18 + }, + { + "current_steps": 1571, + "loss": 1.6198, + "learning_rate": 8.748768472906403e-05, + "epoch": 0.18 + }, + { + "current_steps": 1573, + "loss": 1.3341, + "learning_rate": 8.719211822660097e-05, + "epoch": 0.18 + }, + { + "current_steps": 1575, + "loss": 1.4117, + "learning_rate": 8.689655172413794e-05, + "epoch": 0.18 + }, + { + "current_steps": 1577, + "loss": 1.4784, + "learning_rate": 8.660098522167488e-05, + "epoch": 0.18 + }, + { + "current_steps": 1579, + "loss": 1.3749, + "learning_rate": 8.630541871921182e-05, + "epoch": 0.18 + }, + { + "current_steps": 1581, + "loss": 1.5399, + "learning_rate": 8.600985221674876e-05, + "epoch": 0.18 + }, + { + "current_steps": 1583, + "loss": 1.48, + "learning_rate": 8.57142857142857e-05, + "epoch": 0.18 + }, + { + "current_steps": 1585, + "loss": 1.5269, + "learning_rate": 8.541871921182264e-05, + "epoch": 0.18 + }, + { + "current_steps": 1587, + "loss": 1.6458, + "learning_rate": 8.512315270935961e-05, + "epoch": 0.18 + }, + { + "current_steps": 1589, + "loss": 1.4381, + "learning_rate": 8.482758620689655e-05, + "epoch": 0.19 + }, + { + "current_steps": 1591, + "loss": 1.5807, + "learning_rate": 8.453201970443349e-05, + "epoch": 0.19 + }, + { + "current_steps": 1593, + "loss": 1.1841, + "learning_rate": 8.423645320197043e-05, + "epoch": 0.19 + }, + { + "current_steps": 1595, + "loss": 1.4327, + "learning_rate": 8.394088669950737e-05, + "epoch": 0.19 + }, + { + "current_steps": 1597, + "loss": 1.3586, + "learning_rate": 8.364532019704434e-05, + "epoch": 0.19 + }, + { + "current_steps": 1599, + "loss": 1.4018, + "learning_rate": 8.334975369458128e-05, + "epoch": 0.19 + }, + { + "current_steps": 1601, + "loss": 1.3762, + "learning_rate": 8.305418719211822e-05, + "epoch": 0.19 + }, + { + "current_steps": 1603, + "loss": 1.4126, + "learning_rate": 8.275862068965516e-05, + "epoch": 0.19 + }, + { + "current_steps": 1605, + "loss": 1.2462, + "learning_rate": 8.24630541871921e-05, + "epoch": 0.19 + }, + { + "current_steps": 1607, + "loss": 1.4754, + "learning_rate": 8.216748768472905e-05, + "epoch": 0.19 + }, + { + "current_steps": 1609, + "loss": 1.5572, + "learning_rate": 8.187192118226601e-05, + "epoch": 0.19 + }, + { + "current_steps": 1611, + "loss": 1.5467, + "learning_rate": 8.157635467980295e-05, + "epoch": 0.19 + }, + { + "current_steps": 1613, + "loss": 1.5242, + "learning_rate": 8.128078817733989e-05, + "epoch": 0.19 + }, + { + "current_steps": 1615, + "loss": 1.5588, + "learning_rate": 8.098522167487684e-05, + "epoch": 0.19 + }, + { + "current_steps": 1617, + "loss": 1.6107, + "learning_rate": 8.068965517241378e-05, + "epoch": 0.19 + }, + { + "current_steps": 1619, + "loss": 1.3845, + "learning_rate": 8.039408866995073e-05, + "epoch": 0.19 + }, + { + "current_steps": 1621, + "loss": 1.391, + "learning_rate": 8.009852216748768e-05, + "epoch": 0.19 + }, + { + "current_steps": 1623, + "loss": 1.3375, + "learning_rate": 7.980295566502462e-05, + "epoch": 0.19 + }, + { + "current_steps": 1625, + "loss": 1.5187, + "learning_rate": 7.950738916256157e-05, + "epoch": 0.19 + }, + { + "current_steps": 1627, + "loss": 1.4744, + "learning_rate": 7.921182266009851e-05, + "epoch": 0.19 + }, + { + "current_steps": 1629, + "loss": 1.4449, + "learning_rate": 7.891625615763546e-05, + "epoch": 0.19 + }, + { + "current_steps": 1631, + "loss": 1.4237, + "learning_rate": 7.862068965517241e-05, + "epoch": 0.19 + }, + { + "current_steps": 1633, + "loss": 1.3786, + "learning_rate": 7.832512315270935e-05, + "epoch": 0.19 + }, + { + "current_steps": 1635, + "loss": 1.6633, + "learning_rate": 7.80295566502463e-05, + "epoch": 0.19 + }, + { + "current_steps": 1637, + "loss": 1.3197, + "learning_rate": 7.773399014778325e-05, + "epoch": 0.19 + }, + { + "current_steps": 1639, + "loss": 1.2572, + "learning_rate": 7.743842364532019e-05, + "epoch": 0.19 + }, + { + "current_steps": 1641, + "loss": 1.6133, + "learning_rate": 7.714285714285713e-05, + "epoch": 0.19 + }, + { + "current_steps": 1643, + "loss": 1.3526, + "learning_rate": 7.68472906403941e-05, + "epoch": 0.19 + }, + { + "current_steps": 1645, + "loss": 1.2797, + "learning_rate": 7.655172413793103e-05, + "epoch": 0.19 + }, + { + "current_steps": 1647, + "loss": 1.3872, + "learning_rate": 7.625615763546798e-05, + "epoch": 0.19 + }, + { + "current_steps": 1649, + "loss": 1.2822, + "learning_rate": 7.596059113300492e-05, + "epoch": 0.19 + }, + { + "current_steps": 1651, + "loss": 1.3557, + "learning_rate": 7.566502463054186e-05, + "epoch": 0.19 + }, + { + "current_steps": 1653, + "loss": 1.1847, + "learning_rate": 7.536945812807882e-05, + "epoch": 0.19 + }, + { + "current_steps": 1655, + "loss": 1.4428, + "learning_rate": 7.507389162561577e-05, + "epoch": 0.19 + }, + { + "current_steps": 1657, + "loss": 1.443, + "learning_rate": 7.47783251231527e-05, + "epoch": 0.19 + }, + { + "current_steps": 1659, + "loss": 1.4633, + "learning_rate": 7.448275862068965e-05, + "epoch": 0.19 + }, + { + "current_steps": 1661, + "loss": 1.5413, + "learning_rate": 7.418719211822659e-05, + "epoch": 0.19 + }, + { + "current_steps": 1663, + "loss": 1.3113, + "learning_rate": 7.389162561576354e-05, + "epoch": 0.19 + }, + { + "current_steps": 1665, + "loss": 1.4703, + "learning_rate": 7.359605911330048e-05, + "epoch": 0.19 + }, + { + "current_steps": 1667, + "loss": 1.3975, + "learning_rate": 7.330049261083744e-05, + "epoch": 0.19 + }, + { + "current_steps": 1669, + "loss": 1.3937, + "learning_rate": 7.300492610837438e-05, + "epoch": 0.19 + }, + { + "current_steps": 1671, + "loss": 1.4717, + "learning_rate": 7.270935960591132e-05, + "epoch": 0.19 + }, + { + "current_steps": 1673, + "loss": 1.3512, + "learning_rate": 7.241379310344827e-05, + "epoch": 0.19 + }, + { + "current_steps": 1675, + "loss": 1.4515, + "learning_rate": 7.211822660098521e-05, + "epoch": 0.2 + }, + { + "current_steps": 1677, + "loss": 1.6275, + "learning_rate": 7.182266009852215e-05, + "epoch": 0.2 + }, + { + "current_steps": 1679, + "loss": 1.3561, + "learning_rate": 7.152709359605911e-05, + "epoch": 0.2 + }, + { + "current_steps": 1681, + "loss": 1.2888, + "learning_rate": 7.123152709359605e-05, + "epoch": 0.2 + }, + { + "current_steps": 1683, + "loss": 1.351, + "learning_rate": 7.0935960591133e-05, + "epoch": 0.2 + }, + { + "current_steps": 1685, + "loss": 1.4117, + "learning_rate": 7.064039408866994e-05, + "epoch": 0.2 + }, + { + "current_steps": 1687, + "loss": 1.2991, + "learning_rate": 7.034482758620688e-05, + "epoch": 0.2 + }, + { + "current_steps": 1689, + "loss": 1.413, + "learning_rate": 7.004926108374384e-05, + "epoch": 0.2 + }, + { + "current_steps": 1691, + "loss": 1.3917, + "learning_rate": 6.975369458128078e-05, + "epoch": 0.2 + }, + { + "current_steps": 1693, + "loss": 1.3873, + "learning_rate": 6.945812807881773e-05, + "epoch": 0.2 + }, + { + "current_steps": 1695, + "loss": 1.4783, + "learning_rate": 6.916256157635467e-05, + "epoch": 0.2 + }, + { + "current_steps": 1697, + "loss": 1.5639, + "learning_rate": 6.886699507389163e-05, + "epoch": 0.2 + }, + { + "current_steps": 1699, + "loss": 1.5239, + "learning_rate": 6.857142857142857e-05, + "epoch": 0.2 + }, + { + "current_steps": 1701, + "loss": 1.5281, + "learning_rate": 6.827586206896551e-05, + "epoch": 0.2 + }, + { + "current_steps": 1703, + "loss": 1.2794, + "learning_rate": 6.798029556650246e-05, + "epoch": 0.2 + }, + { + "current_steps": 1705, + "loss": 1.2762, + "learning_rate": 6.76847290640394e-05, + "epoch": 0.2 + }, + { + "current_steps": 1707, + "loss": 1.2824, + "learning_rate": 6.738916256157636e-05, + "epoch": 0.2 + }, + { + "current_steps": 1709, + "loss": 1.2033, + "learning_rate": 6.70935960591133e-05, + "epoch": 0.2 + }, + { + "current_steps": 1711, + "loss": 1.4473, + "learning_rate": 6.679802955665024e-05, + "epoch": 0.2 + }, + { + "current_steps": 1713, + "loss": 1.6409, + "learning_rate": 6.650246305418719e-05, + "epoch": 0.2 + }, + { + "current_steps": 1715, + "loss": 1.6363, + "learning_rate": 6.620689655172413e-05, + "epoch": 0.2 + }, + { + "current_steps": 1717, + "loss": 1.3944, + "learning_rate": 6.591133004926107e-05, + "epoch": 0.2 + }, + { + "current_steps": 1719, + "loss": 1.5966, + "learning_rate": 6.561576354679803e-05, + "epoch": 0.2 + }, + { + "current_steps": 1721, + "loss": 1.4439, + "learning_rate": 6.532019704433497e-05, + "epoch": 0.2 + }, + { + "current_steps": 1723, + "loss": 1.4548, + "learning_rate": 6.502463054187192e-05, + "epoch": 0.2 + }, + { + "current_steps": 1725, + "loss": 1.2948, + "learning_rate": 6.472906403940886e-05, + "epoch": 0.2 + }, + { + "current_steps": 1727, + "loss": 1.5007, + "learning_rate": 6.44334975369458e-05, + "epoch": 0.2 + }, + { + "current_steps": 1729, + "loss": 1.3214, + "learning_rate": 6.413793103448276e-05, + "epoch": 0.2 + }, + { + "current_steps": 1731, + "loss": 1.4571, + "learning_rate": 6.38423645320197e-05, + "epoch": 0.2 + }, + { + "current_steps": 1733, + "loss": 1.5113, + "learning_rate": 6.354679802955664e-05, + "epoch": 0.2 + }, + { + "current_steps": 1735, + "loss": 1.4404, + "learning_rate": 6.32512315270936e-05, + "epoch": 0.2 + }, + { + "current_steps": 1737, + "loss": 1.3533, + "learning_rate": 6.295566502463053e-05, + "epoch": 0.2 + }, + { + "current_steps": 1739, + "loss": 1.4039, + "learning_rate": 6.266009852216747e-05, + "epoch": 0.2 + }, + { + "current_steps": 1741, + "loss": 1.4558, + "learning_rate": 6.236453201970443e-05, + "epoch": 0.2 + }, + { + "current_steps": 1743, + "loss": 1.4169, + "learning_rate": 6.206896551724137e-05, + "epoch": 0.2 + }, + { + "current_steps": 1745, + "loss": 1.5056, + "learning_rate": 6.177339901477831e-05, + "epoch": 0.2 + }, + { + "current_steps": 1747, + "loss": 1.4671, + "learning_rate": 6.147783251231526e-05, + "epoch": 0.2 + }, + { + "current_steps": 1749, + "loss": 1.3122, + "learning_rate": 6.11822660098522e-05, + "epoch": 0.2 + }, + { + "current_steps": 1751, + "loss": 1.3384, + "learning_rate": 6.088669950738916e-05, + "epoch": 0.2 + }, + { + "current_steps": 1753, + "loss": 1.3222, + "learning_rate": 6.059113300492611e-05, + "epoch": 0.2 + }, + { + "current_steps": 1755, + "loss": 1.4684, + "learning_rate": 6.029556650246305e-05, + "epoch": 0.2 + }, + { + "current_steps": 1757, + "loss": 1.2302, + "learning_rate": 5.9999999999999995e-05, + "epoch": 0.2 + }, + { + "current_steps": 1759, + "loss": 1.3662, + "learning_rate": 5.970443349753694e-05, + "epoch": 0.2 + }, + { + "current_steps": 1761, + "loss": 1.4275, + "learning_rate": 5.940886699507388e-05, + "epoch": 0.21 + }, + { + "current_steps": 1763, + "loss": 1.2949, + "learning_rate": 5.911330049261084e-05, + "epoch": 0.21 + }, + { + "current_steps": 1765, + "loss": 1.4499, + "learning_rate": 5.881773399014778e-05, + "epoch": 0.21 + }, + { + "current_steps": 1767, + "loss": 1.3698, + "learning_rate": 5.852216748768472e-05, + "epoch": 0.21 + }, + { + "current_steps": 1769, + "loss": 1.217, + "learning_rate": 5.822660098522167e-05, + "epoch": 0.21 + }, + { + "current_steps": 1771, + "loss": 1.5283, + "learning_rate": 5.793103448275861e-05, + "epoch": 0.21 + }, + { + "current_steps": 1773, + "loss": 1.4191, + "learning_rate": 5.763546798029556e-05, + "epoch": 0.21 + }, + { + "current_steps": 1775, + "loss": 1.4649, + "learning_rate": 5.733990147783251e-05, + "epoch": 0.21 + }, + { + "current_steps": 1777, + "loss": 1.3919, + "learning_rate": 5.704433497536945e-05, + "epoch": 0.21 + }, + { + "current_steps": 1779, + "loss": 1.5556, + "learning_rate": 5.67487684729064e-05, + "epoch": 0.21 + }, + { + "current_steps": 1781, + "loss": 1.5268, + "learning_rate": 5.645320197044334e-05, + "epoch": 0.21 + }, + { + "current_steps": 1783, + "loss": 1.3228, + "learning_rate": 5.615763546798029e-05, + "epoch": 0.21 + }, + { + "current_steps": 1785, + "loss": 1.5109, + "learning_rate": 5.586206896551724e-05, + "epoch": 0.21 + }, + { + "current_steps": 1787, + "loss": 1.4711, + "learning_rate": 5.5566502463054186e-05, + "epoch": 0.21 + }, + { + "current_steps": 1789, + "loss": 1.5531, + "learning_rate": 5.5270935960591126e-05, + "epoch": 0.21 + }, + { + "current_steps": 1791, + "loss": 1.4627, + "learning_rate": 5.4975369458128074e-05, + "epoch": 0.21 + }, + { + "current_steps": 1793, + "loss": 1.375, + "learning_rate": 5.467980295566502e-05, + "epoch": 0.21 + }, + { + "current_steps": 1795, + "loss": 1.3208, + "learning_rate": 5.438423645320196e-05, + "epoch": 0.21 + }, + { + "current_steps": 1797, + "loss": 1.5061, + "learning_rate": 5.4088669950738916e-05, + "epoch": 0.21 + }, + { + "current_steps": 1799, + "loss": 1.4171, + "learning_rate": 5.3793103448275856e-05, + "epoch": 0.21 + }, + { + "current_steps": 1801, + "loss": 1.2523, + "learning_rate": 5.34975369458128e-05, + "epoch": 0.21 + }, + { + "current_steps": 1803, + "loss": 1.1953, + "learning_rate": 5.320197044334975e-05, + "epoch": 0.21 + }, + { + "current_steps": 1805, + "loss": 1.5136, + "learning_rate": 5.290640394088669e-05, + "epoch": 0.21 + }, + { + "current_steps": 1807, + "loss": 1.2321, + "learning_rate": 5.2610837438423646e-05, + "epoch": 0.21 + }, + { + "current_steps": 1809, + "loss": 1.2477, + "learning_rate": 5.231527093596059e-05, + "epoch": 0.21 + }, + { + "current_steps": 1811, + "loss": 1.4657, + "learning_rate": 5.201970443349753e-05, + "epoch": 0.21 + }, + { + "current_steps": 1813, + "loss": 1.6217, + "learning_rate": 5.172413793103448e-05, + "epoch": 0.21 + }, + { + "current_steps": 1815, + "loss": 1.4777, + "learning_rate": 5.142857142857142e-05, + "epoch": 0.21 + }, + { + "current_steps": 1817, + "loss": 1.2844, + "learning_rate": 5.113300492610837e-05, + "epoch": 0.21 + }, + { + "current_steps": 1819, + "loss": 1.5179, + "learning_rate": 5.083743842364532e-05, + "epoch": 0.21 + }, + { + "current_steps": 1821, + "loss": 1.5915, + "learning_rate": 5.0541871921182264e-05, + "epoch": 0.21 + }, + { + "current_steps": 1823, + "loss": 1.2204, + "learning_rate": 5.0246305418719205e-05, + "epoch": 0.21 + }, + { + "current_steps": 1825, + "loss": 1.3171, + "learning_rate": 4.995073891625615e-05, + "epoch": 0.21 + }, + { + "current_steps": 1827, + "loss": 1.4406, + "learning_rate": 4.96551724137931e-05, + "epoch": 0.21 + }, + { + "current_steps": 1829, + "loss": 1.4661, + "learning_rate": 4.935960591133004e-05, + "epoch": 0.21 + }, + { + "current_steps": 1831, + "loss": 1.4711, + "learning_rate": 4.9064039408866995e-05, + "epoch": 0.21 + }, + { + "current_steps": 1833, + "loss": 1.3958, + "learning_rate": 4.8768472906403935e-05, + "epoch": 0.21 + }, + { + "current_steps": 1835, + "loss": 1.3915, + "learning_rate": 4.847290640394089e-05, + "epoch": 0.21 + }, + { + "current_steps": 1837, + "loss": 1.412, + "learning_rate": 4.817733990147783e-05, + "epoch": 0.21 + }, + { + "current_steps": 1839, + "loss": 1.3662, + "learning_rate": 4.788177339901477e-05, + "epoch": 0.21 + }, + { + "current_steps": 1841, + "loss": 1.3259, + "learning_rate": 4.788177339901477e-05, + "epoch": 0.21 + }, + { + "current_steps": 1843, + "loss": 1.499, + "learning_rate": 4.7586206896551725e-05, + "epoch": 0.21 + }, + { + "current_steps": 1845, + "loss": 1.4059, + "learning_rate": 4.7290640394088666e-05, + "epoch": 0.21 + }, + { + "current_steps": 1847, + "loss": 1.4343, + "learning_rate": 4.6995073891625606e-05, + "epoch": 0.22 + }, + { + "current_steps": 1849, + "loss": 1.3018, + "learning_rate": 4.669950738916256e-05, + "epoch": 0.22 + }, + { + "current_steps": 1851, + "loss": 1.3917, + "learning_rate": 4.64039408866995e-05, + "epoch": 0.22 + }, + { + "current_steps": 1853, + "loss": 1.2198, + "learning_rate": 4.610837438423645e-05, + "epoch": 0.22 + }, + { + "current_steps": 1855, + "loss": 1.3396, + "learning_rate": 4.5812807881773396e-05, + "epoch": 0.22 + }, + { + "current_steps": 1857, + "loss": 1.515, + "learning_rate": 4.551724137931034e-05, + "epoch": 0.22 + }, + { + "current_steps": 1859, + "loss": 1.2641, + "learning_rate": 4.5221674876847284e-05, + "epoch": 0.22 + }, + { + "current_steps": 1861, + "loss": 1.3899, + "learning_rate": 4.492610837438423e-05, + "epoch": 0.22 + }, + { + "current_steps": 1863, + "loss": 1.5666, + "learning_rate": 4.463054187192118e-05, + "epoch": 0.22 + }, + { + "current_steps": 1865, + "loss": 1.2917, + "learning_rate": 4.4334975369458126e-05, + "epoch": 0.22 + }, + { + "current_steps": 1867, + "loss": 1.4541, + "learning_rate": 4.4039408866995073e-05, + "epoch": 0.22 + }, + { + "current_steps": 1869, + "loss": 1.2695, + "learning_rate": 4.3743842364532014e-05, + "epoch": 0.22 + }, + { + "current_steps": 1871, + "loss": 1.4244, + "learning_rate": 4.344827586206897e-05, + "epoch": 0.22 + }, + { + "current_steps": 1873, + "loss": 1.325, + "learning_rate": 4.315270935960591e-05, + "epoch": 0.22 + }, + { + "current_steps": 1875, + "loss": 1.3828, + "learning_rate": 4.285714285714285e-05, + "epoch": 0.22 + }, + { + "current_steps": 1877, + "loss": 1.4658, + "learning_rate": 4.2561576354679804e-05, + "epoch": 0.22 + }, + { + "current_steps": 1879, + "loss": 1.4219, + "learning_rate": 4.2266009852216744e-05, + "epoch": 0.22 + }, + { + "current_steps": 1881, + "loss": 1.3448, + "learning_rate": 4.1970443349753685e-05, + "epoch": 0.22 + }, + { + "current_steps": 1883, + "loss": 1.3195, + "learning_rate": 4.167487684729064e-05, + "epoch": 0.22 + }, + { + "current_steps": 1885, + "loss": 1.3979, + "learning_rate": 4.137931034482758e-05, + "epoch": 0.22 + }, + { + "current_steps": 1887, + "loss": 1.4834, + "learning_rate": 4.108374384236453e-05, + "epoch": 0.22 + }, + { + "current_steps": 1889, + "loss": 1.4958, + "learning_rate": 4.0788177339901475e-05, + "epoch": 0.22 + }, + { + "current_steps": 1891, + "loss": 1.5403, + "learning_rate": 4.049261083743842e-05, + "epoch": 0.22 + }, + { + "current_steps": 1893, + "loss": 1.2163, + "learning_rate": 4.019704433497536e-05, + "epoch": 0.22 + }, + { + "current_steps": 1895, + "loss": 1.2908, + "learning_rate": 3.990147783251231e-05, + "epoch": 0.22 + }, + { + "current_steps": 1897, + "loss": 1.4287, + "learning_rate": 3.960591133004926e-05, + "epoch": 0.22 + }, + { + "current_steps": 1899, + "loss": 1.409, + "learning_rate": 3.9310344827586205e-05, + "epoch": 0.22 + }, + { + "current_steps": 1901, + "loss": 1.3897, + "learning_rate": 3.901477832512315e-05, + "epoch": 0.22 + }, + { + "current_steps": 1903, + "loss": 1.3014, + "learning_rate": 3.871921182266009e-05, + "epoch": 0.22 + }, + { + "current_steps": 1905, + "loss": 1.3687, + "learning_rate": 3.842364532019705e-05, + "epoch": 0.22 + }, + { + "current_steps": 1907, + "loss": 1.4001, + "learning_rate": 3.812807881773399e-05, + "epoch": 0.22 + }, + { + "current_steps": 1909, + "loss": 1.3124, + "learning_rate": 3.783251231527093e-05, + "epoch": 0.22 + }, + { + "current_steps": 1911, + "loss": 1.3053, + "learning_rate": 3.753694581280788e-05, + "epoch": 0.22 + }, + { + "current_steps": 1913, + "loss": 1.435, + "learning_rate": 3.724137931034482e-05, + "epoch": 0.22 + }, + { + "current_steps": 1915, + "loss": 1.2448, + "learning_rate": 3.694581280788177e-05, + "epoch": 0.22 + }, + { + "current_steps": 1917, + "loss": 1.3858, + "learning_rate": 3.665024630541872e-05, + "epoch": 0.22 + }, + { + "current_steps": 1919, + "loss": 1.3794, + "learning_rate": 3.635467980295566e-05, + "epoch": 0.22 + }, + { + "current_steps": 1921, + "loss": 1.3404, + "learning_rate": 3.6059113300492606e-05, + "epoch": 0.22 + }, + { + "current_steps": 1923, + "loss": 1.5008, + "learning_rate": 3.5763546798029553e-05, + "epoch": 0.22 + }, + { + "current_steps": 1925, + "loss": 1.5155, + "learning_rate": 3.54679802955665e-05, + "epoch": 0.22 + }, + { + "current_steps": 1927, + "loss": 1.2666, + "learning_rate": 3.517241379310344e-05, + "epoch": 0.22 + }, + { + "current_steps": 1929, + "loss": 1.457, + "learning_rate": 3.487684729064039e-05, + "epoch": 0.22 + }, + { + "current_steps": 1931, + "loss": 1.5565, + "learning_rate": 3.4581280788177336e-05, + "epoch": 0.22 + }, + { + "current_steps": 1933, + "loss": 1.3359, + "learning_rate": 3.4285714285714284e-05, + "epoch": 0.23 + }, + { + "current_steps": 1935, + "loss": 1.4157, + "learning_rate": 3.399014778325123e-05, + "epoch": 0.23 + }, + { + "current_steps": 1937, + "loss": 1.322, + "learning_rate": 3.369458128078818e-05, + "epoch": 0.23 + }, + { + "current_steps": 1939, + "loss": 1.6225, + "learning_rate": 3.339901477832512e-05, + "epoch": 0.23 + }, + { + "current_steps": 1941, + "loss": 1.4268, + "learning_rate": 3.3103448275862067e-05, + "epoch": 0.23 + }, + { + "current_steps": 1943, + "loss": 1.3425, + "learning_rate": 3.2807881773399014e-05, + "epoch": 0.23 + }, + { + "current_steps": 1945, + "loss": 1.3537, + "learning_rate": 3.251231527093596e-05, + "epoch": 0.23 + }, + { + "current_steps": 1947, + "loss": 1.3513, + "learning_rate": 3.22167487684729e-05, + "epoch": 0.23 + }, + { + "current_steps": 1949, + "loss": 1.4673, + "learning_rate": 3.192118226600985e-05, + "epoch": 0.23 + }, + { + "current_steps": 1951, + "loss": 1.4394, + "learning_rate": 3.16256157635468e-05, + "epoch": 0.23 + }, + { + "current_steps": 1953, + "loss": 1.4502, + "learning_rate": 3.133004926108374e-05, + "epoch": 0.23 + }, + { + "current_steps": 1955, + "loss": 1.415, + "learning_rate": 3.1034482758620685e-05, + "epoch": 0.23 + }, + { + "current_steps": 1957, + "loss": 1.2713, + "learning_rate": 3.073891625615763e-05, + "epoch": 0.23 + }, + { + "current_steps": 1959, + "loss": 1.2928, + "learning_rate": 3.044334975369458e-05, + "epoch": 0.23 + }, + { + "current_steps": 1961, + "loss": 1.3281, + "learning_rate": 3.0147783251231524e-05, + "epoch": 0.23 + }, + { + "current_steps": 1963, + "loss": 1.3303, + "learning_rate": 2.985221674876847e-05, + "epoch": 0.23 + }, + { + "current_steps": 1965, + "loss": 1.3034, + "learning_rate": 2.955665024630542e-05, + "epoch": 0.23 + }, + { + "current_steps": 1967, + "loss": 1.5266, + "learning_rate": 2.926108374384236e-05, + "epoch": 0.23 + }, + { + "current_steps": 1969, + "loss": 1.6226, + "learning_rate": 2.8965517241379307e-05, + "epoch": 0.23 + }, + { + "current_steps": 1971, + "loss": 1.4254, + "learning_rate": 2.8669950738916254e-05, + "epoch": 0.23 + }, + { + "current_steps": 1973, + "loss": 1.4346, + "learning_rate": 2.83743842364532e-05, + "epoch": 0.23 + }, + { + "current_steps": 1975, + "loss": 1.4151, + "learning_rate": 2.8078817733990145e-05, + "epoch": 0.23 + }, + { + "current_steps": 1977, + "loss": 1.5342, + "learning_rate": 2.7783251231527093e-05, + "epoch": 0.23 + }, + { + "current_steps": 1979, + "loss": 1.4544, + "learning_rate": 2.7487684729064037e-05, + "epoch": 0.23 + }, + { + "current_steps": 1981, + "loss": 1.2191, + "learning_rate": 2.719211822660098e-05, + "epoch": 0.23 + }, + { + "current_steps": 1983, + "loss": 1.4886, + "learning_rate": 2.6896551724137928e-05, + "epoch": 0.23 + }, + { + "current_steps": 1985, + "loss": 1.4416, + "learning_rate": 2.6600985221674876e-05, + "epoch": 0.23 + }, + { + "current_steps": 1987, + "loss": 1.3084, + "learning_rate": 2.6305418719211823e-05, + "epoch": 0.23 + }, + { + "current_steps": 1989, + "loss": 1.4037, + "learning_rate": 2.6009852216748764e-05, + "epoch": 0.23 + }, + { + "current_steps": 1991, + "loss": 1.4999, + "learning_rate": 2.571428571428571e-05, + "epoch": 0.23 + }, + { + "current_steps": 1993, + "loss": 1.3526, + "learning_rate": 2.541871921182266e-05, + "epoch": 0.23 + }, + { + "current_steps": 1995, + "loss": 1.2452, + "learning_rate": 2.5123152709359602e-05, + "epoch": 0.23 + }, + { + "current_steps": 1997, + "loss": 1.4491, + "learning_rate": 2.482758620689655e-05, + "epoch": 0.23 + }, + { + "current_steps": 1999, + "loss": 1.5146, + "learning_rate": 2.4532019704433497e-05, + "epoch": 0.23 + }, + { + "current_steps": 2001, + "loss": 1.3113, + "learning_rate": 2.4236453201970445e-05, + "epoch": 0.23 + }, + { + "current_steps": 2003, + "loss": 1.4204, + "learning_rate": 2.3940886699507385e-05, + "epoch": 0.23 + }, + { + "current_steps": 2005, + "loss": 1.4282, + "learning_rate": 2.3645320197044333e-05, + "epoch": 0.23 + }, + { + "current_steps": 2007, + "loss": 1.2543, + "learning_rate": 2.334975369458128e-05, + "epoch": 0.23 + }, + { + "current_steps": 2009, + "loss": 1.4168, + "learning_rate": 2.3054187192118224e-05, + "epoch": 0.23 + }, + { + "current_steps": 2011, + "loss": 1.3244, + "learning_rate": 2.275862068965517e-05, + "epoch": 0.23 + }, + { + "current_steps": 2013, + "loss": 1.4299, + "learning_rate": 2.2463054187192116e-05, + "epoch": 0.23 + }, + { + "current_steps": 2015, + "loss": 1.6615, + "learning_rate": 2.2167487684729063e-05, + "epoch": 0.23 + }, + { + "current_steps": 2017, + "loss": 1.494, + "learning_rate": 2.1871921182266007e-05, + "epoch": 0.23 + }, + { + "current_steps": 2019, + "loss": 1.4014, + "learning_rate": 2.1576354679802954e-05, + "epoch": 0.24 + }, + { + "current_steps": 2021, + "loss": 1.6533, + "learning_rate": 2.1280788177339902e-05, + "epoch": 0.24 + }, + { + "current_steps": 2023, + "loss": 1.3424, + "learning_rate": 2.0985221674876842e-05, + "epoch": 0.24 + }, + { + "current_steps": 2025, + "loss": 1.2958, + "learning_rate": 2.068965517241379e-05, + "epoch": 0.24 + }, + { + "current_steps": 2027, + "loss": 1.4598, + "learning_rate": 2.0394088669950737e-05, + "epoch": 0.24 + }, + { + "current_steps": 2029, + "loss": 1.2353, + "learning_rate": 2.009852216748768e-05, + "epoch": 0.24 + }, + { + "current_steps": 2031, + "loss": 1.3546, + "learning_rate": 1.980295566502463e-05, + "epoch": 0.24 + }, + { + "current_steps": 2033, + "loss": 1.3564, + "learning_rate": 1.9507389162561576e-05, + "epoch": 0.24 + }, + { + "current_steps": 2035, + "loss": 1.374, + "learning_rate": 1.9211822660098524e-05, + "epoch": 0.24 + }, + { + "current_steps": 2037, + "loss": 1.3581, + "learning_rate": 1.8916256157635464e-05, + "epoch": 0.24 + }, + { + "current_steps": 2039, + "loss": 1.5073, + "learning_rate": 1.862068965517241e-05, + "epoch": 0.24 + }, + { + "current_steps": 2041, + "loss": 1.4059, + "learning_rate": 1.832512315270936e-05, + "epoch": 0.24 + }, + { + "current_steps": 2043, + "loss": 1.3828, + "learning_rate": 1.8029556650246303e-05, + "epoch": 0.24 + }, + { + "current_steps": 2045, + "loss": 1.4461, + "learning_rate": 1.773399014778325e-05, + "epoch": 0.24 + }, + { + "current_steps": 2047, + "loss": 1.3237, + "learning_rate": 1.7438423645320194e-05, + "epoch": 0.24 + }, + { + "current_steps": 2049, + "loss": 1.3452, + "learning_rate": 1.7142857142857142e-05, + "epoch": 0.24 + }, + { + "current_steps": 2051, + "loss": 1.3552, + "learning_rate": 1.684729064039409e-05, + "epoch": 0.24 + }, + { + "current_steps": 2053, + "loss": 1.3517, + "learning_rate": 1.6551724137931033e-05, + "epoch": 0.24 + }, + { + "current_steps": 2055, + "loss": 1.4637, + "learning_rate": 1.625615763546798e-05, + "epoch": 0.24 + }, + { + "current_steps": 2057, + "loss": 1.4506, + "learning_rate": 1.5960591133004925e-05, + "epoch": 0.24 + }, + { + "current_steps": 2059, + "loss": 1.2913, + "learning_rate": 1.566502463054187e-05, + "epoch": 0.24 + }, + { + "current_steps": 2061, + "loss": 1.4415, + "learning_rate": 1.5369458128078816e-05, + "epoch": 0.24 + }, + { + "current_steps": 2063, + "loss": 1.3982, + "learning_rate": 1.5073891625615762e-05, + "epoch": 0.24 + }, + { + "current_steps": 2065, + "loss": 1.476, + "learning_rate": 1.477832512315271e-05, + "epoch": 0.24 + }, + { + "current_steps": 2067, + "loss": 1.4763, + "learning_rate": 1.4482758620689653e-05, + "epoch": 0.24 + }, + { + "current_steps": 2069, + "loss": 1.3806, + "learning_rate": 1.41871921182266e-05, + "epoch": 0.24 + }, + { + "current_steps": 2071, + "loss": 1.6809, + "learning_rate": 1.3891625615763546e-05, + "epoch": 0.24 + }, + { + "current_steps": 2073, + "loss": 1.3225, + "learning_rate": 1.359605911330049e-05, + "epoch": 0.24 + }, + { + "current_steps": 2075, + "loss": 1.5042, + "learning_rate": 1.3300492610837438e-05, + "epoch": 0.24 + }, + { + "current_steps": 2077, + "loss": 1.3123, + "learning_rate": 1.3004926108374382e-05, + "epoch": 0.24 + }, + { + "current_steps": 2079, + "loss": 1.2716, + "learning_rate": 1.270935960591133e-05, + "epoch": 0.24 + }, + { + "current_steps": 2081, + "loss": 1.4565, + "learning_rate": 1.2413793103448275e-05, + "epoch": 0.24 + }, + { + "current_steps": 2083, + "loss": 1.268, + "learning_rate": 1.2118226600985222e-05, + "epoch": 0.24 + }, + { + "current_steps": 2085, + "loss": 1.3143, + "learning_rate": 1.1822660098522166e-05, + "epoch": 0.24 + }, + { + "current_steps": 2087, + "loss": 1.4247, + "learning_rate": 1.1527093596059112e-05, + "epoch": 0.24 + }, + { + "current_steps": 2089, + "loss": 1.3091, + "learning_rate": 1.1231527093596058e-05, + "epoch": 0.24 + }, + { + "current_steps": 2091, + "loss": 1.4655, + "learning_rate": 1.0935960591133004e-05, + "epoch": 0.24 + }, + { + "current_steps": 2093, + "loss": 1.4689, + "learning_rate": 1.0640394088669951e-05, + "epoch": 0.24 + }, + { + "current_steps": 2095, + "loss": 1.3892, + "learning_rate": 1.0344827586206895e-05, + "epoch": 0.24 + }, + { + "current_steps": 2097, + "loss": 1.2808, + "learning_rate": 1.004926108374384e-05, + "epoch": 0.24 + }, + { + "current_steps": 2099, + "loss": 1.5469, + "learning_rate": 9.753694581280788e-06, + "epoch": 0.24 + }, + { + "current_steps": 2101, + "loss": 1.3735, + "learning_rate": 9.458128078817732e-06, + "epoch": 0.24 + }, + { + "current_steps": 2103, + "loss": 1.4516, + "learning_rate": 9.16256157635468e-06, + "epoch": 0.24 + }, + { + "current_steps": 2105, + "loss": 1.2961, + "learning_rate": 8.866995073891625e-06, + "epoch": 0.25 + }, + { + "current_steps": 2107, + "loss": 1.4416, + "learning_rate": 8.571428571428571e-06, + "epoch": 0.25 + }, + { + "current_steps": 2109, + "loss": 1.309, + "learning_rate": 8.275862068965517e-06, + "epoch": 0.25 + }, + { + "current_steps": 2111, + "loss": 1.4846, + "learning_rate": 7.980295566502462e-06, + "epoch": 0.25 + }, + { + "current_steps": 2113, + "loss": 1.4353, + "learning_rate": 7.684729064039408e-06, + "epoch": 0.25 + }, + { + "current_steps": 2115, + "loss": 1.3507, + "learning_rate": 7.389162561576355e-06, + "epoch": 0.25 + }, + { + "current_steps": 2117, + "loss": 1.1822, + "learning_rate": 7.0935960591133e-06, + "epoch": 0.25 + }, + { + "current_steps": 2119, + "loss": 1.3353, + "learning_rate": 6.798029556650245e-06, + "epoch": 0.25 + }, + { + "current_steps": 2121, + "loss": 1.3504, + "learning_rate": 6.502463054187191e-06, + "epoch": 0.25 + }, + { + "current_steps": 2123, + "loss": 1.3049, + "learning_rate": 6.2068965517241375e-06, + "epoch": 0.25 + }, + { + "current_steps": 2125, + "loss": 1.4379, + "learning_rate": 5.911330049261083e-06, + "epoch": 0.25 + }, + { + "current_steps": 2127, + "loss": 1.4737, + "learning_rate": 5.615763546798029e-06, + "epoch": 0.25 + }, + { + "current_steps": 2129, + "loss": 1.4593, + "learning_rate": 5.3201970443349755e-06, + "epoch": 0.25 + }, + { + "current_steps": 2131, + "loss": 1.5299, + "learning_rate": 5.02463054187192e-06, + "epoch": 0.25 + }, + { + "current_steps": 2133, + "loss": 1.2848, + "learning_rate": 4.729064039408866e-06, + "epoch": 0.25 + }, + { + "current_steps": 2135, + "loss": 1.1764, + "learning_rate": 4.433497536945813e-06, + "epoch": 0.25 + }, + { + "current_steps": 2137, + "loss": 1.3565, + "learning_rate": 4.137931034482758e-06, + "epoch": 0.25 + }, + { + "current_steps": 2139, + "loss": 1.47, + "learning_rate": 3.842364532019704e-06, + "epoch": 0.25 + }, + { + "current_steps": 2141, + "loss": 1.4827, + "learning_rate": 3.54679802955665e-06, + "epoch": 0.25 + }, + { + "current_steps": 2143, + "loss": 1.4833, + "learning_rate": 3.2512315270935955e-06, + "epoch": 0.25 + }, + { + "current_steps": 2145, + "loss": 1.3252, + "learning_rate": 2.9556650246305416e-06, + "epoch": 0.25 + }, + { + "current_steps": 2147, + "loss": 1.4242, + "learning_rate": 2.6600985221674877e-06, + "epoch": 0.25 + }, + { + "current_steps": 2149, + "loss": 1.4473, + "learning_rate": 2.364532019704433e-06, + "epoch": 0.25 + }, + { + "current_steps": 2149, + "loss": 1.4473, + "learning_rate": 2.364532019704433e-06, + "epoch": 0.25 + } +] \ No newline at end of file