diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,67624 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.999989348784696, + "eval_steps": 500, + "global_step": 1126620, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0002, + "loss": 31.0471, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999999611149485, + "loss": 20.7537, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999998444597962, + "loss": 18.0646, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999996500345523, + "loss": 16.8945, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999993778392324, + "loss": 16.206, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999999027873857, + "loss": 15.8136, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999998600138454, + "loss": 15.382, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999998094633056, + "loss": 15.1542, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999997511357703, + "loss": 14.9009, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999968503124396, + "loss": 14.6485, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999961114973178, + "loss": 14.3772, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999952949123948, + "loss": 14.3108, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999944005577342, + "loss": 14.1975, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999934284334057, + "loss": 14.0688, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999923785394845, + "loss": 13.9874, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999912508760528, + "loss": 13.8713, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999900454431978, + "loss": 13.7619, + "step": 1700 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999988762241013, + "loss": 13.7297, + "step": 1800 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999874012695994, + "loss": 13.5458, + "step": 1900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999859625290616, + "loss": 13.4896, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999984446019512, + "loss": 13.4535, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999828517410685, + "loss": 13.4017, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001999981179693855, + "loss": 13.3113, + "step": 2300 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999979429878002, + "loss": 13.2931, + "step": 2400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999776022936448, + "loss": 13.1937, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999975696940926, + "loss": 13.0655, + "step": 2600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999737138199936, + "loss": 13.1311, + "step": 2700 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999716529310021, + "loss": 12.9677, + "step": 2800 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999695142741117, + "loss": 12.9102, + "step": 2900 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999672978494886, + "loss": 12.8818, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999965003657305, + "loss": 12.8942, + "step": 3100 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999626316977395, + "loss": 12.7616, + "step": 3200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999960181970977, + "loss": 12.7554, + "step": 3300 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999576544772074, + "loss": 12.72, + "step": 3400 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999550492166273, + "loss": 12.7146, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999523661894396, + "loss": 12.645, + "step": 3600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999949605395853, + "loss": 12.5612, + "step": 3700 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999467668360818, + "loss": 12.564, + "step": 3800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999438505103473, + "loss": 12.5116, + "step": 3900 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999408564188757, + "loss": 12.4324, + "step": 4000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999377845619003, + "loss": 12.4035, + "step": 4100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999346349396598, + "loss": 12.3447, + "step": 4200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999314075523994, + "loss": 12.4371, + "step": 4300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999281024003698, + "loss": 12.3681, + "step": 4400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999924719483828, + "loss": 12.3628, + "step": 4500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999212588030373, + "loss": 12.1989, + "step": 4600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999177203582668, + "loss": 12.2067, + "step": 4700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999141041497916, + "loss": 12.1742, + "step": 4800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999104101778932, + "loss": 12.1978, + "step": 4900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999066384428584, + "loss": 12.1128, + "step": 5000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999027889449811, + "loss": 12.2079, + "step": 5100 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199989886168456, + "loss": 12.1338, + "step": 5200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999894856661901, + "loss": 12.1586, + "step": 5300 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998907738773156, + "loss": 12.0933, + "step": 5400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998866133311212, + "loss": 12.0715, + "step": 5500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999882375023641, + "loss": 12.1072, + "step": 5600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998780589552056, + "loss": 12.0337, + "step": 5700 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998736651261493, + "loss": 11.9432, + "step": 5800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998691935368151, + "loss": 11.9196, + "step": 5900 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199986464418755, + "loss": 11.9342, + "step": 6000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998600170787078, + "loss": 11.9232, + "step": 6100 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998553122106488, + "loss": 11.8887, + "step": 6200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998505295837387, + "loss": 11.9098, + "step": 6300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998456691983492, + "loss": 11.8077, + "step": 6400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998407310548583, + "loss": 11.8481, + "step": 6500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998357151536508, + "loss": 11.7697, + "step": 6600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998306214951157, + "loss": 11.8407, + "step": 6700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998254500796497, + "loss": 11.8612, + "step": 6800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999820200907655, + "loss": 11.7922, + "step": 6900 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998148739795398, + "loss": 11.7509, + "step": 7000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019998094692957183, + "loss": 11.7071, + "step": 7100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019998039868566108, + "loss": 11.6312, + "step": 7200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997984266626438, + "loss": 11.6964, + "step": 7300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997927887142495, + "loss": 11.6597, + "step": 7400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997870730118668, + "loss": 11.6117, + "step": 7500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997812795559397, + "loss": 11.659, + "step": 7600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999775408346919, + "loss": 11.5898, + "step": 7700 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997694593852612, + "loss": 11.4657, + "step": 7800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999763432671429, + "loss": 11.5958, + "step": 7900 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997573282058912, + "loss": 11.5253, + "step": 8000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997511459891225, + "loss": 11.5289, + "step": 8100 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997448860216034, + "loss": 11.4902, + "step": 8200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997385483038214, + "loss": 11.4736, + "step": 8300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997321328362692, + "loss": 11.5187, + "step": 8400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997256396194448, + "loss": 11.576, + "step": 8500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997190686538543, + "loss": 11.5031, + "step": 8600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001999712419940008, + "loss": 11.4626, + "step": 8700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997056934784236, + "loss": 11.5009, + "step": 8800 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996988892696238, + "loss": 11.3693, + "step": 8900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999692007314138, + "loss": 11.4287, + "step": 9000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999685047612501, + "loss": 11.3802, + "step": 9100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999678010165255, + "loss": 11.5035, + "step": 9200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999670894972946, + "loss": 11.3498, + "step": 9300 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999663702036128, + "loss": 11.3369, + "step": 9400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996564313553604, + "loss": 11.3852, + "step": 9500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996490829312088, + "loss": 11.3234, + "step": 9600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996416567642447, + "loss": 11.313, + "step": 9700 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999634152855045, + "loss": 11.317, + "step": 9800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999626571204194, + "loss": 11.2926, + "step": 9900 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019996189118122812, + "loss": 11.3351, + "step": 10000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999611174679902, + "loss": 11.1711, + "step": 10100 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999603359807658, + "loss": 11.2414, + "step": 10200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995954671961572, + "loss": 11.2358, + "step": 10300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995874968460137, + "loss": 11.2353, + "step": 10400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999579448757847, + "loss": 11.3189, + "step": 10500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999571322932283, + "loss": 11.2248, + "step": 10600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995631193699538, + "loss": 11.255, + "step": 10700 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995548380714973, + "loss": 11.3468, + "step": 10800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995464790375573, + "loss": 11.2192, + "step": 10900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995380422687843, + "loss": 11.1862, + "step": 11000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995295277658345, + "loss": 11.1727, + "step": 11100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995209355293695, + "loss": 11.1103, + "step": 11200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995122655600583, + "loss": 11.2029, + "step": 11300 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019995035178585742, + "loss": 11.1489, + "step": 11400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999494692425598, + "loss": 11.1043, + "step": 11500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994857892618163, + "loss": 11.156, + "step": 11600 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994768083679215, + "loss": 11.1204, + "step": 11700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994677497446115, + "loss": 11.0168, + "step": 11800 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994586133925912, + "loss": 11.0952, + "step": 11900 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994493993125712, + "loss": 11.0757, + "step": 12000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994401075052677, + "loss": 11.0533, + "step": 12100 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994307379714036, + "loss": 11.0473, + "step": 12200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994212907117076, + "loss": 11.131, + "step": 12300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994117657269145, + "loss": 11.0459, + "step": 12400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019994021630177647, + "loss": 11.0304, + "step": 12500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993924825850053, + "loss": 11.0631, + "step": 12600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993827244293888, + "loss": 10.9818, + "step": 12700 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993728885516745, + "loss": 11.0063, + "step": 12800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993629749526272, + "loss": 10.9952, + "step": 12900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993529836330177, + "loss": 10.9415, + "step": 13000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993429145936234, + "loss": 10.9471, + "step": 13100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999332767835227, + "loss": 10.9866, + "step": 13200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999322543358618, + "loss": 10.9669, + "step": 13300 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019993122411645912, + "loss": 11.0177, + "step": 13400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999301861253948, + "loss": 11.0276, + "step": 13500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019992914036274956, + "loss": 11.0605, + "step": 13600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001999280868286047, + "loss": 10.8661, + "step": 13700 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001999270255230422, + "loss": 10.931, + "step": 13800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019992595644614458, + "loss": 10.9676, + "step": 13900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019992487959799498, + "loss": 10.9092, + "step": 14000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019992379497867714, + "loss": 10.9714, + "step": 14100 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001999227025882754, + "loss": 10.9594, + "step": 14200 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019992160242687478, + "loss": 10.9789, + "step": 14300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019992049449456077, + "loss": 10.954, + "step": 14400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991937879141954, + "loss": 10.9189, + "step": 14500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999182553175379, + "loss": 10.8849, + "step": 14600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999171240730032, + "loss": 10.8396, + "step": 14700 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999159850579034, + "loss": 10.8158, + "step": 14800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999148382723271, + "loss": 10.8047, + "step": 14900 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999136837163635, + "loss": 10.8237, + "step": 15000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019991252139010235, + "loss": 10.8259, + "step": 15100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019991135129363407, + "loss": 10.8655, + "step": 15200 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019991017342704966, + "loss": 10.8968, + "step": 15300 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019990898779044072, + "loss": 10.8595, + "step": 15400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019990779438389944, + "loss": 10.838, + "step": 15500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019990659320751866, + "loss": 10.8, + "step": 15600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019990538426139178, + "loss": 10.7413, + "step": 15700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019990416754561282, + "loss": 10.7885, + "step": 15800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001999029430602764, + "loss": 10.6433, + "step": 15900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019990171080547776, + "loss": 10.753, + "step": 16000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001999004707813127, + "loss": 10.8136, + "step": 16100 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019989922298787772, + "loss": 10.7348, + "step": 16200 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019989796742526982, + "loss": 10.6999, + "step": 16300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019989670409358663, + "loss": 10.7208, + "step": 16400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019989543299292644, + "loss": 10.7513, + "step": 16500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998941541233881, + "loss": 10.7394, + "step": 16600 + }, + { + "epoch": 0.18, + "learning_rate": 0.000199892867485071, + "loss": 10.7219, + "step": 16700 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019989157307807526, + "loss": 10.6858, + "step": 16800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998902709025016, + "loss": 10.7832, + "step": 16900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019988896095845117, + "loss": 10.7431, + "step": 17000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998876432460259, + "loss": 10.7464, + "step": 17100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998863177653283, + "loss": 10.7604, + "step": 17200 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019988498451646144, + "loss": 10.72, + "step": 17300 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019988364349952896, + "loss": 10.6117, + "step": 17400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019988229471463518, + "loss": 10.6402, + "step": 17500 + }, + { + "epoch": 0.19, + "learning_rate": 0.000199880938161885, + "loss": 10.623, + "step": 17600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019987957384138394, + "loss": 10.7233, + "step": 17700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019987820175323807, + "loss": 10.6463, + "step": 17800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998768218975541, + "loss": 10.7418, + "step": 17900 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019987543427443935, + "loss": 10.6172, + "step": 18000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019987403888400174, + "loss": 10.6089, + "step": 18100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998726357263498, + "loss": 10.7055, + "step": 18200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019987122480159258, + "loss": 10.588, + "step": 18300 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998698061098399, + "loss": 10.6248, + "step": 18400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998683796512021, + "loss": 10.5568, + "step": 18500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019986694542579002, + "loss": 10.7144, + "step": 18600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998655034337153, + "loss": 10.6502, + "step": 18700 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019986405367509, + "loss": 10.5789, + "step": 18800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019986259615002692, + "loss": 10.5791, + "step": 18900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019986113085863937, + "loss": 10.6031, + "step": 19000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019985965780104138, + "loss": 10.5837, + "step": 19100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019985817697734743, + "loss": 10.5955, + "step": 19200 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019985668838767273, + "loss": 10.6035, + "step": 19300 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019985519203213304, + "loss": 10.548, + "step": 19400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019985368791084471, + "loss": 10.5317, + "step": 19500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019985217602392475, + "loss": 10.5493, + "step": 19600 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019985065637149072, + "loss": 10.5723, + "step": 19700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019984912895366078, + "loss": 10.599, + "step": 19800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001998475937705538, + "loss": 10.5421, + "step": 19900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019984605082228903, + "loss": 10.508, + "step": 20000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019984450010898658, + "loss": 10.4812, + "step": 20100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019984294163076705, + "loss": 10.5839, + "step": 20200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001998413753877516, + "loss": 10.5132, + "step": 20300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983980138006202, + "loss": 10.4647, + "step": 20400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983821960782079, + "loss": 10.5499, + "step": 20500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983663007115085, + "loss": 10.5803, + "step": 20600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983503277017588, + "loss": 10.5661, + "step": 20700 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983342770502004, + "loss": 10.6515, + "step": 20800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983181487580822, + "loss": 10.5345, + "step": 20900 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019983019428266582, + "loss": 10.5207, + "step": 21000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019982856592571887, + "loss": 10.5126, + "step": 21100 + }, + { + "epoch": 0.23, + "learning_rate": 0.000199826929805094, + "loss": 10.5475, + "step": 21200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019982528592091847, + "loss": 10.4911, + "step": 21300 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001998236342733201, + "loss": 10.4779, + "step": 21400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001998219748624274, + "loss": 10.4376, + "step": 21500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019982030768836935, + "loss": 10.5337, + "step": 21600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001998186327512756, + "loss": 10.4757, + "step": 21700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019981695005127649, + "loss": 10.4933, + "step": 21800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019981525958850287, + "loss": 10.3856, + "step": 21900 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019981356136308614, + "loss": 10.5325, + "step": 22000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019981185537515841, + "loss": 10.5507, + "step": 22100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019981014162485233, + "loss": 10.5375, + "step": 22200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001998084201123012, + "loss": 10.4644, + "step": 22300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019980669083763893, + "loss": 10.4069, + "step": 22400 + }, + { + "epoch": 0.24, + "learning_rate": 0.000199804953801, + "loss": 10.4931, + "step": 22500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019980320900251944, + "loss": 10.3751, + "step": 22600 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019980145644233298, + "loss": 10.388, + "step": 22700 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019979969612057694, + "loss": 10.4941, + "step": 22800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997979280373882, + "loss": 10.3828, + "step": 22900 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019979615219290427, + "loss": 10.3678, + "step": 23000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019979436858726324, + "loss": 10.4373, + "step": 23100 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019979257722060382, + "loss": 10.3924, + "step": 23200 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019979077809306536, + "loss": 10.4101, + "step": 23300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001997889712047877, + "loss": 10.4652, + "step": 23400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019978715655591148, + "loss": 10.4567, + "step": 23500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019978533414657774, + "loss": 10.3139, + "step": 23600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019978350397692823, + "loss": 10.3567, + "step": 23700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019978166604710528, + "loss": 10.4204, + "step": 23800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019977982035725186, + "loss": 10.4107, + "step": 23900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019977796690751144, + "loss": 10.4131, + "step": 24000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019977610569802822, + "loss": 10.4296, + "step": 24100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019977423672894694, + "loss": 10.3413, + "step": 24200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019977236000041291, + "loss": 10.5409, + "step": 24300 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019977047551257213, + "loss": 10.3408, + "step": 24400 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019976858326557114, + "loss": 10.4239, + "step": 24500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019976668325955707, + "loss": 10.4088, + "step": 24600 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019976477549467773, + "loss": 10.3837, + "step": 24700 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019976285997108152, + "loss": 10.3859, + "step": 24800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001997609366889173, + "loss": 10.2984, + "step": 24900 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019975900564833473, + "loss": 10.4014, + "step": 25000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001997570668494839, + "loss": 10.4103, + "step": 25100 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019975512029251572, + "loss": 10.3347, + "step": 25200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001997531659775815, + "loss": 10.3077, + "step": 25300 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019975120390483325, + "loss": 10.3327, + "step": 25400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001997492340744235, + "loss": 10.2565, + "step": 25500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001997472564865055, + "loss": 10.3999, + "step": 25600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019974527114123306, + "loss": 10.3328, + "step": 25700 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019974327803876053, + "loss": 10.2816, + "step": 25800 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019974127717924293, + "loss": 10.2603, + "step": 25900 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019973926856283592, + "loss": 10.3163, + "step": 26000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019973725218969563, + "loss": 10.3801, + "step": 26100 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001997352280599789, + "loss": 10.3455, + "step": 26200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019973319617384318, + "loss": 10.4132, + "step": 26300 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019973115653144647, + "loss": 10.3055, + "step": 26400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001997291091329474, + "loss": 10.3018, + "step": 26500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019972705397850514, + "loss": 10.2734, + "step": 26600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001997249910682796, + "loss": 10.3807, + "step": 26700 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019972292040243116, + "loss": 10.2753, + "step": 26800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001997208419811209, + "loss": 10.2918, + "step": 26900 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019971875580451044, + "loss": 10.2821, + "step": 27000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019971666187276198, + "loss": 10.2867, + "step": 27100 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019971456018603846, + "loss": 10.3578, + "step": 27200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001997124507445032, + "loss": 10.2707, + "step": 27300 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019971033354832036, + "loss": 10.2711, + "step": 27400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019970820859765452, + "loss": 10.3376, + "step": 27500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019970607589267104, + "loss": 10.252, + "step": 27600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019970393543353564, + "loss": 10.3103, + "step": 27700 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001997017872204149, + "loss": 10.1373, + "step": 27800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019969963125347583, + "loss": 10.3313, + "step": 27900 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001996974675328861, + "loss": 10.2393, + "step": 28000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019969529605881405, + "loss": 10.2477, + "step": 28100 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019969311683142846, + "loss": 10.2344, + "step": 28200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019969092985089887, + "loss": 10.2996, + "step": 28300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019968873511739532, + "loss": 10.2366, + "step": 28400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019968653263108854, + "loss": 10.25, + "step": 28500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001996843223921498, + "loss": 10.2351, + "step": 28600 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019968210440075097, + "loss": 10.27, + "step": 28700 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019967987865706456, + "loss": 10.2026, + "step": 28800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019967764516126368, + "loss": 10.2296, + "step": 28900 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019967540391352197, + "loss": 10.2811, + "step": 29000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019967315491401384, + "loss": 10.2503, + "step": 29100 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019967089816291412, + "loss": 10.1604, + "step": 29200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001996686336603983, + "loss": 10.2826, + "step": 29300 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019966636140664252, + "loss": 10.3007, + "step": 29400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019966408140182352, + "loss": 10.2024, + "step": 29500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019966179364611856, + "loss": 10.2677, + "step": 29600 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019965949813970563, + "loss": 10.1955, + "step": 29700 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019965719488276314, + "loss": 10.2343, + "step": 29800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019965488387547036, + "loss": 10.2955, + "step": 29900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019965256511800692, + "loss": 10.1927, + "step": 30000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019965023861055316, + "loss": 10.3404, + "step": 30100 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019964790435329004, + "loss": 10.2021, + "step": 30200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019964556234639905, + "loss": 10.2708, + "step": 30300 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001996432125900624, + "loss": 10.2472, + "step": 30400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019964085508446274, + "loss": 10.2798, + "step": 30500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019963848982978353, + "loss": 10.1737, + "step": 30600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001996361168262086, + "loss": 10.2347, + "step": 30700 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019963373607392258, + "loss": 10.1883, + "step": 30800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019963134757311054, + "loss": 10.1404, + "step": 30900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019962895132395834, + "loss": 10.1811, + "step": 31000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019962654732665224, + "loss": 10.1203, + "step": 31100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019962413558137924, + "loss": 10.2284, + "step": 31200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001996217160883269, + "loss": 10.2811, + "step": 31300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019961928884768343, + "loss": 10.219, + "step": 31400 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019961685385963752, + "loss": 10.1644, + "step": 31500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019961441112437855, + "loss": 10.2105, + "step": 31600 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019961196064209653, + "loss": 10.1999, + "step": 31700 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019960950241298202, + "loss": 10.0921, + "step": 31800 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019960703643722618, + "loss": 10.1773, + "step": 31900 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019960456271502083, + "loss": 10.1025, + "step": 32000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001996020812465583, + "loss": 10.0942, + "step": 32100 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019959959203203162, + "loss": 10.1094, + "step": 32200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019959709507163433, + "loss": 10.1029, + "step": 32300 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019959459036556068, + "loss": 10.2163, + "step": 32400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001995920779140054, + "loss": 10.1424, + "step": 32500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001995895577171639, + "loss": 10.1192, + "step": 32600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019958702977523218, + "loss": 10.1044, + "step": 32700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019958449408840684, + "loss": 10.0973, + "step": 32800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001995819506568851, + "loss": 10.1485, + "step": 32900 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019957939948086478, + "loss": 10.0912, + "step": 33000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001995768405605442, + "loss": 10.1289, + "step": 33100 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019957427389612244, + "loss": 10.1974, + "step": 33200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001995716994877991, + "loss": 10.1298, + "step": 33300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019956911733577437, + "loss": 10.0156, + "step": 33400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019956652744024906, + "loss": 10.1539, + "step": 33500 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001995639298014246, + "loss": 10.0955, + "step": 33600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019956132441950304, + "loss": 10.0798, + "step": 33700 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019955871129468696, + "loss": 10.0804, + "step": 33800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001995560904271796, + "loss": 10.0944, + "step": 33900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019955346181718475, + "loss": 10.1492, + "step": 34000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019955082546490688, + "loss": 10.0808, + "step": 34100 + }, + { + "epoch": 0.36, + "learning_rate": 0.000199548181370551, + "loss": 10.1054, + "step": 34200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019954552953432278, + "loss": 10.0621, + "step": 34300 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019954286995642838, + "loss": 10.0861, + "step": 34400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001995402026370747, + "loss": 10.1371, + "step": 34500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019953752757646918, + "loss": 10.0815, + "step": 34600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019953484477481978, + "loss": 10.0745, + "step": 34700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019953215423233522, + "loss": 10.0454, + "step": 34800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001995294559492247, + "loss": 10.0741, + "step": 34900 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019952674992569812, + "loss": 10.0653, + "step": 35000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001995240361619659, + "loss": 10.0455, + "step": 35100 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019952131465823906, + "loss": 10.0667, + "step": 35200 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019951858541472926, + "loss": 10.0858, + "step": 35300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001995158484316488, + "loss": 10.1146, + "step": 35400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019951310370921048, + "loss": 10.0985, + "step": 35500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001995103512476278, + "loss": 10.0451, + "step": 35600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019950759104711476, + "loss": 10.0269, + "step": 35700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001995048231078861, + "loss": 10.0519, + "step": 35800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019950204743015703, + "loss": 10.1344, + "step": 35900 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019949926401414343, + "loss": 10.0772, + "step": 36000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019949647286006178, + "loss": 10.1051, + "step": 36100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001994936739681291, + "loss": 10.0019, + "step": 36200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019949086733856312, + "loss": 10.1141, + "step": 36300 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001994880529715821, + "loss": 10.0941, + "step": 36400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019948523086740484, + "loss": 10.0442, + "step": 36500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001994824010262509, + "loss": 9.9917, + "step": 36600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019947956344834036, + "loss": 10.0871, + "step": 36700 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019947671813389385, + "loss": 10.152, + "step": 36800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019947386508313268, + "loss": 10.0063, + "step": 36900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019947100429627872, + "loss": 10.0998, + "step": 37000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019946813577355446, + "loss": 10.0607, + "step": 37100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019946525951518296, + "loss": 10.0062, + "step": 37200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019946237552138793, + "loss": 10.0572, + "step": 37300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019945948379239368, + "loss": 10.0558, + "step": 37400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019945658432842507, + "loss": 10.0863, + "step": 37500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019945367712970763, + "loss": 9.998, + "step": 37600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019945076219646737, + "loss": 9.9899, + "step": 37700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019944783952893105, + "loss": 10.0608, + "step": 37800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019944490912732598, + "loss": 9.9962, + "step": 37900 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019944197099188, + "loss": 10.0176, + "step": 38000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019943902512282161, + "loss": 10.004, + "step": 38100 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019943607152038, + "loss": 9.956, + "step": 38200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001994331101847848, + "loss": 9.9463, + "step": 38300 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001994301411162663, + "loss": 9.9767, + "step": 38400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019942716431505547, + "loss": 9.9262, + "step": 38500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019942417978138376, + "loss": 10.0205, + "step": 38600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019942118751548326, + "loss": 9.9822, + "step": 38700 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019941818751758675, + "loss": 10.0197, + "step": 38800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019941517978792748, + "loss": 9.9771, + "step": 38900 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019941216432673938, + "loss": 9.9758, + "step": 39000 + }, + { + "epoch": 0.42, + "learning_rate": 0.000199409141134257, + "loss": 10.0243, + "step": 39100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001994061102107154, + "loss": 9.9633, + "step": 39200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019940307155635032, + "loss": 10.0104, + "step": 39300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019940002517139806, + "loss": 9.9586, + "step": 39400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019939697105609557, + "loss": 10.0043, + "step": 39500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019939390921068034, + "loss": 10.0483, + "step": 39600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001993908396353905, + "loss": 9.9968, + "step": 39700 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019938776233046477, + "loss": 10.0115, + "step": 39800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019938467729614247, + "loss": 9.9776, + "step": 39900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019938158453266353, + "loss": 10.0006, + "step": 40000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019937848404026845, + "loss": 10.0248, + "step": 40100 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019937537581919844, + "loss": 9.893, + "step": 40200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001993722598696951, + "loss": 9.969, + "step": 40300 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019936913619200084, + "loss": 10.0055, + "step": 40400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001993660047863586, + "loss": 9.9756, + "step": 40500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001993628656530118, + "loss": 10.0363, + "step": 40600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001993597187922047, + "loss": 10.0008, + "step": 40700 + }, + { + "epoch": 0.43, + "learning_rate": 0.000199356564204182, + "loss": 9.8862, + "step": 40800 + }, + { + "epoch": 0.44, + "learning_rate": 0.000199353401889189, + "loss": 9.9434, + "step": 40900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019935023184747163, + "loss": 9.8759, + "step": 41000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019934705407927646, + "loss": 9.8991, + "step": 41100 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001993438685848506, + "loss": 9.9954, + "step": 41200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019934067536444178, + "loss": 9.9411, + "step": 41300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019933747441829838, + "loss": 9.9673, + "step": 41400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019933426574666928, + "loss": 9.9237, + "step": 41500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019933104934980408, + "loss": 10.0384, + "step": 41600 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019932782522795285, + "loss": 9.9686, + "step": 41700 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001993245933813664, + "loss": 9.9799, + "step": 41800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019932135381029603, + "loss": 9.9338, + "step": 41900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001993181065149937, + "loss": 9.9911, + "step": 42000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001993148514957119, + "loss": 9.914, + "step": 42100 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019931158875270385, + "loss": 9.8745, + "step": 42200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019930831828622326, + "loss": 9.9327, + "step": 42300 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019930504009652448, + "loss": 9.993, + "step": 42400 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019930175418386242, + "loss": 9.9314, + "step": 42500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001992984605484927, + "loss": 9.9066, + "step": 42600 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019929515919067136, + "loss": 9.9097, + "step": 42700 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019929185011065528, + "loss": 9.8948, + "step": 42800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001992885333087017, + "loss": 9.9347, + "step": 42900 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001992852087850686, + "loss": 9.9528, + "step": 43000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019928187654001453, + "loss": 10.0051, + "step": 43100 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019927853657379863, + "loss": 9.9972, + "step": 43200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001992751888866807, + "loss": 9.9091, + "step": 43300 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019927183347892102, + "loss": 9.857, + "step": 43400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019926847035078057, + "loss": 9.9834, + "step": 43500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019926509950252093, + "loss": 9.9047, + "step": 43600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001992617209344042, + "loss": 10.0088, + "step": 43700 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019925833464669315, + "loss": 9.8949, + "step": 43800 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019925494063965116, + "loss": 9.895, + "step": 43900 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019925153891354214, + "loss": 10.0217, + "step": 44000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019924812946863068, + "loss": 9.9424, + "step": 44100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019924471230518188, + "loss": 9.8019, + "step": 44200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019924128742346156, + "loss": 9.9388, + "step": 44300 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019923785482373605, + "loss": 9.8154, + "step": 44400 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019923441450627228, + "loss": 10.0039, + "step": 44500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019923096647133783, + "loss": 9.9578, + "step": 44600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001992275107192008, + "loss": 9.9351, + "step": 44700 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019922404725013, + "loss": 9.9032, + "step": 44800 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019922057606439481, + "loss": 9.8314, + "step": 44900 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019921709716226515, + "loss": 9.8617, + "step": 45000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019921361054401154, + "loss": 9.9342, + "step": 45100 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019921011620990518, + "loss": 9.8491, + "step": 45200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001992066141602178, + "loss": 9.8882, + "step": 45300 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019920310439522175, + "loss": 9.9177, + "step": 45400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019919958691519, + "loss": 9.8844, + "step": 45500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019919606172039616, + "loss": 9.8362, + "step": 45600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019919252881111428, + "loss": 9.9393, + "step": 45700 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019918898818761918, + "loss": 9.8758, + "step": 45800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001991854398501862, + "loss": 9.8643, + "step": 45900 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019918188379909127, + "loss": 9.9308, + "step": 46000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019917832003461098, + "loss": 9.9221, + "step": 46100 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019917474855702248, + "loss": 9.8291, + "step": 46200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001991711693666035, + "loss": 9.7886, + "step": 46300 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001991675824636324, + "loss": 9.8876, + "step": 46400 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019916398784838816, + "loss": 9.8634, + "step": 46500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019916038552115033, + "loss": 9.8741, + "step": 46600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019915677548219902, + "loss": 9.8517, + "step": 46700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019915315773181503, + "loss": 9.8552, + "step": 46800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019914953227027972, + "loss": 9.8375, + "step": 46900 + }, + { + "epoch": 0.5, + "learning_rate": 0.000199145899097875, + "loss": 9.8573, + "step": 47000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019914225821488345, + "loss": 9.8893, + "step": 47100 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001991386096215882, + "loss": 9.8902, + "step": 47200 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019913495331827302, + "loss": 9.8109, + "step": 47300 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019913128930522224, + "loss": 9.8751, + "step": 47400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019912761758272084, + "loss": 9.7455, + "step": 47500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019912393815105437, + "loss": 9.8544, + "step": 47600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019912025101050896, + "loss": 9.8662, + "step": 47700 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019911655616137135, + "loss": 9.7524, + "step": 47800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019911285360392893, + "loss": 9.8187, + "step": 47900 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001991091433384696, + "loss": 9.8238, + "step": 48000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019910542536528194, + "loss": 9.8804, + "step": 48100 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019910169968465508, + "loss": 9.9069, + "step": 48200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019909796629687877, + "loss": 9.9228, + "step": 48300 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019909422520224336, + "loss": 9.8329, + "step": 48400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001990904764010398, + "loss": 9.9184, + "step": 48500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019908671989355968, + "loss": 9.8659, + "step": 48600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019908295568009504, + "loss": 9.8845, + "step": 48700 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019907918376093869, + "loss": 9.7798, + "step": 48800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019907540413638395, + "loss": 9.8361, + "step": 48900 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019907161680672477, + "loss": 9.7878, + "step": 49000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001990678217722557, + "loss": 9.8215, + "step": 49100 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001990640190332719, + "loss": 9.8335, + "step": 49200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019906020859006905, + "loss": 9.8143, + "step": 49300 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019905639044294352, + "loss": 9.7487, + "step": 49400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019905256459219227, + "loss": 9.8336, + "step": 49500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019904873103811278, + "loss": 9.7964, + "step": 49600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019904488978100324, + "loss": 9.8271, + "step": 49700 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001990410408211624, + "loss": 9.8213, + "step": 49800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019903718415888953, + "loss": 9.8547, + "step": 49900 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019903331979448456, + "loss": 9.8669, + "step": 50000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019902944772824812, + "loss": 9.8096, + "step": 50100 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019902556796048124, + "loss": 9.8573, + "step": 50200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001990216804914857, + "loss": 9.8697, + "step": 50300 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019901778532156378, + "loss": 9.8225, + "step": 50400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019901388245101848, + "loss": 9.7901, + "step": 50500 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019900997188015328, + "loss": 9.846, + "step": 50600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019900605360927235, + "loss": 9.7747, + "step": 50700 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019900212763868032, + "loss": 9.8159, + "step": 50800 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001989981939686826, + "loss": 9.8698, + "step": 50900 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019899425259958507, + "loss": 9.8044, + "step": 51000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001989903035316943, + "loss": 9.9548, + "step": 51100 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019898634676531734, + "loss": 9.8655, + "step": 51200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019898238230076193, + "loss": 9.8672, + "step": 51300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019897841013833645, + "loss": 9.831, + "step": 51400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019897443027834974, + "loss": 9.82, + "step": 51500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019897044272111135, + "loss": 9.7631, + "step": 51600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019896644746693137, + "loss": 9.8126, + "step": 51700 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019896244451612048, + "loss": 9.953, + "step": 51800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019895843386899008, + "loss": 9.7073, + "step": 51900 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019895441552585205, + "loss": 9.8157, + "step": 52000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019895038948701887, + "loss": 9.8505, + "step": 52100 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019894635575280363, + "loss": 9.6862, + "step": 52200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019894231432352006, + "loss": 9.8285, + "step": 52300 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019893826519948248, + "loss": 9.847, + "step": 52400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019893420838100576, + "loss": 9.8107, + "step": 52500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019893014386840544, + "loss": 9.8643, + "step": 52600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019892607166199753, + "loss": 9.7064, + "step": 52700 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001989219917620988, + "loss": 9.8509, + "step": 52800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019891790416902655, + "loss": 9.7911, + "step": 52900 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019891380888309863, + "loss": 9.7603, + "step": 53000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001989097059046336, + "loss": 9.759, + "step": 53100 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019890559523395043, + "loss": 9.8208, + "step": 53200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019890147687136894, + "loss": 9.8173, + "step": 53300 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001988973508172093, + "loss": 9.7646, + "step": 53400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019889321707179246, + "loss": 9.8205, + "step": 53500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019888907563543991, + "loss": 9.7749, + "step": 53600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001988849265084737, + "loss": 9.7467, + "step": 53700 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019888076969121652, + "loss": 9.7965, + "step": 53800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019887660518399164, + "loss": 9.7353, + "step": 53900 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019887243298712294, + "loss": 9.788, + "step": 54000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019886825310093487, + "loss": 9.7294, + "step": 54100 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019886406552575253, + "loss": 9.7488, + "step": 54200 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001988598702619016, + "loss": 9.7447, + "step": 54300 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019885566730970828, + "loss": 9.7567, + "step": 54400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001988514566694995, + "loss": 9.7825, + "step": 54500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001988472383416027, + "loss": 9.7692, + "step": 54600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019884301232634595, + "loss": 9.6836, + "step": 54700 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001988387786240579, + "loss": 9.8356, + "step": 54800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019883453723506777, + "loss": 9.7626, + "step": 54900 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019883028815970545, + "loss": 9.7195, + "step": 55000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019882603139830142, + "loss": 9.7177, + "step": 55100 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019882176695118665, + "loss": 9.8917, + "step": 55200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001988174948186929, + "loss": 9.734, + "step": 55300 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001988132150011523, + "loss": 9.675, + "step": 55400 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019880892749889775, + "loss": 9.8575, + "step": 55500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019880463231226265, + "loss": 9.7011, + "step": 55600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001988003294415811, + "loss": 9.7278, + "step": 55700 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019879601888718766, + "loss": 9.7295, + "step": 55800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019879170064941764, + "loss": 9.7427, + "step": 55900 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019878737472860683, + "loss": 9.7965, + "step": 56000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019878304112509162, + "loss": 9.759, + "step": 56100 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001987786998392091, + "loss": 9.9015, + "step": 56200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019877435087129687, + "loss": 9.7249, + "step": 56300 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019876999422169313, + "loss": 9.6978, + "step": 56400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019876562989073674, + "loss": 9.7255, + "step": 56500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019876125787876708, + "loss": 9.7836, + "step": 56600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019875687818612415, + "loss": 9.7311, + "step": 56700 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019875249081314857, + "loss": 9.7167, + "step": 56800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019874809576018158, + "loss": 9.8079, + "step": 56900 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019874369302756497, + "loss": 9.7035, + "step": 57000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001987392826156411, + "loss": 9.6932, + "step": 57100 + }, + { + "epoch": 0.61, + "learning_rate": 0.000198734864524753, + "loss": 9.7365, + "step": 57200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019873043875524426, + "loss": 9.7659, + "step": 57300 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001987260053074591, + "loss": 9.766, + "step": 57400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019872156418174227, + "loss": 9.7714, + "step": 57500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019871711537843914, + "loss": 9.727, + "step": 57600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019871265889789578, + "loss": 9.6792, + "step": 57700 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019870819474045867, + "loss": 9.7587, + "step": 57800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019870372290647508, + "loss": 9.6687, + "step": 57900 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019869924339629272, + "loss": 9.7317, + "step": 58000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019869475621025996, + "loss": 9.7632, + "step": 58100 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019869026134872582, + "loss": 9.7515, + "step": 58200 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001986857588120398, + "loss": 9.815, + "step": 58300 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019868124860055212, + "loss": 9.7749, + "step": 58400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019867673071461355, + "loss": 9.7163, + "step": 58500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019867220515457537, + "loss": 9.652, + "step": 58600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001986676719207896, + "loss": 9.7391, + "step": 58700 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019866313101360875, + "loss": 9.7611, + "step": 58800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019865858243338602, + "loss": 9.6897, + "step": 58900 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019865402618047512, + "loss": 9.688, + "step": 59000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019864946225523037, + "loss": 9.7416, + "step": 59100 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019864489065800667, + "loss": 9.7727, + "step": 59200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001986403113891597, + "loss": 9.7675, + "step": 59300 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019863572444904547, + "loss": 9.7196, + "step": 59400 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001986311298380207, + "loss": 9.6778, + "step": 59500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019862652755644276, + "loss": 9.724, + "step": 59600 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001986219176046696, + "loss": 9.656, + "step": 59700 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001986172999830597, + "loss": 9.7138, + "step": 59800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019861267469197213, + "loss": 9.6985, + "step": 59900 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019860804173176666, + "loss": 9.6366, + "step": 60000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019860340110280355, + "loss": 9.7447, + "step": 60100 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019859875280544374, + "loss": 9.6863, + "step": 60200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019859409684004872, + "loss": 9.8047, + "step": 60300 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019858943320698062, + "loss": 9.6221, + "step": 60400 + }, + { + "epoch": 0.64, + "learning_rate": 0.000198584761906602, + "loss": 9.6547, + "step": 60500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001985800829392763, + "loss": 9.6265, + "step": 60600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019857539630536734, + "loss": 9.7293, + "step": 60700 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001985707020052396, + "loss": 9.7447, + "step": 60800 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019856600003925813, + "loss": 9.7466, + "step": 60900 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019856129040778865, + "loss": 9.6719, + "step": 61000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019855657311119743, + "loss": 9.6534, + "step": 61100 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001985518481498513, + "loss": 9.6513, + "step": 61200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019854711552411773, + "loss": 9.6562, + "step": 61300 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019854237523436478, + "loss": 9.7206, + "step": 61400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019853762728096113, + "loss": 9.7123, + "step": 61500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019853287166427599, + "loss": 9.6824, + "step": 61600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001985281083846792, + "loss": 9.5953, + "step": 61700 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001985233374425412, + "loss": 9.7035, + "step": 61800 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019851855883823308, + "loss": 9.7413, + "step": 61900 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019851377257212643, + "loss": 9.6828, + "step": 62000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019850897864459352, + "loss": 9.6211, + "step": 62100 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019850417705600713, + "loss": 9.7709, + "step": 62200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019849936780674064, + "loss": 9.6846, + "step": 62300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019849455089716815, + "loss": 9.6211, + "step": 62400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019848972632766426, + "loss": 9.7107, + "step": 62500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019848489409860412, + "loss": 9.6783, + "step": 62600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019848005421036359, + "loss": 9.6483, + "step": 62700 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019847520666331904, + "loss": 9.7016, + "step": 62800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019847035145784746, + "loss": 9.6695, + "step": 62900 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019846548859432647, + "loss": 9.6738, + "step": 63000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019846061807313423, + "loss": 9.7383, + "step": 63100 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001984557398946495, + "loss": 9.6857, + "step": 63200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019845085405925173, + "loss": 9.654, + "step": 63300 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019844596056732084, + "loss": 9.6176, + "step": 63400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019844105941923737, + "loss": 9.6247, + "step": 63500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019843615061538255, + "loss": 9.7676, + "step": 63600 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001984312341561381, + "loss": 9.7118, + "step": 63700 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019842631004188637, + "loss": 9.7131, + "step": 63800 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001984213782730103, + "loss": 9.6386, + "step": 63900 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019841643884989347, + "loss": 9.6429, + "step": 64000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019841149177292, + "loss": 9.6835, + "step": 64100 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019840653704247463, + "loss": 9.7867, + "step": 64200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019840157465894268, + "loss": 9.6481, + "step": 64300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019839660462271007, + "loss": 9.6355, + "step": 64400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019839162693416332, + "loss": 9.6678, + "step": 64500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019838664159368957, + "loss": 9.6124, + "step": 64600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019838164860167654, + "loss": 9.6488, + "step": 64700 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019837664795851247, + "loss": 9.6478, + "step": 64800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019837163966458633, + "loss": 9.6505, + "step": 64900 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001983666237202876, + "loss": 9.6577, + "step": 65000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019836160012600632, + "loss": 9.6612, + "step": 65100 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019835656888213322, + "loss": 9.5703, + "step": 65200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019835152998905962, + "loss": 9.674, + "step": 65300 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001983464834471773, + "loss": 9.6131, + "step": 65400 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019834142925687878, + "loss": 9.6051, + "step": 65500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019833636741855713, + "loss": 9.6743, + "step": 65600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019833129793260602, + "loss": 9.6139, + "step": 65700 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019832622079941968, + "loss": 9.684, + "step": 65800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019832113601939296, + "loss": 9.6819, + "step": 65900 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019831604359292135, + "loss": 9.5982, + "step": 66000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001983109435204008, + "loss": 9.6193, + "step": 66100 + }, + { + "epoch": 0.71, + "learning_rate": 0.000198305835802228, + "loss": 9.7239, + "step": 66200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001983007204388002, + "loss": 9.5838, + "step": 66300 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019829559743051519, + "loss": 9.65, + "step": 66400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019829046677777136, + "loss": 9.5691, + "step": 66500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019828532848096777, + "loss": 9.5914, + "step": 66600 + }, + { + "epoch": 0.71, + "learning_rate": 0.000198280182540504, + "loss": 9.6823, + "step": 66700 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019827502895678026, + "loss": 9.685, + "step": 66800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019826986773019734, + "loss": 9.6394, + "step": 66900 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019826469886115663, + "loss": 9.6145, + "step": 67000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019825952235006012, + "loss": 9.5927, + "step": 67100 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001982543381973104, + "loss": 9.6681, + "step": 67200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019824914640331062, + "loss": 9.6144, + "step": 67300 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019824394696846455, + "loss": 9.6173, + "step": 67400 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001982387398931765, + "loss": 9.6461, + "step": 67500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019823352517785154, + "loss": 9.6673, + "step": 67600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019822830282289515, + "loss": 9.647, + "step": 67700 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019822307282871347, + "loss": 9.6291, + "step": 67800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019821783519571322, + "loss": 9.603, + "step": 67900 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019821258992430178, + "loss": 9.6827, + "step": 68000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019820733701488705, + "loss": 9.6193, + "step": 68100 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019820207646787755, + "loss": 9.5848, + "step": 68200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001981968082836824, + "loss": 9.6207, + "step": 68300 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019819153246271128, + "loss": 9.5816, + "step": 68400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019818624900537452, + "loss": 9.6011, + "step": 68500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019818095791208302, + "loss": 9.6176, + "step": 68600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019817565918324828, + "loss": 9.582, + "step": 68700 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019817035281928234, + "loss": 9.6085, + "step": 68800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019816503882059787, + "loss": 9.6201, + "step": 68900 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001981597171876082, + "loss": 9.5671, + "step": 69000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019815438792072714, + "loss": 9.6507, + "step": 69100 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001981490510203692, + "loss": 9.5708, + "step": 69200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019814370648694936, + "loss": 9.6185, + "step": 69300 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019813835432088334, + "loss": 9.6214, + "step": 69400 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001981329945225873, + "loss": 9.7018, + "step": 69500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019812762709247816, + "loss": 9.6996, + "step": 69600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019812225203097328, + "loss": 9.5686, + "step": 69700 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001981168693384907, + "loss": 9.5987, + "step": 69800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019811147901544903, + "loss": 9.6123, + "step": 69900 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019810608106226747, + "loss": 9.561, + "step": 70000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019810067547936587, + "loss": 9.6567, + "step": 70100 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019809526226716452, + "loss": 9.557, + "step": 70200 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019808984142608452, + "loss": 9.5874, + "step": 70300 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019808441295654733, + "loss": 9.5983, + "step": 70400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019807897685897524, + "loss": 9.6106, + "step": 70500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019807353313379097, + "loss": 9.5327, + "step": 70600 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019806808178141788, + "loss": 9.6321, + "step": 70700 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019806262280227984, + "loss": 9.6009, + "step": 70800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019805715619680155, + "loss": 9.617, + "step": 70900 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019805168196540803, + "loss": 9.6198, + "step": 71000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019804620010852508, + "loss": 9.6165, + "step": 71100 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019804071062657895, + "loss": 9.6374, + "step": 71200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019803521351999668, + "loss": 9.5579, + "step": 71300 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019802970878920564, + "loss": 9.582, + "step": 71400 + }, + { + "epoch": 0.76, + "learning_rate": 0.000198024196434634, + "loss": 9.5548, + "step": 71500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019801867645671047, + "loss": 9.5614, + "step": 71600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019801314885586432, + "loss": 9.5571, + "step": 71700 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019800761363252545, + "loss": 9.6367, + "step": 71800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001980020707871243, + "loss": 9.6038, + "step": 71900 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019799652032009198, + "loss": 9.6156, + "step": 72000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001979909622318601, + "loss": 9.6857, + "step": 72100 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019798539652286096, + "loss": 9.6555, + "step": 72200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019797982319352738, + "loss": 9.6086, + "step": 72300 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001979742422442928, + "loss": 9.5374, + "step": 72400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019796865367559125, + "loss": 9.558, + "step": 72500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019796305748785739, + "loss": 9.5419, + "step": 72600 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001979574536815264, + "loss": 9.629, + "step": 72700 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019795184225703406, + "loss": 9.5649, + "step": 72800 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001979462232148168, + "loss": 9.6099, + "step": 72900 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019794059655531165, + "loss": 9.5932, + "step": 73000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019793496227895618, + "loss": 9.5793, + "step": 73100 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001979293203861885, + "loss": 9.7042, + "step": 73200 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001979236708774475, + "loss": 9.5571, + "step": 73300 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019791801375317243, + "loss": 9.6708, + "step": 73400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019791234901380328, + "loss": 9.6414, + "step": 73500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019790667665978067, + "loss": 9.5526, + "step": 73600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019790099669154563, + "loss": 9.5138, + "step": 73700 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019789530910953995, + "loss": 9.551, + "step": 73800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019788961391420596, + "loss": 9.6449, + "step": 73900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019788391110598654, + "loss": 9.5773, + "step": 74000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001978782006853252, + "loss": 9.5658, + "step": 74100 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001978724826526661, + "loss": 9.6814, + "step": 74200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019786675700845388, + "loss": 9.6353, + "step": 74300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019786102375313381, + "loss": 9.6756, + "step": 74400 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019785528288715184, + "loss": 9.614, + "step": 74500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019784953441095434, + "loss": 9.5881, + "step": 74600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019784377832498843, + "loss": 9.5989, + "step": 74700 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019783801462970176, + "loss": 9.5213, + "step": 74800 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019783224332554252, + "loss": 9.5548, + "step": 74900 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019782646441295965, + "loss": 9.5805, + "step": 75000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019782067789240248, + "loss": 9.4886, + "step": 75100 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019781488376432107, + "loss": 9.4853, + "step": 75200 + }, + { + "epoch": 0.8, + "learning_rate": 0.000197809082029166, + "loss": 9.6053, + "step": 75300 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019780327268738851, + "loss": 9.5694, + "step": 75400 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019779745573944037, + "loss": 9.5213, + "step": 75500 + }, + { + "epoch": 0.81, + "learning_rate": 0.000197791631185774, + "loss": 9.5512, + "step": 75600 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019778579902684232, + "loss": 9.5413, + "step": 75700 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019777995926309894, + "loss": 9.5185, + "step": 75800 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019777411189499797, + "loss": 9.6322, + "step": 75900 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001977682569229942, + "loss": 9.5463, + "step": 76000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019776239434754302, + "loss": 9.4634, + "step": 76100 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019775652416910026, + "loss": 9.5728, + "step": 76200 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001977506463881225, + "loss": 9.5803, + "step": 76300 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019774476100506685, + "loss": 9.63, + "step": 76400 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019773886802039103, + "loss": 9.5538, + "step": 76500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001977329674345533, + "loss": 9.5859, + "step": 76600 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001977270592480126, + "loss": 9.447, + "step": 76700 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019772114346122837, + "loss": 9.6822, + "step": 76800 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019771522007466067, + "loss": 9.476, + "step": 76900 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019770928908877022, + "loss": 9.5865, + "step": 77000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019770335050401825, + "loss": 9.5016, + "step": 77100 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001976974043208666, + "loss": 9.5134, + "step": 77200 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019769145053977763, + "loss": 9.5434, + "step": 77300 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001976854891612145, + "loss": 9.6158, + "step": 77400 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019767952018564077, + "loss": 9.488, + "step": 77500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001976735436135206, + "loss": 9.5248, + "step": 77600 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019766755944531885, + "loss": 9.4993, + "step": 77700 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001976615676815009, + "loss": 9.6055, + "step": 77800 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001976555683225327, + "loss": 9.5024, + "step": 77900 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019764956136888084, + "loss": 9.4729, + "step": 78000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001976435468210125, + "loss": 9.5829, + "step": 78100 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019763752467939538, + "loss": 9.5417, + "step": 78200 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019763149494449788, + "loss": 9.6157, + "step": 78300 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001976254576167889, + "loss": 9.5935, + "step": 78400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019761941269673798, + "loss": 9.552, + "step": 78500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019761336018481524, + "loss": 9.5358, + "step": 78600 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019760730008149135, + "loss": 9.542, + "step": 78700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019760123238723765, + "loss": 9.5262, + "step": 78800 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019759515710252598, + "loss": 9.6101, + "step": 78900 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019758907422782885, + "loss": 9.5132, + "step": 79000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001975829837636193, + "loss": 9.5032, + "step": 79100 + }, + { + "epoch": 0.84, + "learning_rate": 0.000197576885710371, + "loss": 9.5075, + "step": 79200 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001975707800685582, + "loss": 9.5714, + "step": 79300 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019756466683865573, + "loss": 9.5298, + "step": 79400 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019755854602113902, + "loss": 9.6249, + "step": 79500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019755241761648407, + "loss": 9.5536, + "step": 79600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019754628162516752, + "loss": 9.4452, + "step": 79700 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019754013804766657, + "loss": 9.4937, + "step": 79800 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019753398688445893, + "loss": 9.551, + "step": 79900 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019752782813602308, + "loss": 9.4343, + "step": 80000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019752166180283788, + "loss": 9.4267, + "step": 80100 + }, + { + "epoch": 0.85, + "learning_rate": 0.000197515487885383, + "loss": 9.4619, + "step": 80200 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001975093063841385, + "loss": 9.5252, + "step": 80300 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019750311729958516, + "loss": 9.5447, + "step": 80400 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019749692063220428, + "loss": 9.3643, + "step": 80500 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001974907163824778, + "loss": 9.5425, + "step": 80600 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019748450455088818, + "loss": 9.5105, + "step": 80700 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019747828513791858, + "loss": 9.4647, + "step": 80800 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019747205814405267, + "loss": 9.5641, + "step": 80900 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019746582356977465, + "loss": 9.5371, + "step": 81000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019745958141556948, + "loss": 9.4877, + "step": 81100 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019745333168192256, + "loss": 9.5087, + "step": 81200 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019744707436931996, + "loss": 9.4341, + "step": 81300 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019744080947824827, + "loss": 9.4888, + "step": 81400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019743453700919476, + "loss": 9.5575, + "step": 81500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019742825696264719, + "loss": 9.5966, + "step": 81600 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019742196933909402, + "loss": 9.5137, + "step": 81700 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001974156741390242, + "loss": 9.5889, + "step": 81800 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001974093713629273, + "loss": 9.5028, + "step": 81900 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001974030610112935, + "loss": 9.4625, + "step": 82000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019739674308461358, + "loss": 9.5907, + "step": 82100 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019739041758337885, + "loss": 9.4754, + "step": 82200 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001973840845080813, + "loss": 9.5144, + "step": 82300 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019737774385921337, + "loss": 9.5873, + "step": 82400 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019737139563726823, + "loss": 9.4574, + "step": 82500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019736503984273959, + "loss": 9.4976, + "step": 82600 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019735867647612166, + "loss": 9.5217, + "step": 82700 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019735230553790943, + "loss": 9.4256, + "step": 82800 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001973459270285983, + "loss": 9.4681, + "step": 82900 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019733954094868435, + "loss": 9.4204, + "step": 83000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019733314729866424, + "loss": 9.4586, + "step": 83100 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019732674607903517, + "loss": 9.5945, + "step": 83200 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019732033729029497, + "loss": 9.4553, + "step": 83300 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001973139209329421, + "loss": 9.5523, + "step": 83400 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019730749700747549, + "loss": 9.4975, + "step": 83500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019730106551439476, + "loss": 9.4948, + "step": 83600 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001972946264542001, + "loss": 9.5866, + "step": 83700 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019728817982739226, + "loss": 9.4686, + "step": 83800 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001972817256344726, + "loss": 9.5562, + "step": 83900 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019727526387594308, + "loss": 9.5085, + "step": 84000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001972687945523062, + "loss": 9.504, + "step": 84100 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001972623176640651, + "loss": 9.5074, + "step": 84200 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001972558332117235, + "loss": 9.4899, + "step": 84300 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019724934119578566, + "loss": 9.4187, + "step": 84400 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001972428416167565, + "loss": 9.4417, + "step": 84500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019723633447514148, + "loss": 9.5616, + "step": 84600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019722981977144665, + "loss": 9.5119, + "step": 84700 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019722329750617866, + "loss": 9.529, + "step": 84800 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019721676767984477, + "loss": 9.4776, + "step": 84900 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019721023029295278, + "loss": 9.4764, + "step": 85000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019720368534601113, + "loss": 9.502, + "step": 85100 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019719713283952882, + "loss": 9.5238, + "step": 85200 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001971905727740154, + "loss": 9.5389, + "step": 85300 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019718400514998107, + "loss": 9.5451, + "step": 85400 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019717742996793662, + "loss": 9.461, + "step": 85500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019717084722839333, + "loss": 9.4839, + "step": 85600 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019716425693186323, + "loss": 9.4726, + "step": 85700 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019715765907885881, + "loss": 9.4735, + "step": 85800 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001971510536698932, + "loss": 9.4365, + "step": 85900 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019714444070548004, + "loss": 9.4469, + "step": 86000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001971378201861337, + "loss": 9.6134, + "step": 86100 + }, + { + "epoch": 0.92, + "learning_rate": 0.000197131192112369, + "loss": 9.5192, + "step": 86200 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019712455648470144, + "loss": 9.5977, + "step": 86300 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019711791330364708, + "loss": 9.4784, + "step": 86400 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019711126256972253, + "loss": 9.4749, + "step": 86500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019710460428344506, + "loss": 9.5812, + "step": 86600 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019709793844533243, + "loss": 9.4644, + "step": 86700 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001970912650559031, + "loss": 9.4229, + "step": 86800 + }, + { + "epoch": 0.93, + "learning_rate": 0.000197084584115676, + "loss": 9.4989, + "step": 86900 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019707789562517077, + "loss": 9.4093, + "step": 87000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019707119958490754, + "loss": 9.4435, + "step": 87100 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001970644959954071, + "loss": 9.3781, + "step": 87200 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001970577848571907, + "loss": 9.5007, + "step": 87300 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019705106617078034, + "loss": 9.4824, + "step": 87400 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019704433993669853, + "loss": 9.4531, + "step": 87500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019703760615546833, + "loss": 9.4226, + "step": 87600 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019703086482761347, + "loss": 9.4673, + "step": 87700 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001970241159536582, + "loss": 9.3922, + "step": 87800 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001970173595341274, + "loss": 9.4277, + "step": 87900 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001970105955695465, + "loss": 9.4192, + "step": 88000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001970038240604415, + "loss": 9.3967, + "step": 88100 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001969970450073391, + "loss": 9.4854, + "step": 88200 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019699025841076644, + "loss": 9.5432, + "step": 88300 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019698346427125135, + "loss": 9.4179, + "step": 88400 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019697666258932221, + "loss": 9.4498, + "step": 88500 + }, + { + "epoch": 0.94, + "learning_rate": 0.000196969853365508, + "loss": 9.4505, + "step": 88600 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019696303660033822, + "loss": 9.5295, + "step": 88700 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019695621229434305, + "loss": 9.4605, + "step": 88800 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001969493804480532, + "loss": 9.4899, + "step": 88900 + }, + { + "epoch": 0.95, + "learning_rate": 0.000196942541062, + "loss": 9.4436, + "step": 89000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019693569413671536, + "loss": 9.5828, + "step": 89100 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019692883967273176, + "loss": 9.4466, + "step": 89200 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019692197767058224, + "loss": 9.3656, + "step": 89300 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019691510813080046, + "loss": 9.5702, + "step": 89400 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019690823105392074, + "loss": 9.4494, + "step": 89500 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019690134644047782, + "loss": 9.4883, + "step": 89600 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019689445429100716, + "loss": 9.3975, + "step": 89700 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019688755460604476, + "loss": 9.5076, + "step": 89800 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019688064738612718, + "loss": 9.4956, + "step": 89900 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019687373263179168, + "loss": 9.5313, + "step": 90000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019686681034357593, + "loss": 9.4493, + "step": 90100 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001968598805220183, + "loss": 9.4066, + "step": 90200 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019685294316765774, + "loss": 9.4379, + "step": 90300 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019684599828103375, + "loss": 9.458, + "step": 90400 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019683904586268646, + "loss": 9.4147, + "step": 90500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019683208591315653, + "loss": 9.5689, + "step": 90600 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001968251184329853, + "loss": 9.4881, + "step": 90700 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019681814342271454, + "loss": 9.4159, + "step": 90800 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019681116088288676, + "loss": 9.4748, + "step": 90900 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019680417081404495, + "loss": 9.4686, + "step": 91000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019679717321673278, + "loss": 9.5847, + "step": 91100 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001967901680914944, + "loss": 9.4081, + "step": 91200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019678315543887465, + "loss": 9.4672, + "step": 91300 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001967761352594189, + "loss": 9.501, + "step": 91400 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019676910755367304, + "loss": 9.3997, + "step": 91500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019676207232218368, + "loss": 9.4048, + "step": 91600 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019675502956549793, + "loss": 9.4855, + "step": 91700 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019674797928416354, + "loss": 9.4287, + "step": 91800 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001967409214787288, + "loss": 9.4476, + "step": 91900 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019673385614974253, + "loss": 9.4766, + "step": 92000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019672678329775428, + "loss": 9.5159, + "step": 92100 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019671970292331405, + "loss": 9.3848, + "step": 92200 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019671261502697253, + "loss": 9.3589, + "step": 92300 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019670551960928093, + "loss": 9.4098, + "step": 92400 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019669841667079103, + "loss": 9.4376, + "step": 92500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019669130621205528, + "loss": 9.4696, + "step": 92600 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019668418823362663, + "loss": 9.4274, + "step": 92700 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019667706273605867, + "loss": 9.4423, + "step": 92800 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001966699297199055, + "loss": 9.4662, + "step": 92900 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019666278918572188, + "loss": 9.5297, + "step": 93000 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019665564113406316, + "loss": 9.473, + "step": 93100 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001966484855654852, + "loss": 9.4398, + "step": 93200 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019664132248054452, + "loss": 9.4403, + "step": 93300 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019663415187979818, + "loss": 9.4588, + "step": 93400 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019662697376380387, + "loss": 9.4023, + "step": 93500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019661978813311976, + "loss": 9.3887, + "step": 93600 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019661259498830478, + "loss": 9.5287, + "step": 93700 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019660539432991825, + "loss": 9.5037, + "step": 93800 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001965981861585202, + "loss": 9.3924, + "step": 93900 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001965909704746712, + "loss": 9.3248, + "step": 94000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019658374727893243, + "loss": 9.3434, + "step": 94100 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019657651657186564, + "loss": 9.4726, + "step": 94200 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019656927835403316, + "loss": 9.3503, + "step": 94300 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001965620326259979, + "loss": 9.4065, + "step": 94400 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019655477938832337, + "loss": 9.3417, + "step": 94500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019654751864157363, + "loss": 9.4946, + "step": 94600 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019654025038631342, + "loss": 9.4344, + "step": 94700 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019653297462310788, + "loss": 9.4037, + "step": 94800 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019652569135252294, + "loss": 9.4572, + "step": 94900 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019651840057512495, + "loss": 9.4511, + "step": 95000 + }, + { + "epoch": 1.01, + "learning_rate": 0.000196511102291481, + "loss": 9.4815, + "step": 95100 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019650379650215863, + "loss": 9.4694, + "step": 95200 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019649648320772597, + "loss": 9.4208, + "step": 95300 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964891624087519, + "loss": 9.4257, + "step": 95400 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019648183410580562, + "loss": 9.3898, + "step": 95500 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964744982994571, + "loss": 9.4102, + "step": 95600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019646715499027688, + "loss": 9.4436, + "step": 95700 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019645980417883603, + "loss": 9.4429, + "step": 95800 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964524458657062, + "loss": 9.4637, + "step": 95900 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964450800514597, + "loss": 9.3822, + "step": 96000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964377067366693, + "loss": 9.3342, + "step": 96100 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001964303259219085, + "loss": 9.4341, + "step": 96200 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019642293760775126, + "loss": 9.383, + "step": 96300 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019641554179477218, + "loss": 9.3835, + "step": 96400 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019640813848354643, + "loss": 9.4911, + "step": 96500 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019640072767464973, + "loss": 9.4076, + "step": 96600 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019639330936865851, + "loss": 9.381, + "step": 96700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019638588356614962, + "loss": 9.4126, + "step": 96800 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019637845026770058, + "loss": 9.3556, + "step": 96900 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019637100947388948, + "loss": 9.3622, + "step": 97000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019636356118529503, + "loss": 9.5078, + "step": 97100 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963561054024964, + "loss": 9.4245, + "step": 97200 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963486421260735, + "loss": 9.404, + "step": 97300 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963411713566067, + "loss": 9.4605, + "step": 97400 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019633369309467706, + "loss": 9.3828, + "step": 97500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019632620734086613, + "loss": 9.4131, + "step": 97600 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963187140957561, + "loss": 9.3533, + "step": 97700 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019631121335992964, + "loss": 9.3736, + "step": 97800 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001963037051339702, + "loss": 9.4449, + "step": 97900 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019629618941846162, + "loss": 9.5733, + "step": 98000 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001962886662139884, + "loss": 9.3786, + "step": 98100 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019628113552113568, + "loss": 9.4287, + "step": 98200 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019627359734048904, + "loss": 9.3929, + "step": 98300 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001962660516726348, + "loss": 9.4058, + "step": 98400 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019625849851815974, + "loss": 9.3536, + "step": 98500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019625093787765127, + "loss": 9.4876, + "step": 98600 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001962433697516974, + "loss": 9.3595, + "step": 98700 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019623579414088674, + "loss": 9.3437, + "step": 98800 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019622821104580836, + "loss": 9.3532, + "step": 98900 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019622062046705202, + "loss": 9.4056, + "step": 99000 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001962130224052081, + "loss": 9.4234, + "step": 99100 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001962054168608675, + "loss": 9.4055, + "step": 99200 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019619780383462162, + "loss": 9.3919, + "step": 99300 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001961901833270626, + "loss": 9.3763, + "step": 99400 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019618255533878306, + "loss": 9.4005, + "step": 99500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001961749198703762, + "loss": 9.3817, + "step": 99600 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001961672769224359, + "loss": 9.4243, + "step": 99700 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001961596264955565, + "loss": 9.3505, + "step": 99800 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019615196859033298, + "loss": 9.3593, + "step": 99900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019614430320736092, + "loss": 9.4034, + "step": 100000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019613663034723645, + "loss": 9.3574, + "step": 100100 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019612895001055628, + "loss": 9.4205, + "step": 100200 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019612126219791773, + "loss": 9.4182, + "step": 100300 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019611356690991865, + "loss": 9.3651, + "step": 100400 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001961058641471575, + "loss": 9.3815, + "step": 100500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019609815391023337, + "loss": 9.3941, + "step": 100600 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019609043619974584, + "loss": 9.4093, + "step": 100700 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001960827110162952, + "loss": 9.3315, + "step": 100800 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019607497836048209, + "loss": 9.4095, + "step": 100900 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019606723823290803, + "loss": 9.4818, + "step": 101000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019605949063417486, + "loss": 9.4138, + "step": 101100 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019605173556488516, + "loss": 9.3873, + "step": 101200 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019604397302564207, + "loss": 9.4278, + "step": 101300 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019603620301704924, + "loss": 9.4231, + "step": 101400 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019602842553971095, + "loss": 9.4631, + "step": 101500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019602064059423208, + "loss": 9.4619, + "step": 101600 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019601284818121804, + "loss": 9.5028, + "step": 101700 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019600504830127484, + "loss": 9.3645, + "step": 101800 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019599724095500912, + "loss": 9.365, + "step": 101900 + }, + { + "epoch": 1.09, + "learning_rate": 0.000195989426143028, + "loss": 9.4206, + "step": 102000 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001959816038659393, + "loss": 9.4081, + "step": 102100 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001959737741243513, + "loss": 9.3695, + "step": 102200 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019596593691887298, + "loss": 9.374, + "step": 102300 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019595809225011378, + "loss": 9.4265, + "step": 102400 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019595024011868383, + "loss": 9.3455, + "step": 102500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019594238052519377, + "loss": 9.428, + "step": 102600 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019593451347025482, + "loss": 9.3812, + "step": 102700 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019592663895447884, + "loss": 9.4424, + "step": 102800 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019591875697847822, + "loss": 9.395, + "step": 102900 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019591086754286593, + "loss": 9.4553, + "step": 103000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019590297064825556, + "loss": 9.3938, + "step": 103100 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001958950662952612, + "loss": 9.4616, + "step": 103200 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019588715448449763, + "loss": 9.3819, + "step": 103300 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019587923521658014, + "loss": 9.423, + "step": 103400 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019587130849212455, + "loss": 9.4149, + "step": 103500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019586337431174743, + "loss": 9.3194, + "step": 103600 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019585543267606573, + "loss": 9.4697, + "step": 103700 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019584748358569712, + "loss": 9.3921, + "step": 103800 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019583952704125976, + "loss": 9.3935, + "step": 103900 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001958315630433725, + "loss": 9.3816, + "step": 104000 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001958235915926546, + "loss": 9.3312, + "step": 104100 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019581561268972614, + "loss": 9.2337, + "step": 104200 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001958076263352075, + "loss": 9.3622, + "step": 104300 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019579963252971987, + "loss": 9.3099, + "step": 104400 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019579163127388487, + "loss": 9.3975, + "step": 104500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019578362256832482, + "loss": 9.4539, + "step": 104600 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019577560641366248, + "loss": 9.3476, + "step": 104700 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019576758281052134, + "loss": 9.3422, + "step": 104800 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019575955175952538, + "loss": 9.3834, + "step": 104900 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019575151326129916, + "loss": 9.2476, + "step": 105000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019574346731646784, + "loss": 9.2196, + "step": 105100 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019573541392565712, + "loss": 9.4011, + "step": 105200 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019572735308949336, + "loss": 9.4452, + "step": 105300 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019571928480860347, + "loss": 9.4477, + "step": 105400 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001957112090836149, + "loss": 9.3857, + "step": 105500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019570312591515564, + "loss": 9.3364, + "step": 105600 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001956950353038544, + "loss": 9.416, + "step": 105700 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019568693725034034, + "loss": 9.3818, + "step": 105800 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019567883175524323, + "loss": 9.4215, + "step": 105900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019567071881919351, + "loss": 9.3541, + "step": 106000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001956625984428221, + "loss": 9.3069, + "step": 106100 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019565447062676048, + "loss": 9.4461, + "step": 106200 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019564633537164075, + "loss": 9.344, + "step": 106300 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019563819267809563, + "loss": 9.4323, + "step": 106400 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001956300425467584, + "loss": 9.362, + "step": 106500 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001956218849782628, + "loss": 9.4185, + "step": 106600 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019561371997324338, + "loss": 9.423, + "step": 106700 + }, + { + "epoch": 1.14, + "learning_rate": 0.000195605547532335, + "loss": 9.4545, + "step": 106800 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019559736765617332, + "loss": 9.3586, + "step": 106900 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001955891803453944, + "loss": 9.3306, + "step": 107000 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001955809856006351, + "loss": 9.3287, + "step": 107100 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019557278342253263, + "loss": 9.3677, + "step": 107200 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019556457381172487, + "loss": 9.2761, + "step": 107300 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019555635676885033, + "loss": 9.3152, + "step": 107400 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019554813229454802, + "loss": 9.3795, + "step": 107500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019553990038945757, + "loss": 9.3835, + "step": 107600 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019553166105421919, + "loss": 9.3242, + "step": 107700 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019552341428947362, + "loss": 9.4807, + "step": 107800 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019551516009586224, + "loss": 9.3232, + "step": 107900 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019550689847402697, + "loss": 9.3058, + "step": 108000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019549862942461034, + "loss": 9.3803, + "step": 108100 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019549035294825538, + "loss": 9.3845, + "step": 108200 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001954820690456058, + "loss": 9.3251, + "step": 108300 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001954737777173058, + "loss": 9.3101, + "step": 108400 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019546547896400022, + "loss": 9.3327, + "step": 108500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019545717278633448, + "loss": 9.3347, + "step": 108600 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019544885918495452, + "loss": 9.4025, + "step": 108700 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001954405381605069, + "loss": 9.3257, + "step": 108800 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019543220971363873, + "loss": 9.369, + "step": 108900 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019542387384499776, + "loss": 9.3553, + "step": 109000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019541553055523223, + "loss": 9.3475, + "step": 109100 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019540717984499099, + "loss": 9.308, + "step": 109200 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001953988217149235, + "loss": 9.3449, + "step": 109300 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019539045616567976, + "loss": 9.2908, + "step": 109400 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019538208319791041, + "loss": 9.4382, + "step": 109500 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019537370281226654, + "loss": 9.3325, + "step": 109600 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019536531500939994, + "loss": 9.293, + "step": 109700 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001953569197899629, + "loss": 9.3353, + "step": 109800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019534851715460834, + "loss": 9.4071, + "step": 109900 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019534010710398975, + "loss": 9.3276, + "step": 110000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019533168963876114, + "loss": 9.3759, + "step": 110100 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019532326475957715, + "loss": 9.3717, + "step": 110200 + }, + { + "epoch": 1.17, + "learning_rate": 0.000195314832467093, + "loss": 9.306, + "step": 110300 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019530639276196446, + "loss": 9.3948, + "step": 110400 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019529794564484787, + "loss": 9.3491, + "step": 110500 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001952894911164002, + "loss": 9.3269, + "step": 110600 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019528102917727892, + "loss": 9.3262, + "step": 110700 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019527255982814217, + "loss": 9.2975, + "step": 110800 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019526408306964855, + "loss": 9.3255, + "step": 110900 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019525559890245733, + "loss": 9.2798, + "step": 111000 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001952471073272283, + "loss": 9.2469, + "step": 111100 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019523860834462191, + "loss": 9.3253, + "step": 111200 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019523010195529906, + "loss": 9.3312, + "step": 111300 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019522158815992135, + "loss": 9.2441, + "step": 111400 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019521306695915086, + "loss": 9.3646, + "step": 111500 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019520453835365027, + "loss": 9.2768, + "step": 111600 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019519600234408287, + "loss": 9.3744, + "step": 111700 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019518745893111255, + "loss": 9.3859, + "step": 111800 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019517890811540366, + "loss": 9.3456, + "step": 111900 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019517034989762126, + "loss": 9.2291, + "step": 112000 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001951617842784309, + "loss": 9.3475, + "step": 112100 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019515321125849867, + "loss": 9.2629, + "step": 112200 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001951446308384914, + "loss": 9.3358, + "step": 112300 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019513604301907628, + "loss": 9.301, + "step": 112400 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019512744780092128, + "loss": 9.33, + "step": 112500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019511884518469484, + "loss": 9.3271, + "step": 112600 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019511023517106594, + "loss": 9.3457, + "step": 112700 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019510161776070418, + "loss": 9.3779, + "step": 112800 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001950929929542798, + "loss": 9.3716, + "step": 112900 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019508436075246348, + "loss": 9.3597, + "step": 113000 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019507572115592657, + "loss": 9.2559, + "step": 113100 + }, + { + "epoch": 1.21, + "learning_rate": 0.000195067074165341, + "loss": 9.3197, + "step": 113200 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019505841978137924, + "loss": 9.3228, + "step": 113300 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019504975800471432, + "loss": 9.2939, + "step": 113400 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001950410888360199, + "loss": 9.3689, + "step": 113500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019503241227597013, + "loss": 9.3529, + "step": 113600 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001950237283252398, + "loss": 9.3608, + "step": 113700 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019501503698450434, + "loss": 9.2911, + "step": 113800 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019500633825443958, + "loss": 9.4173, + "step": 113900 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019499763213572205, + "loss": 9.2975, + "step": 114000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019498891862902883, + "loss": 9.2801, + "step": 114100 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001949801977350376, + "loss": 9.3396, + "step": 114200 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019497146945442656, + "loss": 9.3647, + "step": 114300 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019496273378787445, + "loss": 9.3771, + "step": 114400 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019495399073606075, + "loss": 9.3314, + "step": 114500 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019494524029966534, + "loss": 9.2943, + "step": 114600 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019493648247936879, + "loss": 9.2616, + "step": 114700 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019492771727585212, + "loss": 9.3531, + "step": 114800 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019491894468979707, + "loss": 9.3714, + "step": 114900 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001949101647218859, + "loss": 9.2707, + "step": 115000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019490137737280137, + "loss": 9.3456, + "step": 115100 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019489258264322685, + "loss": 9.2603, + "step": 115200 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019488378053384643, + "loss": 9.1962, + "step": 115300 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001948749710453445, + "loss": 9.3411, + "step": 115400 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001948661541784063, + "loss": 9.4184, + "step": 115500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019485732993371744, + "loss": 9.3172, + "step": 115600 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019484849831196422, + "loss": 9.3644, + "step": 115700 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019483965931383346, + "loss": 9.3326, + "step": 115800 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019483081294001256, + "loss": 9.3099, + "step": 115900 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019482195919118955, + "loss": 9.3192, + "step": 116000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019481309806805293, + "loss": 9.2645, + "step": 116100 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019480422957129188, + "loss": 9.3033, + "step": 116200 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019479535370159608, + "loss": 9.3324, + "step": 116300 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019478647045965576, + "loss": 9.3124, + "step": 116400 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019477757984616187, + "loss": 9.3112, + "step": 116500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019476868186180574, + "loss": 9.2721, + "step": 116600 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019475977650727945, + "loss": 9.3204, + "step": 116700 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019475086378327553, + "loss": 9.2632, + "step": 116800 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001947419436904871, + "loss": 9.2863, + "step": 116900 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001947330162296079, + "loss": 9.304, + "step": 117000 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001947240814013322, + "loss": 9.2448, + "step": 117100 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019471513920635492, + "loss": 9.2903, + "step": 117200 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019470618964537143, + "loss": 9.3705, + "step": 117300 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019469723271907778, + "loss": 9.3106, + "step": 117400 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019468826842817055, + "loss": 9.3315, + "step": 117500 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019467929677334685, + "loss": 9.3977, + "step": 117600 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019467031775530447, + "loss": 9.2033, + "step": 117700 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019466133137474164, + "loss": 9.2825, + "step": 117800 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019465233763235729, + "loss": 9.3418, + "step": 117900 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019464333652885083, + "loss": 9.3297, + "step": 118000 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019463432806492227, + "loss": 9.3645, + "step": 118100 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019462531224127228, + "loss": 9.1967, + "step": 118200 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001946162890586019, + "loss": 9.3424, + "step": 118300 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019460725851761296, + "loss": 9.279, + "step": 118400 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001945982206190077, + "loss": 9.3211, + "step": 118500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019458917536348903, + "loss": 9.3202, + "step": 118600 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019458012275176044, + "loss": 9.3559, + "step": 118700 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019457106278452586, + "loss": 9.2719, + "step": 118800 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019456199546248994, + "loss": 9.2645, + "step": 118900 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019455292078635788, + "loss": 9.3207, + "step": 119000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019454383875683532, + "loss": 9.275, + "step": 119100 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001945347493746287, + "loss": 9.3154, + "step": 119200 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019452565264044478, + "loss": 9.2395, + "step": 119300 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019451654855499108, + "loss": 9.3098, + "step": 119400 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019450743711897563, + "loss": 9.3264, + "step": 119500 + }, + { + "epoch": 1.27, + "learning_rate": 0.000194498318333107, + "loss": 9.3253, + "step": 119600 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019448919219809438, + "loss": 9.351, + "step": 119700 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019448005871464748, + "loss": 9.3485, + "step": 119800 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001944709178834766, + "loss": 9.2532, + "step": 119900 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019446176970529271, + "loss": 9.288, + "step": 120000 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001944526141808072, + "loss": 9.4799, + "step": 120100 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001944434513107321, + "loss": 9.3038, + "step": 120200 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019443428109577998, + "loss": 9.3026, + "step": 120300 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001944251035366641, + "loss": 9.3188, + "step": 120400 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001944159186340981, + "loss": 9.2763, + "step": 120500 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019440672638879636, + "loss": 9.296, + "step": 120600 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019439752680147372, + "loss": 9.3537, + "step": 120700 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019438831987284561, + "loss": 9.309, + "step": 120800 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019437910560362814, + "loss": 9.2144, + "step": 120900 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019436988399453784, + "loss": 9.3245, + "step": 121000 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001943606550462919, + "loss": 9.2782, + "step": 121100 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019435141875960801, + "loss": 9.3064, + "step": 121200 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019434217513520453, + "loss": 9.3335, + "step": 121300 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019433292417380034, + "loss": 9.3318, + "step": 121400 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019432366587611487, + "loss": 9.2998, + "step": 121500 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001943144002428681, + "loss": 9.2864, + "step": 121600 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019430512727478073, + "loss": 9.2584, + "step": 121700 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001942958469725738, + "loss": 9.2936, + "step": 121800 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019428655933696908, + "loss": 9.3321, + "step": 121900 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019427726436868888, + "loss": 9.345, + "step": 122000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019426796206845607, + "loss": 9.2586, + "step": 122100 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019425865243699408, + "loss": 9.343, + "step": 122200 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019424933547502695, + "loss": 9.2783, + "step": 122300 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019424001118327925, + "loss": 9.274, + "step": 122400 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001942306795624761, + "loss": 9.2764, + "step": 122500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019422134061334326, + "loss": 9.34, + "step": 122600 + }, + { + "epoch": 1.31, + "learning_rate": 0.000194211994336607, + "loss": 9.2889, + "step": 122700 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019420264073299418, + "loss": 9.2698, + "step": 122800 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019419327980323226, + "loss": 9.2631, + "step": 122900 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019418391154804918, + "loss": 9.3466, + "step": 123000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019417453596817357, + "loss": 9.227, + "step": 123100 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001941651530643345, + "loss": 9.27, + "step": 123200 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001941557628372618, + "loss": 9.2429, + "step": 123300 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019414636528768565, + "loss": 9.3001, + "step": 123400 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001941369604163369, + "loss": 9.2736, + "step": 123500 + }, + { + "epoch": 1.32, + "learning_rate": 0.000194127548223947, + "loss": 9.2941, + "step": 123600 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019411812871124792, + "loss": 9.2733, + "step": 123700 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019410870187897223, + "loss": 9.2313, + "step": 123800 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019409926772785308, + "loss": 9.3435, + "step": 123900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019408982625862412, + "loss": 9.2744, + "step": 124000 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001940803774720196, + "loss": 9.1923, + "step": 124100 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001940709213687744, + "loss": 9.273, + "step": 124200 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019406145794962393, + "loss": 9.1952, + "step": 124300 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019405198721530412, + "loss": 9.2881, + "step": 124400 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019404250916655151, + "loss": 9.1882, + "step": 124500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019403302380410328, + "loss": 9.3308, + "step": 124600 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019402353112869697, + "loss": 9.3049, + "step": 124700 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019401403114107095, + "loss": 9.3545, + "step": 124800 + }, + { + "epoch": 1.33, + "learning_rate": 0.000194004523841964, + "loss": 9.3408, + "step": 124900 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019399500923211548, + "loss": 9.2751, + "step": 125000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001939854873122654, + "loss": 9.2797, + "step": 125100 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001939759580831542, + "loss": 9.2413, + "step": 125200 + }, + { + "epoch": 1.33, + "learning_rate": 0.000193966421545523, + "loss": 9.2761, + "step": 125300 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001939568777001135, + "loss": 9.2561, + "step": 125400 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019394732654766787, + "loss": 9.288, + "step": 125500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019393776808892895, + "loss": 9.19, + "step": 125600 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019392820232464004, + "loss": 9.2844, + "step": 125700 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019391862925554514, + "loss": 9.2612, + "step": 125800 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001939090488823887, + "loss": 9.2815, + "step": 125900 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019389946120591583, + "loss": 9.2582, + "step": 126000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019388986622687212, + "loss": 9.3157, + "step": 126100 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019388026394600381, + "loss": 9.2597, + "step": 126200 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019387065436405764, + "loss": 9.2249, + "step": 126300 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019386103748178095, + "loss": 9.2928, + "step": 126400 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019385141329992167, + "loss": 9.3433, + "step": 126500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019384178181922827, + "loss": 9.3216, + "step": 126600 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019383214304044978, + "loss": 9.2839, + "step": 126700 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019382249696433577, + "loss": 9.3007, + "step": 126800 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001938128435916365, + "loss": 9.3043, + "step": 126900 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019380318292310268, + "loss": 9.2443, + "step": 127000 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019379351495948556, + "loss": 9.2627, + "step": 127100 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019378383970153711, + "loss": 9.297, + "step": 127200 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001937741571500097, + "loss": 9.2983, + "step": 127300 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001937644673056564, + "loss": 9.2336, + "step": 127400 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019375477016923077, + "loss": 9.2669, + "step": 127500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019374506574148698, + "loss": 9.1947, + "step": 127600 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001937353540231797, + "loss": 9.2839, + "step": 127700 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019372563501506422, + "loss": 9.3007, + "step": 127800 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001937159087178964, + "loss": 9.2656, + "step": 127900 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019370617513243268, + "loss": 9.2807, + "step": 128000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019369643425943, + "loss": 9.2938, + "step": 128100 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019368668609964594, + "loss": 9.2408, + "step": 128200 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019367693065383857, + "loss": 9.3026, + "step": 128300 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019366716792276664, + "loss": 9.3325, + "step": 128400 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019365739790718934, + "loss": 9.3053, + "step": 128500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019364762060786653, + "loss": 9.3031, + "step": 128600 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019363783602555854, + "loss": 9.2816, + "step": 128700 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019362804416102637, + "loss": 9.2392, + "step": 128800 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019361824501503149, + "loss": 9.2443, + "step": 128900 + }, + { + "epoch": 1.37, + "learning_rate": 0.000193608438588336, + "loss": 9.2757, + "step": 129000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019359862488170257, + "loss": 9.2008, + "step": 129100 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019358880389589437, + "loss": 9.1587, + "step": 129200 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019357897563167522, + "loss": 9.2997, + "step": 129300 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019356914008980945, + "loss": 9.3704, + "step": 129400 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019355929727106195, + "loss": 9.3117, + "step": 129500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019354944717619822, + "loss": 9.3125, + "step": 129600 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019353958980598428, + "loss": 9.222, + "step": 129700 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019352972516118678, + "loss": 9.2631, + "step": 129800 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019351985324257286, + "loss": 9.2089, + "step": 129900 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019350997405091026, + "loss": 9.2704, + "step": 130000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019350008758696732, + "loss": 9.2922, + "step": 130100 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019349019385151288, + "loss": 9.254, + "step": 130200 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019348029284531638, + "loss": 9.3446, + "step": 130300 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019347038456914782, + "loss": 9.1929, + "step": 130400 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019346046902377778, + "loss": 9.2877, + "step": 130500 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019345054620997738, + "loss": 9.2966, + "step": 130600 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001934406161285183, + "loss": 9.3047, + "step": 130700 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019343067878017288, + "loss": 9.2964, + "step": 130800 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001934207341657139, + "loss": 9.2106, + "step": 130900 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001934107822859147, + "loss": 9.2356, + "step": 131000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019340082314154932, + "loss": 9.2258, + "step": 131100 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019339085673339225, + "loss": 9.2671, + "step": 131200 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019338088306221859, + "loss": 9.3013, + "step": 131300 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019337090212880398, + "loss": 9.2607, + "step": 131400 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019336091393392464, + "loss": 9.1956, + "step": 131500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019335091847835735, + "loss": 9.2809, + "step": 131600 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019334091576287947, + "loss": 9.2359, + "step": 131700 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001933309057882689, + "loss": 9.289, + "step": 131800 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019332088855530415, + "loss": 9.1894, + "step": 131900 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001933108640647642, + "loss": 9.1921, + "step": 132000 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001933008323174287, + "loss": 9.2665, + "step": 132100 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019329079331407783, + "loss": 9.1931, + "step": 132200 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019328074705549228, + "loss": 9.2332, + "step": 132300 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001932706935424534, + "loss": 9.2612, + "step": 132400 + }, + { + "epoch": 1.41, + "learning_rate": 0.000193260632775743, + "loss": 9.328, + "step": 132500 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019325056475614352, + "loss": 9.3163, + "step": 132600 + }, + { + "epoch": 1.41, + "learning_rate": 0.000193240489484438, + "loss": 9.2339, + "step": 132700 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019323040696140996, + "loss": 9.2777, + "step": 132800 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001932203171878435, + "loss": 9.2755, + "step": 132900 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001932102201645233, + "loss": 9.1908, + "step": 133000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019320011589223465, + "loss": 9.2088, + "step": 133100 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019319000437176333, + "loss": 9.1855, + "step": 133200 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019317988560389572, + "loss": 9.2732, + "step": 133300 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019316975958941877, + "loss": 9.2465, + "step": 133400 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019315962632911998, + "loss": 9.2247, + "step": 133500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019314948582378739, + "loss": 9.2125, + "step": 133600 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019313933807420963, + "loss": 9.1628, + "step": 133700 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019312918308117593, + "loss": 9.1691, + "step": 133800 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019311902084547599, + "loss": 9.2715, + "step": 133900 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019310885136790016, + "loss": 9.2209, + "step": 134000 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001930986746492393, + "loss": 9.2369, + "step": 134100 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001930884906902849, + "loss": 9.2323, + "step": 134200 + }, + { + "epoch": 1.43, + "learning_rate": 0.000193078299491829, + "loss": 9.2648, + "step": 134300 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019306810105466403, + "loss": 9.3134, + "step": 134400 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019305789537958323, + "loss": 9.2365, + "step": 134500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019304768246738028, + "loss": 9.2621, + "step": 134600 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019303746231884937, + "loss": 9.2052, + "step": 134700 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019302723493478546, + "loss": 9.2685, + "step": 134800 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001930170003159838, + "loss": 9.2159, + "step": 134900 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019300675846324044, + "loss": 9.2499, + "step": 135000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019299650937735184, + "loss": 9.1574, + "step": 135100 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019298625305911506, + "loss": 9.2841, + "step": 135200 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019297598950932776, + "loss": 9.2507, + "step": 135300 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001929657187287881, + "loss": 9.1817, + "step": 135400 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001929554407182949, + "loss": 9.2453, + "step": 135500 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019294515547864746, + "loss": 9.2422, + "step": 135600 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001929348630106456, + "loss": 9.2887, + "step": 135700 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019292456331508988, + "loss": 9.2751, + "step": 135800 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019291425639278122, + "loss": 9.287, + "step": 135900 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019290394224452122, + "loss": 9.2641, + "step": 136000 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019289362087111203, + "loss": 9.2152, + "step": 136100 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019288329227335632, + "loss": 9.31, + "step": 136200 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019287295645205733, + "loss": 9.2928, + "step": 136300 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019286261340801893, + "loss": 9.2249, + "step": 136400 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019285226314204546, + "loss": 9.246, + "step": 136500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00019284190565494185, + "loss": 9.2647, + "step": 136600 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019283154094751367, + "loss": 9.2166, + "step": 136700 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001928211690205669, + "loss": 9.1956, + "step": 136800 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001928107898749082, + "loss": 9.2086, + "step": 136900 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019280040351134478, + "loss": 9.2829, + "step": 137000 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001927900099306844, + "loss": 9.2275, + "step": 137100 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019277960913373528, + "loss": 9.1999, + "step": 137200 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019276920112130637, + "loss": 9.2681, + "step": 137300 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001927587858942071, + "loss": 9.2515, + "step": 137400 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019274836345324742, + "loss": 9.2348, + "step": 137500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019273793379923795, + "loss": 9.1734, + "step": 137600 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019272749693298975, + "loss": 9.2201, + "step": 137700 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019271705285531452, + "loss": 9.1794, + "step": 137800 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019270660156702447, + "loss": 9.2083, + "step": 137900 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019269614306893242, + "loss": 9.2232, + "step": 138000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019268567736185172, + "loss": 9.1929, + "step": 138100 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001926752044465963, + "loss": 9.2169, + "step": 138200 + }, + { + "epoch": 1.47, + "learning_rate": 0.00019266472432398067, + "loss": 9.2201, + "step": 138300 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001926542369948198, + "loss": 9.215, + "step": 138400 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001926437424599293, + "loss": 9.1581, + "step": 138500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00019263324072012542, + "loss": 9.2894, + "step": 138600 + }, + { + "epoch": 1.48, + "learning_rate": 0.00019262273177622475, + "loss": 9.1528, + "step": 138700 + }, + { + "epoch": 1.48, + "learning_rate": 0.00019261221562904467, + "loss": 9.1888, + "step": 138800 + }, + { + "epoch": 1.48, + "learning_rate": 0.000192601692279403, + "loss": 9.2173, + "step": 138900 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001925911617281181, + "loss": 9.2572, + "step": 139000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00019258062397600902, + "loss": 9.1563, + "step": 139100 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001925700790238952, + "loss": 9.1999, + "step": 139200 + }, + { + "epoch": 1.48, + "learning_rate": 0.00019255952687259674, + "loss": 9.2211, + "step": 139300 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001925489675229343, + "loss": 9.2655, + "step": 139400 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001925384009757291, + "loss": 9.3033, + "step": 139500 + }, + { + "epoch": 1.49, + "learning_rate": 0.00019252782723180281, + "loss": 9.2836, + "step": 139600 + }, + { + "epoch": 1.49, + "learning_rate": 0.00019251724629197785, + "loss": 9.2844, + "step": 139700 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001925066581570771, + "loss": 9.2113, + "step": 139800 + }, + { + "epoch": 1.49, + "learning_rate": 0.00019249606282792392, + "loss": 9.2401, + "step": 139900 + }, + { + "epoch": 1.49, + "learning_rate": 0.00019248546030534237, + "loss": 9.2845, + "step": 140000 + }, + { + "epoch": 1.49, + "learning_rate": 0.000192474850590157, + "loss": 9.194, + "step": 140100 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001924642336831929, + "loss": 9.2228, + "step": 140200 + }, + { + "epoch": 1.49, + "learning_rate": 0.00019245360958527582, + "loss": 9.2805, + "step": 140300 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019244297829723193, + "loss": 9.2568, + "step": 140400 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019243233981988805, + "loss": 9.2393, + "step": 140500 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001924216941540715, + "loss": 9.2413, + "step": 140600 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019241104130061026, + "loss": 9.3528, + "step": 140700 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019240038126033275, + "loss": 9.2566, + "step": 140800 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019238971403406801, + "loss": 9.2224, + "step": 140900 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019237903962264567, + "loss": 9.1451, + "step": 141000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019236835802689586, + "loss": 9.2272, + "step": 141100 + }, + { + "epoch": 1.5, + "learning_rate": 0.00019235766924764926, + "loss": 9.2047, + "step": 141200 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019234697328573717, + "loss": 9.2651, + "step": 141300 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019233627014199141, + "loss": 9.2141, + "step": 141400 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019232555981724435, + "loss": 9.2544, + "step": 141500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019231484231232895, + "loss": 9.1772, + "step": 141600 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019230411762807869, + "loss": 9.2349, + "step": 141700 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019229338576532768, + "loss": 9.194, + "step": 141800 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019228264672491044, + "loss": 9.1984, + "step": 141900 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001922719005076623, + "loss": 9.1912, + "step": 142000 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019226114711441884, + "loss": 9.2207, + "step": 142100 + }, + { + "epoch": 1.51, + "learning_rate": 0.00019225038654601642, + "loss": 9.1997, + "step": 142200 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019223961880329192, + "loss": 9.1824, + "step": 142300 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019222884388708265, + "loss": 9.1473, + "step": 142400 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001922180617982267, + "loss": 9.1345, + "step": 142500 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001922072725375625, + "loss": 9.2014, + "step": 142600 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001921964761059292, + "loss": 9.2317, + "step": 142700 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019218567250416639, + "loss": 9.1546, + "step": 142800 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019217486173311426, + "loss": 9.2714, + "step": 142900 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019216404379361363, + "loss": 9.2407, + "step": 143000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00019215321868650578, + "loss": 9.2299, + "step": 143100 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019214238641263254, + "loss": 9.1851, + "step": 143200 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019213154697283636, + "loss": 9.3089, + "step": 143300 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019212070036796024, + "loss": 9.2084, + "step": 143400 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019210984659884773, + "loss": 9.1195, + "step": 143500 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019209898566634293, + "loss": 9.1995, + "step": 143600 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019208811757129044, + "loss": 9.2166, + "step": 143700 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001920772423145355, + "loss": 9.1633, + "step": 143800 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019206635989692394, + "loss": 9.2845, + "step": 143900 + }, + { + "epoch": 1.53, + "learning_rate": 0.000192055470319302, + "loss": 9.2898, + "step": 144000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00019204457358251663, + "loss": 9.1636, + "step": 144100 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001920336696874152, + "loss": 9.1687, + "step": 144200 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001920227586348458, + "loss": 9.2174, + "step": 144300 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001920118404256569, + "loss": 9.1873, + "step": 144400 + }, + { + "epoch": 1.54, + "learning_rate": 0.00019200091506069763, + "loss": 9.2164, + "step": 144500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00019198998254081772, + "loss": 9.261, + "step": 144600 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001919790428668673, + "loss": 9.2058, + "step": 144700 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001919680960396972, + "loss": 9.2192, + "step": 144800 + }, + { + "epoch": 1.54, + "learning_rate": 0.00019195714206015878, + "loss": 9.1892, + "step": 144900 + }, + { + "epoch": 1.54, + "learning_rate": 0.00019194618092910386, + "loss": 9.2218, + "step": 145000 + }, + { + "epoch": 1.55, + "learning_rate": 0.00019193521264738496, + "loss": 9.2483, + "step": 145100 + }, + { + "epoch": 1.55, + "learning_rate": 0.000191924237215855, + "loss": 9.2156, + "step": 145200 + }, + { + "epoch": 1.55, + "learning_rate": 0.00019191325463536763, + "loss": 9.1744, + "step": 145300 + }, + { + "epoch": 1.55, + "learning_rate": 0.00019190226490677694, + "loss": 9.181, + "step": 145400 + }, + { + "epoch": 1.55, + "learning_rate": 0.00019189126803093758, + "loss": 9.164, + "step": 145500 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001918802640087048, + "loss": 9.1643, + "step": 145600 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001918692528409344, + "loss": 9.2215, + "step": 145700 + }, + { + "epoch": 1.55, + "learning_rate": 0.00019185823452848264, + "loss": 9.2219, + "step": 145800 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001918472090722065, + "loss": 9.1803, + "step": 145900 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019183617647296338, + "loss": 9.1764, + "step": 146000 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019182513673161134, + "loss": 9.2564, + "step": 146100 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019181408984900888, + "loss": 9.207, + "step": 146200 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019180303582601513, + "loss": 9.13, + "step": 146300 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001917919746634898, + "loss": 9.2213, + "step": 146400 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019178090636229312, + "loss": 9.251, + "step": 146500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019176983092328582, + "loss": 9.2466, + "step": 146600 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019175874834732927, + "loss": 9.2254, + "step": 146700 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019174765863528537, + "loss": 9.1645, + "step": 146800 + }, + { + "epoch": 1.56, + "learning_rate": 0.00019173656178801655, + "loss": 9.2097, + "step": 146900 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019172545780638583, + "loss": 9.1132, + "step": 147000 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019171434669125673, + "loss": 9.2148, + "step": 147100 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019170322844349341, + "loss": 9.1861, + "step": 147200 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019169210306396056, + "loss": 9.1431, + "step": 147300 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001916809705535233, + "loss": 9.2127, + "step": 147400 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019166983091304748, + "loss": 9.2828, + "step": 147500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019165868414339942, + "loss": 9.2589, + "step": 147600 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019164753024544605, + "loss": 9.2054, + "step": 147700 + }, + { + "epoch": 1.57, + "learning_rate": 0.00019163636922005472, + "loss": 9.1916, + "step": 147800 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019162520106809345, + "loss": 9.2397, + "step": 147900 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019161402579043085, + "loss": 9.2059, + "step": 148000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019160284338793596, + "loss": 9.2292, + "step": 148100 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019159165386147843, + "loss": 9.1898, + "step": 148200 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019158045721192853, + "loss": 9.1775, + "step": 148300 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019156925344015696, + "loss": 9.2688, + "step": 148400 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019155804254703508, + "loss": 9.2079, + "step": 148500 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001915468245334348, + "loss": 9.2323, + "step": 148600 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019153559940022843, + "loss": 9.2077, + "step": 148700 + }, + { + "epoch": 1.58, + "learning_rate": 0.00019152436714828906, + "loss": 9.2747, + "step": 148800 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019151312777849016, + "loss": 9.2461, + "step": 148900 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001915018812917058, + "loss": 9.2495, + "step": 149000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019149062768881073, + "loss": 9.1922, + "step": 149100 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019147936697068004, + "loss": 9.1634, + "step": 149200 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001914680991381895, + "loss": 9.2505, + "step": 149300 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001914568241922154, + "loss": 9.2331, + "step": 149400 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019144554213363466, + "loss": 9.2046, + "step": 149500 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019143425296332461, + "loss": 9.1451, + "step": 149600 + }, + { + "epoch": 1.59, + "learning_rate": 0.00019142295668216327, + "loss": 9.242, + "step": 149700 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019141165329102908, + "loss": 9.1182, + "step": 149800 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019140034279080116, + "loss": 9.1915, + "step": 149900 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019138902518235913, + "loss": 9.1837, + "step": 150000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019137770046658315, + "loss": 9.2855, + "step": 150100 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019136636864435395, + "loss": 9.2226, + "step": 150200 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019135502971655277, + "loss": 9.1355, + "step": 150300 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001913436836840615, + "loss": 9.2515, + "step": 150400 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019133233054776248, + "loss": 9.1942, + "step": 150500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00019132097030853865, + "loss": 9.2233, + "step": 150600 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019130960296727352, + "loss": 9.2331, + "step": 150700 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001912982285248511, + "loss": 9.2144, + "step": 150800 + }, + { + "epoch": 1.61, + "learning_rate": 0.000191286846982156, + "loss": 9.1434, + "step": 150900 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019127545834007336, + "loss": 9.154, + "step": 151000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019126406259948887, + "loss": 9.262, + "step": 151100 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019125265976128878, + "loss": 9.1999, + "step": 151200 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019124124982635988, + "loss": 9.158, + "step": 151300 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019122983279558958, + "loss": 9.1562, + "step": 151400 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019121840866986566, + "loss": 9.1829, + "step": 151500 + }, + { + "epoch": 1.61, + "learning_rate": 0.00019120697745007671, + "loss": 9.2308, + "step": 151600 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019119553913711168, + "loss": 9.2134, + "step": 151700 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019118409373186011, + "loss": 9.1193, + "step": 151800 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019117264123521212, + "loss": 9.2709, + "step": 151900 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001911611816480584, + "loss": 9.2727, + "step": 152000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019114971497129015, + "loss": 9.3019, + "step": 152100 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001911382412057991, + "loss": 9.2419, + "step": 152200 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019112676035247763, + "loss": 9.1431, + "step": 152300 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019111527241221857, + "loss": 9.1435, + "step": 152400 + }, + { + "epoch": 1.62, + "learning_rate": 0.00019110377738591533, + "loss": 9.2461, + "step": 152500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019109227527446185, + "loss": 9.1988, + "step": 152600 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019108076607875275, + "loss": 9.2145, + "step": 152700 + }, + { + "epoch": 1.63, + "learning_rate": 0.000191069249799683, + "loss": 9.1934, + "step": 152800 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001910577264381483, + "loss": 9.2267, + "step": 152900 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019104619599504474, + "loss": 9.095, + "step": 153000 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019103465847126913, + "loss": 9.2327, + "step": 153100 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019102311386771868, + "loss": 9.1902, + "step": 153200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019101156218529126, + "loss": 9.1722, + "step": 153300 + }, + { + "epoch": 1.63, + "learning_rate": 0.00019100000342488522, + "loss": 9.1728, + "step": 153400 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001909884375873995, + "loss": 9.1844, + "step": 153500 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019097686467373356, + "loss": 9.1276, + "step": 153600 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001909652846847874, + "loss": 9.1911, + "step": 153700 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019095369762146168, + "loss": 9.0729, + "step": 153800 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019094210348465746, + "loss": 9.0662, + "step": 153900 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019093050227527645, + "loss": 9.1688, + "step": 154000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019091889399422082, + "loss": 9.0877, + "step": 154100 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019090727864239343, + "loss": 9.1497, + "step": 154200 + }, + { + "epoch": 1.64, + "learning_rate": 0.00019089565622069755, + "loss": 9.198, + "step": 154300 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001908840267300371, + "loss": 9.1873, + "step": 154400 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019087239017131645, + "loss": 9.1804, + "step": 154500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019086074654544062, + "loss": 9.1927, + "step": 154600 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019084909585331513, + "loss": 9.16, + "step": 154700 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019083743809584602, + "loss": 9.2385, + "step": 154800 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019082577327394, + "loss": 9.1368, + "step": 154900 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019081410138850413, + "loss": 9.1206, + "step": 155000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019080242244044618, + "loss": 9.2212, + "step": 155100 + }, + { + "epoch": 1.65, + "learning_rate": 0.00019079073643067448, + "loss": 9.2291, + "step": 155200 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001907790433600978, + "loss": 9.1397, + "step": 155300 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019076734322962552, + "loss": 9.1465, + "step": 155400 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019075563604016755, + "loss": 9.1455, + "step": 155500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019074392179263436, + "loss": 9.1805, + "step": 155600 + }, + { + "epoch": 1.66, + "learning_rate": 0.000190732200487937, + "loss": 9.1323, + "step": 155700 + }, + { + "epoch": 1.66, + "learning_rate": 0.000190720472126987, + "loss": 9.1676, + "step": 155800 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019070873671069648, + "loss": 9.1487, + "step": 155900 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019069699423997813, + "loss": 9.2359, + "step": 156000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019068524471574513, + "loss": 9.1559, + "step": 156100 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019067348813891127, + "loss": 9.1226, + "step": 156200 + }, + { + "epoch": 1.66, + "learning_rate": 0.00019066172451039087, + "loss": 9.1891, + "step": 156300 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019064995383109872, + "loss": 9.1099, + "step": 156400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019063817610195032, + "loss": 9.1542, + "step": 156500 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019062639132386155, + "loss": 9.1481, + "step": 156600 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019061459949774894, + "loss": 9.166, + "step": 156700 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019060280062452957, + "loss": 9.1529, + "step": 156800 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019059099470512098, + "loss": 9.1805, + "step": 156900 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019057918174044137, + "loss": 9.2102, + "step": 157000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00019056736173140944, + "loss": 9.1822, + "step": 157100 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001905555346789444, + "loss": 9.1906, + "step": 157200 + }, + { + "epoch": 1.68, + "learning_rate": 0.00019054370058396605, + "loss": 9.1508, + "step": 157300 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001905318594473947, + "loss": 9.2155, + "step": 157400 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001905200112701513, + "loss": 9.2158, + "step": 157500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00019050815605315723, + "loss": 9.1504, + "step": 157600 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001904962937973345, + "loss": 9.1406, + "step": 157700 + }, + { + "epoch": 1.68, + "learning_rate": 0.00019048442450360565, + "loss": 9.1925, + "step": 157800 + }, + { + "epoch": 1.68, + "learning_rate": 0.00019047254817289373, + "loss": 9.1348, + "step": 157900 + }, + { + "epoch": 1.68, + "learning_rate": 0.00019046066480612236, + "loss": 9.134, + "step": 158000 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001904487744042157, + "loss": 9.0975, + "step": 158100 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019043687696809853, + "loss": 9.1963, + "step": 158200 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019042497249869604, + "loss": 9.0766, + "step": 158300 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019041306099693413, + "loss": 9.0891, + "step": 158400 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019040114246373903, + "loss": 9.2311, + "step": 158500 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019038921690003776, + "loss": 9.2391, + "step": 158600 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019037728430675775, + "loss": 9.194, + "step": 158700 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019036534468482694, + "loss": 9.2109, + "step": 158800 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019035339803517395, + "loss": 9.1743, + "step": 158900 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019034144435872783, + "loss": 9.2158, + "step": 159000 + }, + { + "epoch": 1.69, + "learning_rate": 0.00019032948365641823, + "loss": 9.1839, + "step": 159100 + }, + { + "epoch": 1.7, + "learning_rate": 0.00019031751592917532, + "loss": 9.1961, + "step": 159200 + }, + { + "epoch": 1.7, + "learning_rate": 0.00019030554117792985, + "loss": 9.2253, + "step": 159300 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001902935594036131, + "loss": 9.1937, + "step": 159400 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001902815706071569, + "loss": 9.1053, + "step": 159500 + }, + { + "epoch": 1.7, + "learning_rate": 0.00019026957478949358, + "loss": 9.1567, + "step": 159600 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001902575719515561, + "loss": 9.1301, + "step": 159700 + }, + { + "epoch": 1.7, + "learning_rate": 0.00019024556209427788, + "loss": 9.2587, + "step": 159800 + }, + { + "epoch": 1.7, + "learning_rate": 0.00019023354521859295, + "loss": 9.1593, + "step": 159900 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001902215213254359, + "loss": 9.1822, + "step": 160000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019020949041574178, + "loss": 9.2405, + "step": 160100 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019019745249044626, + "loss": 9.1795, + "step": 160200 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019018540755048552, + "loss": 9.1398, + "step": 160300 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019017335559679625, + "loss": 9.1535, + "step": 160400 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019016129663031582, + "loss": 9.1646, + "step": 160500 + }, + { + "epoch": 1.71, + "learning_rate": 0.000190149230651982, + "loss": 9.13, + "step": 160600 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019013715766273318, + "loss": 9.1286, + "step": 160700 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001901250776635083, + "loss": 9.1498, + "step": 160800 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019011299065524676, + "loss": 9.176, + "step": 160900 + }, + { + "epoch": 1.71, + "learning_rate": 0.00019010089663888864, + "loss": 9.1498, + "step": 161000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019008879561537443, + "loss": 9.1192, + "step": 161100 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019007668758564528, + "loss": 9.2045, + "step": 161200 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001900645725506428, + "loss": 9.1249, + "step": 161300 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019005245051130918, + "loss": 9.1121, + "step": 161400 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019004032146858717, + "loss": 9.1724, + "step": 161500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019002818542342004, + "loss": 9.1648, + "step": 161600 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001900160423767516, + "loss": 9.1413, + "step": 161700 + }, + { + "epoch": 1.72, + "learning_rate": 0.00019000389232952622, + "loss": 9.2193, + "step": 161800 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001899917352826888, + "loss": 9.1364, + "step": 161900 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018997957123718484, + "loss": 9.146, + "step": 162000 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001899674001939603, + "loss": 9.0989, + "step": 162100 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018995522215396174, + "loss": 9.2133, + "step": 162200 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018994303711813624, + "loss": 9.2196, + "step": 162300 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001899308450874314, + "loss": 9.1908, + "step": 162400 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018991864606279543, + "loss": 9.1318, + "step": 162500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018990644004517705, + "loss": 9.1749, + "step": 162600 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018989422703552555, + "loss": 9.1333, + "step": 162700 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001898820070347907, + "loss": 9.1344, + "step": 162800 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018986978004392283, + "loss": 9.1017, + "step": 162900 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018985754606387287, + "loss": 9.2017, + "step": 163000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018984530509559225, + "loss": 9.1503, + "step": 163100 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018983305714003295, + "loss": 9.2238, + "step": 163200 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018982080219814748, + "loss": 9.0557, + "step": 163300 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018980854027088895, + "loss": 9.1285, + "step": 163400 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018979627135921092, + "loss": 9.1594, + "step": 163500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018978399546406758, + "loss": 9.1574, + "step": 163600 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018977171258641363, + "loss": 9.1591, + "step": 163700 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018975942272720427, + "loss": 9.1908, + "step": 163800 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018974712588739533, + "loss": 9.1904, + "step": 163900 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001897348220679431, + "loss": 9.0989, + "step": 164000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018972251126980446, + "loss": 9.1292, + "step": 164100 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018971019349393685, + "loss": 9.1208, + "step": 164200 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001896978687412982, + "loss": 9.151, + "step": 164300 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018968553701284702, + "loss": 9.0635, + "step": 164400 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018967319830954233, + "loss": 9.2055, + "step": 164500 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001896608526323437, + "loss": 9.2053, + "step": 164600 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018964849998221134, + "loss": 9.1248, + "step": 164700 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018963614036010578, + "loss": 9.1596, + "step": 164800 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018962377376698835, + "loss": 9.1306, + "step": 164900 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018961140020382072, + "loss": 9.175, + "step": 165000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018959901967156522, + "loss": 9.1572, + "step": 165100 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018958663217118473, + "loss": 9.196, + "step": 165200 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018957423770364255, + "loss": 9.1575, + "step": 165300 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018956183626990262, + "loss": 9.241, + "step": 165400 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018954942787092943, + "loss": 9.1454, + "step": 165500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018953701250768793, + "loss": 9.1385, + "step": 165600 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018952459018114373, + "loss": 9.1462, + "step": 165700 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018951216089226286, + "loss": 9.2226, + "step": 165800 + }, + { + "epoch": 1.77, + "learning_rate": 0.000189499724642012, + "loss": 9.1304, + "step": 165900 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018948728143135824, + "loss": 9.1205, + "step": 166000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018947483126126937, + "loss": 9.1196, + "step": 166100 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001894623741327136, + "loss": 9.1775, + "step": 166200 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018944991004665973, + "loss": 9.1318, + "step": 166300 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018943743900407708, + "loss": 9.0922, + "step": 166400 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018942496100593558, + "loss": 9.1432, + "step": 166500 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018941247605320559, + "loss": 9.2018, + "step": 166600 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018939998414685804, + "loss": 9.1556, + "step": 166700 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001893874852878645, + "loss": 9.1165, + "step": 166800 + }, + { + "epoch": 1.78, + "learning_rate": 0.000189374979477197, + "loss": 9.1665, + "step": 166900 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018936246671582803, + "loss": 9.0888, + "step": 167000 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001893499470047308, + "loss": 9.1661, + "step": 167100 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018933742034487895, + "loss": 9.1278, + "step": 167200 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018932488673724664, + "loss": 9.0987, + "step": 167300 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018931234618280868, + "loss": 9.0935, + "step": 167400 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018929979868254027, + "loss": 9.1878, + "step": 167500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018928724423741727, + "loss": 9.177, + "step": 167600 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018927468284841607, + "loss": 9.1462, + "step": 167700 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018926211451651353, + "loss": 9.0518, + "step": 167800 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001892495392426871, + "loss": 9.1411, + "step": 167900 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018923695702791478, + "loss": 9.1325, + "step": 168000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018922436787317505, + "loss": 9.1411, + "step": 168100 + }, + { + "epoch": 1.79, + "learning_rate": 0.000189211771779447, + "loss": 9.185, + "step": 168200 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001891991687477102, + "loss": 9.0569, + "step": 168300 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018918655877894484, + "loss": 9.0878, + "step": 168400 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018917394187413156, + "loss": 9.1232, + "step": 168500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018916131803425158, + "loss": 9.1181, + "step": 168600 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018914868726028667, + "loss": 9.232, + "step": 168700 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001891360495532191, + "loss": 9.1871, + "step": 168800 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018912340491403177, + "loss": 9.1903, + "step": 168900 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018911075334370798, + "loss": 9.1721, + "step": 169000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018909809484323172, + "loss": 9.2003, + "step": 169100 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018908542941358737, + "loss": 9.1717, + "step": 169200 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018907275705575991, + "loss": 9.1267, + "step": 169300 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018906007777073496, + "loss": 9.1102, + "step": 169400 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018904739155949852, + "loss": 9.1592, + "step": 169500 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018903469842303724, + "loss": 9.1311, + "step": 169600 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001890219983623382, + "loss": 9.1568, + "step": 169700 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018900929137838918, + "loss": 9.1715, + "step": 169800 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018899657747217834, + "loss": 9.0627, + "step": 169900 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018898385664469443, + "loss": 9.1047, + "step": 170000 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001889711288969268, + "loss": 9.0115, + "step": 170100 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018895839422986525, + "loss": 9.1236, + "step": 170200 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018894565264450017, + "loss": 9.1673, + "step": 170300 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018893290414182247, + "loss": 9.1755, + "step": 170400 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018892014872282363, + "loss": 9.1462, + "step": 170500 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001889073863884956, + "loss": 9.1549, + "step": 170600 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001888946171398309, + "loss": 9.0767, + "step": 170700 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018888184097782265, + "loss": 9.1676, + "step": 170800 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018886905790346445, + "loss": 9.0572, + "step": 170900 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001888562679177504, + "loss": 9.1657, + "step": 171000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018884347102167518, + "loss": 9.1149, + "step": 171100 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018883066721623402, + "loss": 9.1589, + "step": 171200 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018881785650242265, + "loss": 9.15, + "step": 171300 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018880503888123743, + "loss": 9.1856, + "step": 171400 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001887922143536751, + "loss": 9.001, + "step": 171500 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001887793829207331, + "loss": 9.1903, + "step": 171600 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018876654458340925, + "loss": 9.0402, + "step": 171700 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001887536993427021, + "loss": 9.1351, + "step": 171800 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018874084719961053, + "loss": 9.1436, + "step": 171900 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018872798815513408, + "loss": 9.1166, + "step": 172000 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001887151222102728, + "loss": 9.1972, + "step": 172100 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001887022493660273, + "loss": 9.1033, + "step": 172200 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018868936962339867, + "loss": 9.2539, + "step": 172300 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001886764829833886, + "loss": 9.1322, + "step": 172400 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018866358944699923, + "loss": 9.1669, + "step": 172500 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018865068901523336, + "loss": 9.1709, + "step": 172600 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001886377816890942, + "loss": 9.1704, + "step": 172700 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018862486746958559, + "loss": 9.0733, + "step": 172800 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018861194635771188, + "loss": 9.1179, + "step": 172900 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018859901835447792, + "loss": 9.1713, + "step": 173000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001885860834608891, + "loss": 9.1308, + "step": 173100 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018857314167795145, + "loss": 9.1248, + "step": 173200 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018856019300667132, + "loss": 9.1197, + "step": 173300 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018854723744805586, + "loss": 9.0444, + "step": 173400 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001885342750031126, + "loss": 9.0698, + "step": 173500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018852130567284956, + "loss": 9.1714, + "step": 173600 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018850832945827543, + "loss": 9.145, + "step": 173700 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018849534636039935, + "loss": 9.0327, + "step": 173800 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018848235638023103, + "loss": 9.0952, + "step": 173900 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018846935951878071, + "loss": 9.1063, + "step": 174000 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001884563557770591, + "loss": 9.1439, + "step": 174100 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018844334515607755, + "loss": 9.1117, + "step": 174200 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018843032765684793, + "loss": 9.1398, + "step": 174300 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018841730328038252, + "loss": 9.1262, + "step": 174400 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001884042720276943, + "loss": 9.076, + "step": 174500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018839123389979669, + "loss": 9.0873, + "step": 174600 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001883781888977037, + "loss": 9.1043, + "step": 174700 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018836513702242974, + "loss": 9.1025, + "step": 174800 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018835207827498998, + "loss": 9.1255, + "step": 174900 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018833901265639992, + "loss": 9.0882, + "step": 175000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001883259401676757, + "loss": 9.1123, + "step": 175100 + }, + { + "epoch": 1.87, + "learning_rate": 0.000188312860809834, + "loss": 9.082, + "step": 175200 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001882997745838919, + "loss": 9.1171, + "step": 175300 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018828668149086726, + "loss": 9.0766, + "step": 175400 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018827358153177822, + "loss": 9.1312, + "step": 175500 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001882604747076436, + "loss": 9.0933, + "step": 175600 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018824736101948276, + "loss": 9.1577, + "step": 175700 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001882342404683155, + "loss": 9.121, + "step": 175800 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018822111305516218, + "loss": 9.0757, + "step": 175900 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001882079787810438, + "loss": 9.0875, + "step": 176000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018819483764698181, + "loss": 9.0441, + "step": 176100 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018818168965399813, + "loss": 9.0911, + "step": 176200 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001881685348031153, + "loss": 9.15, + "step": 176300 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001881553730953564, + "loss": 9.1436, + "step": 176400 + }, + { + "epoch": 1.88, + "learning_rate": 0.000188142204531745, + "loss": 9.194, + "step": 176500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018812902911330526, + "loss": 9.1552, + "step": 176600 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018811584684106175, + "loss": 9.0843, + "step": 176700 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018810265771603978, + "loss": 9.0564, + "step": 176800 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018808946173926494, + "loss": 9.1383, + "step": 176900 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018807625891176355, + "loss": 9.1081, + "step": 177000 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001880630492345624, + "loss": 9.0439, + "step": 177100 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018804983270868876, + "loss": 9.1555, + "step": 177200 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018803660933517055, + "loss": 9.0664, + "step": 177300 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001880233791150361, + "loss": 9.0827, + "step": 177400 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018801014204931433, + "loss": 9.1387, + "step": 177500 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018799689813903472, + "loss": 9.1359, + "step": 177600 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018798364738522723, + "loss": 9.1015, + "step": 177700 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018797038978892236, + "loss": 9.0144, + "step": 177800 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001879571253511512, + "loss": 9.1726, + "step": 177900 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018794385407294525, + "loss": 9.1201, + "step": 178000 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018793057595533664, + "loss": 9.1104, + "step": 178100 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001879172909993581, + "loss": 9.1695, + "step": 178200 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001879039992060427, + "loss": 9.0277, + "step": 178300 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001878907005764242, + "loss": 9.0884, + "step": 178400 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001878773951115368, + "loss": 9.1421, + "step": 178500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018786408281241528, + "loss": 9.12, + "step": 178600 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018785076368009495, + "loss": 9.1529, + "step": 178700 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018783743771561164, + "loss": 9.1086, + "step": 178800 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001878241049200017, + "loss": 9.1452, + "step": 178900 + }, + { + "epoch": 1.91, + "learning_rate": 0.000187810765294302, + "loss": 9.1038, + "step": 179000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018779741883955003, + "loss": 9.1826, + "step": 179100 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001877840655567837, + "loss": 9.1532, + "step": 179200 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001877707054470415, + "loss": 9.0952, + "step": 179300 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018775733851136243, + "loss": 9.0865, + "step": 179400 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018774396475078605, + "loss": 9.0835, + "step": 179500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018773058416635246, + "loss": 9.0861, + "step": 179600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018771719675910226, + "loss": 9.1069, + "step": 179700 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018770380253007663, + "loss": 9.1194, + "step": 179800 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018769040148031713, + "loss": 9.1425, + "step": 179900 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018767699361086604, + "loss": 9.0962, + "step": 180000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018766357892276609, + "loss": 9.0267, + "step": 180100 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018765015741706054, + "loss": 9.1044, + "step": 180200 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018763672909479314, + "loss": 9.1402, + "step": 180300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018762329395700827, + "loss": 9.1602, + "step": 180400 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018760985200475075, + "loss": 9.1229, + "step": 180500 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018759640323906597, + "loss": 9.1722, + "step": 180600 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018758294766099985, + "loss": 9.0454, + "step": 180700 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018756948527159882, + "loss": 9.0794, + "step": 180800 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018755601607190983, + "loss": 9.0821, + "step": 180900 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001875425400629804, + "loss": 9.2338, + "step": 181000 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001875290572458586, + "loss": 9.0849, + "step": 181100 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018751556762159295, + "loss": 9.1392, + "step": 181200 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018750207119123252, + "loss": 9.1497, + "step": 181300 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018748856795582696, + "loss": 9.004, + "step": 181400 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018747505791642645, + "loss": 9.146, + "step": 181500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018746154107408158, + "loss": 9.0926, + "step": 181600 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018744801742984365, + "loss": 9.1021, + "step": 181700 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001874344869847643, + "loss": 9.0526, + "step": 181800 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018742094973989585, + "loss": 9.1334, + "step": 181900 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018740740569629113, + "loss": 9.1752, + "step": 182000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018739385485500338, + "loss": 9.2061, + "step": 182100 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018738029721708648, + "loss": 9.1071, + "step": 182200 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018736673278359485, + "loss": 9.1583, + "step": 182300 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018735316155558333, + "loss": 9.0803, + "step": 182400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018733958353410744, + "loss": 9.145, + "step": 182500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018732599872022307, + "loss": 9.091, + "step": 182600 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001873124071149867, + "loss": 9.0832, + "step": 182700 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018729880871945547, + "loss": 9.1639, + "step": 182800 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018728520353468678, + "loss": 9.03, + "step": 182900 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018727159156173878, + "loss": 9.1201, + "step": 183000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018725797280167008, + "loss": 9.202, + "step": 183100 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018724434725553982, + "loss": 9.0792, + "step": 183200 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018723071492440763, + "loss": 9.2023, + "step": 183300 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018721707580933368, + "loss": 9.0379, + "step": 183400 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018720342991137873, + "loss": 9.1347, + "step": 183500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018718977723160401, + "loss": 9.0437, + "step": 183600 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018717611777107132, + "loss": 9.0495, + "step": 183700 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018716245153084287, + "loss": 9.0067, + "step": 183800 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018714877851198157, + "loss": 9.0759, + "step": 183900 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018713509871555074, + "loss": 9.1023, + "step": 184000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018712141214261425, + "loss": 9.1307, + "step": 184100 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018710771879423653, + "loss": 9.0587, + "step": 184200 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018709401867148247, + "loss": 9.0876, + "step": 184300 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018708031177541758, + "loss": 9.1103, + "step": 184400 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018706659810710784, + "loss": 9.1864, + "step": 184500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018705287766761973, + "loss": 9.1094, + "step": 184600 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018703915045802032, + "loss": 9.0665, + "step": 184700 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018702541647937717, + "loss": 9.1126, + "step": 184800 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018701167573275838, + "loss": 9.0574, + "step": 184900 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018699792821923251, + "loss": 9.0779, + "step": 185000 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001869841739398688, + "loss": 9.1004, + "step": 185100 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018697041289573687, + "loss": 9.0701, + "step": 185200 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018695664508790692, + "loss": 9.075, + "step": 185300 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018694287051744968, + "loss": 9.1037, + "step": 185400 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001869290891854364, + "loss": 9.0495, + "step": 185500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018691530109293884, + "loss": 9.0411, + "step": 185600 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001869015062410293, + "loss": 9.1423, + "step": 185700 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018688770463078065, + "loss": 9.0384, + "step": 185800 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001868738962632662, + "loss": 9.0632, + "step": 185900 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018686008113955987, + "loss": 9.0866, + "step": 186000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018684625926073601, + "loss": 9.1251, + "step": 186100 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018683243062786957, + "loss": 9.0553, + "step": 186200 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018681859524203605, + "loss": 9.1005, + "step": 186300 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018680475310431133, + "loss": 9.0743, + "step": 186400 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018679090421577202, + "loss": 9.1147, + "step": 186500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018677704857749509, + "loss": 9.0435, + "step": 186600 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001867631861905581, + "loss": 9.1564, + "step": 186700 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018674931705603913, + "loss": 9.1399, + "step": 186800 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018673544117501681, + "loss": 9.0335, + "step": 186900 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001867215585485702, + "loss": 9.0469, + "step": 187000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018670766917777907, + "loss": 9.0829, + "step": 187100 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018669377306372348, + "loss": 9.1005, + "step": 187200 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001866798702074842, + "loss": 9.0546, + "step": 187300 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018666596061014246, + "loss": 9.1273, + "step": 187400 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018665204427277993, + "loss": 9.1391, + "step": 187500 + }, + { + "epoch": 2.0, + "learning_rate": 0.000186638121196479, + "loss": 9.1007, + "step": 187600 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018662419138232238, + "loss": 9.1299, + "step": 187700 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018661025483139346, + "loss": 9.1318, + "step": 187800 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018659631154477606, + "loss": 9.0361, + "step": 187900 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001865823615235545, + "loss": 9.0805, + "step": 188000 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018656840476881376, + "loss": 9.1156, + "step": 188100 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001865544412816392, + "loss": 9.0904, + "step": 188200 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018654047106311678, + "loss": 9.038, + "step": 188300 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018652649411433296, + "loss": 9.0755, + "step": 188400 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018651251043637473, + "loss": 9.0806, + "step": 188500 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018649852003032962, + "loss": 9.0397, + "step": 188600 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018648452289728566, + "loss": 9.1623, + "step": 188700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018647051903833138, + "loss": 8.9964, + "step": 188800 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001864565084545559, + "loss": 9.0934, + "step": 188900 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018644249114704877, + "loss": 9.0901, + "step": 189000 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018642846711690018, + "loss": 9.0424, + "step": 189100 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018641443636520076, + "loss": 9.0725, + "step": 189200 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018640039889304165, + "loss": 9.0892, + "step": 189300 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001863863547015146, + "loss": 9.1131, + "step": 189400 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018637230379171178, + "loss": 9.1148, + "step": 189500 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018635824616472594, + "loss": 9.04, + "step": 189600 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001863441818216504, + "loss": 9.0514, + "step": 189700 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018633011076357888, + "loss": 9.0545, + "step": 189800 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018631603299160566, + "loss": 9.077, + "step": 189900 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018630194850682567, + "loss": 9.107, + "step": 190000 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001862878573103342, + "loss": 9.0824, + "step": 190100 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001862737594032271, + "loss": 9.0207, + "step": 190200 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018625965478660084, + "loss": 9.0777, + "step": 190300 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018624554346155227, + "loss": 9.0374, + "step": 190400 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018623142542917886, + "loss": 9.034, + "step": 190500 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018621730069057858, + "loss": 9.0841, + "step": 190600 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001862031692468499, + "loss": 9.1886, + "step": 190700 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001861890310990918, + "loss": 9.0773, + "step": 190800 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018617488624840384, + "loss": 9.1442, + "step": 190900 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018616073469588607, + "loss": 9.0265, + "step": 191000 + }, + { + "epoch": 2.04, + "learning_rate": 0.000186146576442639, + "loss": 9.0532, + "step": 191100 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001861324114897638, + "loss": 9.0351, + "step": 191200 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018611823983836202, + "loss": 9.1037, + "step": 191300 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018610406148953577, + "loss": 9.1077, + "step": 191400 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018608987644438784, + "loss": 9.0743, + "step": 191500 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018607568470402122, + "loss": 9.06, + "step": 191600 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018606148626953972, + "loss": 9.0224, + "step": 191700 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018604728114204752, + "loss": 9.0657, + "step": 191800 + }, + { + "epoch": 2.04, + "learning_rate": 0.00018603306932264937, + "loss": 9.0676, + "step": 191900 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001860188508124505, + "loss": 9.0153, + "step": 192000 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001860046256125567, + "loss": 9.046, + "step": 192100 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018599039372407426, + "loss": 9.0563, + "step": 192200 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018597615514810999, + "loss": 9.0144, + "step": 192300 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018596190988577122, + "loss": 8.9947, + "step": 192400 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001859476579381658, + "loss": 9.0341, + "step": 192500 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018593339930640218, + "loss": 9.0551, + "step": 192600 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018591913399158917, + "loss": 9.0906, + "step": 192700 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018590486199483621, + "loss": 9.1057, + "step": 192800 + }, + { + "epoch": 2.05, + "learning_rate": 0.00018589058331725325, + "loss": 9.1243, + "step": 192900 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001858762979599507, + "loss": 9.0537, + "step": 193000 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001858620059240396, + "loss": 8.9789, + "step": 193100 + }, + { + "epoch": 2.06, + "learning_rate": 0.00018584770721063142, + "loss": 9.0434, + "step": 193200 + }, + { + "epoch": 2.06, + "learning_rate": 0.00018583340182083813, + "loss": 8.9735, + "step": 193300 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001858190897557723, + "loss": 9.0947, + "step": 193400 + }, + { + "epoch": 2.06, + "learning_rate": 0.000185804771016547, + "loss": 9.0639, + "step": 193500 + }, + { + "epoch": 2.06, + "learning_rate": 0.00018579044560427575, + "loss": 9.027, + "step": 193600 + }, + { + "epoch": 2.06, + "learning_rate": 0.00018577611352007265, + "loss": 9.0806, + "step": 193700 + }, + { + "epoch": 2.06, + "learning_rate": 0.00018576177476505232, + "loss": 9.1146, + "step": 193800 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001857474293403299, + "loss": 9.1001, + "step": 193900 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018573307724702103, + "loss": 9.0516, + "step": 194000 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018571871848624186, + "loss": 9.0974, + "step": 194100 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018570435305910907, + "loss": 9.0248, + "step": 194200 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001856899809667399, + "loss": 8.957, + "step": 194300 + }, + { + "epoch": 2.07, + "learning_rate": 0.000185675602210252, + "loss": 9.0908, + "step": 194400 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018566121679076364, + "loss": 9.033, + "step": 194500 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001856468247093936, + "loss": 9.0694, + "step": 194600 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018563242596726114, + "loss": 9.0322, + "step": 194700 + }, + { + "epoch": 2.07, + "learning_rate": 0.00018561802056548602, + "loss": 9.0703, + "step": 194800 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001856036085051886, + "loss": 9.0578, + "step": 194900 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018558918978748968, + "loss": 9.122, + "step": 195000 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001855747644135106, + "loss": 9.0533, + "step": 195100 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018556033238437323, + "loss": 9.0608, + "step": 195200 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018554589370119997, + "loss": 8.9978, + "step": 195300 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018553144836511368, + "loss": 8.9844, + "step": 195400 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018551699637723781, + "loss": 9.0916, + "step": 195500 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018550253773869625, + "loss": 9.0631, + "step": 195600 + }, + { + "epoch": 2.08, + "learning_rate": 0.00018548807245061347, + "loss": 9.1133, + "step": 195700 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001854736005141145, + "loss": 9.0627, + "step": 195800 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018545912193032474, + "loss": 9.0639, + "step": 195900 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001854446367003702, + "loss": 9.091, + "step": 196000 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018543014482537743, + "loss": 9.0558, + "step": 196100 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018541564630647345, + "loss": 9.0436, + "step": 196200 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018540114114478582, + "loss": 9.0564, + "step": 196300 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001853866293414426, + "loss": 9.0963, + "step": 196400 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018537211089757238, + "loss": 9.0298, + "step": 196500 + }, + { + "epoch": 2.09, + "learning_rate": 0.00018535758581430422, + "loss": 9.0885, + "step": 196600 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001853430540927678, + "loss": 9.0837, + "step": 196700 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018532851573409325, + "loss": 8.9962, + "step": 196800 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018531397073941116, + "loss": 9.1237, + "step": 196900 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018529941910985274, + "loss": 9.0634, + "step": 197000 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018528486084654967, + "loss": 9.0702, + "step": 197100 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018527029595063414, + "loss": 9.043, + "step": 197200 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018525572442323887, + "loss": 9.0656, + "step": 197300 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018524114626549708, + "loss": 9.0682, + "step": 197400 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018522656147854248, + "loss": 9.0869, + "step": 197500 + }, + { + "epoch": 2.1, + "learning_rate": 0.00018521197006350943, + "loss": 9.0807, + "step": 197600 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018519737202153262, + "loss": 9.0042, + "step": 197700 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018518276735374735, + "loss": 9.0961, + "step": 197800 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018516815606128947, + "loss": 8.9478, + "step": 197900 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018515353814529525, + "loss": 9.0755, + "step": 198000 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018513891360690159, + "loss": 9.0415, + "step": 198100 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018512428244724577, + "loss": 9.0864, + "step": 198200 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001851096446674657, + "loss": 8.9876, + "step": 198300 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018509500026869975, + "loss": 9.1571, + "step": 198400 + }, + { + "epoch": 2.11, + "learning_rate": 0.00018508034925208682, + "loss": 9.1207, + "step": 198500 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018506569161876632, + "loss": 9.0226, + "step": 198600 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001850510273698782, + "loss": 9.0617, + "step": 198700 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018503635650656285, + "loss": 9.0288, + "step": 198800 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018502167902996122, + "loss": 9.0285, + "step": 198900 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018500699494121485, + "loss": 9.0174, + "step": 199000 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018499230424146569, + "loss": 9.0784, + "step": 199100 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018497760693185616, + "loss": 9.0509, + "step": 199200 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001849629030135294, + "loss": 9.0232, + "step": 199300 + }, + { + "epoch": 2.12, + "learning_rate": 0.00018494819248762886, + "loss": 9.0862, + "step": 199400 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001849334753552986, + "loss": 9.0237, + "step": 199500 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018491875161768317, + "loss": 9.0221, + "step": 199600 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018490402127592764, + "loss": 9.0536, + "step": 199700 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018488928433117754, + "loss": 8.9925, + "step": 199800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018487454078457906, + "loss": 9.0284, + "step": 199900 + }, + { + "epoch": 2.13, + "learning_rate": 0.00018485979063727874, + "loss": 9.01, + "step": 200000 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001848450338904237, + "loss": 9.0113, + "step": 200100 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001848302705451616, + "loss": 9.0514, + "step": 200200 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001848155006026406, + "loss": 9.0937, + "step": 200300 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001848007240640093, + "loss": 9.0665, + "step": 200400 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001847859409304169, + "loss": 9.0667, + "step": 200500 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018477115120301313, + "loss": 8.9775, + "step": 200600 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018475635488294815, + "loss": 9.0599, + "step": 200700 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018474155197137267, + "loss": 9.0191, + "step": 200800 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001847267424694379, + "loss": 9.0395, + "step": 200900 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001847119263782956, + "loss": 9.0814, + "step": 201000 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018469710369909803, + "loss": 9.0794, + "step": 201100 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018468227443299792, + "loss": 9.1301, + "step": 201200 + }, + { + "epoch": 2.14, + "learning_rate": 0.00018466743858114857, + "loss": 9.0768, + "step": 201300 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018465259614470377, + "loss": 9.0294, + "step": 201400 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001846377471248178, + "loss": 9.0807, + "step": 201500 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018462289152264543, + "loss": 9.0755, + "step": 201600 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018460802933934205, + "loss": 9.0339, + "step": 201700 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018459316057606347, + "loss": 9.0461, + "step": 201800 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018457828523396605, + "loss": 9.0434, + "step": 201900 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001845634033142066, + "loss": 9.0326, + "step": 202000 + }, + { + "epoch": 2.15, + "learning_rate": 0.00018454851481794252, + "loss": 9.0274, + "step": 202100 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001845336197463317, + "loss": 9.048, + "step": 202200 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001845187181005325, + "loss": 9.0262, + "step": 202300 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001845038098817039, + "loss": 9.1042, + "step": 202400 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001844888950910052, + "loss": 9.1432, + "step": 202500 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001844739737295964, + "loss": 9.0574, + "step": 202600 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018445904579863793, + "loss": 9.0025, + "step": 202700 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001844441112992907, + "loss": 8.9958, + "step": 202800 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001844291702327162, + "loss": 9.0976, + "step": 202900 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018441422260007638, + "loss": 9.0575, + "step": 203000 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018439926840253375, + "loss": 9.0566, + "step": 203100 + }, + { + "epoch": 2.16, + "learning_rate": 0.00018438430764125126, + "loss": 9.1034, + "step": 203200 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001843693403173924, + "loss": 8.9753, + "step": 203300 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018435436643212126, + "loss": 8.9516, + "step": 203400 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018433938598660227, + "loss": 8.9312, + "step": 203500 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018432439898200055, + "loss": 9.0279, + "step": 203600 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018430940541948153, + "loss": 9.1025, + "step": 203700 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018429440530021136, + "loss": 9.0378, + "step": 203800 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018427939862535652, + "loss": 9.1263, + "step": 203900 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018426438539608414, + "loss": 9.0882, + "step": 204000 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001842493656135618, + "loss": 8.988, + "step": 204100 + }, + { + "epoch": 2.17, + "learning_rate": 0.00018423433927895757, + "loss": 9.0134, + "step": 204200 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018421930639344006, + "loss": 9.0507, + "step": 204300 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018420426695817837, + "loss": 9.1358, + "step": 204400 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001841892209743421, + "loss": 9.0135, + "step": 204500 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001841741684431014, + "loss": 8.995, + "step": 204600 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018415910936562692, + "loss": 8.9723, + "step": 204700 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001841440437430898, + "loss": 9.0061, + "step": 204800 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018412897157666164, + "loss": 9.1036, + "step": 204900 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018411389286751467, + "loss": 9.0239, + "step": 205000 + }, + { + "epoch": 2.18, + "learning_rate": 0.00018409880761682153, + "loss": 9.0181, + "step": 205100 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018408371582575545, + "loss": 9.0687, + "step": 205200 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018406861749549006, + "loss": 9.0625, + "step": 205300 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001840535126271996, + "loss": 9.1, + "step": 205400 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018403840122205875, + "loss": 8.936, + "step": 205500 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018402328328124274, + "loss": 9.0775, + "step": 205600 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001840081588059273, + "loss": 8.9591, + "step": 205700 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001839930277972886, + "loss": 9.0189, + "step": 205800 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018397789025650346, + "loss": 9.0379, + "step": 205900 + }, + { + "epoch": 2.19, + "learning_rate": 0.00018396274618474913, + "loss": 8.9525, + "step": 206000 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001839475955832033, + "loss": 9.003, + "step": 206100 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018393243845304428, + "loss": 9.0737, + "step": 206200 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018391727479545085, + "loss": 9.0594, + "step": 206300 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018390210461160223, + "loss": 9.0163, + "step": 206400 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001838869279026783, + "loss": 9.0808, + "step": 206500 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018387174466985925, + "loss": 9.0046, + "step": 206600 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018385655491432595, + "loss": 9.1375, + "step": 206700 + }, + { + "epoch": 2.2, + "learning_rate": 0.00018384135863725972, + "loss": 9.0388, + "step": 206800 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001838261558398423, + "loss": 9.0166, + "step": 206900 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001838109465232561, + "loss": 9.1212, + "step": 207000 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001837957306886839, + "loss": 9.008, + "step": 207100 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018378050833730902, + "loss": 8.9999, + "step": 207200 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018376527947031534, + "loss": 9.0493, + "step": 207300 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018375004408888724, + "loss": 9.0645, + "step": 207400 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001837348021942095, + "loss": 9.0016, + "step": 207500 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018371955378746754, + "loss": 8.9775, + "step": 207600 + }, + { + "epoch": 2.21, + "learning_rate": 0.00018370429886984725, + "loss": 9.072, + "step": 207700 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001836890374425349, + "loss": 8.9995, + "step": 207800 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001836737695067175, + "loss": 9.0573, + "step": 207900 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018365849506358231, + "loss": 8.9878, + "step": 208000 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018364321411431736, + "loss": 9.0843, + "step": 208100 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018362792666011099, + "loss": 9.0816, + "step": 208200 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018361263270215208, + "loss": 9.141, + "step": 208300 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018359733224163007, + "loss": 9.1048, + "step": 208400 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018358202527973488, + "loss": 9.1237, + "step": 208500 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018356671181765693, + "loss": 9.0832, + "step": 208600 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018355139185658714, + "loss": 9.0726, + "step": 208700 + }, + { + "epoch": 2.22, + "learning_rate": 0.00018353606539771696, + "loss": 9.0936, + "step": 208800 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018352073244223832, + "loss": 9.0294, + "step": 208900 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001835053929913437, + "loss": 9.1129, + "step": 209000 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018349004704622596, + "loss": 9.0514, + "step": 209100 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018347469460807867, + "loss": 9.0335, + "step": 209200 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018345933567809572, + "loss": 8.9734, + "step": 209300 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001834439702574716, + "loss": 9.0146, + "step": 209400 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018342859834740124, + "loss": 9.0115, + "step": 209500 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018341321994908022, + "loss": 9.0388, + "step": 209600 + }, + { + "epoch": 2.23, + "learning_rate": 0.00018339783506370438, + "loss": 9.0324, + "step": 209700 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001833824436924703, + "loss": 9.0434, + "step": 209800 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018336704583657491, + "loss": 9.0993, + "step": 209900 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018335164149721578, + "loss": 9.0646, + "step": 210000 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018333623067559086, + "loss": 8.9706, + "step": 210100 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001833208133728986, + "loss": 9.0812, + "step": 210200 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001833053895903381, + "loss": 9.0648, + "step": 210300 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001832899593291088, + "loss": 9.0992, + "step": 210400 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018327452259041076, + "loss": 8.9943, + "step": 210500 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018325907937544446, + "loss": 9.0456, + "step": 210600 + }, + { + "epoch": 2.24, + "learning_rate": 0.00018324362968541094, + "loss": 9.0184, + "step": 210700 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001832281735215117, + "loss": 9.0313, + "step": 210800 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018321271088494884, + "loss": 9.0699, + "step": 210900 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001831972417769248, + "loss": 8.9835, + "step": 211000 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018318176619864268, + "loss": 9.0244, + "step": 211100 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018316628415130593, + "loss": 9.0359, + "step": 211200 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001831507956361187, + "loss": 9.0116, + "step": 211300 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018313530065428547, + "loss": 9.0968, + "step": 211400 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018311979920701131, + "loss": 9.0157, + "step": 211500 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018310429129550176, + "loss": 8.9904, + "step": 211600 + }, + { + "epoch": 2.25, + "learning_rate": 0.00018308877692096287, + "loss": 8.972, + "step": 211700 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018307325608460118, + "loss": 9.0645, + "step": 211800 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001830577287876238, + "loss": 9.0632, + "step": 211900 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018304219503123822, + "loss": 9.0678, + "step": 212000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018302665481665254, + "loss": 9.0069, + "step": 212100 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018301110814507532, + "loss": 9.003, + "step": 212200 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001829955550177156, + "loss": 9.052, + "step": 212300 + }, + { + "epoch": 2.26, + "learning_rate": 0.000182979995435783, + "loss": 9.1205, + "step": 212400 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018296442940048756, + "loss": 9.0304, + "step": 212500 + }, + { + "epoch": 2.26, + "learning_rate": 0.00018294885691303985, + "loss": 9.0817, + "step": 212600 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018293327797465095, + "loss": 9.0798, + "step": 212700 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018291769258653245, + "loss": 8.9945, + "step": 212800 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018290210074989639, + "loss": 8.9899, + "step": 212900 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001828865024659554, + "loss": 9.0035, + "step": 213000 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001828708977359225, + "loss": 9.0623, + "step": 213100 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018285528656101133, + "loss": 9.0026, + "step": 213200 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001828396689424359, + "loss": 8.9956, + "step": 213300 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018282404488141087, + "loss": 9.0695, + "step": 213400 + }, + { + "epoch": 2.27, + "learning_rate": 0.00018280841437915131, + "loss": 9.0896, + "step": 213500 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018279277743687278, + "loss": 9.056, + "step": 213600 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018277713405579136, + "loss": 9.0705, + "step": 213700 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018276148423712366, + "loss": 9.0187, + "step": 213800 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018274582798208676, + "loss": 9.0982, + "step": 213900 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018273016529189823, + "loss": 8.9909, + "step": 214000 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018271449616777622, + "loss": 9.0602, + "step": 214100 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018269882061093927, + "loss": 9.009, + "step": 214200 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018268313862260645, + "loss": 9.0646, + "step": 214300 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001826674502039974, + "loss": 9.0518, + "step": 214400 + }, + { + "epoch": 2.28, + "learning_rate": 0.00018265175535633218, + "loss": 8.9937, + "step": 214500 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001826360540808314, + "loss": 8.984, + "step": 214600 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001826203463787161, + "loss": 9.0001, + "step": 214700 + }, + { + "epoch": 2.29, + "learning_rate": 0.00018260463225120796, + "loss": 9.1137, + "step": 214800 + }, + { + "epoch": 2.29, + "learning_rate": 0.00018258891169952902, + "loss": 8.9672, + "step": 214900 + }, + { + "epoch": 2.29, + "learning_rate": 0.00018257318472490183, + "loss": 9.0247, + "step": 215000 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001825574513285495, + "loss": 8.9751, + "step": 215100 + }, + { + "epoch": 2.29, + "learning_rate": 0.00018254171151169568, + "loss": 9.0836, + "step": 215200 + }, + { + "epoch": 2.29, + "learning_rate": 0.00018252596527556437, + "loss": 9.0591, + "step": 215300 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001825102126213802, + "loss": 9.0263, + "step": 215400 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018249445355036827, + "loss": 9.0564, + "step": 215500 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018247868806375414, + "loss": 9.0681, + "step": 215600 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001824629161627639, + "loss": 9.0457, + "step": 215700 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018244713784862414, + "loss": 9.0344, + "step": 215800 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018243135312256191, + "loss": 9.0157, + "step": 215900 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018241556198580482, + "loss": 8.9688, + "step": 216000 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018239976443958094, + "loss": 9.0565, + "step": 216100 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018238396048511887, + "loss": 9.0746, + "step": 216200 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018236815012364764, + "loss": 8.9914, + "step": 216300 + }, + { + "epoch": 2.3, + "learning_rate": 0.00018235233335639687, + "loss": 8.9725, + "step": 216400 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001823365101845966, + "loss": 9.0081, + "step": 216500 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018232068060947736, + "loss": 9.0237, + "step": 216600 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001823048446322703, + "loss": 8.9644, + "step": 216700 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018228900225420694, + "loss": 9.0773, + "step": 216800 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018227315347651938, + "loss": 9.0652, + "step": 216900 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018225729830044016, + "loss": 9.0316, + "step": 217000 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001822414367272023, + "loss": 9.0582, + "step": 217100 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018222556875803942, + "loss": 8.9918, + "step": 217200 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001822096943941855, + "loss": 9.0734, + "step": 217300 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018219381363687517, + "loss": 9.0242, + "step": 217400 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018217792648734347, + "loss": 9.0317, + "step": 217500 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018216203294682589, + "loss": 9.0083, + "step": 217600 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018214613301655846, + "loss": 9.0274, + "step": 217700 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001821302266977778, + "loss": 8.9526, + "step": 217800 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018211431399172093, + "loss": 9.0753, + "step": 217900 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018209839489962532, + "loss": 9.0146, + "step": 218000 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018208246942272904, + "loss": 9.0196, + "step": 218100 + }, + { + "epoch": 2.32, + "learning_rate": 0.00018206653756227065, + "loss": 8.9761, + "step": 218200 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001820505993194891, + "loss": 8.9926, + "step": 218300 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018203465469562394, + "loss": 8.9958, + "step": 218400 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001820187036919152, + "loss": 9.1148, + "step": 218500 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001820027463096034, + "loss": 9.0125, + "step": 218600 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001819867825499295, + "loss": 9.0877, + "step": 218700 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018197081241413504, + "loss": 9.1242, + "step": 218800 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018195483590346205, + "loss": 9.0773, + "step": 218900 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018193885301915293, + "loss": 8.9999, + "step": 219000 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018192286376245075, + "loss": 8.9843, + "step": 219100 + }, + { + "epoch": 2.33, + "learning_rate": 0.00018190686813459897, + "loss": 8.9305, + "step": 219200 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001818908661368416, + "loss": 9.0152, + "step": 219300 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018187485777042305, + "loss": 8.9341, + "step": 219400 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018185884303658837, + "loss": 8.9283, + "step": 219500 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018184282193658296, + "loss": 9.0401, + "step": 219600 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018182679447165283, + "loss": 9.003, + "step": 219700 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001818107606430444, + "loss": 9.064, + "step": 219800 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018179472045200468, + "loss": 8.9277, + "step": 219900 + }, + { + "epoch": 2.34, + "learning_rate": 0.00018177867389978104, + "loss": 9.0493, + "step": 220000 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001817626209876215, + "loss": 9.1271, + "step": 220100 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018174656171677443, + "loss": 9.1035, + "step": 220200 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018173049608848885, + "loss": 8.9526, + "step": 220300 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018171442410401406, + "loss": 9.0167, + "step": 220400 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018169834576460008, + "loss": 9.0515, + "step": 220500 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018168226107149728, + "loss": 9.0868, + "step": 220600 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001816661700259566, + "loss": 9.0466, + "step": 220700 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001816500726292294, + "loss": 8.9584, + "step": 220800 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018163396888256757, + "loss": 9.0466, + "step": 220900 + }, + { + "epoch": 2.35, + "learning_rate": 0.00018161785878722356, + "loss": 9.0957, + "step": 221000 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018160174234445022, + "loss": 9.1248, + "step": 221100 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018158561955550093, + "loss": 9.0141, + "step": 221200 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018156949042162957, + "loss": 9.0383, + "step": 221300 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018155335494409046, + "loss": 9.0845, + "step": 221400 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018153721312413854, + "loss": 9.0356, + "step": 221500 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018152106496302904, + "loss": 9.0927, + "step": 221600 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018150491046201793, + "loss": 9.0491, + "step": 221700 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018148874962236152, + "loss": 8.9652, + "step": 221800 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018147258244531655, + "loss": 8.9909, + "step": 221900 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018145640893214043, + "loss": 9.0484, + "step": 222000 + }, + { + "epoch": 2.37, + "learning_rate": 0.000181440229084091, + "loss": 8.9756, + "step": 222100 + }, + { + "epoch": 2.37, + "learning_rate": 0.00018142404290242645, + "loss": 8.9684, + "step": 222200 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001814078503884057, + "loss": 8.9948, + "step": 222300 + }, + { + "epoch": 2.37, + "learning_rate": 0.00018139165154328798, + "loss": 9.1269, + "step": 222400 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001813754463683331, + "loss": 9.0485, + "step": 222500 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001813592348648013, + "loss": 9.0652, + "step": 222600 + }, + { + "epoch": 2.37, + "learning_rate": 0.00018134301703395342, + "loss": 9.0138, + "step": 222700 + }, + { + "epoch": 2.37, + "learning_rate": 0.00018132679287705065, + "loss": 8.9932, + "step": 222800 + }, + { + "epoch": 2.37, + "learning_rate": 0.00018131056239535482, + "loss": 9.0117, + "step": 222900 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018129432559012808, + "loss": 9.0057, + "step": 223000 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018127808246263326, + "loss": 9.0649, + "step": 223100 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018126183301413353, + "loss": 9.0071, + "step": 223200 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018124557724589265, + "loss": 8.922, + "step": 223300 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001812293151591748, + "loss": 8.9331, + "step": 223400 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018121304675524472, + "loss": 9.0288, + "step": 223500 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018119677203536755, + "loss": 8.9544, + "step": 223600 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018118049100080905, + "loss": 9.0862, + "step": 223700 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018116420365283533, + "loss": 9.056, + "step": 223800 + }, + { + "epoch": 2.38, + "learning_rate": 0.00018114790999271312, + "loss": 9.0827, + "step": 223900 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018113161002170954, + "loss": 8.9339, + "step": 224000 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001811153037410922, + "loss": 8.9882, + "step": 224100 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018109899115212934, + "loss": 9.0736, + "step": 224200 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018108267225608953, + "loss": 9.0003, + "step": 224300 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001810663470542419, + "loss": 9.0168, + "step": 224400 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018105001554785606, + "loss": 9.0447, + "step": 224500 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018103367773820213, + "loss": 9.0279, + "step": 224600 + }, + { + "epoch": 2.39, + "learning_rate": 0.00018101733362655068, + "loss": 9.0693, + "step": 224700 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001810009832141728, + "loss": 9.0063, + "step": 224800 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001809846265023401, + "loss": 9.032, + "step": 224900 + }, + { + "epoch": 2.4, + "learning_rate": 0.00018096826349232455, + "loss": 9.0819, + "step": 225000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00018095189418539884, + "loss": 9.0004, + "step": 225100 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001809355185828359, + "loss": 9.105, + "step": 225200 + }, + { + "epoch": 2.4, + "learning_rate": 0.00018091913668590926, + "loss": 9.0469, + "step": 225300 + }, + { + "epoch": 2.4, + "learning_rate": 0.00018090274849589304, + "loss": 9.0234, + "step": 225400 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001808863540140617, + "loss": 8.9568, + "step": 225500 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001808699532416902, + "loss": 9.0313, + "step": 225600 + }, + { + "epoch": 2.4, + "learning_rate": 0.00018085354618005406, + "loss": 9.0245, + "step": 225700 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001808371328304293, + "loss": 9.062, + "step": 225800 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018082071319409232, + "loss": 9.0529, + "step": 225900 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001808042872723201, + "loss": 8.9451, + "step": 226000 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018078785506639008, + "loss": 9.0105, + "step": 226100 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018077141657758025, + "loss": 9.0455, + "step": 226200 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018075497180716895, + "loss": 8.9498, + "step": 226300 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018073852075643516, + "loss": 9.087, + "step": 226400 + }, + { + "epoch": 2.41, + "learning_rate": 0.00018072206342665826, + "loss": 9.0282, + "step": 226500 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001807055998191181, + "loss": 8.8933, + "step": 226600 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001806891299350951, + "loss": 8.974, + "step": 226700 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018067265377587008, + "loss": 8.9945, + "step": 226800 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018065617134272445, + "loss": 9.0351, + "step": 226900 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018063968263694002, + "loss": 8.983, + "step": 227000 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018062318765979914, + "loss": 8.9085, + "step": 227100 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018060668641258458, + "loss": 9.0434, + "step": 227200 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018059017889657968, + "loss": 8.9996, + "step": 227300 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018057366511306823, + "loss": 8.9967, + "step": 227400 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001805571450633345, + "loss": 9.0237, + "step": 227500 + }, + { + "epoch": 2.42, + "learning_rate": 0.00018054061874866323, + "loss": 8.9821, + "step": 227600 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018052408617033974, + "loss": 8.9712, + "step": 227700 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018050754732964973, + "loss": 8.8961, + "step": 227800 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001804910022278794, + "loss": 9.0262, + "step": 227900 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001804744508663155, + "loss": 8.9848, + "step": 228000 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018045789324624522, + "loss": 8.9764, + "step": 228100 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018044132936895625, + "loss": 8.9937, + "step": 228200 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018042475923573677, + "loss": 9.0757, + "step": 228300 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018040818284787547, + "loss": 8.9889, + "step": 228400 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001803916002066614, + "loss": 8.9482, + "step": 228500 + }, + { + "epoch": 2.43, + "learning_rate": 0.00018037501131338432, + "loss": 9.0511, + "step": 228600 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018035841616933427, + "loss": 8.9874, + "step": 228700 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018034181477580185, + "loss": 8.9758, + "step": 228800 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001803252071340782, + "loss": 9.0269, + "step": 228900 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018030859324545486, + "loss": 9.0034, + "step": 229000 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018029197311122394, + "loss": 9.0862, + "step": 229100 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018027534673267792, + "loss": 8.9631, + "step": 229200 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018025871411110988, + "loss": 9.0016, + "step": 229300 + }, + { + "epoch": 2.44, + "learning_rate": 0.00018024207524781332, + "loss": 8.9814, + "step": 229400 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001802254301440823, + "loss": 8.9851, + "step": 229500 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018020877880121124, + "loss": 8.9458, + "step": 229600 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018019212122049515, + "loss": 9.0367, + "step": 229700 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018017545740322953, + "loss": 8.932, + "step": 229800 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018015878735071024, + "loss": 9.0184, + "step": 229900 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018014211106423376, + "loss": 8.9249, + "step": 230000 + }, + { + "epoch": 2.45, + "learning_rate": 0.000180125428545097, + "loss": 8.9847, + "step": 230100 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018010873979459739, + "loss": 8.9156, + "step": 230200 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001800920448140328, + "loss": 9.0328, + "step": 230300 + }, + { + "epoch": 2.45, + "learning_rate": 0.00018007534360470158, + "loss": 8.9383, + "step": 230400 + }, + { + "epoch": 2.46, + "learning_rate": 0.00018005863616790257, + "loss": 8.9882, + "step": 230500 + }, + { + "epoch": 2.46, + "learning_rate": 0.00018004192250493513, + "loss": 8.9413, + "step": 230600 + }, + { + "epoch": 2.46, + "learning_rate": 0.00018002520261709915, + "loss": 9.1052, + "step": 230700 + }, + { + "epoch": 2.46, + "learning_rate": 0.00018000847650569482, + "loss": 9.0456, + "step": 230800 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017999174417202303, + "loss": 8.9984, + "step": 230900 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017997500561738497, + "loss": 9.0676, + "step": 231000 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017995826084308249, + "loss": 8.9715, + "step": 231100 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017994150985041775, + "loss": 9.0023, + "step": 231200 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017992475264069354, + "loss": 8.8997, + "step": 231300 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017990798921521298, + "loss": 9.0132, + "step": 231400 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001798912195752799, + "loss": 9.0338, + "step": 231500 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017987444372219837, + "loss": 9.0081, + "step": 231600 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017985766165727306, + "loss": 9.0621, + "step": 231700 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017984087338180916, + "loss": 9.0275, + "step": 231800 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017982407889711226, + "loss": 8.9959, + "step": 231900 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017980727820448848, + "loss": 9.0693, + "step": 232000 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017979047130524444, + "loss": 8.996, + "step": 232100 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017977365820068717, + "loss": 8.967, + "step": 232200 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017975683889212421, + "loss": 9.0099, + "step": 232300 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017974001338086365, + "loss": 8.9557, + "step": 232400 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017972318166821401, + "loss": 9.0425, + "step": 232500 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017970634375548425, + "loss": 8.9797, + "step": 232600 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001796894996439839, + "loss": 8.9579, + "step": 232700 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001796726493350229, + "loss": 9.0408, + "step": 232800 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017965579282991172, + "loss": 8.8956, + "step": 232900 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017963893012996128, + "loss": 9.0176, + "step": 233000 + }, + { + "epoch": 2.48, + "learning_rate": 0.000179622061236483, + "loss": 8.926, + "step": 233100 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001796051861507888, + "loss": 9.0264, + "step": 233200 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017958830487419097, + "loss": 8.9533, + "step": 233300 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017957141740800248, + "loss": 9.0312, + "step": 233400 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001795545237535366, + "loss": 8.9952, + "step": 233500 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017953762391210716, + "loss": 8.984, + "step": 233600 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017952071788502848, + "loss": 8.9888, + "step": 233700 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017950380567361535, + "loss": 9.0563, + "step": 233800 + }, + { + "epoch": 2.49, + "learning_rate": 0.000179486887279183, + "loss": 9.0163, + "step": 233900 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017946996270304724, + "loss": 8.9417, + "step": 234000 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017945303194652422, + "loss": 8.9554, + "step": 234100 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017943609501093065, + "loss": 9.012, + "step": 234200 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001794191518975838, + "loss": 9.0193, + "step": 234300 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017940220260780128, + "loss": 9.0566, + "step": 234400 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001793852471429012, + "loss": 9.0254, + "step": 234500 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017936828550420224, + "loss": 8.9888, + "step": 234600 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001793513176930235, + "loss": 8.994, + "step": 234700 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017933434371068458, + "loss": 9.0078, + "step": 234800 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017931736355850553, + "loss": 9.0199, + "step": 234900 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017930037723780693, + "loss": 9.0082, + "step": 235000 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017928338474990975, + "loss": 8.9717, + "step": 235100 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017926638609613553, + "loss": 8.986, + "step": 235200 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017924938127780626, + "loss": 8.9616, + "step": 235300 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001792323702962444, + "loss": 8.9094, + "step": 235400 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017921535315277288, + "loss": 9.0385, + "step": 235500 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001791983298487152, + "loss": 8.9876, + "step": 235600 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017918130038539515, + "loss": 8.9576, + "step": 235700 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001791642647641372, + "loss": 8.9366, + "step": 235800 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017914722298626617, + "loss": 8.9642, + "step": 235900 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001791301750531074, + "loss": 9.0173, + "step": 236000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017911312096598677, + "loss": 8.8534, + "step": 236100 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017909606072623048, + "loss": 9.1026, + "step": 236200 + }, + { + "epoch": 2.52, + "learning_rate": 0.0001790789943351654, + "loss": 9.0192, + "step": 236300 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017906192179411868, + "loss": 8.9376, + "step": 236400 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017904484310441818, + "loss": 9.0049, + "step": 236500 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017902775826739202, + "loss": 9.0113, + "step": 236600 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017901066728436894, + "loss": 9.0122, + "step": 236700 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017899357015667807, + "loss": 8.9689, + "step": 236800 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017897646688564905, + "loss": 8.9865, + "step": 236900 + }, + { + "epoch": 2.52, + "learning_rate": 0.00017895935747261202, + "loss": 9.038, + "step": 237000 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017894224191889762, + "loss": 8.9527, + "step": 237100 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017892512022583688, + "loss": 9.0151, + "step": 237200 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017890799239476135, + "loss": 8.9613, + "step": 237300 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017889085842700308, + "loss": 8.9334, + "step": 237400 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001788737183238946, + "loss": 8.9318, + "step": 237500 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001788565720867689, + "loss": 8.9868, + "step": 237600 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017883941971695938, + "loss": 8.9898, + "step": 237700 + }, + { + "epoch": 2.53, + "learning_rate": 0.00017882226121580005, + "loss": 8.9896, + "step": 237800 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001788050965846253, + "loss": 9.0053, + "step": 237900 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017878792582477, + "loss": 9.0776, + "step": 238000 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017877074893756957, + "loss": 9.0531, + "step": 238100 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017875356592435987, + "loss": 8.9578, + "step": 238200 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017873637678647716, + "loss": 9.0499, + "step": 238300 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001787191815252583, + "loss": 8.984, + "step": 238400 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017870198014204052, + "loss": 8.948, + "step": 238500 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001786847726381616, + "loss": 9.0011, + "step": 238600 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017866755901495976, + "loss": 8.9566, + "step": 238700 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001786503392737737, + "loss": 9.0594, + "step": 238800 + }, + { + "epoch": 2.54, + "learning_rate": 0.00017863311341594263, + "loss": 9.0408, + "step": 238900 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001786158814428062, + "loss": 8.9133, + "step": 239000 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001785986433557045, + "loss": 8.9362, + "step": 239100 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001785813991559782, + "loss": 8.9637, + "step": 239200 + }, + { + "epoch": 2.55, + "learning_rate": 0.00017856414884496834, + "loss": 9.0058, + "step": 239300 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001785468924240165, + "loss": 9.0535, + "step": 239400 + }, + { + "epoch": 2.55, + "learning_rate": 0.00017852962989446466, + "loss": 8.9891, + "step": 239500 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001785123612576554, + "loss": 8.9299, + "step": 239600 + }, + { + "epoch": 2.55, + "learning_rate": 0.00017849508651493164, + "loss": 9.0406, + "step": 239700 + }, + { + "epoch": 2.55, + "learning_rate": 0.00017847780566763693, + "loss": 9.0083, + "step": 239800 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001784605187171151, + "loss": 9.0706, + "step": 239900 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017844322566471066, + "loss": 8.9877, + "step": 240000 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001784259265117684, + "loss": 8.9968, + "step": 240100 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017840862125963373, + "loss": 8.9917, + "step": 240200 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017839130990965246, + "loss": 9.0561, + "step": 240300 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017837399246317087, + "loss": 8.9813, + "step": 240400 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017835666892153582, + "loss": 8.9812, + "step": 240500 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001783393392860945, + "loss": 8.9138, + "step": 240600 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017832200355819466, + "loss": 9.0441, + "step": 240700 + }, + { + "epoch": 2.56, + "learning_rate": 0.00017830466173918448, + "loss": 9.0023, + "step": 240800 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017828731383041265, + "loss": 8.9152, + "step": 240900 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017826995983322833, + "loss": 8.9627, + "step": 241000 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001782525997489811, + "loss": 8.9837, + "step": 241100 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017823523357902112, + "loss": 8.9975, + "step": 241200 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001782178613246989, + "loss": 9.0357, + "step": 241300 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017820048298736548, + "loss": 9.0839, + "step": 241400 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017818309856837248, + "loss": 8.989, + "step": 241500 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017816570806907173, + "loss": 8.9682, + "step": 241600 + }, + { + "epoch": 2.57, + "learning_rate": 0.00017814831149081582, + "loss": 8.9141, + "step": 241700 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001781309088349576, + "loss": 9.0119, + "step": 241800 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017811350010285052, + "loss": 8.8881, + "step": 241900 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017809608529584847, + "loss": 9.0199, + "step": 242000 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017807866441530576, + "loss": 8.9558, + "step": 242100 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017806123746257725, + "loss": 9.016, + "step": 242200 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001780438044390182, + "loss": 8.9634, + "step": 242300 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017802636534598442, + "loss": 9.0079, + "step": 242400 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001780089201848321, + "loss": 8.9477, + "step": 242500 + }, + { + "epoch": 2.58, + "learning_rate": 0.00017799146895691801, + "loss": 8.956, + "step": 242600 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017797401166359927, + "loss": 9.0139, + "step": 242700 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017795654830623362, + "loss": 8.9594, + "step": 242800 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001779390788861791, + "loss": 8.9543, + "step": 242900 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017792160340479433, + "loss": 8.9353, + "step": 243000 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017790412186343844, + "loss": 8.9452, + "step": 243100 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017788663426347088, + "loss": 9.0115, + "step": 243200 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017786914060625173, + "loss": 8.9568, + "step": 243300 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017785164089314146, + "loss": 8.9664, + "step": 243400 + }, + { + "epoch": 2.59, + "learning_rate": 0.00017783413512550102, + "loss": 9.0013, + "step": 243500 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001778166233046918, + "loss": 9.0053, + "step": 243600 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017779910543207577, + "loss": 8.9488, + "step": 243700 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017778158150901521, + "loss": 9.0135, + "step": 243800 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017776405153687304, + "loss": 9.0008, + "step": 243900 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017774651551701252, + "loss": 8.9717, + "step": 244000 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017772897345079742, + "loss": 9.0279, + "step": 244100 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017771142533959205, + "loss": 9.048, + "step": 244200 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017769387118476103, + "loss": 8.9288, + "step": 244300 + }, + { + "epoch": 2.6, + "learning_rate": 0.00017767631098766963, + "loss": 9.0056, + "step": 244400 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001776587447496835, + "loss": 8.9302, + "step": 244500 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001776411724721687, + "loss": 8.9618, + "step": 244600 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017762359415649193, + "loss": 8.9716, + "step": 244700 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001776060098040202, + "loss": 9.0517, + "step": 244800 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017758841941612104, + "loss": 8.9922, + "step": 244900 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017757082299416243, + "loss": 9.0091, + "step": 245000 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017755322053951292, + "loss": 8.9973, + "step": 245100 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001775356120535414, + "loss": 8.9712, + "step": 245200 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017751799753761734, + "loss": 9.004, + "step": 245300 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017750037699311056, + "loss": 8.9259, + "step": 245400 + }, + { + "epoch": 2.61, + "learning_rate": 0.00017748275042139144, + "loss": 8.9173, + "step": 245500 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017746511782383082, + "loss": 9.0404, + "step": 245600 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017744747920179994, + "loss": 8.8637, + "step": 245700 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001774298345566706, + "loss": 8.9282, + "step": 245800 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017741218388981497, + "loss": 8.9994, + "step": 245900 + }, + { + "epoch": 2.62, + "learning_rate": 0.0001773945272026058, + "loss": 8.9139, + "step": 246000 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017737686449641627, + "loss": 8.9915, + "step": 246100 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017735919577261994, + "loss": 9.043, + "step": 246200 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017734152103259095, + "loss": 9.0605, + "step": 246300 + }, + { + "epoch": 2.62, + "learning_rate": 0.00017732384027770387, + "loss": 8.9385, + "step": 246400 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017730615350933376, + "loss": 9.0488, + "step": 246500 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017728846072885603, + "loss": 9.0303, + "step": 246600 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001772707619376467, + "loss": 8.9641, + "step": 246700 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017725305713708228, + "loss": 8.9321, + "step": 246800 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017723534632853956, + "loss": 8.975, + "step": 246900 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017721762951339597, + "loss": 9.0382, + "step": 247000 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001771999066930293, + "loss": 8.9338, + "step": 247100 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017718217786881794, + "loss": 8.9887, + "step": 247200 + }, + { + "epoch": 2.63, + "learning_rate": 0.00017716444304214063, + "loss": 8.9251, + "step": 247300 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017714670221437656, + "loss": 8.9513, + "step": 247400 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017712895538690547, + "loss": 8.9797, + "step": 247500 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017711120256110754, + "loss": 8.932, + "step": 247600 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001770934437383634, + "loss": 8.9234, + "step": 247700 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017707567892005416, + "loss": 9.0391, + "step": 247800 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001770579081075614, + "loss": 8.9909, + "step": 247900 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017704013130226713, + "loss": 8.9147, + "step": 248000 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001770223485055539, + "loss": 8.9645, + "step": 248100 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017700455971880462, + "loss": 8.8275, + "step": 248200 + }, + { + "epoch": 2.64, + "learning_rate": 0.00017698676494340277, + "loss": 8.9759, + "step": 248300 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017696896418073222, + "loss": 8.9454, + "step": 248400 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017695115743217738, + "loss": 8.9626, + "step": 248500 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017693334469912308, + "loss": 8.9747, + "step": 248600 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017691552598295455, + "loss": 8.9045, + "step": 248700 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017689770128505764, + "loss": 8.9448, + "step": 248800 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017687987060681853, + "loss": 8.9506, + "step": 248900 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017686203394962392, + "loss": 8.9454, + "step": 249000 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017684419131486093, + "loss": 8.9703, + "step": 249100 + }, + { + "epoch": 2.65, + "learning_rate": 0.00017682634270391726, + "loss": 8.9793, + "step": 249200 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017680848811818097, + "loss": 9.0042, + "step": 249300 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001767906275590406, + "loss": 8.9762, + "step": 249400 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017677276102788517, + "loss": 8.9424, + "step": 249500 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017675488852610417, + "loss": 8.852, + "step": 249600 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017673701005508754, + "loss": 8.8896, + "step": 249700 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001767191256162257, + "loss": 8.9113, + "step": 249800 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017670123521090948, + "loss": 8.9871, + "step": 249900 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001766833388405303, + "loss": 9.0373, + "step": 250000 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017666543650647992, + "loss": 8.9158, + "step": 250100 + }, + { + "epoch": 2.66, + "learning_rate": 0.00017664752821015055, + "loss": 8.919, + "step": 250200 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017662961395293502, + "loss": 8.9563, + "step": 250300 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017661169373622647, + "loss": 9.0025, + "step": 250400 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017659376756141857, + "loss": 8.9836, + "step": 250500 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017657583542990543, + "loss": 9.0069, + "step": 250600 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017655789734308166, + "loss": 8.9648, + "step": 250700 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017653995330234228, + "loss": 8.9767, + "step": 250800 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017652200330908277, + "loss": 8.9507, + "step": 250900 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017650404736469917, + "loss": 9.028, + "step": 251000 + }, + { + "epoch": 2.67, + "learning_rate": 0.00017648608547058786, + "loss": 8.8685, + "step": 251100 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001764681176281458, + "loss": 8.9771, + "step": 251200 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001764501438387703, + "loss": 9.0844, + "step": 251300 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017643216410385918, + "loss": 8.9713, + "step": 251400 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001764141784248108, + "loss": 9.0016, + "step": 251500 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017639618680302378, + "loss": 8.9975, + "step": 251600 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017637818923989745, + "loss": 8.9719, + "step": 251700 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017636018573683143, + "loss": 8.9181, + "step": 251800 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017634217629522583, + "loss": 8.9611, + "step": 251900 + }, + { + "epoch": 2.68, + "learning_rate": 0.00017632416091648129, + "loss": 8.8874, + "step": 252000 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017630613960199884, + "loss": 8.9654, + "step": 252100 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017628811235318004, + "loss": 8.9891, + "step": 252200 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001762700791714268, + "loss": 8.9001, + "step": 252300 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017625204005814165, + "loss": 8.9429, + "step": 252400 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017623399501472744, + "loss": 8.8998, + "step": 252500 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017621594404258753, + "loss": 9.0265, + "step": 252600 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001761978871431258, + "loss": 8.9344, + "step": 252700 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017617982431774644, + "loss": 9.0004, + "step": 252800 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001761617555678543, + "loss": 8.9864, + "step": 252900 + }, + { + "epoch": 2.69, + "learning_rate": 0.00017614368089485452, + "loss": 9.013, + "step": 253000 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001761256003001528, + "loss": 8.9821, + "step": 253100 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017610751378515524, + "loss": 8.9165, + "step": 253200 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017608942135126848, + "loss": 8.9903, + "step": 253300 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017607132299989953, + "loss": 8.9693, + "step": 253400 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001760532187324559, + "loss": 8.9395, + "step": 253500 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017603510855034558, + "loss": 8.958, + "step": 253600 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017601699245497704, + "loss": 8.9414, + "step": 253700 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017599887044775908, + "loss": 8.9102, + "step": 253800 + }, + { + "epoch": 2.7, + "learning_rate": 0.00017598074253010113, + "loss": 8.8994, + "step": 253900 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017596260870341294, + "loss": 8.9628, + "step": 254000 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017594446896910482, + "loss": 9.0717, + "step": 254100 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017592632332858748, + "loss": 8.932, + "step": 254200 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001759081717832721, + "loss": 8.9224, + "step": 254300 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017589001433457034, + "loss": 8.9806, + "step": 254400 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017587185098389432, + "loss": 8.9448, + "step": 254500 + }, + { + "epoch": 2.71, + "learning_rate": 0.00017585368173265659, + "loss": 8.9449, + "step": 254600 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001758355065822702, + "loss": 9.0166, + "step": 254700 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001758173255341486, + "loss": 8.9605, + "step": 254800 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001757991385897057, + "loss": 8.9765, + "step": 254900 + }, + { + "epoch": 2.72, + "learning_rate": 0.000175780945750356, + "loss": 8.922, + "step": 255000 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017576274701751427, + "loss": 8.9235, + "step": 255100 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017574454239259585, + "loss": 8.943, + "step": 255200 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017572633187701655, + "loss": 8.9625, + "step": 255300 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001757081154721926, + "loss": 9.0197, + "step": 255400 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017568989317954064, + "loss": 9.027, + "step": 255500 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017567166500047785, + "loss": 9.0078, + "step": 255600 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001756534309364218, + "loss": 8.9845, + "step": 255700 + }, + { + "epoch": 2.72, + "learning_rate": 0.00017563519098879064, + "loss": 9.0329, + "step": 255800 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017561694515900284, + "loss": 9.0249, + "step": 255900 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017559869344847741, + "loss": 8.8926, + "step": 256000 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017558043585863374, + "loss": 8.9714, + "step": 256100 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017556217239089175, + "loss": 8.9411, + "step": 256200 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017554390304667177, + "loss": 8.8691, + "step": 256300 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017552562782739464, + "loss": 8.8395, + "step": 256400 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001755073467344816, + "loss": 8.9803, + "step": 256500 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017548905976935442, + "loss": 8.9569, + "step": 256600 + }, + { + "epoch": 2.73, + "learning_rate": 0.00017547076693343521, + "loss": 8.973, + "step": 256700 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017545246822814667, + "loss": 8.9689, + "step": 256800 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017543416365491186, + "loss": 8.9267, + "step": 256900 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017541585321515432, + "loss": 8.909, + "step": 257000 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017539753691029808, + "loss": 8.9222, + "step": 257100 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017537921474176758, + "loss": 8.9514, + "step": 257200 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017536088671098773, + "loss": 8.9511, + "step": 257300 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017534255281938395, + "loss": 8.9192, + "step": 257400 + }, + { + "epoch": 2.74, + "learning_rate": 0.000175324213068382, + "loss": 9.0063, + "step": 257500 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017530586745940824, + "loss": 8.9085, + "step": 257600 + }, + { + "epoch": 2.74, + "learning_rate": 0.00017528751599388932, + "loss": 8.933, + "step": 257700 + }, + { + "epoch": 2.75, + "learning_rate": 0.00017526915867325252, + "loss": 8.9768, + "step": 257800 + }, + { + "epoch": 2.75, + "learning_rate": 0.00017525079549892544, + "loss": 8.9929, + "step": 257900 + }, + { + "epoch": 2.75, + "learning_rate": 0.00017523242647233622, + "loss": 8.9177, + "step": 258000 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001752140515949134, + "loss": 8.8856, + "step": 258100 + }, + { + "epoch": 2.75, + "learning_rate": 0.000175195670868086, + "loss": 8.9662, + "step": 258200 + }, + { + "epoch": 2.75, + "learning_rate": 0.00017517728429328346, + "loss": 8.9415, + "step": 258300 + }, + { + "epoch": 2.75, + "learning_rate": 0.00017515889187193578, + "loss": 8.9625, + "step": 258400 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001751404936054733, + "loss": 8.8996, + "step": 258500 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001751220894953268, + "loss": 9.0123, + "step": 258600 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017510367954292765, + "loss": 8.9661, + "step": 258700 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017508526374970758, + "loss": 8.8847, + "step": 258800 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017506684211709877, + "loss": 8.8645, + "step": 258900 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017504841464653385, + "loss": 8.8753, + "step": 259000 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017502998133944598, + "loss": 8.9587, + "step": 259100 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017501154219726867, + "loss": 8.9751, + "step": 259200 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017499309722143598, + "loss": 8.9833, + "step": 259300 + }, + { + "epoch": 2.76, + "learning_rate": 0.00017497464641338234, + "loss": 8.9087, + "step": 259400 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001749561897745427, + "loss": 8.9397, + "step": 259500 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001749377273063524, + "loss": 8.9047, + "step": 259600 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001749192590102473, + "loss": 8.9878, + "step": 259700 + }, + { + "epoch": 2.77, + "learning_rate": 0.00017490078488766365, + "loss": 9.0019, + "step": 259800 + }, + { + "epoch": 2.77, + "learning_rate": 0.00017488230494003824, + "loss": 8.948, + "step": 259900 + }, + { + "epoch": 2.77, + "learning_rate": 0.00017486381916880818, + "loss": 8.9107, + "step": 260000 + }, + { + "epoch": 2.77, + "learning_rate": 0.00017484532757541116, + "loss": 8.9556, + "step": 260100 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001748268301612853, + "loss": 8.929, + "step": 260200 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001748083269278691, + "loss": 8.9624, + "step": 260300 + }, + { + "epoch": 2.77, + "learning_rate": 0.00017478981787660155, + "loss": 8.9792, + "step": 260400 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001747713030089221, + "loss": 8.9191, + "step": 260500 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017475278232627074, + "loss": 8.9754, + "step": 260600 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001747342558300877, + "loss": 8.8545, + "step": 260700 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017471572352181384, + "loss": 8.9311, + "step": 260800 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001746971854028905, + "loss": 8.9463, + "step": 260900 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017467864147475924, + "loss": 8.8983, + "step": 261000 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017466009173886234, + "loss": 8.9522, + "step": 261100 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017464153619664235, + "loss": 9.0215, + "step": 261200 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017462297484954235, + "loss": 8.9982, + "step": 261300 + }, + { + "epoch": 2.78, + "learning_rate": 0.00017460440769900593, + "loss": 8.9512, + "step": 261400 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017458583474647693, + "loss": 8.9549, + "step": 261500 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017456725599339988, + "loss": 8.9553, + "step": 261600 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001745486714412196, + "loss": 9.0025, + "step": 261700 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017453008109138144, + "loss": 8.9597, + "step": 261800 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017451148494533115, + "loss": 8.9457, + "step": 261900 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017449288300451493, + "loss": 8.9031, + "step": 262000 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001744742752703795, + "loss": 8.8891, + "step": 262100 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017445566174437198, + "loss": 8.9886, + "step": 262200 + }, + { + "epoch": 2.79, + "learning_rate": 0.00017443704242793993, + "loss": 9.0055, + "step": 262300 + }, + { + "epoch": 2.79, + "learning_rate": 0.0001744184173225314, + "loss": 8.9144, + "step": 262400 + }, + { + "epoch": 2.8, + "learning_rate": 0.00017439978642959483, + "loss": 8.9942, + "step": 262500 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001743811497505792, + "loss": 8.8742, + "step": 262600 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001743625072869338, + "loss": 8.9657, + "step": 262700 + }, + { + "epoch": 2.8, + "learning_rate": 0.00017434385904010853, + "loss": 8.9, + "step": 262800 + }, + { + "epoch": 2.8, + "learning_rate": 0.00017432520501155363, + "loss": 8.9832, + "step": 262900 + }, + { + "epoch": 2.8, + "learning_rate": 0.00017430654520271985, + "loss": 8.9842, + "step": 263000 + }, + { + "epoch": 2.8, + "learning_rate": 0.00017428787961505838, + "loss": 8.9224, + "step": 263100 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001742692082500208, + "loss": 8.9885, + "step": 263200 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001742505311090592, + "loss": 9.0455, + "step": 263300 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017423184819362613, + "loss": 9.0014, + "step": 263400 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017421315950517455, + "loss": 8.9636, + "step": 263500 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001741944650451578, + "loss": 8.9, + "step": 263600 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001741757648150299, + "loss": 8.9688, + "step": 263700 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017415705881624507, + "loss": 8.9756, + "step": 263800 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017413834705025806, + "loss": 9.0131, + "step": 263900 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017411962951852417, + "loss": 8.8775, + "step": 264000 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017410090622249898, + "loss": 8.9091, + "step": 264100 + }, + { + "epoch": 2.81, + "learning_rate": 0.00017408217716363865, + "loss": 8.939, + "step": 264200 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017406344234339972, + "loss": 8.9407, + "step": 264300 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017404470176323925, + "loss": 8.8857, + "step": 264400 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017402595542461463, + "loss": 8.9844, + "step": 264500 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001740072033289838, + "loss": 8.9623, + "step": 264600 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017398844547780512, + "loss": 8.8565, + "step": 264700 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017396968187253736, + "loss": 8.8913, + "step": 264800 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017395091251463978, + "loss": 8.9024, + "step": 264900 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017393213740557207, + "loss": 8.9521, + "step": 265000 + }, + { + "epoch": 2.82, + "learning_rate": 0.00017391335654679438, + "loss": 8.8992, + "step": 265100 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001738945699397673, + "loss": 8.9777, + "step": 265200 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017387577758595186, + "loss": 8.8901, + "step": 265300 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017385697948680954, + "loss": 8.8653, + "step": 265400 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001738381756438023, + "loss": 8.8726, + "step": 265500 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001738193660583925, + "loss": 8.8983, + "step": 265600 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017380055073204292, + "loss": 8.8447, + "step": 265700 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001737817296662169, + "loss": 8.8827, + "step": 265800 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017376290286237812, + "loss": 8.9537, + "step": 265900 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017374407032199075, + "loss": 8.9574, + "step": 266000 + }, + { + "epoch": 2.83, + "learning_rate": 0.00017372523204651938, + "loss": 8.9101, + "step": 266100 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001737063880374291, + "loss": 8.9037, + "step": 266200 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017368753829618538, + "loss": 8.9936, + "step": 266300 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001736686828242542, + "loss": 8.9124, + "step": 266400 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001736498216231019, + "loss": 8.9783, + "step": 266500 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017363095469419534, + "loss": 8.9667, + "step": 266600 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017361208203900183, + "loss": 8.9815, + "step": 266700 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017359320365898905, + "loss": 8.9511, + "step": 266800 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017357431955562525, + "loss": 9.0009, + "step": 266900 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017355542973037895, + "loss": 8.8952, + "step": 267000 + }, + { + "epoch": 2.84, + "learning_rate": 0.00017353653418471929, + "loss": 8.9527, + "step": 267100 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017351763292011574, + "loss": 8.936, + "step": 267200 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001734987259380383, + "loss": 9.0016, + "step": 267300 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017347981323995727, + "loss": 8.9115, + "step": 267400 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001734608948273436, + "loss": 8.8959, + "step": 267500 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017344197070166852, + "loss": 8.9212, + "step": 267600 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001734230408644038, + "loss": 8.9316, + "step": 267700 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017340410531702156, + "loss": 8.8712, + "step": 267800 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017338516406099447, + "loss": 8.8838, + "step": 267900 + }, + { + "epoch": 2.85, + "learning_rate": 0.00017336621709779554, + "loss": 8.9771, + "step": 268000 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017334726442889834, + "loss": 8.9568, + "step": 268100 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001733283060557768, + "loss": 8.9458, + "step": 268200 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017330934197990527, + "loss": 8.9517, + "step": 268300 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017329037220275862, + "loss": 8.9658, + "step": 268400 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017327139672581216, + "loss": 8.9565, + "step": 268500 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017325241555054158, + "loss": 8.9412, + "step": 268600 + }, + { + "epoch": 2.86, + "learning_rate": 0.00017323342867842307, + "loss": 8.8065, + "step": 268700 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001732144361109332, + "loss": 8.8451, + "step": 268800 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001731954378495491, + "loss": 8.917, + "step": 268900 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001731764338957482, + "loss": 8.8892, + "step": 269000 + }, + { + "epoch": 2.87, + "learning_rate": 0.00017315742425100847, + "loss": 8.8897, + "step": 269100 + }, + { + "epoch": 2.87, + "learning_rate": 0.00017313840891680825, + "loss": 8.9245, + "step": 269200 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001731193878946264, + "loss": 8.9287, + "step": 269300 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001731003611859422, + "loss": 8.8802, + "step": 269400 + }, + { + "epoch": 2.87, + "learning_rate": 0.00017308132879223533, + "loss": 8.9032, + "step": 269500 + }, + { + "epoch": 2.87, + "learning_rate": 0.00017306229071498597, + "loss": 8.9203, + "step": 269600 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001730432469556747, + "loss": 8.9732, + "step": 269700 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001730241975157825, + "loss": 8.9446, + "step": 269800 + }, + { + "epoch": 2.87, + "learning_rate": 0.00017300514239679092, + "loss": 9.0112, + "step": 269900 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001729860816001819, + "loss": 8.8536, + "step": 270000 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001729670151274377, + "loss": 8.9583, + "step": 270100 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017294794298004123, + "loss": 8.9816, + "step": 270200 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017292886515947564, + "loss": 8.9209, + "step": 270300 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017290978166722466, + "loss": 8.9154, + "step": 270400 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017289069250477245, + "loss": 8.9527, + "step": 270500 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001728715976736035, + "loss": 9.0254, + "step": 270600 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017285249717520284, + "loss": 8.9509, + "step": 270700 + }, + { + "epoch": 2.88, + "learning_rate": 0.00017283339101105595, + "loss": 8.9001, + "step": 270800 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001728142791826487, + "loss": 8.9519, + "step": 270900 + }, + { + "epoch": 2.89, + "learning_rate": 0.00017279516169146745, + "loss": 9.0014, + "step": 271000 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001727760385389989, + "loss": 8.9635, + "step": 271100 + }, + { + "epoch": 2.89, + "learning_rate": 0.00017275690972673032, + "loss": 8.9549, + "step": 271200 + }, + { + "epoch": 2.89, + "learning_rate": 0.00017273777525614933, + "loss": 8.8711, + "step": 271300 + }, + { + "epoch": 2.89, + "learning_rate": 0.000172718635128744, + "loss": 8.8529, + "step": 271400 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001726994893460029, + "loss": 8.9201, + "step": 271500 + }, + { + "epoch": 2.89, + "learning_rate": 0.000172680337909415, + "loss": 8.9045, + "step": 271600 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001726611808204697, + "loss": 8.8497, + "step": 271700 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017264201808065682, + "loss": 8.8581, + "step": 271800 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001726228496914667, + "loss": 8.9443, + "step": 271900 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017260367565439, + "loss": 8.9193, + "step": 272000 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017258449597091797, + "loss": 8.881, + "step": 272100 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017256531064254217, + "loss": 8.9874, + "step": 272200 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001725461196707546, + "loss": 8.8933, + "step": 272300 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017252692305704784, + "loss": 8.9976, + "step": 272400 + }, + { + "epoch": 2.9, + "learning_rate": 0.00017250772080291474, + "loss": 8.8656, + "step": 272500 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001724885129098487, + "loss": 8.9491, + "step": 272600 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001724692993793435, + "loss": 8.9194, + "step": 272700 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001724500802128934, + "loss": 8.9272, + "step": 272800 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017243085541199303, + "loss": 8.8759, + "step": 272900 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017241162497813755, + "loss": 8.9244, + "step": 273000 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001723923889128225, + "loss": 8.8069, + "step": 273100 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017237314721754384, + "loss": 8.9837, + "step": 273200 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017235389989379808, + "loss": 8.9349, + "step": 273300 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017233464694308198, + "loss": 8.9206, + "step": 273400 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001723153883668929, + "loss": 8.9406, + "step": 273500 + }, + { + "epoch": 2.91, + "learning_rate": 0.00017229612416672863, + "loss": 8.9123, + "step": 273600 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017227685434408727, + "loss": 8.9349, + "step": 273700 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017225757890046745, + "loss": 8.9765, + "step": 273800 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017223829783736828, + "loss": 8.9098, + "step": 273900 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017221901115628912, + "loss": 8.885, + "step": 274000 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017219971885873008, + "loss": 8.9512, + "step": 274100 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001721804209461914, + "loss": 8.9377, + "step": 274200 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001721611174201739, + "loss": 8.8934, + "step": 274300 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001721418082821788, + "loss": 8.9182, + "step": 274400 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017212249353370783, + "loss": 8.8682, + "step": 274500 + }, + { + "epoch": 2.92, + "learning_rate": 0.00017210317317626307, + "loss": 8.8184, + "step": 274600 + }, + { + "epoch": 2.93, + "learning_rate": 0.00017208384721134707, + "loss": 8.888, + "step": 274700 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001720645156404628, + "loss": 8.8648, + "step": 274800 + }, + { + "epoch": 2.93, + "learning_rate": 0.00017204517846511368, + "loss": 8.8186, + "step": 274900 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001720258356868036, + "loss": 8.9285, + "step": 275000 + }, + { + "epoch": 2.93, + "learning_rate": 0.00017200648730703678, + "loss": 8.8787, + "step": 275100 + }, + { + "epoch": 2.93, + "learning_rate": 0.000171987133327318, + "loss": 8.9236, + "step": 275200 + }, + { + "epoch": 2.93, + "learning_rate": 0.00017196777374915243, + "loss": 8.9164, + "step": 275300 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001719484085740456, + "loss": 8.8974, + "step": 275400 + }, + { + "epoch": 2.93, + "learning_rate": 0.00017192903780350363, + "loss": 8.8836, + "step": 275500 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017190966143903292, + "loss": 8.9866, + "step": 275600 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017189027948214043, + "loss": 8.8716, + "step": 275700 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001718708919343334, + "loss": 8.9524, + "step": 275800 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017185149879711973, + "loss": 8.88, + "step": 275900 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017183210007200753, + "loss": 8.9094, + "step": 276000 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017181269576050542, + "loss": 8.8641, + "step": 276100 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017179328586412258, + "loss": 8.9116, + "step": 276200 + }, + { + "epoch": 2.94, + "learning_rate": 0.00017177387038436846, + "loss": 8.9618, + "step": 276300 + }, + { + "epoch": 2.94, + "learning_rate": 0.000171754449322753, + "loss": 8.9288, + "step": 276400 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001717350226807866, + "loss": 8.9354, + "step": 276500 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017171559045998, + "loss": 8.9362, + "step": 276600 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017169615266184458, + "loss": 8.9101, + "step": 276700 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017167670928789193, + "loss": 8.9366, + "step": 276800 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017165726033963413, + "loss": 8.9519, + "step": 276900 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017163780581858383, + "loss": 8.9374, + "step": 277000 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001716183457262539, + "loss": 8.889, + "step": 277100 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017159888006415785, + "loss": 8.9146, + "step": 277200 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017157940883380946, + "loss": 8.9435, + "step": 277300 + }, + { + "epoch": 2.95, + "learning_rate": 0.00017155993203672306, + "loss": 8.9732, + "step": 277400 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001715404496744133, + "loss": 8.9504, + "step": 277500 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001715209617483954, + "loss": 8.8781, + "step": 277600 + }, + { + "epoch": 2.96, + "learning_rate": 0.00017150146826018484, + "loss": 8.8843, + "step": 277700 + }, + { + "epoch": 2.96, + "learning_rate": 0.00017148196921129775, + "loss": 8.9286, + "step": 277800 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001714624646032505, + "loss": 8.958, + "step": 277900 + }, + { + "epoch": 2.96, + "learning_rate": 0.00017144295443755997, + "loss": 8.882, + "step": 278000 + }, + { + "epoch": 2.96, + "learning_rate": 0.00017142343871574348, + "loss": 8.975, + "step": 278100 + }, + { + "epoch": 2.96, + "learning_rate": 0.00017140391743931875, + "loss": 8.8864, + "step": 278200 + }, + { + "epoch": 2.96, + "learning_rate": 0.000171384390609804, + "loss": 8.8394, + "step": 278300 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017136485822871777, + "loss": 8.819, + "step": 278400 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017134532029757914, + "loss": 8.927, + "step": 278500 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017132577681790756, + "loss": 8.9393, + "step": 278600 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017130622779122293, + "loss": 8.8611, + "step": 278700 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001712866732190456, + "loss": 8.8771, + "step": 278800 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017126711310289628, + "loss": 8.8931, + "step": 278900 + }, + { + "epoch": 2.97, + "learning_rate": 0.00017124754744429622, + "loss": 8.9703, + "step": 279000 + }, + { + "epoch": 2.97, + "learning_rate": 0.000171227976244767, + "loss": 8.8179, + "step": 279100 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001712083995058307, + "loss": 8.9376, + "step": 279200 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001711888172290098, + "loss": 8.9023, + "step": 279300 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017116922941582722, + "loss": 8.9119, + "step": 279400 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001711496360678063, + "loss": 8.8717, + "step": 279500 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001711300371864708, + "loss": 8.9021, + "step": 279600 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017111043277334494, + "loss": 8.8933, + "step": 279700 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017109082282995336, + "loss": 8.7801, + "step": 279800 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017107120735782117, + "loss": 8.9186, + "step": 279900 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017105158635847376, + "loss": 8.8796, + "step": 280000 + }, + { + "epoch": 2.98, + "learning_rate": 0.00017103195983343716, + "loss": 8.9189, + "step": 280100 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001710123277842377, + "loss": 8.8969, + "step": 280200 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017099269021240212, + "loss": 8.9221, + "step": 280300 + }, + { + "epoch": 2.99, + "learning_rate": 0.0001709730471194577, + "loss": 8.8817, + "step": 280400 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017095339850693204, + "loss": 8.9323, + "step": 280500 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017093374437635327, + "loss": 8.8186, + "step": 280600 + }, + { + "epoch": 2.99, + "learning_rate": 0.0001709140847292498, + "loss": 8.8642, + "step": 280700 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017089441956715066, + "loss": 8.9177, + "step": 280800 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017087474889158512, + "loss": 8.8801, + "step": 280900 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017085507270408307, + "loss": 8.9598, + "step": 281000 + }, + { + "epoch": 2.99, + "learning_rate": 0.00017083539100617466, + "loss": 8.9144, + "step": 281100 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017081570379939056, + "loss": 8.8356, + "step": 281200 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017079601108526185, + "loss": 8.8304, + "step": 281300 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017077631286532, + "loss": 8.86, + "step": 281400 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017075660914109698, + "loss": 8.9017, + "step": 281500 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017073689991412518, + "loss": 9.0032, + "step": 281600 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001707171851859373, + "loss": 8.9352, + "step": 281700 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017069746495806663, + "loss": 8.8118, + "step": 281800 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001706777392320468, + "loss": 8.9098, + "step": 281900 + }, + { + "epoch": 3.0, + "learning_rate": 0.00017065800800941184, + "loss": 8.9336, + "step": 282000 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001706382712916963, + "loss": 8.9269, + "step": 282100 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001706185290804351, + "loss": 8.9362, + "step": 282200 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017059878137716354, + "loss": 8.9821, + "step": 282300 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017057902818341748, + "loss": 8.8858, + "step": 282400 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001705592695007331, + "loss": 8.9324, + "step": 282500 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017053950533064697, + "loss": 8.8959, + "step": 282600 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017051973567469626, + "loss": 8.8696, + "step": 282700 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017049996053441837, + "loss": 8.9197, + "step": 282800 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001704801799113513, + "loss": 8.8827, + "step": 282900 + }, + { + "epoch": 3.01, + "learning_rate": 0.00017046039380703328, + "loss": 8.948, + "step": 283000 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017044060222300318, + "loss": 8.8769, + "step": 283100 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017042080516080013, + "loss": 8.9017, + "step": 283200 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017040100262196377, + "loss": 8.8998, + "step": 283300 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017038119460803416, + "loss": 8.9212, + "step": 283400 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017036138112055173, + "loss": 8.8666, + "step": 283500 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017034156216105744, + "loss": 8.9426, + "step": 283600 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017032173773109258, + "loss": 8.9078, + "step": 283700 + }, + { + "epoch": 3.02, + "learning_rate": 0.00017030190783219885, + "loss": 8.8866, + "step": 283800 + }, + { + "epoch": 3.02, + "learning_rate": 0.0001702820724659185, + "loss": 8.9188, + "step": 283900 + }, + { + "epoch": 3.02, + "learning_rate": 0.0001702622316337941, + "loss": 8.9284, + "step": 284000 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017024238533736865, + "loss": 8.8781, + "step": 284100 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017022253357818566, + "loss": 8.8829, + "step": 284200 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017020267635778892, + "loss": 8.9222, + "step": 284300 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017018281367772278, + "loss": 8.9894, + "step": 284400 + }, + { + "epoch": 3.03, + "learning_rate": 0.000170162945539532, + "loss": 8.8855, + "step": 284500 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001701430719447616, + "loss": 8.8644, + "step": 284600 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017012319289495727, + "loss": 8.9142, + "step": 284700 + }, + { + "epoch": 3.03, + "learning_rate": 0.000170103308391665, + "loss": 8.9464, + "step": 284800 + }, + { + "epoch": 3.03, + "learning_rate": 0.00017008341843643114, + "loss": 8.9217, + "step": 284900 + }, + { + "epoch": 3.04, + "learning_rate": 0.00017006352303080256, + "loss": 8.8993, + "step": 285000 + }, + { + "epoch": 3.04, + "learning_rate": 0.00017004362217632658, + "loss": 8.8958, + "step": 285100 + }, + { + "epoch": 3.04, + "learning_rate": 0.00017002371587455085, + "loss": 8.8842, + "step": 285200 + }, + { + "epoch": 3.04, + "learning_rate": 0.00017000380412702346, + "loss": 8.9488, + "step": 285300 + }, + { + "epoch": 3.04, + "learning_rate": 0.00016998388693529297, + "loss": 8.9244, + "step": 285400 + }, + { + "epoch": 3.04, + "learning_rate": 0.00016996396430090838, + "loss": 8.9235, + "step": 285500 + }, + { + "epoch": 3.04, + "learning_rate": 0.00016994403622541898, + "loss": 8.8933, + "step": 285600 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001699241027103747, + "loss": 9.0091, + "step": 285700 + }, + { + "epoch": 3.04, + "learning_rate": 0.00016990416375732567, + "loss": 8.9172, + "step": 285800 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016988421936782263, + "loss": 8.9355, + "step": 285900 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001698642695434166, + "loss": 8.9431, + "step": 286000 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016984431428565907, + "loss": 8.868, + "step": 286100 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016982435359610203, + "loss": 8.8454, + "step": 286200 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016980438747629775, + "loss": 8.9214, + "step": 286300 + }, + { + "epoch": 3.05, + "learning_rate": 0.000169784415927799, + "loss": 8.8799, + "step": 286400 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016976443895215905, + "loss": 8.9022, + "step": 286500 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016974445655093144, + "loss": 8.9055, + "step": 286600 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016972446872567023, + "loss": 8.8324, + "step": 286700 + }, + { + "epoch": 3.05, + "learning_rate": 0.00016970447547792986, + "loss": 8.928, + "step": 286800 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001696844768092652, + "loss": 8.9485, + "step": 286900 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016966447272123158, + "loss": 8.9039, + "step": 287000 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001696444632153847, + "loss": 8.846, + "step": 287100 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001696244482932807, + "loss": 8.8699, + "step": 287200 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016960442795647615, + "loss": 8.9411, + "step": 287300 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016958440220652806, + "loss": 8.8621, + "step": 287400 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016956437104499375, + "loss": 8.9386, + "step": 287500 + }, + { + "epoch": 3.06, + "learning_rate": 0.00016954433447343114, + "loss": 8.9246, + "step": 287600 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001695242924933984, + "loss": 8.8651, + "step": 287700 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016950424510645427, + "loss": 8.9414, + "step": 287800 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001694841923141578, + "loss": 8.8703, + "step": 287900 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001694641341180685, + "loss": 8.8533, + "step": 288000 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016944407051974628, + "loss": 8.8922, + "step": 288100 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001694240015207515, + "loss": 8.8697, + "step": 288200 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016940392712264495, + "loss": 8.8795, + "step": 288300 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001693838473269878, + "loss": 8.9759, + "step": 288400 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016936376213534166, + "loss": 8.8686, + "step": 288500 + }, + { + "epoch": 3.07, + "learning_rate": 0.00016934367154926855, + "loss": 8.8617, + "step": 288600 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016932357557033093, + "loss": 8.8092, + "step": 288700 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016930347420009167, + "loss": 8.8871, + "step": 288800 + }, + { + "epoch": 3.08, + "learning_rate": 0.000169283367440114, + "loss": 8.8764, + "step": 288900 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016926325529196173, + "loss": 8.9017, + "step": 289000 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016924313775719892, + "loss": 8.8472, + "step": 289100 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016922301483739008, + "loss": 8.9263, + "step": 289200 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016920288653410022, + "loss": 8.939, + "step": 289300 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016918275284889472, + "loss": 8.8499, + "step": 289400 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016916261378333936, + "loss": 8.9908, + "step": 289500 + }, + { + "epoch": 3.08, + "learning_rate": 0.00016914246933900036, + "loss": 8.8932, + "step": 289600 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016912231951744436, + "loss": 8.8728, + "step": 289700 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001691021643202384, + "loss": 8.8685, + "step": 289800 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016908200374895, + "loss": 8.9162, + "step": 289900 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016906183780514698, + "loss": 8.7858, + "step": 290000 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001690416664903977, + "loss": 8.8802, + "step": 290100 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001690214898062709, + "loss": 8.8971, + "step": 290200 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016900130775433566, + "loss": 8.9348, + "step": 290300 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016898112033616157, + "loss": 8.9205, + "step": 290400 + }, + { + "epoch": 3.09, + "learning_rate": 0.00016896092755331864, + "loss": 8.8464, + "step": 290500 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016894072940737718, + "loss": 8.8812, + "step": 290600 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016892052589990807, + "loss": 8.844, + "step": 290700 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016890031703248255, + "loss": 8.8703, + "step": 290800 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016888010280667224, + "loss": 8.9505, + "step": 290900 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001688598832240492, + "loss": 8.9742, + "step": 291000 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001688396582861859, + "loss": 8.9243, + "step": 291100 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016881942799465524, + "loss": 8.905, + "step": 291200 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016879919235103055, + "loss": 8.8743, + "step": 291300 + }, + { + "epoch": 3.1, + "learning_rate": 0.00016877895135688554, + "loss": 8.9128, + "step": 291400 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001687587050137944, + "loss": 8.8546, + "step": 291500 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001687384533233316, + "loss": 8.8468, + "step": 291600 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016871819628707217, + "loss": 8.8242, + "step": 291700 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016869793390659153, + "loss": 8.9047, + "step": 291800 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016867766618346543, + "loss": 8.8636, + "step": 291900 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016865739311927014, + "loss": 8.9261, + "step": 292000 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016863711471558227, + "loss": 8.9802, + "step": 292100 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016861683097397888, + "loss": 8.8888, + "step": 292200 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016859654189603745, + "loss": 8.8124, + "step": 292300 + }, + { + "epoch": 3.11, + "learning_rate": 0.00016857624748333586, + "loss": 8.8448, + "step": 292400 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016855594773745235, + "loss": 8.8565, + "step": 292500 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016853564265996576, + "loss": 8.8444, + "step": 292600 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001685153322524551, + "loss": 9.0277, + "step": 292700 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016849501651649995, + "loss": 8.9035, + "step": 292800 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016847469545368032, + "loss": 8.8548, + "step": 292900 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016845436906557648, + "loss": 8.8712, + "step": 293000 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001684340373537693, + "loss": 8.9075, + "step": 293100 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016841370031983995, + "loss": 8.9144, + "step": 293200 + }, + { + "epoch": 3.12, + "learning_rate": 0.00016839335796537005, + "loss": 8.9328, + "step": 293300 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001683730102919416, + "loss": 8.8397, + "step": 293400 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016835265730113705, + "loss": 8.9189, + "step": 293500 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001683322989945393, + "loss": 8.8776, + "step": 293600 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016831193537373152, + "loss": 8.8885, + "step": 293700 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016829156644029752, + "loss": 8.8295, + "step": 293800 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016827119219582127, + "loss": 8.8619, + "step": 293900 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016825081264188735, + "loss": 8.8181, + "step": 294000 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001682304277800807, + "loss": 8.9626, + "step": 294100 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016821003761198662, + "loss": 8.9288, + "step": 294200 + }, + { + "epoch": 3.13, + "learning_rate": 0.00016818964213919082, + "loss": 8.8504, + "step": 294300 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016816924136327948, + "loss": 8.8629, + "step": 294400 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001681488352858392, + "loss": 8.9242, + "step": 294500 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016812842390845692, + "loss": 8.8748, + "step": 294600 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001681080072327201, + "loss": 8.9207, + "step": 294700 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001680875852602165, + "loss": 8.9582, + "step": 294800 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016806715799253433, + "loss": 8.8838, + "step": 294900 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016804672543126224, + "loss": 8.8979, + "step": 295000 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016802628757798925, + "loss": 8.871, + "step": 295100 + }, + { + "epoch": 3.14, + "learning_rate": 0.00016800584443430486, + "loss": 8.8415, + "step": 295200 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016798539600179887, + "loss": 8.9833, + "step": 295300 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016796494228206164, + "loss": 8.824, + "step": 295400 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016794448327668378, + "loss": 8.8348, + "step": 295500 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016792401898725642, + "loss": 8.8741, + "step": 295600 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016790354941537106, + "loss": 8.9638, + "step": 295700 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016788307456261966, + "loss": 8.8481, + "step": 295800 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016786259443059452, + "loss": 8.9451, + "step": 295900 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016784210902088837, + "loss": 8.9034, + "step": 296000 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016782161833509438, + "loss": 8.7941, + "step": 296100 + }, + { + "epoch": 3.15, + "learning_rate": 0.00016780112237480612, + "loss": 8.9949, + "step": 296200 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016778062114161752, + "loss": 8.8263, + "step": 296300 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016776011463712304, + "loss": 8.8967, + "step": 296400 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001677396028629174, + "loss": 8.9105, + "step": 296500 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016771908582059584, + "loss": 8.9428, + "step": 296600 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016769856351175398, + "loss": 8.8651, + "step": 296700 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001676780359379878, + "loss": 8.9337, + "step": 296800 + }, + { + "epoch": 3.16, + "learning_rate": 0.00016765750310089377, + "loss": 8.8074, + "step": 296900 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001676369650020687, + "loss": 8.9385, + "step": 297000 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001676164216431099, + "loss": 8.9569, + "step": 297100 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016759587302561495, + "loss": 8.9236, + "step": 297200 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016757531915118196, + "loss": 8.9351, + "step": 297300 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016755476002140943, + "loss": 8.858, + "step": 297400 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016753419563789618, + "loss": 8.8819, + "step": 297500 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016751362600224157, + "loss": 8.8933, + "step": 297600 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016749305111604527, + "loss": 8.8626, + "step": 297700 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001674724709809074, + "loss": 8.7971, + "step": 297800 + }, + { + "epoch": 3.17, + "learning_rate": 0.00016745188559842846, + "loss": 8.9559, + "step": 297900 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001674312949702094, + "loss": 8.8404, + "step": 298000 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016741069909785155, + "loss": 8.8084, + "step": 298100 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016739009798295664, + "loss": 8.8532, + "step": 298200 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016736949162712684, + "loss": 8.8602, + "step": 298300 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001673488800319647, + "loss": 8.9834, + "step": 298400 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001673282631990732, + "loss": 8.8454, + "step": 298500 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016730764113005568, + "loss": 8.9243, + "step": 298600 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016728701382651595, + "loss": 8.9056, + "step": 298700 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001672663812900582, + "loss": 8.8982, + "step": 298800 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016724574352228698, + "loss": 8.8392, + "step": 298900 + }, + { + "epoch": 3.18, + "learning_rate": 0.00016722510052480735, + "loss": 8.8603, + "step": 299000 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001672044522992247, + "loss": 8.8935, + "step": 299100 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016718379884714482, + "loss": 8.8215, + "step": 299200 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016716314017017393, + "loss": 8.8014, + "step": 299300 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001671424762699187, + "loss": 8.8379, + "step": 299400 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016712180714798614, + "loss": 8.8318, + "step": 299500 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016710113280598367, + "loss": 8.8614, + "step": 299600 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016708045324551917, + "loss": 8.8288, + "step": 299700 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016705976846820086, + "loss": 8.8342, + "step": 299800 + }, + { + "epoch": 3.19, + "learning_rate": 0.00016703907847563746, + "loss": 8.8155, + "step": 299900 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001670183832694379, + "loss": 8.9157, + "step": 300000 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001669976828512118, + "loss": 8.8243, + "step": 300100 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016697697722256895, + "loss": 8.9397, + "step": 300200 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016695626638511964, + "loss": 8.8923, + "step": 300300 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016693555034047455, + "loss": 8.8594, + "step": 300400 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016691482909024478, + "loss": 8.8345, + "step": 300500 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016689410263604187, + "loss": 8.9996, + "step": 300600 + }, + { + "epoch": 3.2, + "learning_rate": 0.00016687337097947763, + "loss": 8.8269, + "step": 300700 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001668526341221644, + "loss": 8.8797, + "step": 300800 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001668318920657149, + "loss": 8.8778, + "step": 300900 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016681114481174224, + "loss": 8.8588, + "step": 301000 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016679039236185993, + "loss": 8.8876, + "step": 301100 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001667696347176819, + "loss": 8.8451, + "step": 301200 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016674887188082246, + "loss": 8.8142, + "step": 301300 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016672810385289636, + "loss": 8.8014, + "step": 301400 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016670733063551868, + "loss": 8.9007, + "step": 301500 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016668655223030498, + "loss": 8.8692, + "step": 301600 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016666576863887125, + "loss": 8.9013, + "step": 301700 + }, + { + "epoch": 3.21, + "learning_rate": 0.00016664497986283375, + "loss": 8.8695, + "step": 301800 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001666241859038093, + "loss": 8.8634, + "step": 301900 + }, + { + "epoch": 3.22, + "learning_rate": 0.000166603386763415, + "loss": 8.8922, + "step": 302000 + }, + { + "epoch": 3.22, + "learning_rate": 0.00016658258244326842, + "loss": 8.8432, + "step": 302100 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001665617729449875, + "loss": 8.8945, + "step": 302200 + }, + { + "epoch": 3.22, + "learning_rate": 0.00016654095827019062, + "loss": 8.9507, + "step": 302300 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001665201384204965, + "loss": 8.8219, + "step": 302400 + }, + { + "epoch": 3.22, + "learning_rate": 0.00016649931339752434, + "loss": 8.8339, + "step": 302500 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001664784832028937, + "loss": 8.8542, + "step": 302600 + }, + { + "epoch": 3.22, + "learning_rate": 0.00016645764783822454, + "loss": 8.8212, + "step": 302700 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016643680730513719, + "loss": 8.9278, + "step": 302800 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016641596160525248, + "loss": 8.969, + "step": 302900 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016639511074019158, + "loss": 8.8932, + "step": 303000 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016637425471157603, + "loss": 8.7918, + "step": 303100 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001663533935210278, + "loss": 8.7991, + "step": 303200 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016633252717016933, + "loss": 8.8806, + "step": 303300 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016631165566062333, + "loss": 8.776, + "step": 303400 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016629077899401297, + "loss": 8.8823, + "step": 303500 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001662698971719619, + "loss": 8.8288, + "step": 303600 + }, + { + "epoch": 3.23, + "learning_rate": 0.00016624901019609406, + "loss": 8.8365, + "step": 303700 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016622811806803382, + "loss": 8.8313, + "step": 303800 + }, + { + "epoch": 3.24, + "learning_rate": 0.000166207220789406, + "loss": 8.8813, + "step": 303900 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016618631836183578, + "loss": 8.8893, + "step": 304000 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001661654107869487, + "loss": 8.8544, + "step": 304100 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016614449806637077, + "loss": 8.8635, + "step": 304200 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016612358020172839, + "loss": 8.9248, + "step": 304300 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016610265719464831, + "loss": 8.8184, + "step": 304400 + }, + { + "epoch": 3.24, + "learning_rate": 0.00016608172904675778, + "loss": 8.8668, + "step": 304500 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001660607957596843, + "loss": 8.8512, + "step": 304600 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016603985733505588, + "loss": 8.8757, + "step": 304700 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016601891377450095, + "loss": 8.8534, + "step": 304800 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016599796507964825, + "loss": 8.9527, + "step": 304900 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016597701125212698, + "loss": 8.9505, + "step": 305000 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016595605229356673, + "loss": 8.9393, + "step": 305100 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016593508820559742, + "loss": 8.8415, + "step": 305200 + }, + { + "epoch": 3.25, + "learning_rate": 0.0001659141189898495, + "loss": 8.9315, + "step": 305300 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016589314464795374, + "loss": 8.8932, + "step": 305400 + }, + { + "epoch": 3.25, + "learning_rate": 0.00016587216518154126, + "loss": 8.9088, + "step": 305500 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016585118059224367, + "loss": 8.8397, + "step": 305600 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016583019088169297, + "loss": 8.8646, + "step": 305700 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016580919605152153, + "loss": 8.8741, + "step": 305800 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016578819610336206, + "loss": 8.9116, + "step": 305900 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016576719103884776, + "loss": 8.9094, + "step": 306000 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016574618085961223, + "loss": 8.8791, + "step": 306100 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001657251655672894, + "loss": 8.8175, + "step": 306200 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016570414516351364, + "loss": 8.9112, + "step": 306300 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016568311964991968, + "loss": 8.8867, + "step": 306400 + }, + { + "epoch": 3.26, + "learning_rate": 0.00016566208902814276, + "loss": 8.935, + "step": 306500 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016564105329981837, + "loss": 8.8282, + "step": 306600 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001656200124665824, + "loss": 8.8696, + "step": 306700 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016559896653007132, + "loss": 8.8342, + "step": 306800 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016557791549192183, + "loss": 8.8299, + "step": 306900 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016555685935377103, + "loss": 8.9126, + "step": 307000 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001655357981172565, + "loss": 8.8309, + "step": 307100 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016551473178401618, + "loss": 8.9544, + "step": 307200 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016549366035568838, + "loss": 8.8615, + "step": 307300 + }, + { + "epoch": 3.27, + "learning_rate": 0.00016547258383391183, + "loss": 8.8673, + "step": 307400 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016545150222032563, + "loss": 8.8326, + "step": 307500 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016543041551656936, + "loss": 8.8818, + "step": 307600 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016540932372428288, + "loss": 8.807, + "step": 307700 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016538822684510654, + "loss": 8.8328, + "step": 307800 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016536712488068102, + "loss": 8.804, + "step": 307900 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016534601783264744, + "loss": 8.7601, + "step": 308000 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016532490570264727, + "loss": 8.8995, + "step": 308100 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016530378849232243, + "loss": 8.8031, + "step": 308200 + }, + { + "epoch": 3.28, + "learning_rate": 0.0001652826662033152, + "loss": 8.9032, + "step": 308300 + }, + { + "epoch": 3.28, + "learning_rate": 0.00016526153883726825, + "loss": 8.7545, + "step": 308400 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016524040639582466, + "loss": 8.9056, + "step": 308500 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016521926888062792, + "loss": 8.8039, + "step": 308600 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016519812629332188, + "loss": 8.8814, + "step": 308700 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001651769786355508, + "loss": 8.9495, + "step": 308800 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016515582590895936, + "loss": 8.8503, + "step": 308900 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001651346681151926, + "loss": 8.889, + "step": 309000 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016511350525589592, + "loss": 8.8396, + "step": 309100 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016509233733271524, + "loss": 8.9425, + "step": 309200 + }, + { + "epoch": 3.29, + "learning_rate": 0.00016507116434729674, + "loss": 8.8979, + "step": 309300 + }, + { + "epoch": 3.3, + "learning_rate": 0.00016504998630128702, + "loss": 8.8985, + "step": 309400 + }, + { + "epoch": 3.3, + "learning_rate": 0.00016502880319633313, + "loss": 8.8973, + "step": 309500 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001650076150340825, + "loss": 8.8845, + "step": 309600 + }, + { + "epoch": 3.3, + "learning_rate": 0.00016498642181618292, + "loss": 8.8878, + "step": 309700 + }, + { + "epoch": 3.3, + "learning_rate": 0.00016496522354428258, + "loss": 8.872, + "step": 309800 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001649440202200301, + "loss": 8.8983, + "step": 309900 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001649228118450744, + "loss": 8.8325, + "step": 310000 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001649015984210649, + "loss": 8.8496, + "step": 310100 + }, + { + "epoch": 3.3, + "learning_rate": 0.00016488037994965142, + "loss": 8.8447, + "step": 310200 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016485915643248405, + "loss": 8.9117, + "step": 310300 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016483792787121338, + "loss": 8.8934, + "step": 310400 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016481669426749032, + "loss": 8.9058, + "step": 310500 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016479545562296625, + "loss": 8.9166, + "step": 310600 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016477421193929288, + "loss": 8.8728, + "step": 310700 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016475296321812234, + "loss": 8.8514, + "step": 310800 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016473170946110715, + "loss": 8.869, + "step": 310900 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001647104506699002, + "loss": 8.8177, + "step": 311000 + }, + { + "epoch": 3.31, + "learning_rate": 0.00016468918684615482, + "loss": 8.785, + "step": 311100 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001646679179915247, + "loss": 8.908, + "step": 311200 + }, + { + "epoch": 3.32, + "learning_rate": 0.00016464664410766386, + "loss": 8.8311, + "step": 311300 + }, + { + "epoch": 3.32, + "learning_rate": 0.00016462536519622683, + "loss": 8.8482, + "step": 311400 + }, + { + "epoch": 3.32, + "learning_rate": 0.00016460408125886845, + "loss": 8.8471, + "step": 311500 + }, + { + "epoch": 3.32, + "learning_rate": 0.000164582792297244, + "loss": 8.8361, + "step": 311600 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001645614983130091, + "loss": 8.7845, + "step": 311700 + }, + { + "epoch": 3.32, + "learning_rate": 0.00016454019930781975, + "loss": 8.8361, + "step": 311800 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001645188952833325, + "loss": 8.86, + "step": 311900 + }, + { + "epoch": 3.32, + "learning_rate": 0.000164497586241204, + "loss": 8.8472, + "step": 312000 + }, + { + "epoch": 3.32, + "learning_rate": 0.00016447627218309162, + "loss": 8.8096, + "step": 312100 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016445495311065281, + "loss": 8.8513, + "step": 312200 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016443362902554567, + "loss": 8.8329, + "step": 312300 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001644122999294285, + "loss": 8.964, + "step": 312400 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001643909658239601, + "loss": 8.9141, + "step": 312500 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016436962671079961, + "loss": 8.8478, + "step": 312600 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001643482825916066, + "loss": 8.8271, + "step": 312700 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016432693346804097, + "loss": 8.8762, + "step": 312800 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016430557934176313, + "loss": 8.9101, + "step": 312900 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016428422021443363, + "loss": 8.8378, + "step": 313000 + }, + { + "epoch": 3.33, + "learning_rate": 0.00016426285608771372, + "loss": 8.9024, + "step": 313100 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016424148696326485, + "loss": 8.8365, + "step": 313200 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016422011284274887, + "loss": 8.7968, + "step": 313300 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016419873372782806, + "loss": 8.9143, + "step": 313400 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001641773496201651, + "loss": 8.8139, + "step": 313500 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016415596052142298, + "loss": 8.817, + "step": 313600 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001641345664332652, + "loss": 8.7739, + "step": 313700 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016411316735735553, + "loss": 8.9066, + "step": 313800 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001640917632953582, + "loss": 8.9058, + "step": 313900 + }, + { + "epoch": 3.34, + "learning_rate": 0.00016407035424893782, + "loss": 8.9351, + "step": 314000 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001640489402197593, + "loss": 8.8297, + "step": 314100 + }, + { + "epoch": 3.35, + "learning_rate": 0.00016402752120948813, + "loss": 8.9225, + "step": 314200 + }, + { + "epoch": 3.35, + "learning_rate": 0.00016400609721978999, + "loss": 8.7641, + "step": 314300 + }, + { + "epoch": 3.35, + "learning_rate": 0.00016398466825233104, + "loss": 8.8751, + "step": 314400 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001639632343087778, + "loss": 8.8796, + "step": 314500 + }, + { + "epoch": 3.35, + "learning_rate": 0.00016394179539079722, + "loss": 8.8335, + "step": 314600 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001639203515000566, + "loss": 8.8226, + "step": 314700 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001638989026382236, + "loss": 8.8491, + "step": 314800 + }, + { + "epoch": 3.35, + "learning_rate": 0.00016387744880696635, + "loss": 8.8421, + "step": 314900 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016385599000795327, + "loss": 8.8603, + "step": 315000 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016383452624285324, + "loss": 8.9275, + "step": 315100 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001638130575133355, + "loss": 8.8467, + "step": 315200 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016379158382106966, + "loss": 8.9356, + "step": 315300 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016377010516772575, + "loss": 8.8629, + "step": 315400 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016374862155497417, + "loss": 8.8885, + "step": 315500 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001637271329844857, + "loss": 8.8025, + "step": 315600 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016370563945793145, + "loss": 8.8273, + "step": 315700 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016368414097698306, + "loss": 8.8153, + "step": 315800 + }, + { + "epoch": 3.36, + "learning_rate": 0.00016366263754331243, + "loss": 8.8415, + "step": 315900 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016364112915859188, + "loss": 8.9096, + "step": 316000 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016361961582449412, + "loss": 8.8118, + "step": 316100 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016359809754269225, + "loss": 8.8581, + "step": 316200 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016357657431485977, + "loss": 8.8342, + "step": 316300 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001635550461426705, + "loss": 8.774, + "step": 316400 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001635335130277987, + "loss": 8.8394, + "step": 316500 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016351197497191906, + "loss": 8.7936, + "step": 316600 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016349043197670652, + "loss": 8.8635, + "step": 316700 + }, + { + "epoch": 3.37, + "learning_rate": 0.00016346888404383654, + "loss": 8.8644, + "step": 316800 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016344733117498484, + "loss": 8.9004, + "step": 316900 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016342577337182763, + "loss": 8.8511, + "step": 317000 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016340421063604146, + "loss": 8.807, + "step": 317100 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016338264296930326, + "loss": 8.8355, + "step": 317200 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016336107037329036, + "loss": 8.891, + "step": 317300 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016333949284968045, + "loss": 8.812, + "step": 317400 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016331791040015162, + "loss": 8.8547, + "step": 317500 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001632963230263824, + "loss": 8.8529, + "step": 317600 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001632747307300515, + "loss": 8.7918, + "step": 317700 + }, + { + "epoch": 3.38, + "learning_rate": 0.00016325313351283827, + "loss": 8.8366, + "step": 317800 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001632315313764223, + "loss": 8.7922, + "step": 317900 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001632099243224836, + "loss": 8.8495, + "step": 318000 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016318831235270252, + "loss": 8.8635, + "step": 318100 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016316669546875985, + "loss": 8.8716, + "step": 318200 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016314507367233677, + "loss": 8.8005, + "step": 318300 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016312344696511476, + "loss": 8.9005, + "step": 318400 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001631018153487757, + "loss": 8.9588, + "step": 318500 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016308017882500194, + "loss": 8.7969, + "step": 318600 + }, + { + "epoch": 3.39, + "learning_rate": 0.00016305853739547618, + "loss": 8.8721, + "step": 318700 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001630368910618814, + "loss": 8.8004, + "step": 318800 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001630152398259011, + "loss": 8.7804, + "step": 318900 + }, + { + "epoch": 3.4, + "learning_rate": 0.00016299358368921903, + "loss": 8.9114, + "step": 319000 + }, + { + "epoch": 3.4, + "learning_rate": 0.00016297192265351949, + "loss": 8.7883, + "step": 319100 + }, + { + "epoch": 3.4, + "learning_rate": 0.000162950256720487, + "loss": 8.8704, + "step": 319200 + }, + { + "epoch": 3.4, + "learning_rate": 0.00016292858589180646, + "loss": 8.8476, + "step": 319300 + }, + { + "epoch": 3.4, + "learning_rate": 0.00016290691016916333, + "loss": 8.8775, + "step": 319400 + }, + { + "epoch": 3.4, + "learning_rate": 0.00016288522955424327, + "loss": 8.9047, + "step": 319500 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001628635440487324, + "loss": 8.9268, + "step": 319600 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001628418536543172, + "loss": 8.8271, + "step": 319700 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016282015837268456, + "loss": 8.8366, + "step": 319800 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016279845820552167, + "loss": 8.7672, + "step": 319900 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001627767531545162, + "loss": 8.8677, + "step": 320000 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016275504322135612, + "loss": 8.8583, + "step": 320100 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016273332840772983, + "loss": 8.7989, + "step": 320200 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016271160871532612, + "loss": 8.9044, + "step": 320300 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016268988414583412, + "loss": 8.7894, + "step": 320400 + }, + { + "epoch": 3.41, + "learning_rate": 0.00016266815470094328, + "loss": 8.8726, + "step": 320500 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001626464203823436, + "loss": 8.8633, + "step": 320600 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016262468119172528, + "loss": 8.8091, + "step": 320700 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016260293713077906, + "loss": 8.9145, + "step": 320800 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016258118820119591, + "loss": 8.8588, + "step": 320900 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001625594344046673, + "loss": 8.8703, + "step": 321000 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016253767574288502, + "loss": 8.771, + "step": 321100 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016251591221754115, + "loss": 8.7819, + "step": 321200 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016249414383032838, + "loss": 8.872, + "step": 321300 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016247237058293954, + "loss": 8.8432, + "step": 321400 + }, + { + "epoch": 3.42, + "learning_rate": 0.00016245059247706797, + "loss": 8.8634, + "step": 321500 + }, + { + "epoch": 3.43, + "learning_rate": 0.00016242880951440735, + "loss": 8.8914, + "step": 321600 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001624070216966518, + "loss": 8.8192, + "step": 321700 + }, + { + "epoch": 3.43, + "learning_rate": 0.00016238522902549564, + "loss": 8.9342, + "step": 321800 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001623634315026338, + "loss": 8.848, + "step": 321900 + }, + { + "epoch": 3.43, + "learning_rate": 0.00016234162912976143, + "loss": 8.9287, + "step": 322000 + }, + { + "epoch": 3.43, + "learning_rate": 0.00016231982190857412, + "loss": 8.8312, + "step": 322100 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001622980098407678, + "loss": 8.8721, + "step": 322200 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001622761929280388, + "loss": 8.7652, + "step": 322300 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001622543711720838, + "loss": 8.827, + "step": 322400 + }, + { + "epoch": 3.44, + "learning_rate": 0.0001622325445746, + "loss": 8.8409, + "step": 322500 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016221071313728466, + "loss": 8.8458, + "step": 322600 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016218887686183576, + "loss": 8.7498, + "step": 322700 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016216703574995148, + "loss": 8.8714, + "step": 322800 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016214518980333037, + "loss": 8.8048, + "step": 322900 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016212333902367143, + "loss": 8.8655, + "step": 323000 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016210148341267398, + "loss": 8.8664, + "step": 323100 + }, + { + "epoch": 3.44, + "learning_rate": 0.0001620796229720377, + "loss": 8.8481, + "step": 323200 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016205775770346275, + "loss": 8.785, + "step": 323300 + }, + { + "epoch": 3.44, + "learning_rate": 0.00016203588760864957, + "loss": 8.8145, + "step": 323400 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016201401268929895, + "loss": 8.8889, + "step": 323500 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016199213294711216, + "loss": 8.7844, + "step": 323600 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016197024838379077, + "loss": 8.8415, + "step": 323700 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016194835900103675, + "loss": 8.812, + "step": 323800 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001619264648005524, + "loss": 8.824, + "step": 323900 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016190456578404054, + "loss": 8.8469, + "step": 324000 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016188266195320416, + "loss": 8.8818, + "step": 324100 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016186075330974675, + "loss": 8.8142, + "step": 324200 + }, + { + "epoch": 3.45, + "learning_rate": 0.00016183883985537213, + "loss": 8.8315, + "step": 324300 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016181692159178453, + "loss": 8.9224, + "step": 324400 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016179499852068857, + "loss": 8.8194, + "step": 324500 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016177307064378917, + "loss": 8.8242, + "step": 324600 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016175113796279165, + "loss": 8.8843, + "step": 324700 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016172920047940177, + "loss": 8.8034, + "step": 324800 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016170725819532555, + "loss": 8.7634, + "step": 324900 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016168531111226947, + "loss": 8.9174, + "step": 325000 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016166335923194035, + "loss": 8.8254, + "step": 325100 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016164140255604543, + "loss": 8.8592, + "step": 325200 + }, + { + "epoch": 3.46, + "learning_rate": 0.00016161944108629225, + "loss": 8.8166, + "step": 325300 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016159747482438872, + "loss": 8.8608, + "step": 325400 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016157550377204322, + "loss": 8.8917, + "step": 325500 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001615535279309644, + "loss": 8.8638, + "step": 325600 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016153154730286137, + "loss": 8.8742, + "step": 325700 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001615095618894435, + "loss": 8.8238, + "step": 325800 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016148757169242062, + "loss": 8.8172, + "step": 325900 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016146557671350298, + "loss": 8.776, + "step": 326000 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016144357695440102, + "loss": 8.8274, + "step": 326100 + }, + { + "epoch": 3.47, + "learning_rate": 0.00016142157241682577, + "loss": 8.8697, + "step": 326200 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016139956310248842, + "loss": 8.8137, + "step": 326300 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016137754901310073, + "loss": 8.8269, + "step": 326400 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016135553015037468, + "loss": 8.8872, + "step": 326500 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001613335065160227, + "loss": 8.8784, + "step": 326600 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001613114781117576, + "loss": 8.7493, + "step": 326700 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016128944493929245, + "loss": 8.8708, + "step": 326800 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016126740700034083, + "loss": 8.7749, + "step": 326900 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016124536429661665, + "loss": 8.7913, + "step": 327000 + }, + { + "epoch": 3.48, + "learning_rate": 0.00016122331682983415, + "loss": 8.8539, + "step": 327100 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001612012646017079, + "loss": 8.8501, + "step": 327200 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016117920761395303, + "loss": 8.8217, + "step": 327300 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016115714586828483, + "loss": 8.7911, + "step": 327400 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016113507936641906, + "loss": 8.8302, + "step": 327500 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016111300811007183, + "loss": 8.8339, + "step": 327600 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001610909321009596, + "loss": 8.9134, + "step": 327700 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001610688513407993, + "loss": 8.8141, + "step": 327800 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016104676583130812, + "loss": 8.8283, + "step": 327900 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016102467557420363, + "loss": 8.8596, + "step": 328000 + }, + { + "epoch": 3.49, + "learning_rate": 0.00016100258057120378, + "loss": 8.833, + "step": 328100 + }, + { + "epoch": 3.5, + "learning_rate": 0.00016098048082402694, + "loss": 8.8038, + "step": 328200 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001609583763343918, + "loss": 8.9334, + "step": 328300 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001609362671040174, + "loss": 8.6813, + "step": 328400 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001609141531346232, + "loss": 8.8347, + "step": 328500 + }, + { + "epoch": 3.5, + "learning_rate": 0.000160892034427929, + "loss": 8.8028, + "step": 328600 + }, + { + "epoch": 3.5, + "learning_rate": 0.000160869910985655, + "loss": 8.7974, + "step": 328700 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001608477828095217, + "loss": 8.9062, + "step": 328800 + }, + { + "epoch": 3.5, + "learning_rate": 0.00016082564990125008, + "loss": 8.8358, + "step": 328900 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001608035122625613, + "loss": 8.7716, + "step": 329000 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001607813698951771, + "loss": 8.7402, + "step": 329100 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016075922280081947, + "loss": 8.8229, + "step": 329200 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001607370709812108, + "loss": 8.8703, + "step": 329300 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016071491443807383, + "loss": 8.8267, + "step": 329400 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016069275317313165, + "loss": 8.8735, + "step": 329500 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016067058718810783, + "loss": 8.8592, + "step": 329600 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016064841648472613, + "loss": 8.8177, + "step": 329700 + }, + { + "epoch": 3.51, + "learning_rate": 0.00016062624106471076, + "loss": 8.9204, + "step": 329800 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001606040609297864, + "loss": 8.8927, + "step": 329900 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001605818760816779, + "loss": 8.8842, + "step": 330000 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016055968652211068, + "loss": 8.8987, + "step": 330100 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016053749225281033, + "loss": 8.8613, + "step": 330200 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016051529327550297, + "loss": 8.8361, + "step": 330300 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016049308959191496, + "loss": 8.8699, + "step": 330400 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001604708812037731, + "loss": 8.8068, + "step": 330500 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016044866811280457, + "loss": 8.8031, + "step": 330600 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016042645032073686, + "loss": 8.8496, + "step": 330700 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016040422782929786, + "loss": 8.8256, + "step": 330800 + }, + { + "epoch": 3.52, + "learning_rate": 0.00016038200064021578, + "loss": 8.8414, + "step": 330900 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001603597687552193, + "loss": 8.8435, + "step": 331000 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016033753217603732, + "loss": 8.8762, + "step": 331100 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016031529090439921, + "loss": 8.8422, + "step": 331200 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001602930449420347, + "loss": 8.7924, + "step": 331300 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016027079429067382, + "loss": 8.7515, + "step": 331400 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016024853895204704, + "loss": 8.8623, + "step": 331500 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016022627892788515, + "loss": 8.8583, + "step": 331600 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001602040142199193, + "loss": 8.8838, + "step": 331700 + }, + { + "epoch": 3.53, + "learning_rate": 0.00016018174482988106, + "loss": 8.8254, + "step": 331800 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016015947075950226, + "loss": 8.8392, + "step": 331900 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001601371920105152, + "loss": 8.8825, + "step": 332000 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001601149085846525, + "loss": 8.8602, + "step": 332100 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016009262048364713, + "loss": 8.7105, + "step": 332200 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016007032770923245, + "loss": 8.8744, + "step": 332300 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016004803026314215, + "loss": 8.8508, + "step": 332400 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016002572814711034, + "loss": 8.8154, + "step": 332500 + }, + { + "epoch": 3.54, + "learning_rate": 0.00016000342136287144, + "loss": 8.8198, + "step": 332600 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001599811099121602, + "loss": 8.7725, + "step": 332700 + }, + { + "epoch": 3.54, + "learning_rate": 0.00015995879379671188, + "loss": 8.7143, + "step": 332800 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015993647301826194, + "loss": 8.8654, + "step": 332900 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015991414757854627, + "loss": 8.9016, + "step": 333000 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015989181747930116, + "loss": 8.8549, + "step": 333100 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015986948272226321, + "loss": 8.861, + "step": 333200 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015984714330916937, + "loss": 8.8253, + "step": 333300 + }, + { + "epoch": 3.55, + "learning_rate": 0.000159824799241757, + "loss": 8.8724, + "step": 333400 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015980245052176382, + "loss": 8.8608, + "step": 333500 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015978009715092786, + "loss": 8.8278, + "step": 333600 + }, + { + "epoch": 3.55, + "learning_rate": 0.00015975773913098754, + "loss": 8.8414, + "step": 333700 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001597353764636817, + "loss": 8.8037, + "step": 333800 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001597130091507494, + "loss": 8.8772, + "step": 333900 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015969063719393023, + "loss": 8.8659, + "step": 334000 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015966826059496397, + "loss": 8.9268, + "step": 334100 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015964587935559095, + "loss": 8.8329, + "step": 334200 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015962349347755175, + "loss": 8.8112, + "step": 334300 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015960110296258723, + "loss": 8.8808, + "step": 334400 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001595787078124388, + "loss": 8.8263, + "step": 334500 + }, + { + "epoch": 3.56, + "learning_rate": 0.00015955630802884808, + "loss": 8.8331, + "step": 334600 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001595339036135571, + "loss": 8.7711, + "step": 334700 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001595114945683083, + "loss": 8.7296, + "step": 334800 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001594890808948444, + "loss": 8.8117, + "step": 334900 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001594666625949085, + "loss": 8.7724, + "step": 335000 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015944423967024411, + "loss": 8.908, + "step": 335100 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015942181212259503, + "loss": 8.8009, + "step": 335200 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015939937995370547, + "loss": 8.7553, + "step": 335300 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015937694316532, + "loss": 8.8445, + "step": 335400 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001593545017591835, + "loss": 8.8232, + "step": 335500 + }, + { + "epoch": 3.57, + "learning_rate": 0.00015933205573704124, + "loss": 8.8788, + "step": 335600 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001593096051006389, + "loss": 8.7842, + "step": 335700 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001592871498517224, + "loss": 8.7617, + "step": 335800 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001592646899920381, + "loss": 8.8858, + "step": 335900 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015924222552333276, + "loss": 8.8421, + "step": 336000 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015921975644735337, + "loss": 8.7263, + "step": 336100 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015919728276584744, + "loss": 8.8489, + "step": 336200 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015917480448056267, + "loss": 8.8309, + "step": 336300 + }, + { + "epoch": 3.58, + "learning_rate": 0.00015915232159324723, + "loss": 8.7476, + "step": 336400 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001591298341056496, + "loss": 8.8439, + "step": 336500 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015910734201951866, + "loss": 8.8549, + "step": 336600 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015908484533660362, + "loss": 8.6809, + "step": 336700 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015906234405865406, + "loss": 8.8895, + "step": 336800 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015903983818741984, + "loss": 8.8627, + "step": 336900 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015901732772465133, + "loss": 8.7655, + "step": 337000 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001589948126720991, + "loss": 8.8692, + "step": 337100 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001589722930315142, + "loss": 8.7777, + "step": 337200 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015894976880464795, + "loss": 8.8005, + "step": 337300 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001589272399932521, + "loss": 8.8294, + "step": 337400 + }, + { + "epoch": 3.59, + "learning_rate": 0.00015890470659907867, + "loss": 8.7806, + "step": 337500 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015888216862388013, + "loss": 8.7413, + "step": 337600 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015885962606940925, + "loss": 8.8273, + "step": 337700 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015883707893741913, + "loss": 8.8289, + "step": 337800 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001588145272296633, + "loss": 8.8462, + "step": 337900 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015879197094789558, + "loss": 8.8588, + "step": 338000 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001587694100938702, + "loss": 8.8417, + "step": 338100 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015874684466934174, + "loss": 8.7962, + "step": 338200 + }, + { + "epoch": 3.6, + "learning_rate": 0.00015872427467606502, + "loss": 8.818, + "step": 338300 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001587017001157954, + "loss": 8.7353, + "step": 338400 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001586791209902885, + "loss": 8.8433, + "step": 338500 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015865653730130031, + "loss": 8.8131, + "step": 338600 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015863394905058712, + "loss": 8.7508, + "step": 338700 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015861135623990567, + "loss": 8.8329, + "step": 338800 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001585887588710129, + "loss": 8.7581, + "step": 338900 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015856615694566636, + "loss": 8.8585, + "step": 339000 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015854355046562372, + "loss": 8.7996, + "step": 339100 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015852093943264308, + "loss": 8.8807, + "step": 339200 + }, + { + "epoch": 3.61, + "learning_rate": 0.00015849832384848291, + "loss": 8.8583, + "step": 339300 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015847570371490207, + "loss": 8.8223, + "step": 339400 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015845307903365966, + "loss": 8.8431, + "step": 339500 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015843044980651526, + "loss": 8.8003, + "step": 339600 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015840781603522873, + "loss": 8.8752, + "step": 339700 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015838517772156032, + "loss": 8.752, + "step": 339800 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015836253486727056, + "loss": 8.914, + "step": 339900 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015833988747412044, + "loss": 8.7908, + "step": 340000 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015831723554387121, + "loss": 8.7905, + "step": 340100 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015829457907828455, + "loss": 8.7981, + "step": 340200 + }, + { + "epoch": 3.62, + "learning_rate": 0.00015827191807912244, + "loss": 8.8325, + "step": 340300 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001582492525481472, + "loss": 8.7974, + "step": 340400 + }, + { + "epoch": 3.63, + "learning_rate": 0.00015822658248712158, + "loss": 8.8586, + "step": 340500 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001582039078978086, + "loss": 8.8437, + "step": 340600 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001581812287819717, + "loss": 8.7443, + "step": 340700 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001581585451413746, + "loss": 8.7585, + "step": 340800 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001581358569777814, + "loss": 8.896, + "step": 340900 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001581131642929566, + "loss": 8.8495, + "step": 341000 + }, + { + "epoch": 3.63, + "learning_rate": 0.00015809046708866503, + "loss": 8.7829, + "step": 341100 + }, + { + "epoch": 3.63, + "learning_rate": 0.00015806776536667178, + "loss": 8.8781, + "step": 341200 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015804505912874243, + "loss": 8.7541, + "step": 341300 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001580223483766428, + "loss": 8.7986, + "step": 341400 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015799963311213913, + "loss": 8.7963, + "step": 341500 + }, + { + "epoch": 3.64, + "learning_rate": 0.000157976913336998, + "loss": 8.8106, + "step": 341600 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015795418905298632, + "loss": 8.8739, + "step": 341700 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015793146026187133, + "loss": 8.7188, + "step": 341800 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001579087269654207, + "loss": 8.8487, + "step": 341900 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001578859891654024, + "loss": 8.7577, + "step": 342000 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001578632468635847, + "loss": 8.8199, + "step": 342100 + }, + { + "epoch": 3.64, + "learning_rate": 0.00015784050006173626, + "loss": 8.8172, + "step": 342200 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001578177487616262, + "loss": 8.808, + "step": 342300 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015779499296502382, + "loss": 8.8138, + "step": 342400 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015777223267369884, + "loss": 8.7688, + "step": 342500 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015774946788942135, + "loss": 8.8189, + "step": 342600 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015772669861396175, + "loss": 8.7327, + "step": 342700 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015770392484909084, + "loss": 8.9121, + "step": 342800 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015768114659657972, + "loss": 8.8576, + "step": 342900 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015765836385819986, + "loss": 8.7381, + "step": 343000 + }, + { + "epoch": 3.65, + "learning_rate": 0.00015763557663572305, + "loss": 8.7666, + "step": 343100 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001576127849309215, + "loss": 8.8821, + "step": 343200 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001575899887455677, + "loss": 8.7618, + "step": 343300 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001575671880814345, + "loss": 8.7463, + "step": 343400 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001575443829402951, + "loss": 8.823, + "step": 343500 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015752157332392312, + "loss": 8.8199, + "step": 343600 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001574987592340924, + "loss": 8.8773, + "step": 343700 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015747594067257726, + "loss": 8.8867, + "step": 343800 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015745311764115222, + "loss": 8.7913, + "step": 343900 + }, + { + "epoch": 3.66, + "learning_rate": 0.00015743029014159233, + "loss": 8.8127, + "step": 344000 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015740745817567278, + "loss": 8.8051, + "step": 344100 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015738462174516926, + "loss": 8.8353, + "step": 344200 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015736178085185777, + "loss": 8.8307, + "step": 344300 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015733893549751466, + "loss": 8.773, + "step": 344400 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001573160856839166, + "loss": 8.7724, + "step": 344500 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015729323141284058, + "loss": 8.8896, + "step": 344600 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015727037268606405, + "loss": 8.8395, + "step": 344700 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015724750950536472, + "loss": 8.8564, + "step": 344800 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015722464187252063, + "loss": 8.7894, + "step": 344900 + }, + { + "epoch": 3.67, + "learning_rate": 0.00015720176978931022, + "loss": 8.7971, + "step": 345000 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015717889325751226, + "loss": 8.7546, + "step": 345100 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015715601227890585, + "loss": 8.8477, + "step": 345200 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015713312685527043, + "loss": 8.6833, + "step": 345300 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015711023698838586, + "loss": 8.7846, + "step": 345400 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001570873426800322, + "loss": 8.8653, + "step": 345500 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015706444393199002, + "loss": 8.7762, + "step": 345600 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001570415407460401, + "loss": 8.8121, + "step": 345700 + }, + { + "epoch": 3.68, + "learning_rate": 0.00015701863312396366, + "loss": 8.7938, + "step": 345800 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001569957210675422, + "loss": 8.779, + "step": 345900 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015697280457855766, + "loss": 8.8201, + "step": 346000 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015694988365879217, + "loss": 8.8128, + "step": 346100 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015692695831002836, + "loss": 8.8314, + "step": 346200 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015690402853404907, + "loss": 8.79, + "step": 346300 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001568810943326376, + "loss": 8.8233, + "step": 346400 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015685815570757756, + "loss": 8.8668, + "step": 346500 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015683521266065284, + "loss": 8.9163, + "step": 346600 + }, + { + "epoch": 3.69, + "learning_rate": 0.00015681226519364776, + "loss": 8.794, + "step": 346700 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001567893133083469, + "loss": 8.7783, + "step": 346800 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001567663570065353, + "loss": 8.7487, + "step": 346900 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015674339628999823, + "loss": 8.8421, + "step": 347000 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015672043116052135, + "loss": 8.8096, + "step": 347100 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015669746161989066, + "loss": 8.9053, + "step": 347200 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015667448766989254, + "loss": 8.791, + "step": 347300 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015665150931231363, + "loss": 8.8226, + "step": 347400 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015662852654894097, + "loss": 8.7575, + "step": 347500 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015660553938156194, + "loss": 8.7938, + "step": 347600 + }, + { + "epoch": 3.7, + "learning_rate": 0.00015658254781196422, + "loss": 8.8051, + "step": 347700 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001565595518419359, + "loss": 8.7581, + "step": 347800 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015653655147326542, + "loss": 8.8594, + "step": 347900 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015651354670774147, + "loss": 8.7451, + "step": 348000 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015649053754715313, + "loss": 8.7484, + "step": 348100 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015646752399328984, + "loss": 8.8128, + "step": 348200 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015644450604794137, + "loss": 8.8463, + "step": 348300 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001564214837128978, + "loss": 8.7961, + "step": 348400 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015639845698994963, + "loss": 8.8124, + "step": 348500 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001563754258808876, + "loss": 8.851, + "step": 348600 + }, + { + "epoch": 3.71, + "learning_rate": 0.00015635239038750282, + "loss": 8.8557, + "step": 348700 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015632935051158685, + "loss": 8.8308, + "step": 348800 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015630630625493143, + "loss": 8.7889, + "step": 348900 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001562832576193288, + "loss": 8.6978, + "step": 349000 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015626020460657135, + "loss": 8.727, + "step": 349100 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015623714721845196, + "loss": 8.8095, + "step": 349200 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015621408545676385, + "loss": 8.8017, + "step": 349300 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015619101932330044, + "loss": 8.7143, + "step": 349400 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015616794881985572, + "loss": 8.8755, + "step": 349500 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015614487394822373, + "loss": 8.7983, + "step": 349600 + }, + { + "epoch": 3.72, + "learning_rate": 0.00015612179471019907, + "loss": 8.811, + "step": 349700 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015609871110757668, + "loss": 8.8465, + "step": 349800 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015607562314215166, + "loss": 8.8213, + "step": 349900 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001560525308157197, + "loss": 8.8391, + "step": 350000 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015602943413007653, + "loss": 8.7946, + "step": 350100 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015600633308701854, + "loss": 8.8644, + "step": 350200 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015598322768834217, + "loss": 8.8087, + "step": 350300 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015596011793584443, + "loss": 8.7416, + "step": 350400 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001559370038313225, + "loss": 8.8248, + "step": 350500 + }, + { + "epoch": 3.73, + "learning_rate": 0.00015591388537657399, + "loss": 8.8538, + "step": 350600 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015589076257339684, + "loss": 8.8375, + "step": 350700 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015586763542358925, + "loss": 8.8043, + "step": 350800 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015584450392894993, + "loss": 8.8588, + "step": 350900 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015582136809127772, + "loss": 8.7364, + "step": 351000 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015579822791237193, + "loss": 8.6966, + "step": 351100 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001557750833940322, + "loss": 8.7664, + "step": 351200 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015575193453805844, + "loss": 8.8026, + "step": 351300 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015572878134625096, + "loss": 8.7778, + "step": 351400 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001557056238204104, + "loss": 8.9069, + "step": 351500 + }, + { + "epoch": 3.74, + "learning_rate": 0.00015568246196233772, + "loss": 8.8172, + "step": 351600 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015565929577383418, + "loss": 8.8251, + "step": 351700 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015563612525670145, + "loss": 8.8538, + "step": 351800 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015561295041274152, + "loss": 8.7601, + "step": 351900 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015558977124375663, + "loss": 8.8528, + "step": 352000 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015556658775154952, + "loss": 8.7399, + "step": 352100 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015554339993792312, + "loss": 8.805, + "step": 352200 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015552020780468072, + "loss": 8.8398, + "step": 352300 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015549701135362607, + "loss": 8.7817, + "step": 352400 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015547381058656306, + "loss": 8.7342, + "step": 352500 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001554506055052961, + "loss": 8.8, + "step": 352600 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015542739611162974, + "loss": 8.6936, + "step": 352700 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001554041824073691, + "loss": 8.7817, + "step": 352800 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015538096439431941, + "loss": 8.7224, + "step": 352900 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001553577420742864, + "loss": 8.8444, + "step": 353000 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015533451544907605, + "loss": 8.7332, + "step": 353100 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015531128452049475, + "loss": 8.7289, + "step": 353200 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001552880492903491, + "loss": 8.8064, + "step": 353300 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001552648097604461, + "loss": 8.7928, + "step": 353400 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001552415659325932, + "loss": 8.9137, + "step": 353500 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015521831780859792, + "loss": 8.8206, + "step": 353600 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015519506539026838, + "loss": 8.7694, + "step": 353700 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001551718086794129, + "loss": 8.7908, + "step": 353800 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015514854767784012, + "loss": 8.7815, + "step": 353900 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015512528238735912, + "loss": 8.7962, + "step": 354000 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015510201280977915, + "loss": 8.9141, + "step": 354100 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015507873894690996, + "loss": 8.8699, + "step": 354200 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015505546080056153, + "loss": 8.7353, + "step": 354300 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015503217837254424, + "loss": 8.8022, + "step": 354400 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001550088916646687, + "loss": 8.8301, + "step": 354500 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015498560067874598, + "loss": 8.7804, + "step": 354600 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015496230541658742, + "loss": 8.8191, + "step": 354700 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015493900588000464, + "loss": 8.8028, + "step": 354800 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001549157020708097, + "loss": 8.8434, + "step": 354900 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015489239399081494, + "loss": 8.8251, + "step": 355000 + }, + { + "epoch": 3.78, + "learning_rate": 0.000154869081641833, + "loss": 8.7733, + "step": 355100 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001548457650256769, + "loss": 8.7896, + "step": 355200 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015482244414415997, + "loss": 8.7338, + "step": 355300 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001547991189990959, + "loss": 8.7329, + "step": 355400 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015477578959229864, + "loss": 8.7516, + "step": 355500 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015475245592558258, + "loss": 8.8049, + "step": 355600 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015472911800076236, + "loss": 8.765, + "step": 355700 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015470577581965292, + "loss": 8.8133, + "step": 355800 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015468242938406968, + "loss": 8.7739, + "step": 355900 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015465907869582823, + "loss": 8.8426, + "step": 356000 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015463572375674459, + "loss": 8.748, + "step": 356100 + }, + { + "epoch": 3.79, + "learning_rate": 0.000154612364568635, + "loss": 8.7769, + "step": 356200 + }, + { + "epoch": 3.8, + "learning_rate": 0.0001545890011333162, + "loss": 8.7511, + "step": 356300 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015456563345260516, + "loss": 8.7977, + "step": 356400 + }, + { + "epoch": 3.8, + "learning_rate": 0.0001545422615283191, + "loss": 8.8053, + "step": 356500 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015451888536227574, + "loss": 8.7655, + "step": 356600 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015449550495629302, + "loss": 8.7893, + "step": 356700 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015447212031218923, + "loss": 8.7606, + "step": 356800 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015444873143178302, + "loss": 8.8284, + "step": 356900 + }, + { + "epoch": 3.8, + "learning_rate": 0.0001544253383168933, + "loss": 8.7675, + "step": 357000 + }, + { + "epoch": 3.8, + "learning_rate": 0.0001544019409693394, + "loss": 8.7711, + "step": 357100 + }, + { + "epoch": 3.8, + "learning_rate": 0.00015437853939094092, + "loss": 8.8141, + "step": 357200 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001543551335835178, + "loss": 8.7533, + "step": 357300 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015433172354889033, + "loss": 8.7777, + "step": 357400 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015430830928887904, + "loss": 8.8338, + "step": 357500 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015428489080530496, + "loss": 8.7993, + "step": 357600 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001542614680999893, + "loss": 8.8598, + "step": 357700 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001542380411747536, + "loss": 8.776, + "step": 357800 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001542146100314199, + "loss": 8.7512, + "step": 357900 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015419117467181033, + "loss": 8.7399, + "step": 358000 + }, + { + "epoch": 3.81, + "learning_rate": 0.00015416773509774746, + "loss": 8.8542, + "step": 358100 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015414429131105427, + "loss": 8.8344, + "step": 358200 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001541208433135539, + "loss": 8.7726, + "step": 358300 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015409739110706997, + "loss": 8.7971, + "step": 358400 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015407393469342632, + "loss": 8.7877, + "step": 358500 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015405047407444718, + "loss": 8.7961, + "step": 358600 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015402700925195708, + "loss": 8.7702, + "step": 358700 + }, + { + "epoch": 3.82, + "learning_rate": 0.00015400354022778085, + "loss": 8.8963, + "step": 358800 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001539800670037437, + "loss": 8.7944, + "step": 358900 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001539565895816712, + "loss": 8.8151, + "step": 359000 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001539331079633891, + "loss": 8.8228, + "step": 359100 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015390962215072363, + "loss": 8.7734, + "step": 359200 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015388613214550122, + "loss": 8.7039, + "step": 359300 + }, + { + "epoch": 3.83, + "learning_rate": 0.0001538626379495488, + "loss": 8.7386, + "step": 359400 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015383913956469342, + "loss": 8.835, + "step": 359500 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015381563699276256, + "loss": 8.7587, + "step": 359600 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015379213023558407, + "loss": 8.8079, + "step": 359700 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015376861929498602, + "loss": 8.7264, + "step": 359800 + }, + { + "epoch": 3.83, + "learning_rate": 0.0001537451041727969, + "loss": 8.8375, + "step": 359900 + }, + { + "epoch": 3.83, + "learning_rate": 0.00015372158487084544, + "loss": 8.7863, + "step": 360000 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001536980613909608, + "loss": 8.7268, + "step": 360100 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001536745337349723, + "loss": 8.7323, + "step": 360200 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001536510019047098, + "loss": 8.7829, + "step": 360300 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015362746590200335, + "loss": 8.8188, + "step": 360400 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015360392572868328, + "loss": 8.8168, + "step": 360500 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015358038138658036, + "loss": 8.7688, + "step": 360600 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015355683287752566, + "loss": 8.8192, + "step": 360700 + }, + { + "epoch": 3.84, + "learning_rate": 0.00015353328020335048, + "loss": 8.7491, + "step": 360800 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001535097233658866, + "loss": 8.8169, + "step": 360900 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015348616236696597, + "loss": 8.7479, + "step": 361000 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015346259720842094, + "loss": 8.8774, + "step": 361100 + }, + { + "epoch": 3.85, + "learning_rate": 0.0001534390278920842, + "loss": 8.6881, + "step": 361200 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015341545441978878, + "loss": 8.8338, + "step": 361300 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015339187679336787, + "loss": 8.8075, + "step": 361400 + }, + { + "epoch": 3.85, + "learning_rate": 0.0001533682950146552, + "loss": 8.7839, + "step": 361500 + }, + { + "epoch": 3.85, + "learning_rate": 0.0001533447090854847, + "loss": 8.8578, + "step": 361600 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015332111900769063, + "loss": 8.8354, + "step": 361700 + }, + { + "epoch": 3.85, + "learning_rate": 0.00015329752478310764, + "loss": 8.8921, + "step": 361800 + }, + { + "epoch": 3.85, + "learning_rate": 0.0001532739264135706, + "loss": 8.8199, + "step": 361900 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015325032390091484, + "loss": 8.844, + "step": 362000 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015322671724697583, + "loss": 8.8705, + "step": 362100 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015320310645358952, + "loss": 8.8233, + "step": 362200 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015317949152259214, + "loss": 8.8973, + "step": 362300 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015315587245582016, + "loss": 8.7869, + "step": 362400 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001531322492551105, + "loss": 8.7722, + "step": 362500 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015310862192230028, + "loss": 8.8077, + "step": 362600 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015308499045922709, + "loss": 8.8453, + "step": 362700 + }, + { + "epoch": 3.86, + "learning_rate": 0.00015306135486772864, + "loss": 8.7911, + "step": 362800 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015303771514964316, + "loss": 8.8691, + "step": 362900 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015301407130680907, + "loss": 8.766, + "step": 363000 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015299042334106518, + "loss": 8.8194, + "step": 363100 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015296677125425055, + "loss": 8.8141, + "step": 363200 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015294311504820466, + "loss": 8.7333, + "step": 363300 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001529194547247672, + "loss": 8.8332, + "step": 363400 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015289579028577828, + "loss": 8.7512, + "step": 363500 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001528721217330783, + "loss": 8.8402, + "step": 363600 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001528484490685079, + "loss": 8.7548, + "step": 363700 + }, + { + "epoch": 3.87, + "learning_rate": 0.00015282477229390819, + "loss": 8.8196, + "step": 363800 + }, + { + "epoch": 3.88, + "learning_rate": 0.00015280109141112042, + "loss": 8.6734, + "step": 363900 + }, + { + "epoch": 3.88, + "learning_rate": 0.00015277740642198633, + "loss": 8.815, + "step": 364000 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001527537173283479, + "loss": 8.7787, + "step": 364100 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001527300241320474, + "loss": 8.8431, + "step": 364200 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001527063268349274, + "loss": 8.7571, + "step": 364300 + }, + { + "epoch": 3.88, + "learning_rate": 0.00015268262543883097, + "loss": 8.7344, + "step": 364400 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001526589199456013, + "loss": 8.7589, + "step": 364500 + }, + { + "epoch": 3.88, + "learning_rate": 0.00015263521035708196, + "loss": 8.8339, + "step": 364600 + }, + { + "epoch": 3.88, + "learning_rate": 0.00015261149667511684, + "loss": 8.7424, + "step": 364700 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015258777890155023, + "loss": 8.8471, + "step": 364800 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015256405703822657, + "loss": 8.7772, + "step": 364900 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015254033108699075, + "loss": 8.7625, + "step": 365000 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015251660104968794, + "loss": 8.7699, + "step": 365100 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015249286692816365, + "loss": 8.8759, + "step": 365200 + }, + { + "epoch": 3.89, + "learning_rate": 0.00015246912872426366, + "loss": 8.7017, + "step": 365300 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001524453864398341, + "loss": 8.8131, + "step": 365400 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001524216400767214, + "loss": 8.8399, + "step": 365500 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001523978896367723, + "loss": 8.7111, + "step": 365600 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001523741351218339, + "loss": 8.7408, + "step": 365700 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001523503765337536, + "loss": 8.7066, + "step": 365800 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001523266138743791, + "loss": 8.8381, + "step": 365900 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015230284714555845, + "loss": 8.8991, + "step": 366000 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015227907634913994, + "loss": 8.8352, + "step": 366100 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015225530148697223, + "loss": 8.7335, + "step": 366200 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015223152256090434, + "loss": 8.6815, + "step": 366300 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015220773957278554, + "loss": 8.7098, + "step": 366400 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015218395252446538, + "loss": 8.8032, + "step": 366500 + }, + { + "epoch": 3.9, + "learning_rate": 0.00015216016141779388, + "loss": 8.8152, + "step": 366600 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015213636625462118, + "loss": 8.7637, + "step": 366700 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001521125670367979, + "loss": 8.7589, + "step": 366800 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015208876376617488, + "loss": 8.8547, + "step": 366900 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015206495644460333, + "loss": 8.7155, + "step": 367000 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001520411450739347, + "loss": 8.7925, + "step": 367100 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015201732965602086, + "loss": 8.8253, + "step": 367200 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015199351019271387, + "loss": 8.7885, + "step": 367300 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015196968668586624, + "loss": 8.8603, + "step": 367400 + }, + { + "epoch": 3.91, + "learning_rate": 0.00015194585913733065, + "loss": 8.7897, + "step": 367500 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015192202754896026, + "loss": 8.8019, + "step": 367600 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001518981919226084, + "loss": 8.7146, + "step": 367700 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015187435226012876, + "loss": 8.742, + "step": 367800 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015185050856337542, + "loss": 8.7665, + "step": 367900 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001518266608342026, + "loss": 8.77, + "step": 368000 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015180280907446504, + "loss": 8.8253, + "step": 368100 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015177895328601765, + "loss": 8.7968, + "step": 368200 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001517550934707157, + "loss": 8.8201, + "step": 368300 + }, + { + "epoch": 3.92, + "learning_rate": 0.00015173122963041478, + "loss": 8.7492, + "step": 368400 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015170736176697077, + "loss": 8.719, + "step": 368500 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015168348988223987, + "loss": 8.7588, + "step": 368600 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015165961397807862, + "loss": 8.7138, + "step": 368700 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015163573405634382, + "loss": 8.813, + "step": 368800 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015161185011889266, + "loss": 8.7865, + "step": 368900 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015158796216758257, + "loss": 8.7746, + "step": 369000 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015156407020427133, + "loss": 8.8233, + "step": 369100 + }, + { + "epoch": 3.93, + "learning_rate": 0.000151540174230817, + "loss": 8.6673, + "step": 369200 + }, + { + "epoch": 3.93, + "learning_rate": 0.00015151627424907796, + "loss": 8.8217, + "step": 369300 + }, + { + "epoch": 3.93, + "learning_rate": 0.000151492370260913, + "loss": 8.8191, + "step": 369400 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015146846226818102, + "loss": 8.7782, + "step": 369500 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015144455027274142, + "loss": 8.7868, + "step": 369600 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001514206342764538, + "loss": 8.7291, + "step": 369700 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015139671428117816, + "loss": 8.7515, + "step": 369800 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015137279028877467, + "loss": 8.756, + "step": 369900 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015134886230110404, + "loss": 8.7745, + "step": 370000 + }, + { + "epoch": 3.94, + "learning_rate": 0.000151324930320027, + "loss": 8.8526, + "step": 370100 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015130099434740486, + "loss": 8.7989, + "step": 370200 + }, + { + "epoch": 3.94, + "learning_rate": 0.00015127705438509908, + "loss": 8.708, + "step": 370300 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015125311043497143, + "loss": 8.8573, + "step": 370400 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001512291624988841, + "loss": 8.6882, + "step": 370500 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015120521057869948, + "loss": 8.7515, + "step": 370600 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001511812546762804, + "loss": 8.7603, + "step": 370700 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015115729479348977, + "loss": 8.7988, + "step": 370800 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015113333093219104, + "loss": 8.7712, + "step": 370900 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001511093630942479, + "loss": 8.6666, + "step": 371000 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001510853912815243, + "loss": 8.7819, + "step": 371100 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001510614154958845, + "loss": 8.8205, + "step": 371200 + }, + { + "epoch": 3.95, + "learning_rate": 0.00015103743573919312, + "loss": 8.719, + "step": 371300 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001510134520133151, + "loss": 8.7494, + "step": 371400 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015098946432011564, + "loss": 8.7874, + "step": 371500 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015096547266146024, + "loss": 8.8516, + "step": 371600 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015094147703921477, + "loss": 8.8068, + "step": 371700 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015091747745524535, + "loss": 8.7698, + "step": 371800 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015089347391141847, + "loss": 8.7673, + "step": 371900 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015086946640960078, + "loss": 8.7176, + "step": 372000 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015084545495165947, + "loss": 8.768, + "step": 372100 + }, + { + "epoch": 3.96, + "learning_rate": 0.00015082143953946186, + "loss": 8.7448, + "step": 372200 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015079742017487565, + "loss": 8.7793, + "step": 372300 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015077339685976878, + "loss": 8.7268, + "step": 372400 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001507493695960096, + "loss": 8.7955, + "step": 372500 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015072533838546667, + "loss": 8.8322, + "step": 372600 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015070130323000894, + "loss": 8.7648, + "step": 372700 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015067726413150562, + "loss": 8.7743, + "step": 372800 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001506532210918262, + "loss": 8.8245, + "step": 372900 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015062917411284054, + "loss": 8.8257, + "step": 373000 + }, + { + "epoch": 3.97, + "learning_rate": 0.00015060512319641878, + "loss": 8.7505, + "step": 373100 + }, + { + "epoch": 3.98, + "learning_rate": 0.0001505810683444313, + "loss": 8.7908, + "step": 373200 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015055700955874895, + "loss": 8.7712, + "step": 373300 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015053294684124274, + "loss": 8.7092, + "step": 373400 + }, + { + "epoch": 3.98, + "learning_rate": 0.000150508880193784, + "loss": 8.7832, + "step": 373500 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015048480961824437, + "loss": 8.7934, + "step": 373600 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015046073511649593, + "loss": 8.8941, + "step": 373700 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015043665669041087, + "loss": 8.846, + "step": 373800 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015041257434186183, + "loss": 8.7362, + "step": 373900 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015038848807272164, + "loss": 8.8195, + "step": 374000 + }, + { + "epoch": 3.98, + "learning_rate": 0.00015036439788486348, + "loss": 8.8158, + "step": 374100 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001503403037801609, + "loss": 8.8013, + "step": 374200 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001503162057604877, + "loss": 8.8347, + "step": 374300 + }, + { + "epoch": 3.99, + "learning_rate": 0.000150292103827718, + "loss": 8.7542, + "step": 374400 + }, + { + "epoch": 3.99, + "learning_rate": 0.00015026799798372614, + "loss": 8.7484, + "step": 374500 + }, + { + "epoch": 3.99, + "learning_rate": 0.00015024388823038686, + "loss": 8.7191, + "step": 374600 + }, + { + "epoch": 3.99, + "learning_rate": 0.00015021977456957522, + "loss": 8.7484, + "step": 374700 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001501956570031665, + "loss": 8.7636, + "step": 374800 + }, + { + "epoch": 3.99, + "learning_rate": 0.00015017153553303632, + "loss": 8.8289, + "step": 374900 + }, + { + "epoch": 3.99, + "learning_rate": 0.00015014741016106063, + "loss": 8.7534, + "step": 375000 + }, + { + "epoch": 4.0, + "learning_rate": 0.00015012328088911568, + "loss": 8.8313, + "step": 375100 + }, + { + "epoch": 4.0, + "learning_rate": 0.000150099147719078, + "loss": 8.7978, + "step": 375200 + }, + { + "epoch": 4.0, + "learning_rate": 0.00015007501065282436, + "loss": 8.7299, + "step": 375300 + }, + { + "epoch": 4.0, + "learning_rate": 0.00015005086969223198, + "loss": 8.7271, + "step": 375400 + }, + { + "epoch": 4.0, + "learning_rate": 0.0001500267248391783, + "loss": 8.7294, + "step": 375500 + }, + { + "epoch": 4.0, + "learning_rate": 0.00015000257609554105, + "loss": 8.8167, + "step": 375600 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014997842346319826, + "loss": 8.7167, + "step": 375700 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014995426694402832, + "loss": 8.7883, + "step": 375800 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014993010653990984, + "loss": 8.6511, + "step": 375900 + }, + { + "epoch": 4.0, + "learning_rate": 0.00014990594225272182, + "loss": 8.7537, + "step": 376000 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001498817740843435, + "loss": 8.7335, + "step": 376100 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014985760203665445, + "loss": 8.7847, + "step": 376200 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014983342611153453, + "loss": 8.6918, + "step": 376300 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001498092463108639, + "loss": 8.7556, + "step": 376400 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014978506263652302, + "loss": 8.7422, + "step": 376500 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014976087509039268, + "loss": 8.8314, + "step": 376600 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014973668367435392, + "loss": 8.7912, + "step": 376700 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001497124883902881, + "loss": 8.878, + "step": 376800 + }, + { + "epoch": 4.01, + "learning_rate": 0.00014968828924007697, + "loss": 8.711, + "step": 376900 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014966408622560238, + "loss": 8.7847, + "step": 377000 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001496398793487467, + "loss": 8.7744, + "step": 377100 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014961566861139246, + "loss": 8.8013, + "step": 377200 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001495914540154225, + "loss": 8.7572, + "step": 377300 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014956723556272005, + "loss": 8.7429, + "step": 377400 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014954301325516856, + "loss": 8.7699, + "step": 377500 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001495187870946518, + "loss": 8.8787, + "step": 377600 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014949455708305386, + "loss": 8.7483, + "step": 377700 + }, + { + "epoch": 4.02, + "learning_rate": 0.00014947032322225903, + "loss": 8.7249, + "step": 377800 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001494460855141521, + "loss": 8.7458, + "step": 377900 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014942184396061795, + "loss": 8.7392, + "step": 378000 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001493975985635419, + "loss": 8.7443, + "step": 378100 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014937334932480948, + "loss": 8.7785, + "step": 378200 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014934909624630656, + "loss": 8.7747, + "step": 378300 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001493248393299193, + "loss": 8.7083, + "step": 378400 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014930057857753424, + "loss": 8.8142, + "step": 378500 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014927631399103803, + "loss": 8.722, + "step": 378600 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001492520455723178, + "loss": 8.7737, + "step": 378700 + }, + { + "epoch": 4.03, + "learning_rate": 0.00014922777332326088, + "loss": 8.7385, + "step": 378800 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014920349724575492, + "loss": 8.8544, + "step": 378900 + }, + { + "epoch": 4.04, + "learning_rate": 0.0001491792173416879, + "loss": 8.7835, + "step": 379000 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014915493361294807, + "loss": 8.7839, + "step": 379100 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014913064606142393, + "loss": 8.7335, + "step": 379200 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014910635468900438, + "loss": 8.7351, + "step": 379300 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014908205949757853, + "loss": 8.7432, + "step": 379400 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014905776048903583, + "loss": 8.7576, + "step": 379500 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014903345766526603, + "loss": 8.7377, + "step": 379600 + }, + { + "epoch": 4.04, + "learning_rate": 0.00014900915102815916, + "loss": 8.7917, + "step": 379700 + }, + { + "epoch": 4.05, + "learning_rate": 0.0001489848405796055, + "loss": 8.8103, + "step": 379800 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014896052632149576, + "loss": 8.765, + "step": 379900 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014893620825572076, + "loss": 8.7955, + "step": 380000 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014891188638417184, + "loss": 8.8483, + "step": 380100 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014888756070874042, + "loss": 8.7179, + "step": 380200 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014886323123131835, + "loss": 8.7511, + "step": 380300 + }, + { + "epoch": 4.05, + "learning_rate": 0.0001488388979537977, + "loss": 8.7755, + "step": 380400 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014881456087807092, + "loss": 8.6909, + "step": 380500 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014879022000603072, + "loss": 8.8049, + "step": 380600 + }, + { + "epoch": 4.05, + "learning_rate": 0.00014876587533957004, + "loss": 8.7371, + "step": 380700 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014874152688058216, + "loss": 8.7611, + "step": 380800 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014871717463096069, + "loss": 8.7872, + "step": 380900 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014869281859259954, + "loss": 8.7589, + "step": 381000 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001486684587673928, + "loss": 8.6995, + "step": 381100 + }, + { + "epoch": 4.06, + "learning_rate": 0.000148644095157235, + "loss": 8.8481, + "step": 381200 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001486197277640209, + "loss": 8.7561, + "step": 381300 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014859535658964554, + "loss": 8.7706, + "step": 381400 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014857098163600425, + "loss": 8.8484, + "step": 381500 + }, + { + "epoch": 4.06, + "learning_rate": 0.00014854660290499266, + "loss": 8.7628, + "step": 381600 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014852222039850682, + "loss": 8.7184, + "step": 381700 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014849783411844278, + "loss": 8.7551, + "step": 381800 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014847344406669722, + "loss": 8.7492, + "step": 381900 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014844905024516687, + "loss": 8.7958, + "step": 382000 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014842465265574886, + "loss": 8.6799, + "step": 382100 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014840025130034062, + "loss": 8.7966, + "step": 382200 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014837584618083985, + "loss": 8.7865, + "step": 382300 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014835143729914445, + "loss": 8.7535, + "step": 382400 + }, + { + "epoch": 4.07, + "learning_rate": 0.00014832702465715278, + "loss": 8.7662, + "step": 382500 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014830260825676343, + "loss": 8.7131, + "step": 382600 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014827818809987523, + "loss": 8.8125, + "step": 382700 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001482537641883873, + "loss": 8.7707, + "step": 382800 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014822933652419918, + "loss": 8.7533, + "step": 382900 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014820490510921054, + "loss": 8.774, + "step": 383000 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014818046994532143, + "loss": 8.8086, + "step": 383100 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014815603103443222, + "loss": 8.7406, + "step": 383200 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014813158837844348, + "loss": 8.6995, + "step": 383300 + }, + { + "epoch": 4.08, + "learning_rate": 0.00014810714197925612, + "loss": 8.885, + "step": 383400 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001480826918387713, + "loss": 8.8406, + "step": 383500 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014805823795889064, + "loss": 8.7703, + "step": 383600 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001480337803415158, + "loss": 8.7461, + "step": 383700 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014800931898854887, + "loss": 8.7982, + "step": 383800 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014798485390189227, + "loss": 8.7316, + "step": 383900 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001479603850834486, + "loss": 8.8264, + "step": 384000 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014793591253512082, + "loss": 8.7422, + "step": 384100 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014791143625881214, + "loss": 8.7171, + "step": 384200 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014788695625642613, + "loss": 8.7895, + "step": 384300 + }, + { + "epoch": 4.09, + "learning_rate": 0.00014786247252986654, + "loss": 8.7343, + "step": 384400 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014783798508103752, + "loss": 8.7497, + "step": 384500 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014781349391184342, + "loss": 8.7825, + "step": 384600 + }, + { + "epoch": 4.1, + "learning_rate": 0.000147788999024189, + "loss": 8.6788, + "step": 384700 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014776450041997913, + "loss": 8.7372, + "step": 384800 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014773999810111917, + "loss": 8.6982, + "step": 384900 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014771549206951456, + "loss": 8.8127, + "step": 385000 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001476909823270712, + "loss": 8.837, + "step": 385100 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014766646887569523, + "loss": 8.7586, + "step": 385200 + }, + { + "epoch": 4.1, + "learning_rate": 0.00014764195171729306, + "loss": 8.7796, + "step": 385300 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014761743085377134, + "loss": 8.7884, + "step": 385400 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014759290628703708, + "loss": 8.7886, + "step": 385500 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014756837801899758, + "loss": 8.7119, + "step": 385600 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001475438460515604, + "loss": 8.6953, + "step": 385700 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014751931038663342, + "loss": 8.7258, + "step": 385800 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001474947710261247, + "loss": 8.7772, + "step": 385900 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014747022797194275, + "loss": 8.6818, + "step": 386000 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014744568122599625, + "loss": 8.6791, + "step": 386100 + }, + { + "epoch": 4.11, + "learning_rate": 0.00014742113079019422, + "loss": 8.8339, + "step": 386200 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001473965766664459, + "loss": 8.7941, + "step": 386300 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014737201885666094, + "loss": 8.8127, + "step": 386400 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014734745736274917, + "loss": 8.6502, + "step": 386500 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014732289218662072, + "loss": 8.7087, + "step": 386600 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014729832333018603, + "loss": 8.7788, + "step": 386700 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014727375079535586, + "loss": 8.7376, + "step": 386800 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001472491745840412, + "loss": 8.7109, + "step": 386900 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014722459469815332, + "loss": 8.7599, + "step": 387000 + }, + { + "epoch": 4.12, + "learning_rate": 0.00014720001113960383, + "loss": 8.722, + "step": 387100 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001471754239103046, + "loss": 8.7402, + "step": 387200 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014715083301216776, + "loss": 8.7022, + "step": 387300 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014712623844710576, + "loss": 8.739, + "step": 387400 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001471016402170313, + "loss": 8.7488, + "step": 387500 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014707703832385742, + "loss": 8.6934, + "step": 387600 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001470524327694974, + "loss": 8.7852, + "step": 387700 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014702782355586477, + "loss": 8.7739, + "step": 387800 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014700321068487348, + "loss": 8.7246, + "step": 387900 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014697859415843762, + "loss": 8.7082, + "step": 388000 + }, + { + "epoch": 4.13, + "learning_rate": 0.00014695397397847162, + "loss": 8.825, + "step": 388100 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001469293501468902, + "loss": 8.675, + "step": 388200 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014690472266560835, + "loss": 8.7704, + "step": 388300 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001468800915365414, + "loss": 8.778, + "step": 388400 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014685545676160485, + "loss": 8.6891, + "step": 388500 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014683081834271456, + "loss": 8.759, + "step": 388600 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014680617628178667, + "loss": 8.7725, + "step": 388700 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014678153058073762, + "loss": 8.7959, + "step": 388800 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001467568812414841, + "loss": 8.8305, + "step": 388900 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001467322282659431, + "loss": 8.7618, + "step": 389000 + }, + { + "epoch": 4.14, + "learning_rate": 0.00014670757165603185, + "loss": 8.7906, + "step": 389100 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014668291141366793, + "loss": 8.6714, + "step": 389200 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014665824754076916, + "loss": 8.6809, + "step": 389300 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014663358003925362, + "loss": 8.834, + "step": 389400 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014660890891103975, + "loss": 8.7504, + "step": 389500 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001465842341580462, + "loss": 8.7452, + "step": 389600 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014655955578219193, + "loss": 8.8116, + "step": 389700 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001465348737853962, + "loss": 8.7696, + "step": 389800 + }, + { + "epoch": 4.15, + "learning_rate": 0.00014651018816957853, + "loss": 8.7476, + "step": 389900 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001464854989366587, + "loss": 8.7863, + "step": 390000 + }, + { + "epoch": 4.16, + "learning_rate": 0.0001464608060885568, + "loss": 8.6433, + "step": 390100 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014643610962719315, + "loss": 8.7914, + "step": 390200 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014641140955448854, + "loss": 8.6901, + "step": 390300 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014638670587236374, + "loss": 8.7713, + "step": 390400 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014636199858274003, + "loss": 8.6776, + "step": 390500 + }, + { + "epoch": 4.16, + "learning_rate": 0.0001463372876875389, + "loss": 8.6914, + "step": 390600 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014631257318868212, + "loss": 8.6987, + "step": 390700 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014628785508809171, + "loss": 8.6709, + "step": 390800 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014626313338769, + "loss": 8.8769, + "step": 390900 + }, + { + "epoch": 4.16, + "learning_rate": 0.00014623840808939965, + "loss": 8.8109, + "step": 391000 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001462136791951435, + "loss": 8.7367, + "step": 391100 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014618894670684476, + "loss": 8.749, + "step": 391200 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014616421062642681, + "loss": 8.8037, + "step": 391300 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014613947095581343, + "loss": 8.7598, + "step": 391400 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014611472769692863, + "loss": 8.7868, + "step": 391500 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001460899808516967, + "loss": 8.6603, + "step": 391600 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014606523042204214, + "loss": 8.8115, + "step": 391700 + }, + { + "epoch": 4.17, + "learning_rate": 0.00014604047640988987, + "loss": 8.7685, + "step": 391800 + }, + { + "epoch": 4.17, + "learning_rate": 0.000146015718817165, + "loss": 8.7664, + "step": 391900 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014599095764579288, + "loss": 8.7421, + "step": 392000 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014596619289769923, + "loss": 8.8426, + "step": 392100 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014594142457481, + "loss": 8.7327, + "step": 392200 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014591665267905143, + "loss": 8.7414, + "step": 392300 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014589187721235, + "loss": 8.802, + "step": 392400 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014586709817663259, + "loss": 8.7566, + "step": 392500 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014584231557382613, + "loss": 8.7035, + "step": 392600 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001458175294058581, + "loss": 8.7488, + "step": 392700 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014579273967465603, + "loss": 8.7291, + "step": 392800 + }, + { + "epoch": 4.18, + "learning_rate": 0.00014576794638214788, + "loss": 8.6828, + "step": 392900 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014574314953026173, + "loss": 8.6853, + "step": 393000 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014571834912092618, + "loss": 8.7324, + "step": 393100 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014569354515606988, + "loss": 8.7372, + "step": 393200 + }, + { + "epoch": 4.19, + "learning_rate": 0.0001456687376376218, + "loss": 8.7628, + "step": 393300 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014564392656751132, + "loss": 8.78, + "step": 393400 + }, + { + "epoch": 4.19, + "learning_rate": 0.0001456191119476679, + "loss": 8.7421, + "step": 393500 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014559429378002144, + "loss": 8.7306, + "step": 393600 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014556947206650203, + "loss": 8.7756, + "step": 393700 + }, + { + "epoch": 4.19, + "learning_rate": 0.00014554464680904006, + "loss": 8.7759, + "step": 393800 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014551981800956618, + "loss": 8.7544, + "step": 393900 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014549498567001138, + "loss": 8.6484, + "step": 394000 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001454701497923068, + "loss": 8.7412, + "step": 394100 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014544531037838395, + "loss": 8.7507, + "step": 394200 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014542046743017462, + "loss": 8.7346, + "step": 394300 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014539562094961084, + "loss": 8.6961, + "step": 394400 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001453707709386249, + "loss": 8.7081, + "step": 394500 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014534591739914944, + "loss": 8.7133, + "step": 394600 + }, + { + "epoch": 4.2, + "learning_rate": 0.00014532106033311726, + "loss": 8.7808, + "step": 394700 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014529619974246156, + "loss": 8.7151, + "step": 394800 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014527133562911569, + "loss": 8.775, + "step": 394900 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014524646799501335, + "loss": 8.7484, + "step": 395000 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001452215968420885, + "loss": 8.7531, + "step": 395100 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001451967221722754, + "loss": 8.7424, + "step": 395200 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014517184398750855, + "loss": 8.75, + "step": 395300 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001451469622897227, + "loss": 8.7824, + "step": 395400 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014512207708085293, + "loss": 8.8173, + "step": 395500 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014509718836283454, + "loss": 8.6954, + "step": 395600 + }, + { + "epoch": 4.21, + "learning_rate": 0.00014507229613760315, + "loss": 8.708, + "step": 395700 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014504740040709462, + "loss": 8.6666, + "step": 395800 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014502250117324512, + "loss": 8.7723, + "step": 395900 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014499759843799104, + "loss": 8.6953, + "step": 396000 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014497269220326904, + "loss": 8.7308, + "step": 396100 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001449477824710161, + "loss": 8.8029, + "step": 396200 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001449228692431695, + "loss": 8.6063, + "step": 396300 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001448979525216667, + "loss": 8.7625, + "step": 396400 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014487303230844545, + "loss": 8.6348, + "step": 396500 + }, + { + "epoch": 4.22, + "learning_rate": 0.00014484810860544386, + "loss": 8.7507, + "step": 396600 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001448231814146002, + "loss": 8.6592, + "step": 396700 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001447982507378531, + "loss": 8.7541, + "step": 396800 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001447733165771414, + "loss": 8.8388, + "step": 396900 + }, + { + "epoch": 4.23, + "learning_rate": 0.00014474837893440424, + "loss": 8.7402, + "step": 397000 + }, + { + "epoch": 4.23, + "learning_rate": 0.000144723437811581, + "loss": 8.6946, + "step": 397100 + }, + { + "epoch": 4.23, + "learning_rate": 0.00014469849321061137, + "loss": 8.8094, + "step": 397200 + }, + { + "epoch": 4.23, + "learning_rate": 0.00014467354513343533, + "loss": 8.7344, + "step": 397300 + }, + { + "epoch": 4.23, + "learning_rate": 0.000144648593581993, + "loss": 8.7992, + "step": 397400 + }, + { + "epoch": 4.23, + "learning_rate": 0.000144623638558225, + "loss": 8.7473, + "step": 397500 + }, + { + "epoch": 4.23, + "learning_rate": 0.00014459868006407194, + "loss": 8.8354, + "step": 397600 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014457371810147493, + "loss": 8.7254, + "step": 397700 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014454875267237526, + "loss": 8.772, + "step": 397800 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014452378377871448, + "loss": 8.7334, + "step": 397900 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001444988114224344, + "loss": 8.7306, + "step": 398000 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001444738356054772, + "loss": 8.7441, + "step": 398100 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014444885632978517, + "loss": 8.7414, + "step": 398200 + }, + { + "epoch": 4.24, + "learning_rate": 0.000144423873597301, + "loss": 8.7938, + "step": 398300 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014439888740996754, + "loss": 8.7484, + "step": 398400 + }, + { + "epoch": 4.24, + "learning_rate": 0.00014437389776972805, + "loss": 8.6872, + "step": 398500 + }, + { + "epoch": 4.25, + "learning_rate": 0.00014434890467852592, + "loss": 8.7452, + "step": 398600 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001443239081383049, + "loss": 8.8336, + "step": 398700 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001442989081510089, + "loss": 8.7651, + "step": 398800 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001442739047185823, + "loss": 8.787, + "step": 398900 + }, + { + "epoch": 4.25, + "learning_rate": 0.00014424889784296952, + "loss": 8.8022, + "step": 399000 + }, + { + "epoch": 4.25, + "learning_rate": 0.00014422388752611534, + "loss": 8.7374, + "step": 399100 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001441988737699649, + "loss": 8.7278, + "step": 399200 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001441738565764634, + "loss": 8.7864, + "step": 399300 + }, + { + "epoch": 4.25, + "learning_rate": 0.00014414883594755651, + "loss": 8.7278, + "step": 399400 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014412381188519012, + "loss": 8.7477, + "step": 399500 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014409878439131032, + "loss": 8.8015, + "step": 399600 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014407375346786342, + "loss": 8.665, + "step": 399700 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014404871911679622, + "loss": 8.72, + "step": 399800 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014402368134005554, + "loss": 8.7124, + "step": 399900 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014399864013958863, + "loss": 8.7382, + "step": 400000 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014397359551734292, + "loss": 8.8097, + "step": 400100 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014394854747526613, + "loss": 8.7069, + "step": 400200 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014392349601530623, + "loss": 8.6482, + "step": 400300 + }, + { + "epoch": 4.26, + "learning_rate": 0.00014389844113941148, + "loss": 8.7546, + "step": 400400 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014387338284953046, + "loss": 8.8027, + "step": 400500 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014384832114761192, + "loss": 8.6851, + "step": 400600 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001438232560356049, + "loss": 8.71, + "step": 400700 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001437981875154587, + "loss": 8.6673, + "step": 400800 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014377311558912292, + "loss": 8.634, + "step": 400900 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001437480402585474, + "loss": 8.7671, + "step": 401000 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014372296152568228, + "loss": 8.6996, + "step": 401100 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001436978793924779, + "loss": 8.7745, + "step": 401200 + }, + { + "epoch": 4.27, + "learning_rate": 0.00014367279386088492, + "loss": 8.6867, + "step": 401300 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014364770493285425, + "loss": 8.7945, + "step": 401400 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014362261261033704, + "loss": 8.7168, + "step": 401500 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014359751689528474, + "loss": 8.7867, + "step": 401600 + }, + { + "epoch": 4.28, + "learning_rate": 0.000143572417789649, + "loss": 8.7969, + "step": 401700 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014354731529538184, + "loss": 8.6974, + "step": 401800 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014352220941443546, + "loss": 8.69, + "step": 401900 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014349710014876238, + "loss": 8.795, + "step": 402000 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014347198750031527, + "loss": 8.814, + "step": 402100 + }, + { + "epoch": 4.28, + "learning_rate": 0.00014344687147104722, + "loss": 8.7248, + "step": 402200 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014342175206291145, + "loss": 8.7165, + "step": 402300 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014339662927786157, + "loss": 8.7283, + "step": 402400 + }, + { + "epoch": 4.29, + "learning_rate": 0.0001433715031178513, + "loss": 8.6628, + "step": 402500 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014334637358483476, + "loss": 8.674, + "step": 402600 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014332124068076624, + "loss": 8.7194, + "step": 402700 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014329610440760038, + "loss": 8.7785, + "step": 402800 + }, + { + "epoch": 4.29, + "learning_rate": 0.000143270964767292, + "loss": 8.8013, + "step": 402900 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014324582176179616, + "loss": 8.658, + "step": 403000 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014322067539306832, + "loss": 8.7081, + "step": 403100 + }, + { + "epoch": 4.29, + "learning_rate": 0.00014319552566306406, + "loss": 8.7314, + "step": 403200 + }, + { + "epoch": 4.3, + "learning_rate": 0.0001431703725737393, + "loss": 8.7522, + "step": 403300 + }, + { + "epoch": 4.3, + "learning_rate": 0.00014314521612705022, + "loss": 8.7748, + "step": 403400 + }, + { + "epoch": 4.3, + "learning_rate": 0.0001431200563249532, + "loss": 8.7482, + "step": 403500 + }, + { + "epoch": 4.3, + "learning_rate": 0.0001430948931694049, + "loss": 8.7824, + "step": 403600 + }, + { + "epoch": 4.3, + "learning_rate": 0.00014306972666236232, + "loss": 8.6622, + "step": 403700 + }, + { + "epoch": 4.3, + "learning_rate": 0.00014304455680578263, + "loss": 8.7742, + "step": 403800 + }, + { + "epoch": 4.3, + "learning_rate": 0.00014301938360162332, + "loss": 8.6135, + "step": 403900 + }, + { + "epoch": 4.3, + "learning_rate": 0.0001429942070518421, + "loss": 8.7627, + "step": 404000 + }, + { + "epoch": 4.3, + "learning_rate": 0.00014296902715839693, + "loss": 8.7709, + "step": 404100 + }, + { + "epoch": 4.31, + "learning_rate": 0.0001429438439232461, + "loss": 8.7006, + "step": 404200 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014291865734834805, + "loss": 8.7088, + "step": 404300 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014289346743566158, + "loss": 8.6441, + "step": 404400 + }, + { + "epoch": 4.31, + "learning_rate": 0.0001428682741871457, + "loss": 8.7381, + "step": 404500 + }, + { + "epoch": 4.31, + "learning_rate": 0.0001428430776047597, + "loss": 8.6838, + "step": 404600 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014281787769046309, + "loss": 8.7299, + "step": 404700 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014279267444621577, + "loss": 8.671, + "step": 404800 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014276746787397765, + "loss": 8.7405, + "step": 404900 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014274225797570916, + "loss": 8.6298, + "step": 405000 + }, + { + "epoch": 4.31, + "learning_rate": 0.00014271704475337085, + "loss": 8.7434, + "step": 405100 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014269182820892355, + "loss": 8.7398, + "step": 405200 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014266660834432831, + "loss": 8.7565, + "step": 405300 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014264138516154658, + "loss": 8.7111, + "step": 405400 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014261615866253984, + "loss": 8.8154, + "step": 405500 + }, + { + "epoch": 4.32, + "learning_rate": 0.0001425909288492701, + "loss": 8.7876, + "step": 405600 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014256569572369937, + "loss": 8.7099, + "step": 405700 + }, + { + "epoch": 4.32, + "learning_rate": 0.0001425404592877901, + "loss": 8.7479, + "step": 405800 + }, + { + "epoch": 4.32, + "learning_rate": 0.0001425152195435049, + "loss": 8.7033, + "step": 405900 + }, + { + "epoch": 4.32, + "learning_rate": 0.00014248997649280666, + "loss": 8.6822, + "step": 406000 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014246473013765858, + "loss": 8.6669, + "step": 406100 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014243948048002397, + "loss": 8.73, + "step": 406200 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014241422752186663, + "loss": 8.7322, + "step": 406300 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001423889712651504, + "loss": 8.75, + "step": 406400 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014236371171183953, + "loss": 8.8091, + "step": 406500 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014233844886389835, + "loss": 8.7555, + "step": 406600 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014231318272329166, + "loss": 8.7153, + "step": 406700 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014228791329198439, + "loss": 8.6937, + "step": 406800 + }, + { + "epoch": 4.33, + "learning_rate": 0.00014226264057194167, + "loss": 8.766, + "step": 406900 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014223736456512906, + "loss": 8.7583, + "step": 407000 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001422120852735122, + "loss": 8.7242, + "step": 407100 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001421868026990571, + "loss": 8.7035, + "step": 407200 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014216151684373, + "loss": 8.7601, + "step": 407300 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014213622770949737, + "loss": 8.6412, + "step": 407400 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014211093529832594, + "loss": 8.777, + "step": 407500 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014208563961218274, + "loss": 8.6787, + "step": 407600 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014206034065303492, + "loss": 8.6641, + "step": 407700 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001420350384228501, + "loss": 8.7002, + "step": 407800 + }, + { + "epoch": 4.34, + "learning_rate": 0.00014200973292359602, + "loss": 8.64, + "step": 407900 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001419844241572406, + "loss": 8.7028, + "step": 408000 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001419591121257522, + "loss": 8.6801, + "step": 408100 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001419337968310993, + "loss": 8.7466, + "step": 408200 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001419084782752507, + "loss": 8.7682, + "step": 408300 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014188315646017537, + "loss": 8.7528, + "step": 408400 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014185783138784264, + "loss": 8.7488, + "step": 408500 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014183250306022203, + "loss": 8.7204, + "step": 408600 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001418071714792833, + "loss": 8.7317, + "step": 408700 + }, + { + "epoch": 4.35, + "learning_rate": 0.00014178183664699653, + "loss": 8.7512, + "step": 408800 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014175649856533202, + "loss": 8.7478, + "step": 408900 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014173115723626028, + "loss": 8.7577, + "step": 409000 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014170581266175213, + "loss": 8.6999, + "step": 409100 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014168046484377858, + "loss": 8.6793, + "step": 409200 + }, + { + "epoch": 4.36, + "learning_rate": 0.000141655113784311, + "loss": 8.6452, + "step": 409300 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014162975948532087, + "loss": 8.7217, + "step": 409400 + }, + { + "epoch": 4.36, + "learning_rate": 0.0001416044019487801, + "loss": 8.7448, + "step": 409500 + }, + { + "epoch": 4.36, + "learning_rate": 0.0001415790411766606, + "loss": 8.6733, + "step": 409600 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014155367717093482, + "loss": 8.6687, + "step": 409700 + }, + { + "epoch": 4.36, + "learning_rate": 0.00014152830993357526, + "loss": 8.679, + "step": 409800 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014150293946655468, + "loss": 8.7147, + "step": 409900 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014147756577184624, + "loss": 8.7292, + "step": 410000 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014145218885142322, + "loss": 8.6794, + "step": 410100 + }, + { + "epoch": 4.37, + "learning_rate": 0.0001414268087072592, + "loss": 8.7394, + "step": 410200 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014140142534132795, + "loss": 8.6776, + "step": 410300 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014137603875560357, + "loss": 8.7311, + "step": 410400 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014135064895206035, + "loss": 8.6413, + "step": 410500 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014132525593267293, + "loss": 8.6119, + "step": 410600 + }, + { + "epoch": 4.37, + "learning_rate": 0.00014129985969941604, + "loss": 8.8189, + "step": 410700 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014127446025426478, + "loss": 8.6761, + "step": 410800 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014124905759919451, + "loss": 8.7531, + "step": 410900 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014122365173618073, + "loss": 8.6942, + "step": 411000 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001411982426671993, + "loss": 8.7264, + "step": 411100 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014117283039422624, + "loss": 8.6969, + "step": 411200 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014114741491923793, + "loss": 8.6689, + "step": 411300 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001411219962442109, + "loss": 8.6462, + "step": 411400 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014109657437112196, + "loss": 8.7247, + "step": 411500 + }, + { + "epoch": 4.38, + "learning_rate": 0.00014107114930194816, + "loss": 8.6709, + "step": 411600 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014104572103866683, + "loss": 8.709, + "step": 411700 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014102028958325552, + "loss": 8.7175, + "step": 411800 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001409948549376921, + "loss": 8.7289, + "step": 411900 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014096941710395447, + "loss": 8.6513, + "step": 412000 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001409439760840211, + "loss": 8.6931, + "step": 412100 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014091853187987046, + "loss": 8.6852, + "step": 412200 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014089308449348134, + "loss": 8.782, + "step": 412300 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014086763392683283, + "loss": 8.7775, + "step": 412400 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014084218018190416, + "loss": 8.6834, + "step": 412500 + }, + { + "epoch": 4.39, + "learning_rate": 0.00014081672326067494, + "loss": 8.854, + "step": 412600 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014079126316512492, + "loss": 8.768, + "step": 412700 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014076579989723414, + "loss": 8.7178, + "step": 412800 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014074033345898288, + "loss": 8.7832, + "step": 412900 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014071486385235167, + "loss": 8.7066, + "step": 413000 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014068939107932126, + "loss": 8.7867, + "step": 413100 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014066391514187273, + "loss": 8.7645, + "step": 413200 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014063843604198727, + "loss": 8.6238, + "step": 413300 + }, + { + "epoch": 4.4, + "learning_rate": 0.00014061295378164646, + "loss": 8.65, + "step": 413400 + }, + { + "epoch": 4.4, + "learning_rate": 0.000140587468362832, + "loss": 8.6794, + "step": 413500 + }, + { + "epoch": 4.41, + "learning_rate": 0.000140561979787526, + "loss": 8.7251, + "step": 413600 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014053648805771056, + "loss": 8.751, + "step": 413700 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014051099317536828, + "loss": 8.7017, + "step": 413800 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014048549514248185, + "loss": 8.6813, + "step": 413900 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014045999396103425, + "loss": 8.7501, + "step": 414000 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014043448963300874, + "loss": 8.7054, + "step": 414100 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014040898216038879, + "loss": 8.6838, + "step": 414200 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014038347154515812, + "loss": 8.7231, + "step": 414300 + }, + { + "epoch": 4.41, + "learning_rate": 0.00014035795778930067, + "loss": 8.6826, + "step": 414400 + }, + { + "epoch": 4.41, + "learning_rate": 0.0001403324408948007, + "loss": 8.7641, + "step": 414500 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001403069208636426, + "loss": 8.6836, + "step": 414600 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001402813976978111, + "loss": 8.8353, + "step": 414700 + }, + { + "epoch": 4.42, + "learning_rate": 0.00014025587139929112, + "loss": 8.6606, + "step": 414800 + }, + { + "epoch": 4.42, + "learning_rate": 0.00014023034197006785, + "loss": 8.7689, + "step": 414900 + }, + { + "epoch": 4.42, + "learning_rate": 0.00014020480941212673, + "loss": 8.7038, + "step": 415000 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001401792737274534, + "loss": 8.7108, + "step": 415100 + }, + { + "epoch": 4.42, + "learning_rate": 0.00014015373491803383, + "loss": 8.6715, + "step": 415200 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001401281929858541, + "loss": 8.7156, + "step": 415300 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001401026479329007, + "loss": 8.6068, + "step": 415400 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014007709976116014, + "loss": 8.8547, + "step": 415500 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014005154847261946, + "loss": 8.5942, + "step": 415600 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014002599406926565, + "loss": 8.6637, + "step": 415700 + }, + { + "epoch": 4.43, + "learning_rate": 0.00014000043655308618, + "loss": 8.7994, + "step": 415800 + }, + { + "epoch": 4.43, + "learning_rate": 0.00013997487592606856, + "loss": 8.6776, + "step": 415900 + }, + { + "epoch": 4.43, + "learning_rate": 0.00013994931219020074, + "loss": 8.6372, + "step": 416000 + }, + { + "epoch": 4.43, + "learning_rate": 0.00013992374534747078, + "loss": 8.7512, + "step": 416100 + }, + { + "epoch": 4.43, + "learning_rate": 0.00013989817539986697, + "loss": 8.7217, + "step": 416200 + }, + { + "epoch": 4.43, + "learning_rate": 0.00013987260234937798, + "loss": 8.6435, + "step": 416300 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013984702619799255, + "loss": 8.7559, + "step": 416400 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013982144694769979, + "loss": 8.6317, + "step": 416500 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013979586460048895, + "loss": 8.6981, + "step": 416600 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013977027915834962, + "loss": 8.6685, + "step": 416700 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013974469062327154, + "loss": 8.6485, + "step": 416800 + }, + { + "epoch": 4.44, + "learning_rate": 0.0001397190989972448, + "loss": 8.7616, + "step": 416900 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013969350428225954, + "loss": 8.7101, + "step": 417000 + }, + { + "epoch": 4.44, + "learning_rate": 0.0001396679064803064, + "loss": 8.7375, + "step": 417100 + }, + { + "epoch": 4.44, + "learning_rate": 0.00013964230559337603, + "loss": 8.7378, + "step": 417200 + }, + { + "epoch": 4.44, + "learning_rate": 0.0001396167016234595, + "loss": 8.7182, + "step": 417300 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013959109457254793, + "loss": 8.6974, + "step": 417400 + }, + { + "epoch": 4.45, + "learning_rate": 0.0001395654844426329, + "loss": 8.7892, + "step": 417500 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013953987123570597, + "loss": 8.6849, + "step": 417600 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013951425495375923, + "loss": 8.7437, + "step": 417700 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013948863559878476, + "loss": 8.7089, + "step": 417800 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013946301317277498, + "loss": 8.714, + "step": 417900 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013943738767772262, + "loss": 8.6958, + "step": 418000 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013941175911562053, + "loss": 8.7432, + "step": 418100 + }, + { + "epoch": 4.45, + "learning_rate": 0.00013938612748846184, + "loss": 8.6565, + "step": 418200 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013936049279823993, + "loss": 8.7864, + "step": 418300 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013933485504694846, + "loss": 8.7149, + "step": 418400 + }, + { + "epoch": 4.46, + "learning_rate": 0.0001393092142365812, + "loss": 8.7531, + "step": 418500 + }, + { + "epoch": 4.46, + "learning_rate": 0.0001392835703691323, + "loss": 8.7268, + "step": 418600 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013925792344659604, + "loss": 8.6573, + "step": 418700 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013923227347096705, + "loss": 8.6895, + "step": 418800 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013920662044424007, + "loss": 8.7445, + "step": 418900 + }, + { + "epoch": 4.46, + "learning_rate": 0.00013918096436841014, + "loss": 8.6716, + "step": 419000 + }, + { + "epoch": 4.46, + "learning_rate": 0.0001391553052454726, + "loss": 8.6521, + "step": 419100 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013912964307742287, + "loss": 8.7822, + "step": 419200 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013910397786625679, + "loss": 8.677, + "step": 419300 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013907830961397023, + "loss": 8.7763, + "step": 419400 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013905263832255957, + "loss": 8.6849, + "step": 419500 + }, + { + "epoch": 4.47, + "learning_rate": 0.0001390269639940211, + "loss": 8.6985, + "step": 419600 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013900128663035168, + "loss": 8.6367, + "step": 419700 + }, + { + "epoch": 4.47, + "learning_rate": 0.0001389756062335481, + "loss": 8.712, + "step": 419800 + }, + { + "epoch": 4.47, + "learning_rate": 0.00013894992280560765, + "loss": 8.7225, + "step": 419900 + }, + { + "epoch": 4.47, + "learning_rate": 0.0001389242363485276, + "loss": 8.7023, + "step": 420000 + }, + { + "epoch": 4.47, + "learning_rate": 0.0001388985468643057, + "loss": 8.7156, + "step": 420100 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013887285435493978, + "loss": 8.6507, + "step": 420200 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013884715882242797, + "loss": 8.6994, + "step": 420300 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013882146026876854, + "loss": 8.6918, + "step": 420400 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013879575869596016, + "loss": 8.7041, + "step": 420500 + }, + { + "epoch": 4.48, + "learning_rate": 0.0001387700541060016, + "loss": 8.7694, + "step": 420600 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013874434650089192, + "loss": 8.6939, + "step": 420700 + }, + { + "epoch": 4.48, + "learning_rate": 0.0001387186358826304, + "loss": 8.664, + "step": 420800 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013869292225321658, + "loss": 8.6917, + "step": 420900 + }, + { + "epoch": 4.48, + "learning_rate": 0.00013866720561465018, + "loss": 8.7328, + "step": 421000 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013864148596893117, + "loss": 8.7181, + "step": 421100 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013861576331805983, + "loss": 8.661, + "step": 421200 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013859003766403656, + "loss": 8.5957, + "step": 421300 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013856430900886205, + "loss": 8.7683, + "step": 421400 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013853857735453726, + "loss": 8.682, + "step": 421500 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013851284270306334, + "loss": 8.7608, + "step": 421600 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013848710505644162, + "loss": 8.7286, + "step": 421700 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013846136441667375, + "loss": 8.7232, + "step": 421800 + }, + { + "epoch": 4.49, + "learning_rate": 0.0001384356207857616, + "loss": 8.7714, + "step": 421900 + }, + { + "epoch": 4.49, + "learning_rate": 0.00013840987416570725, + "loss": 8.6999, + "step": 422000 + }, + { + "epoch": 4.5, + "learning_rate": 0.000138384124558513, + "loss": 8.6985, + "step": 422100 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001383583719661814, + "loss": 8.6643, + "step": 422200 + }, + { + "epoch": 4.5, + "learning_rate": 0.00013833261639071526, + "loss": 8.7499, + "step": 422300 + }, + { + "epoch": 4.5, + "learning_rate": 0.00013830685783411756, + "loss": 8.7492, + "step": 422400 + }, + { + "epoch": 4.5, + "learning_rate": 0.00013828109629839154, + "loss": 8.7182, + "step": 422500 + }, + { + "epoch": 4.5, + "learning_rate": 0.00013825533178554072, + "loss": 8.7054, + "step": 422600 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001382295642975688, + "loss": 8.6815, + "step": 422700 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001382037938364797, + "loss": 8.7182, + "step": 422800 + }, + { + "epoch": 4.5, + "learning_rate": 0.00013817802040427756, + "loss": 8.6887, + "step": 422900 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013815224400296687, + "loss": 8.7763, + "step": 423000 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013812646463455219, + "loss": 8.6407, + "step": 423100 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013810068230103842, + "loss": 8.6498, + "step": 423200 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013807489700443063, + "loss": 8.6364, + "step": 423300 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013804910874673414, + "loss": 8.6941, + "step": 423400 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013802331752995456, + "loss": 8.7638, + "step": 423500 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013799752335609764, + "loss": 8.656, + "step": 423600 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013797172622716935, + "loss": 8.6915, + "step": 423700 + }, + { + "epoch": 4.51, + "learning_rate": 0.00013794592614517603, + "loss": 8.7478, + "step": 423800 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013792012311212405, + "loss": 8.6434, + "step": 423900 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013789431713002025, + "loss": 8.7139, + "step": 424000 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013786850820087142, + "loss": 8.6479, + "step": 424100 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001378426963266848, + "loss": 8.6681, + "step": 424200 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013781688150946774, + "loss": 8.687, + "step": 424300 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013779106375122794, + "loss": 8.689, + "step": 424400 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013776524305397317, + "loss": 8.6652, + "step": 424500 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013773941941971153, + "loss": 8.7472, + "step": 424600 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013771359285045135, + "loss": 8.6734, + "step": 424700 + }, + { + "epoch": 4.52, + "learning_rate": 0.00013768776334820114, + "loss": 8.7114, + "step": 424800 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013766193091496967, + "loss": 8.7017, + "step": 424900 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013763609555276596, + "loss": 8.686, + "step": 425000 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013761025726359918, + "loss": 8.6917, + "step": 425100 + }, + { + "epoch": 4.53, + "learning_rate": 0.0001375844160494788, + "loss": 8.6859, + "step": 425200 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013755857191241453, + "loss": 8.7364, + "step": 425300 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013753272485441618, + "loss": 8.6769, + "step": 425400 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013750687487749396, + "loss": 8.717, + "step": 425500 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013748102198365817, + "loss": 8.585, + "step": 425600 + }, + { + "epoch": 4.53, + "learning_rate": 0.00013745516617491945, + "loss": 8.6706, + "step": 425700 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013742930745328858, + "loss": 8.706, + "step": 425800 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013740344582077656, + "loss": 8.6708, + "step": 425900 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001373775812793947, + "loss": 8.6906, + "step": 426000 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013735171383115447, + "loss": 8.7357, + "step": 426100 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013732584347806763, + "loss": 8.6359, + "step": 426200 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013729997022214602, + "loss": 8.7372, + "step": 426300 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001372740940654019, + "loss": 8.6842, + "step": 426400 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013724821500984762, + "loss": 8.6463, + "step": 426500 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013722233305749578, + "loss": 8.6714, + "step": 426600 + }, + { + "epoch": 4.54, + "learning_rate": 0.00013719644821035925, + "loss": 8.6289, + "step": 426700 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001371705604704511, + "loss": 8.7016, + "step": 426800 + }, + { + "epoch": 4.55, + "learning_rate": 0.00013714466983978462, + "loss": 8.6542, + "step": 426900 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001371187763203733, + "loss": 8.6677, + "step": 427000 + }, + { + "epoch": 4.55, + "learning_rate": 0.00013709287991423095, + "loss": 8.7297, + "step": 427100 + }, + { + "epoch": 4.55, + "learning_rate": 0.00013706698062337144, + "loss": 8.6993, + "step": 427200 + }, + { + "epoch": 4.55, + "learning_rate": 0.00013704107844980903, + "loss": 8.7267, + "step": 427300 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001370151733955581, + "loss": 8.76, + "step": 427400 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001369892654626333, + "loss": 8.5837, + "step": 427500 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001369633546530495, + "loss": 8.7575, + "step": 427600 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013693744096882182, + "loss": 8.6222, + "step": 427700 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013691152441196546, + "loss": 8.7573, + "step": 427800 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013688560498449607, + "loss": 8.7192, + "step": 427900 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013685968268842936, + "loss": 8.7428, + "step": 428000 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001368337575257813, + "loss": 8.7482, + "step": 428100 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001368078294985681, + "loss": 8.7187, + "step": 428200 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013678189860880623, + "loss": 8.6596, + "step": 428300 + }, + { + "epoch": 4.56, + "learning_rate": 0.00013675596485851225, + "loss": 8.7009, + "step": 428400 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001367300282497031, + "loss": 8.6592, + "step": 428500 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013670408878439585, + "loss": 8.6039, + "step": 428600 + }, + { + "epoch": 4.57, + "learning_rate": 0.0001366781464646078, + "loss": 8.7369, + "step": 428700 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013665220129235647, + "loss": 8.7044, + "step": 428800 + }, + { + "epoch": 4.57, + "learning_rate": 0.0001366262532696597, + "loss": 8.7216, + "step": 428900 + }, + { + "epoch": 4.57, + "learning_rate": 0.0001366003023985354, + "loss": 8.653, + "step": 429000 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013657434868100178, + "loss": 8.7042, + "step": 429100 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013654839211907734, + "loss": 8.6737, + "step": 429200 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013652243271478057, + "loss": 8.7116, + "step": 429300 + }, + { + "epoch": 4.57, + "learning_rate": 0.0001364964704701305, + "loss": 8.7641, + "step": 429400 + }, + { + "epoch": 4.57, + "learning_rate": 0.00013647050538714609, + "loss": 8.7082, + "step": 429500 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013644453746784676, + "loss": 8.7369, + "step": 429600 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013641856671425196, + "loss": 8.7461, + "step": 429700 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013639259312838142, + "loss": 8.7246, + "step": 429800 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013636661671225517, + "loss": 8.7108, + "step": 429900 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013634063746789337, + "loss": 8.5821, + "step": 430000 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013631465539731642, + "loss": 8.7039, + "step": 430100 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013628867050254496, + "loss": 8.705, + "step": 430200 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013626268278559987, + "loss": 8.7553, + "step": 430300 + }, + { + "epoch": 4.58, + "learning_rate": 0.00013623669224850212, + "loss": 8.7093, + "step": 430400 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013621069889327314, + "loss": 8.6218, + "step": 430500 + }, + { + "epoch": 4.59, + "learning_rate": 0.0001361847027219343, + "loss": 8.6991, + "step": 430600 + }, + { + "epoch": 4.59, + "learning_rate": 0.0001361587037365074, + "loss": 8.6372, + "step": 430700 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013613270193901436, + "loss": 8.7829, + "step": 430800 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013610669733147738, + "loss": 8.726, + "step": 430900 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013608068991591879, + "loss": 8.662, + "step": 431000 + }, + { + "epoch": 4.59, + "learning_rate": 0.0001360546796943612, + "loss": 8.6354, + "step": 431100 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013602866666882743, + "loss": 8.6808, + "step": 431200 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013600265084134051, + "loss": 8.6403, + "step": 431300 + }, + { + "epoch": 4.59, + "learning_rate": 0.00013597663221392376, + "loss": 8.6489, + "step": 431400 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013595061078860058, + "loss": 8.6041, + "step": 431500 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013592458656739467, + "loss": 8.643, + "step": 431600 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013589855955232992, + "loss": 8.6543, + "step": 431700 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001358725297454305, + "loss": 8.6367, + "step": 431800 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001358464971487207, + "loss": 8.7222, + "step": 431900 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013582046176422515, + "loss": 8.6482, + "step": 432000 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013579442359396856, + "loss": 8.7105, + "step": 432100 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013576838263997594, + "loss": 8.6948, + "step": 432200 + }, + { + "epoch": 4.6, + "learning_rate": 0.00013574233890427251, + "loss": 8.6586, + "step": 432300 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013571629238888367, + "loss": 8.7153, + "step": 432400 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013569024309583507, + "loss": 8.7663, + "step": 432500 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013566419102715256, + "loss": 8.6856, + "step": 432600 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013563813618486226, + "loss": 8.6653, + "step": 432700 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013561207857099036, + "loss": 8.6845, + "step": 432800 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013558601818756347, + "loss": 8.7472, + "step": 432900 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013555995503660822, + "loss": 8.6883, + "step": 433000 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013553388912015163, + "loss": 8.6863, + "step": 433100 + }, + { + "epoch": 4.61, + "learning_rate": 0.00013550782044022077, + "loss": 8.5969, + "step": 433200 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013548174899884305, + "loss": 8.6478, + "step": 433300 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013545567479804604, + "loss": 8.6775, + "step": 433400 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013542959783985754, + "loss": 8.6404, + "step": 433500 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013540351812630554, + "loss": 8.6695, + "step": 433600 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013537743565941824, + "loss": 8.5857, + "step": 433700 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013535135044122416, + "loss": 8.7694, + "step": 433800 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013532526247375185, + "loss": 8.6878, + "step": 433900 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013529917175903025, + "loss": 8.6533, + "step": 434000 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013527307829908837, + "loss": 8.7287, + "step": 434100 + }, + { + "epoch": 4.62, + "learning_rate": 0.00013524698209595556, + "loss": 8.6564, + "step": 434200 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001352208831516613, + "loss": 8.7178, + "step": 434300 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013519478146823533, + "loss": 8.6649, + "step": 434400 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013516867704770755, + "loss": 8.7133, + "step": 434500 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013514256989210813, + "loss": 8.6512, + "step": 434600 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013511646000346735, + "loss": 8.6983, + "step": 434700 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001350903473838159, + "loss": 8.7628, + "step": 434800 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013506423203518448, + "loss": 8.6477, + "step": 434900 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001350381139596041, + "loss": 8.6548, + "step": 435000 + }, + { + "epoch": 4.63, + "learning_rate": 0.00013501199315910597, + "loss": 8.7342, + "step": 435100 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013498586963572152, + "loss": 8.6523, + "step": 435200 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013495974339148236, + "loss": 8.6789, + "step": 435300 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001349336144284203, + "loss": 8.6512, + "step": 435400 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001349074827485675, + "loss": 8.6475, + "step": 435500 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013488134835395609, + "loss": 8.6317, + "step": 435600 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013485521124661864, + "loss": 8.6971, + "step": 435700 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001348290714285878, + "loss": 8.7406, + "step": 435800 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013480292890189648, + "loss": 8.7145, + "step": 435900 + }, + { + "epoch": 4.64, + "learning_rate": 0.00013477678366857773, + "loss": 8.7399, + "step": 436000 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013475063573066497, + "loss": 8.6595, + "step": 436100 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013472448509019161, + "loss": 8.7065, + "step": 436200 + }, + { + "epoch": 4.65, + "learning_rate": 0.0001346983317491915, + "loss": 8.7297, + "step": 436300 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013467217570969851, + "loss": 8.6082, + "step": 436400 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013464601697374686, + "loss": 8.6914, + "step": 436500 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013461985554337087, + "loss": 8.7473, + "step": 436600 + }, + { + "epoch": 4.65, + "learning_rate": 0.0001345936914206051, + "loss": 8.7084, + "step": 436700 + }, + { + "epoch": 4.65, + "learning_rate": 0.0001345675246074844, + "loss": 8.7014, + "step": 436800 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013454135510604368, + "loss": 8.6319, + "step": 436900 + }, + { + "epoch": 4.65, + "learning_rate": 0.00013451518291831825, + "loss": 8.6675, + "step": 437000 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013448900804634344, + "loss": 8.6828, + "step": 437100 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013446283049215493, + "loss": 8.6202, + "step": 437200 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013443665025778846, + "loss": 8.7234, + "step": 437300 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013441046734528021, + "loss": 8.6713, + "step": 437400 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013438428175666632, + "loss": 8.7172, + "step": 437500 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013435809349398328, + "loss": 8.6464, + "step": 437600 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001343319025592678, + "loss": 8.7326, + "step": 437700 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001343057089545566, + "loss": 8.6928, + "step": 437800 + }, + { + "epoch": 4.66, + "learning_rate": 0.00013427951268188694, + "loss": 8.6686, + "step": 437900 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013425331374329602, + "loss": 8.6542, + "step": 438000 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013422711214082134, + "loss": 8.7078, + "step": 438100 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013420090787650058, + "loss": 8.7437, + "step": 438200 + }, + { + "epoch": 4.67, + "learning_rate": 0.0001341747009523717, + "loss": 8.6458, + "step": 438300 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013414849137047277, + "loss": 8.7386, + "step": 438400 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013412227913284214, + "loss": 8.6853, + "step": 438500 + }, + { + "epoch": 4.67, + "learning_rate": 0.00013409606424151833, + "loss": 8.7797, + "step": 438600 + }, + { + "epoch": 4.67, + "learning_rate": 0.0001340698466985401, + "loss": 8.6758, + "step": 438700 + }, + { + "epoch": 4.67, + "learning_rate": 0.0001340436265059463, + "loss": 8.6902, + "step": 438800 + }, + { + "epoch": 4.67, + "learning_rate": 0.0001340174036657762, + "loss": 8.6261, + "step": 438900 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013399117818006906, + "loss": 8.7246, + "step": 439000 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013396495005086448, + "loss": 8.5702, + "step": 439100 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013393871928020222, + "loss": 8.6844, + "step": 439200 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013391248587012227, + "loss": 8.7315, + "step": 439300 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013388624982266475, + "loss": 8.7408, + "step": 439400 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013386001113987008, + "loss": 8.5877, + "step": 439500 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013383376982377886, + "loss": 8.6458, + "step": 439600 + }, + { + "epoch": 4.68, + "learning_rate": 0.0001338075258764318, + "loss": 8.7653, + "step": 439700 + }, + { + "epoch": 4.68, + "learning_rate": 0.00013378127929987004, + "loss": 8.7303, + "step": 439800 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013375503009613462, + "loss": 8.6847, + "step": 439900 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013372877826726707, + "loss": 8.66, + "step": 440000 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013370252381530887, + "loss": 8.6867, + "step": 440100 + }, + { + "epoch": 4.69, + "learning_rate": 0.000133676266742302, + "loss": 8.6833, + "step": 440200 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013365000705028827, + "loss": 8.743, + "step": 440300 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013362374474131008, + "loss": 8.6252, + "step": 440400 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013359747981740976, + "loss": 8.68, + "step": 440500 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013357121228062995, + "loss": 8.6574, + "step": 440600 + }, + { + "epoch": 4.69, + "learning_rate": 0.00013354494213301348, + "loss": 8.6804, + "step": 440700 + }, + { + "epoch": 4.7, + "learning_rate": 0.0001335186693766034, + "loss": 8.6704, + "step": 440800 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013349239401344297, + "loss": 8.6634, + "step": 440900 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013346611604557552, + "loss": 8.6001, + "step": 441000 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013343983547504479, + "loss": 8.7114, + "step": 441100 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013341355230389457, + "loss": 8.7539, + "step": 441200 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013338726653416898, + "loss": 8.6807, + "step": 441300 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013336097816791219, + "loss": 8.6907, + "step": 441400 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013333468720716865, + "loss": 8.5849, + "step": 441500 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013330839365398306, + "loss": 8.6622, + "step": 441600 + }, + { + "epoch": 4.7, + "learning_rate": 0.00013328209751040023, + "loss": 8.6906, + "step": 441700 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013325579877846526, + "loss": 8.6419, + "step": 441800 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013322949746022333, + "loss": 8.6871, + "step": 441900 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013320319355772, + "loss": 8.6406, + "step": 442000 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001331768870730008, + "loss": 8.5895, + "step": 442100 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001331505780081117, + "loss": 8.7237, + "step": 442200 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013312426636509871, + "loss": 8.7143, + "step": 442300 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001330979521460081, + "loss": 8.6556, + "step": 442400 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001330716353528863, + "loss": 8.674, + "step": 442500 + }, + { + "epoch": 4.71, + "learning_rate": 0.00013304531598778003, + "loss": 8.7472, + "step": 442600 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001330189940527361, + "loss": 8.753, + "step": 442700 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013299266954980158, + "loss": 8.6215, + "step": 442800 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013296634248102373, + "loss": 8.6842, + "step": 442900 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013294001284845002, + "loss": 8.6099, + "step": 443000 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013291368065412808, + "loss": 8.7121, + "step": 443100 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013288734590010583, + "loss": 8.7227, + "step": 443200 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013286100858843125, + "loss": 8.6447, + "step": 443300 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001328346687211526, + "loss": 8.6375, + "step": 443400 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013280832630031846, + "loss": 8.7078, + "step": 443500 + }, + { + "epoch": 4.72, + "learning_rate": 0.00013278198132797727, + "loss": 8.6439, + "step": 443600 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013275563380617809, + "loss": 8.6797, + "step": 443700 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013272928373696981, + "loss": 8.5402, + "step": 443800 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001327029311224018, + "loss": 8.6718, + "step": 443900 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013267657596452342, + "loss": 8.6351, + "step": 444000 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013265021826538436, + "loss": 8.6366, + "step": 444100 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013262385802703444, + "loss": 8.6156, + "step": 444200 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001325974952515237, + "loss": 8.604, + "step": 444300 + }, + { + "epoch": 4.73, + "learning_rate": 0.00013257112994090236, + "loss": 8.759, + "step": 444400 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001325447620972209, + "loss": 8.6781, + "step": 444500 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013251839172252994, + "loss": 8.6539, + "step": 444600 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013249201881888022, + "loss": 8.6305, + "step": 444700 + }, + { + "epoch": 4.74, + "learning_rate": 0.0001324656433883229, + "loss": 8.6443, + "step": 444800 + }, + { + "epoch": 4.74, + "learning_rate": 0.0001324392654329091, + "loss": 8.6715, + "step": 444900 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013241288495469028, + "loss": 8.5667, + "step": 445000 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013238650195571803, + "loss": 8.7046, + "step": 445100 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013236011643804417, + "loss": 8.6405, + "step": 445200 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013233372840372072, + "loss": 8.6918, + "step": 445300 + }, + { + "epoch": 4.74, + "learning_rate": 0.00013230733785479985, + "loss": 8.7232, + "step": 445400 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013228094479333395, + "loss": 8.6141, + "step": 445500 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013225454922137568, + "loss": 8.7148, + "step": 445600 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013222815114097774, + "loss": 8.6652, + "step": 445700 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013220175055419313, + "loss": 8.6564, + "step": 445800 + }, + { + "epoch": 4.75, + "learning_rate": 0.0001321753474630751, + "loss": 8.5814, + "step": 445900 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013214894186967693, + "loss": 8.6772, + "step": 446000 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013212253377605223, + "loss": 8.7201, + "step": 446100 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013209612318425474, + "loss": 8.6887, + "step": 446200 + }, + { + "epoch": 4.75, + "learning_rate": 0.00013206971009633846, + "loss": 8.6393, + "step": 446300 + }, + { + "epoch": 4.75, + "learning_rate": 0.0001320432945143575, + "loss": 8.6791, + "step": 446400 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001320168764403662, + "loss": 8.7203, + "step": 446500 + }, + { + "epoch": 4.76, + "learning_rate": 0.00013199045587641907, + "loss": 8.624, + "step": 446600 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001319640328245709, + "loss": 8.6511, + "step": 446700 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001319376072868766, + "loss": 8.6827, + "step": 446800 + }, + { + "epoch": 4.76, + "learning_rate": 0.00013191117926539126, + "loss": 8.5908, + "step": 446900 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001318847487621702, + "loss": 8.6547, + "step": 447000 + }, + { + "epoch": 4.76, + "learning_rate": 0.00013185831577926894, + "loss": 8.6745, + "step": 447100 + }, + { + "epoch": 4.76, + "learning_rate": 0.00013183188031874317, + "loss": 8.7081, + "step": 447200 + }, + { + "epoch": 4.76, + "learning_rate": 0.00013180544238264877, + "loss": 8.6436, + "step": 447300 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013177900197304182, + "loss": 8.6324, + "step": 447400 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001317525590919786, + "loss": 8.6772, + "step": 447500 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013172611374151556, + "loss": 8.7012, + "step": 447600 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013169966592370934, + "loss": 8.7295, + "step": 447700 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013167321564061687, + "loss": 8.6531, + "step": 447800 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001316467628942951, + "loss": 8.6602, + "step": 447900 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013162030768680135, + "loss": 8.6292, + "step": 448000 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013159385002019293, + "loss": 8.6696, + "step": 448100 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001315673898965276, + "loss": 8.7786, + "step": 448200 + }, + { + "epoch": 4.77, + "learning_rate": 0.00013154092731786302, + "loss": 8.6553, + "step": 448300 + }, + { + "epoch": 4.78, + "learning_rate": 0.0001315144622862573, + "loss": 8.7195, + "step": 448400 + }, + { + "epoch": 4.78, + "learning_rate": 0.0001314879948037686, + "loss": 8.6262, + "step": 448500 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013146152487245524, + "loss": 8.6295, + "step": 448600 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013143505249437585, + "loss": 8.6805, + "step": 448700 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013140857767158915, + "loss": 8.6464, + "step": 448800 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013138210040615419, + "loss": 8.677, + "step": 448900 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013135562070012996, + "loss": 8.694, + "step": 449000 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013132913855557588, + "loss": 8.7106, + "step": 449100 + }, + { + "epoch": 4.78, + "learning_rate": 0.00013130265397455143, + "loss": 8.7082, + "step": 449200 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013127616695911634, + "loss": 8.6467, + "step": 449300 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013124967751133054, + "loss": 8.7179, + "step": 449400 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013122318563325408, + "loss": 8.6559, + "step": 449500 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013119669132694722, + "loss": 8.6813, + "step": 449600 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013117019459447045, + "loss": 8.7213, + "step": 449700 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013114369543788443, + "loss": 8.6462, + "step": 449800 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013111719385924995, + "loss": 8.6001, + "step": 449900 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013109068986062817, + "loss": 8.5937, + "step": 450000 + }, + { + "epoch": 4.79, + "learning_rate": 0.00013106418344408015, + "loss": 8.6684, + "step": 450100 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001310376746116674, + "loss": 8.6593, + "step": 450200 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013101116336545145, + "loss": 8.7048, + "step": 450300 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013098464970749414, + "loss": 8.6725, + "step": 450400 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013095813363985737, + "loss": 8.6562, + "step": 450500 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001309316151646034, + "loss": 8.627, + "step": 450600 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013090509428379448, + "loss": 8.7338, + "step": 450700 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001308785709994932, + "loss": 8.5993, + "step": 450800 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013085204531376225, + "loss": 8.6895, + "step": 450900 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013082551722866454, + "loss": 8.5996, + "step": 451000 + }, + { + "epoch": 4.8, + "learning_rate": 0.00013079898674626315, + "loss": 8.7312, + "step": 451100 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013077245386862137, + "loss": 8.6287, + "step": 451200 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001307459185978027, + "loss": 8.6602, + "step": 451300 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013071938093587074, + "loss": 8.6048, + "step": 451400 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013069284088488935, + "loss": 8.68, + "step": 451500 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013066629844692254, + "loss": 8.6718, + "step": 451600 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001306397536240345, + "loss": 8.6248, + "step": 451700 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013061320641828968, + "loss": 8.6596, + "step": 451800 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013058665683175262, + "loss": 8.6217, + "step": 451900 + }, + { + "epoch": 4.81, + "learning_rate": 0.00013056010486648805, + "loss": 8.6618, + "step": 452000 + }, + { + "epoch": 4.82, + "learning_rate": 0.00013053355052456103, + "loss": 8.6665, + "step": 452100 + }, + { + "epoch": 4.82, + "learning_rate": 0.00013050699380803658, + "loss": 8.6802, + "step": 452200 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001304804347189801, + "loss": 8.7164, + "step": 452300 + }, + { + "epoch": 4.82, + "learning_rate": 0.000130453873259457, + "loss": 8.6673, + "step": 452400 + }, + { + "epoch": 4.82, + "learning_rate": 0.00013042730943153304, + "loss": 8.6993, + "step": 452500 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001304007432372741, + "loss": 8.584, + "step": 452600 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001303741746787462, + "loss": 8.6595, + "step": 452700 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001303476037580156, + "loss": 8.6721, + "step": 452800 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001303210304771487, + "loss": 8.7283, + "step": 452900 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013029445483821213, + "loss": 8.672, + "step": 453000 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013026787684327265, + "loss": 8.6558, + "step": 453100 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013024129649439727, + "loss": 8.6994, + "step": 453200 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013021471379365314, + "loss": 8.607, + "step": 453300 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013018812874310759, + "loss": 8.607, + "step": 453400 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001301615413448281, + "loss": 8.5984, + "step": 453500 + }, + { + "epoch": 4.83, + "learning_rate": 0.00013013495160088244, + "loss": 8.6559, + "step": 453600 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001301083595133385, + "loss": 8.5914, + "step": 453700 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001300817650842643, + "loss": 8.6822, + "step": 453800 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001300551683157281, + "loss": 8.6826, + "step": 453900 + }, + { + "epoch": 4.84, + "learning_rate": 0.00013002856920979838, + "loss": 8.703, + "step": 454000 + }, + { + "epoch": 4.84, + "learning_rate": 0.0001300019677685437, + "loss": 8.6225, + "step": 454100 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012997536399403286, + "loss": 8.6328, + "step": 454200 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012994875788833492, + "loss": 8.6575, + "step": 454300 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012992214945351893, + "loss": 8.6601, + "step": 454400 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012989553869165432, + "loss": 8.6177, + "step": 454500 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012986892560481051, + "loss": 8.7317, + "step": 454600 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012984231019505732, + "loss": 8.6673, + "step": 454700 + }, + { + "epoch": 4.84, + "learning_rate": 0.00012981569246446454, + "loss": 8.6431, + "step": 454800 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001297890724151023, + "loss": 8.6172, + "step": 454900 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012976245004904077, + "loss": 8.6859, + "step": 455000 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012973582536835048, + "loss": 8.629, + "step": 455100 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012970919837510192, + "loss": 8.743, + "step": 455200 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012968256907136595, + "loss": 8.6734, + "step": 455300 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001296559374592135, + "loss": 8.6681, + "step": 455400 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012962930354071573, + "loss": 8.556, + "step": 455500 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012960266731794397, + "loss": 8.6006, + "step": 455600 + }, + { + "epoch": 4.85, + "learning_rate": 0.00012957602879296967, + "loss": 8.6222, + "step": 455700 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001295493879678646, + "loss": 8.6805, + "step": 455800 + }, + { + "epoch": 4.86, + "learning_rate": 0.0001295227448447005, + "loss": 8.6639, + "step": 455900 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012949609942554955, + "loss": 8.5644, + "step": 456000 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012946945171248383, + "loss": 8.7454, + "step": 456100 + }, + { + "epoch": 4.86, + "learning_rate": 0.0001294428017075758, + "loss": 8.6576, + "step": 456200 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012941614941289808, + "loss": 8.6452, + "step": 456300 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012938949483052334, + "loss": 8.7498, + "step": 456400 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012936283796252456, + "loss": 8.6262, + "step": 456500 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012933617881097484, + "loss": 8.614, + "step": 456600 + }, + { + "epoch": 4.86, + "learning_rate": 0.00012930951737794746, + "loss": 8.5952, + "step": 456700 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012928285366551584, + "loss": 8.6992, + "step": 456800 + }, + { + "epoch": 4.87, + "learning_rate": 0.0001292561876757537, + "loss": 8.6923, + "step": 456900 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012922951941073477, + "loss": 8.6807, + "step": 457000 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012920284887253314, + "loss": 8.7229, + "step": 457100 + }, + { + "epoch": 4.87, + "learning_rate": 0.0001291761760632229, + "loss": 8.6145, + "step": 457200 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012914950098487843, + "loss": 8.6644, + "step": 457300 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012912282363957423, + "loss": 8.6596, + "step": 457400 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012909614402938508, + "loss": 8.6228, + "step": 457500 + }, + { + "epoch": 4.87, + "learning_rate": 0.00012906946215638575, + "loss": 8.5926, + "step": 457600 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012904277802265135, + "loss": 8.5833, + "step": 457700 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012901609163025707, + "loss": 8.6997, + "step": 457800 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012898940298127837, + "loss": 8.6351, + "step": 457900 + }, + { + "epoch": 4.88, + "learning_rate": 0.0001289627120777908, + "loss": 8.6754, + "step": 458000 + }, + { + "epoch": 4.88, + "learning_rate": 0.0001289360189218701, + "loss": 8.689, + "step": 458100 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012890932351559224, + "loss": 8.6534, + "step": 458200 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012888262586103322, + "loss": 8.6197, + "step": 458300 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012885592596026946, + "loss": 8.6718, + "step": 458400 + }, + { + "epoch": 4.88, + "learning_rate": 0.00012882922381537735, + "loss": 8.6748, + "step": 458500 + }, + { + "epoch": 4.88, + "learning_rate": 0.0001288025194284335, + "loss": 8.7142, + "step": 458600 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012877581280151473, + "loss": 8.659, + "step": 458700 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012874910393669806, + "loss": 8.687, + "step": 458800 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012872239283606053, + "loss": 8.5978, + "step": 458900 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001286956795016796, + "loss": 8.6133, + "step": 459000 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012866896393563267, + "loss": 8.6104, + "step": 459100 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012864224613999746, + "loss": 8.6649, + "step": 459200 + }, + { + "epoch": 4.89, + "learning_rate": 0.00012861552611685183, + "loss": 8.7475, + "step": 459300 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001285888038682737, + "loss": 8.6462, + "step": 459400 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001285620793963414, + "loss": 8.5911, + "step": 459500 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012853535270313318, + "loss": 8.7623, + "step": 459600 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012850862379072765, + "loss": 8.6328, + "step": 459700 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012848189266120348, + "loss": 8.6915, + "step": 459800 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012845515931663958, + "loss": 8.7009, + "step": 459900 + }, + { + "epoch": 4.9, + "learning_rate": 0.000128428423759115, + "loss": 8.6888, + "step": 460000 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012840168599070895, + "loss": 8.6626, + "step": 460100 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012837494601350085, + "loss": 8.7216, + "step": 460200 + }, + { + "epoch": 4.9, + "learning_rate": 0.00012834820382957025, + "loss": 8.6818, + "step": 460300 + }, + { + "epoch": 4.9, + "learning_rate": 0.0001283214594409969, + "loss": 8.5964, + "step": 460400 + }, + { + "epoch": 4.9, + "learning_rate": 0.0001282947128498607, + "loss": 8.6759, + "step": 460500 + }, + { + "epoch": 4.91, + "learning_rate": 0.0001282679640582418, + "loss": 8.6929, + "step": 460600 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012824121306822037, + "loss": 8.6573, + "step": 460700 + }, + { + "epoch": 4.91, + "learning_rate": 0.0001282144598818769, + "loss": 8.623, + "step": 460800 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012818770450129195, + "loss": 8.5876, + "step": 460900 + }, + { + "epoch": 4.91, + "learning_rate": 0.0001281609469285463, + "loss": 8.6245, + "step": 461000 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012813418716572087, + "loss": 8.64, + "step": 461100 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012810742521489682, + "loss": 8.671, + "step": 461200 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012808066107815539, + "loss": 8.7127, + "step": 461300 + }, + { + "epoch": 4.91, + "learning_rate": 0.00012805389475757808, + "loss": 8.6625, + "step": 461400 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012802712625524642, + "loss": 8.6856, + "step": 461500 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012800035557324229, + "loss": 8.5683, + "step": 461600 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012797358271364757, + "loss": 8.572, + "step": 461700 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012794680767854445, + "loss": 8.7105, + "step": 461800 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012792003047001516, + "loss": 8.6965, + "step": 461900 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012789325109014227, + "loss": 8.6666, + "step": 462000 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012786646954100831, + "loss": 8.6546, + "step": 462100 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012783968582469617, + "loss": 8.5924, + "step": 462200 + }, + { + "epoch": 4.92, + "learning_rate": 0.00012781289994328874, + "loss": 8.6702, + "step": 462300 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012778611189886925, + "loss": 8.6303, + "step": 462400 + }, + { + "epoch": 4.93, + "learning_rate": 0.0001277593216935209, + "loss": 8.6607, + "step": 462500 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012773252932932725, + "loss": 8.6817, + "step": 462600 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012770573480837193, + "loss": 8.629, + "step": 462700 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012767893813273875, + "loss": 8.6142, + "step": 462800 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012765213930451168, + "loss": 8.6747, + "step": 462900 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012762533832577487, + "loss": 8.683, + "step": 463000 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012759853519861265, + "loss": 8.6181, + "step": 463100 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012757172992510947, + "loss": 8.647, + "step": 463200 + }, + { + "epoch": 4.93, + "learning_rate": 0.00012754492250735002, + "loss": 8.6689, + "step": 463300 + }, + { + "epoch": 4.94, + "learning_rate": 0.0001275181129474191, + "loss": 8.6324, + "step": 463400 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012749130124740165, + "loss": 8.6439, + "step": 463500 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012746448740938289, + "loss": 8.7475, + "step": 463600 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012743767143544806, + "loss": 8.6256, + "step": 463700 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012741085332768274, + "loss": 8.6616, + "step": 463800 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012738403308817247, + "loss": 8.6574, + "step": 463900 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012735721071900315, + "loss": 8.7184, + "step": 464000 + }, + { + "epoch": 4.94, + "learning_rate": 0.00012733038622226068, + "loss": 8.6314, + "step": 464100 + }, + { + "epoch": 4.94, + "learning_rate": 0.0001273035596000313, + "loss": 8.6973, + "step": 464200 + }, + { + "epoch": 4.95, + "learning_rate": 0.0001272767308544012, + "loss": 8.5774, + "step": 464300 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012724989998745694, + "loss": 8.6374, + "step": 464400 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012722306700128512, + "loss": 8.6839, + "step": 464500 + }, + { + "epoch": 4.95, + "learning_rate": 0.0001271962318979726, + "loss": 8.6733, + "step": 464600 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012716939467960627, + "loss": 8.7056, + "step": 464700 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012714255534827332, + "loss": 8.6365, + "step": 464800 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012711571390606102, + "loss": 8.7412, + "step": 464900 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012708887035505685, + "loss": 8.7121, + "step": 465000 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012706202469734844, + "loss": 8.5874, + "step": 465100 + }, + { + "epoch": 4.95, + "learning_rate": 0.00012703517693502355, + "loss": 8.6865, + "step": 465200 + }, + { + "epoch": 4.96, + "learning_rate": 0.00012700832707017017, + "loss": 8.7009, + "step": 465300 + }, + { + "epoch": 4.96, + "learning_rate": 0.0001269814751048764, + "loss": 8.64, + "step": 465400 + }, + { + "epoch": 4.96, + "learning_rate": 0.0001269546210412305, + "loss": 8.6359, + "step": 465500 + }, + { + "epoch": 4.96, + "learning_rate": 0.00012692776488132092, + "loss": 8.6478, + "step": 465600 + }, + { + "epoch": 4.96, + "learning_rate": 0.0001269009066272363, + "loss": 8.6409, + "step": 465700 + }, + { + "epoch": 4.96, + "learning_rate": 0.00012687404628106537, + "loss": 8.6387, + "step": 465800 + }, + { + "epoch": 4.96, + "learning_rate": 0.0001268471838448971, + "loss": 8.5902, + "step": 465900 + }, + { + "epoch": 4.96, + "learning_rate": 0.00012682031932082053, + "loss": 8.6444, + "step": 466000 + }, + { + "epoch": 4.96, + "learning_rate": 0.00012679345271092497, + "loss": 8.6726, + "step": 466100 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012676658401729985, + "loss": 8.6177, + "step": 466200 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012673971324203467, + "loss": 8.5909, + "step": 466300 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012671284038721922, + "loss": 8.6811, + "step": 466400 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012668596545494342, + "loss": 8.677, + "step": 466500 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012665908844729736, + "loss": 8.6492, + "step": 466600 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012663220936637117, + "loss": 8.6038, + "step": 466700 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012660532821425532, + "loss": 8.648, + "step": 466800 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012657844499304035, + "loss": 8.7073, + "step": 466900 + }, + { + "epoch": 4.97, + "learning_rate": 0.00012655155970481695, + "loss": 8.667, + "step": 467000 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012652467235167603, + "loss": 8.6003, + "step": 467100 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012649778293570858, + "loss": 8.6487, + "step": 467200 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012647089145900582, + "loss": 8.6726, + "step": 467300 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012644399792365905, + "loss": 8.5793, + "step": 467400 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012641710233175985, + "loss": 8.7389, + "step": 467500 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012639020468539986, + "loss": 8.7092, + "step": 467600 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012636330498667095, + "loss": 8.6097, + "step": 467700 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012633640323766505, + "loss": 8.6392, + "step": 467800 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012630949944047434, + "loss": 8.6286, + "step": 467900 + }, + { + "epoch": 4.98, + "learning_rate": 0.00012628259359719114, + "loss": 8.6716, + "step": 468000 + }, + { + "epoch": 4.99, + "learning_rate": 0.00012625568570990794, + "loss": 8.6469, + "step": 468100 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001262287757807173, + "loss": 8.6353, + "step": 468200 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001262018638117121, + "loss": 8.553, + "step": 468300 + }, + { + "epoch": 4.99, + "learning_rate": 0.00012617494980498515, + "loss": 8.6476, + "step": 468400 + }, + { + "epoch": 4.99, + "learning_rate": 0.00012614803376262974, + "loss": 8.647, + "step": 468500 + }, + { + "epoch": 4.99, + "learning_rate": 0.00012612111568673898, + "loss": 8.7589, + "step": 468600 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001260941955794064, + "loss": 8.5698, + "step": 468700 + }, + { + "epoch": 4.99, + "learning_rate": 0.0001260672734427255, + "loss": 8.6316, + "step": 468800 + }, + { + "epoch": 4.99, + "learning_rate": 0.00012604034927879, + "loss": 8.7102, + "step": 468900 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001260134230896939, + "loss": 8.5382, + "step": 469000 + }, + { + "epoch": 5.0, + "learning_rate": 0.00012598649487753118, + "loss": 8.683, + "step": 469100 + }, + { + "epoch": 5.0, + "learning_rate": 0.00012595956464439608, + "loss": 8.6113, + "step": 469200 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001259326323923829, + "loss": 8.7009, + "step": 469300 + }, + { + "epoch": 5.0, + "learning_rate": 0.00012590569812358625, + "loss": 8.624, + "step": 469400 + }, + { + "epoch": 5.0, + "learning_rate": 0.00012587876184010077, + "loss": 8.6623, + "step": 469500 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001258518235440213, + "loss": 8.6151, + "step": 469600 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001258248832374428, + "loss": 8.6626, + "step": 469700 + }, + { + "epoch": 5.0, + "learning_rate": 0.00012579794092246053, + "loss": 8.697, + "step": 469800 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012577099660116966, + "loss": 8.651, + "step": 469900 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012574405027566577, + "loss": 8.7128, + "step": 470000 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012571710194804437, + "loss": 8.7282, + "step": 470100 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012569015162040134, + "loss": 8.6923, + "step": 470200 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012566319929483254, + "loss": 8.6688, + "step": 470300 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012563624497343405, + "loss": 8.712, + "step": 470400 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012560928865830217, + "loss": 8.6393, + "step": 470500 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012558233035153322, + "loss": 8.5656, + "step": 470600 + }, + { + "epoch": 5.01, + "learning_rate": 0.0001255553700552238, + "loss": 8.6113, + "step": 470700 + }, + { + "epoch": 5.01, + "learning_rate": 0.00012552840777147064, + "loss": 8.6454, + "step": 470800 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012550144350237055, + "loss": 8.562, + "step": 470900 + }, + { + "epoch": 5.02, + "learning_rate": 0.0001254744772500205, + "loss": 8.6416, + "step": 471000 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012544750901651779, + "loss": 8.6147, + "step": 471100 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012542053880395962, + "loss": 8.6192, + "step": 471200 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012539356661444355, + "loss": 8.5812, + "step": 471300 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012536659245006716, + "loss": 8.6073, + "step": 471400 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012533961631292826, + "loss": 8.5993, + "step": 471500 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012531263820512474, + "loss": 8.5635, + "step": 471600 + }, + { + "epoch": 5.02, + "learning_rate": 0.00012528565812875477, + "loss": 8.5815, + "step": 471700 + }, + { + "epoch": 5.03, + "learning_rate": 0.00012525867608591655, + "loss": 8.7462, + "step": 471800 + }, + { + "epoch": 5.03, + "learning_rate": 0.00012523169207870845, + "loss": 8.5659, + "step": 471900 + }, + { + "epoch": 5.03, + "learning_rate": 0.00012520470610922905, + "loss": 8.5946, + "step": 472000 + }, + { + "epoch": 5.03, + "learning_rate": 0.00012517771817957703, + "loss": 8.6666, + "step": 472100 + }, + { + "epoch": 5.03, + "learning_rate": 0.0001251507282918513, + "loss": 8.5675, + "step": 472200 + }, + { + "epoch": 5.03, + "learning_rate": 0.0001251237364481508, + "loss": 8.6315, + "step": 472300 + }, + { + "epoch": 5.03, + "learning_rate": 0.0001250967426505747, + "loss": 8.6516, + "step": 472400 + }, + { + "epoch": 5.03, + "learning_rate": 0.00012506974690122235, + "loss": 8.5964, + "step": 472500 + }, + { + "epoch": 5.03, + "learning_rate": 0.0001250427492021932, + "loss": 8.616, + "step": 472600 + }, + { + "epoch": 5.03, + "learning_rate": 0.0001250157495555868, + "loss": 8.6674, + "step": 472700 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012498874796350304, + "loss": 8.5774, + "step": 472800 + }, + { + "epoch": 5.04, + "learning_rate": 0.0001249617444280417, + "loss": 8.6512, + "step": 472900 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012493473895130297, + "loss": 8.6232, + "step": 473000 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012490773153538696, + "loss": 8.6816, + "step": 473100 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012488072218239408, + "loss": 8.6267, + "step": 473200 + }, + { + "epoch": 5.04, + "learning_rate": 0.0001248537108944249, + "loss": 8.673, + "step": 473300 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012482669767358004, + "loss": 8.642, + "step": 473400 + }, + { + "epoch": 5.04, + "learning_rate": 0.00012479968252196032, + "loss": 8.6582, + "step": 473500 + }, + { + "epoch": 5.04, + "learning_rate": 0.0001247726654416667, + "loss": 8.6881, + "step": 473600 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012474564643480038, + "loss": 8.6648, + "step": 473700 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012471862550346254, + "loss": 8.6286, + "step": 473800 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012469160264975466, + "loss": 8.6541, + "step": 473900 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012466457787577826, + "loss": 8.6569, + "step": 474000 + }, + { + "epoch": 5.05, + "learning_rate": 0.0001246375511836351, + "loss": 8.5916, + "step": 474100 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012461052257542704, + "loss": 8.6337, + "step": 474200 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012458349205325605, + "loss": 8.643, + "step": 474300 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012455645961922439, + "loss": 8.5945, + "step": 474400 + }, + { + "epoch": 5.05, + "learning_rate": 0.00012452942527543427, + "loss": 8.6754, + "step": 474500 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012450238902398826, + "loss": 8.6129, + "step": 474600 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012447535086698887, + "loss": 8.5897, + "step": 474700 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012444831080653896, + "loss": 8.6259, + "step": 474800 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012442126884474136, + "loss": 8.664, + "step": 474900 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012439422498369915, + "loss": 8.5912, + "step": 475000 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012436717922551552, + "loss": 8.6629, + "step": 475100 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012434013157229385, + "loss": 8.6405, + "step": 475200 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012431308202613765, + "loss": 8.6185, + "step": 475300 + }, + { + "epoch": 5.06, + "learning_rate": 0.00012428603058915053, + "loss": 8.6368, + "step": 475400 + }, + { + "epoch": 5.06, + "learning_rate": 0.0001242589772634363, + "loss": 8.5612, + "step": 475500 + }, + { + "epoch": 5.07, + "learning_rate": 0.00012423192205109887, + "loss": 8.6352, + "step": 475600 + }, + { + "epoch": 5.07, + "learning_rate": 0.00012420486495424236, + "loss": 8.5956, + "step": 475700 + }, + { + "epoch": 5.07, + "learning_rate": 0.000124177805974971, + "loss": 8.6332, + "step": 475800 + }, + { + "epoch": 5.07, + "learning_rate": 0.0001241507451153892, + "loss": 8.6193, + "step": 475900 + }, + { + "epoch": 5.07, + "learning_rate": 0.00012412368237760144, + "loss": 8.5371, + "step": 476000 + }, + { + "epoch": 5.07, + "learning_rate": 0.0001240966177637124, + "loss": 8.6573, + "step": 476100 + }, + { + "epoch": 5.07, + "learning_rate": 0.00012406955127582686, + "loss": 8.646, + "step": 476200 + }, + { + "epoch": 5.07, + "learning_rate": 0.00012404248291604988, + "loss": 8.6684, + "step": 476300 + }, + { + "epoch": 5.07, + "learning_rate": 0.0001240154126864865, + "loss": 8.6365, + "step": 476400 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012398834058924198, + "loss": 8.6788, + "step": 476500 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012396126662642172, + "loss": 8.6437, + "step": 476600 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012393419080013128, + "loss": 8.6745, + "step": 476700 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012390711311247634, + "loss": 8.6564, + "step": 476800 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012388003356556278, + "loss": 8.5859, + "step": 476900 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012385295216149646, + "loss": 8.6127, + "step": 477000 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012382586890238363, + "loss": 8.7183, + "step": 477100 + }, + { + "epoch": 5.08, + "learning_rate": 0.0001237987837903305, + "loss": 8.6002, + "step": 477200 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012377169682744346, + "loss": 8.6373, + "step": 477300 + }, + { + "epoch": 5.08, + "learning_rate": 0.00012374460801582916, + "loss": 8.5768, + "step": 477400 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012371751735759418, + "loss": 8.6374, + "step": 477500 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012369042485484548, + "loss": 8.5788, + "step": 477600 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012366333050968991, + "loss": 8.5527, + "step": 477700 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012363623432423473, + "loss": 8.6456, + "step": 477800 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012360913630058714, + "loss": 8.6739, + "step": 477900 + }, + { + "epoch": 5.09, + "learning_rate": 0.0001235820364408546, + "loss": 8.6803, + "step": 478000 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012355493474714463, + "loss": 8.5792, + "step": 478100 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012352783122156492, + "loss": 8.6385, + "step": 478200 + }, + { + "epoch": 5.09, + "learning_rate": 0.00012350072586622338, + "loss": 8.6877, + "step": 478300 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012347361868322794, + "loss": 8.6881, + "step": 478400 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012344650967468674, + "loss": 8.5815, + "step": 478500 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012341939884270804, + "loss": 8.6461, + "step": 478600 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012339228618940028, + "loss": 8.6738, + "step": 478700 + }, + { + "epoch": 5.1, + "learning_rate": 0.000123365171716872, + "loss": 8.5605, + "step": 478800 + }, + { + "epoch": 5.1, + "learning_rate": 0.0001233380554272319, + "loss": 8.6855, + "step": 478900 + }, + { + "epoch": 5.1, + "learning_rate": 0.0001233109373225888, + "loss": 8.5776, + "step": 479000 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012328381740505173, + "loss": 8.7573, + "step": 479100 + }, + { + "epoch": 5.1, + "learning_rate": 0.00012325669567672974, + "loss": 8.6138, + "step": 479200 + }, + { + "epoch": 5.11, + "learning_rate": 0.0001232295721397321, + "loss": 8.6954, + "step": 479300 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012320244679616825, + "loss": 8.5549, + "step": 479400 + }, + { + "epoch": 5.11, + "learning_rate": 0.0001231753196481477, + "loss": 8.6059, + "step": 479500 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012314819069778017, + "loss": 8.6543, + "step": 479600 + }, + { + "epoch": 5.11, + "learning_rate": 0.0001231210599471754, + "loss": 8.5689, + "step": 479700 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012309392739844348, + "loss": 8.6011, + "step": 479800 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012306679305369437, + "loss": 8.696, + "step": 479900 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012303965691503842, + "loss": 8.7434, + "step": 480000 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012301251898458592, + "loss": 8.6261, + "step": 480100 + }, + { + "epoch": 5.11, + "learning_rate": 0.00012298537926444747, + "loss": 8.6675, + "step": 480200 + }, + { + "epoch": 5.12, + "learning_rate": 0.0001229582377567337, + "loss": 8.5841, + "step": 480300 + }, + { + "epoch": 5.12, + "learning_rate": 0.0001229310944635554, + "loss": 8.6291, + "step": 480400 + }, + { + "epoch": 5.12, + "learning_rate": 0.00012290394938702348, + "loss": 8.6233, + "step": 480500 + }, + { + "epoch": 5.12, + "learning_rate": 0.00012287680252924906, + "loss": 8.6898, + "step": 480600 + }, + { + "epoch": 5.12, + "learning_rate": 0.00012284965389234334, + "loss": 8.6368, + "step": 480700 + }, + { + "epoch": 5.12, + "learning_rate": 0.00012282250347841767, + "loss": 8.66, + "step": 480800 + }, + { + "epoch": 5.12, + "learning_rate": 0.00012279535128958356, + "loss": 8.6045, + "step": 480900 + }, + { + "epoch": 5.12, + "learning_rate": 0.0001227681973279526, + "loss": 8.6507, + "step": 481000 + }, + { + "epoch": 5.12, + "learning_rate": 0.0001227410415956366, + "loss": 8.6886, + "step": 481100 + }, + { + "epoch": 5.13, + "learning_rate": 0.0001227138840947474, + "loss": 8.5769, + "step": 481200 + }, + { + "epoch": 5.13, + "learning_rate": 0.0001226867248273971, + "loss": 8.652, + "step": 481300 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012265956379569788, + "loss": 8.6534, + "step": 481400 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012263240100176198, + "loss": 8.6213, + "step": 481500 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012260523644770196, + "loss": 8.5805, + "step": 481600 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012257807013563035, + "loss": 8.5515, + "step": 481700 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012255090206765988, + "loss": 8.6288, + "step": 481800 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012252373224590346, + "loss": 8.6531, + "step": 481900 + }, + { + "epoch": 5.13, + "learning_rate": 0.00012249656067247398, + "loss": 8.6806, + "step": 482000 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012246938734948473, + "loss": 8.5822, + "step": 482100 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012244221227904886, + "loss": 8.6463, + "step": 482200 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012241503546327981, + "loss": 8.7077, + "step": 482300 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012238785690429116, + "loss": 8.6507, + "step": 482400 + }, + { + "epoch": 5.14, + "learning_rate": 0.0001223606766041965, + "loss": 8.6023, + "step": 482500 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012233349456510975, + "loss": 8.6388, + "step": 482600 + }, + { + "epoch": 5.14, + "learning_rate": 0.0001223063107891448, + "loss": 8.5932, + "step": 482700 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012227912527841577, + "loss": 8.5526, + "step": 482800 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012225193803503686, + "loss": 8.6142, + "step": 482900 + }, + { + "epoch": 5.14, + "learning_rate": 0.00012222474906112242, + "loss": 8.4713, + "step": 483000 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012219755835878694, + "loss": 8.6286, + "step": 483100 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012217036593014505, + "loss": 8.7256, + "step": 483200 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012214317177731146, + "loss": 8.6603, + "step": 483300 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012211597590240119, + "loss": 8.6599, + "step": 483400 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012208877830752912, + "loss": 8.6037, + "step": 483500 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012206157899481049, + "loss": 8.552, + "step": 483600 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012203437796636059, + "loss": 8.6496, + "step": 483700 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012200717522429483, + "loss": 8.5669, + "step": 483800 + }, + { + "epoch": 5.15, + "learning_rate": 0.00012197997077072878, + "loss": 8.5969, + "step": 483900 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012195276460777811, + "loss": 8.5967, + "step": 484000 + }, + { + "epoch": 5.16, + "learning_rate": 0.0001219255567375587, + "loss": 8.5443, + "step": 484100 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012189834716218643, + "loss": 8.6976, + "step": 484200 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012187113588377746, + "loss": 8.5307, + "step": 484300 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012184392290444797, + "loss": 8.6069, + "step": 484400 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012181670822631435, + "loss": 8.5897, + "step": 484500 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012178949185149304, + "loss": 8.6776, + "step": 484600 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012176227378210074, + "loss": 8.6032, + "step": 484700 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012173505402025412, + "loss": 8.668, + "step": 484800 + }, + { + "epoch": 5.16, + "learning_rate": 0.00012170783256807013, + "loss": 8.6563, + "step": 484900 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012168060942766572, + "loss": 8.6571, + "step": 485000 + }, + { + "epoch": 5.17, + "learning_rate": 0.0001216533846011581, + "loss": 8.5537, + "step": 485100 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012162615809066449, + "loss": 8.6301, + "step": 485200 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012159892989830233, + "loss": 8.57, + "step": 485300 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012157170002618919, + "loss": 8.6743, + "step": 485400 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012154446847644266, + "loss": 8.6202, + "step": 485500 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012151723525118059, + "loss": 8.6026, + "step": 485600 + }, + { + "epoch": 5.17, + "learning_rate": 0.00012149000035252091, + "loss": 8.5762, + "step": 485700 + }, + { + "epoch": 5.17, + "learning_rate": 0.0001214627637825817, + "loss": 8.5651, + "step": 485800 + }, + { + "epoch": 5.18, + "learning_rate": 0.0001214355255434811, + "loss": 8.631, + "step": 485900 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012140828563733749, + "loss": 8.6734, + "step": 486000 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012138104406626927, + "loss": 8.6246, + "step": 486100 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012135380083239504, + "loss": 8.6306, + "step": 486200 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012132655593783353, + "loss": 8.5956, + "step": 486300 + }, + { + "epoch": 5.18, + "learning_rate": 0.0001212993093847035, + "loss": 8.6468, + "step": 486400 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012127206117512404, + "loss": 8.6103, + "step": 486500 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012124481131121414, + "loss": 8.6011, + "step": 486600 + }, + { + "epoch": 5.18, + "learning_rate": 0.00012121755979509308, + "loss": 8.6358, + "step": 486700 + }, + { + "epoch": 5.19, + "learning_rate": 0.0001211903066288802, + "loss": 8.5875, + "step": 486800 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012116305181469498, + "loss": 8.5241, + "step": 486900 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012113579535465702, + "loss": 8.6396, + "step": 487000 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012110853725088607, + "loss": 8.5347, + "step": 487100 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012108127750550198, + "loss": 8.6325, + "step": 487200 + }, + { + "epoch": 5.19, + "learning_rate": 0.0001210540161206248, + "loss": 8.5518, + "step": 487300 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012102675309837454, + "loss": 8.5238, + "step": 487400 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012099948844087157, + "loss": 8.5798, + "step": 487500 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012097222215023616, + "loss": 8.7137, + "step": 487600 + }, + { + "epoch": 5.19, + "learning_rate": 0.00012094495422858891, + "loss": 8.5289, + "step": 487700 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012091768467805036, + "loss": 8.5658, + "step": 487800 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012089041350074133, + "loss": 8.5446, + "step": 487900 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012086314069878269, + "loss": 8.6184, + "step": 488000 + }, + { + "epoch": 5.2, + "learning_rate": 0.0001208358662742954, + "loss": 8.6675, + "step": 488100 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012080859022940067, + "loss": 8.6121, + "step": 488200 + }, + { + "epoch": 5.2, + "learning_rate": 0.0001207813125662197, + "loss": 8.6151, + "step": 488300 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012075403328687391, + "loss": 8.5027, + "step": 488400 + }, + { + "epoch": 5.2, + "learning_rate": 0.0001207267523934848, + "loss": 8.5561, + "step": 488500 + }, + { + "epoch": 5.2, + "learning_rate": 0.00012069946988817402, + "loss": 8.6449, + "step": 488600 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012067218577306331, + "loss": 8.5879, + "step": 488700 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012064490005027458, + "loss": 8.7064, + "step": 488800 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012061761272192983, + "loss": 8.6776, + "step": 488900 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012059032379015121, + "loss": 8.6386, + "step": 489000 + }, + { + "epoch": 5.21, + "learning_rate": 0.000120563033257061, + "loss": 8.6197, + "step": 489100 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012053574112478152, + "loss": 8.6031, + "step": 489200 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012050844739543535, + "loss": 8.5165, + "step": 489300 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012048115207114509, + "loss": 8.5469, + "step": 489400 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012045385515403353, + "loss": 8.6332, + "step": 489500 + }, + { + "epoch": 5.21, + "learning_rate": 0.00012042655664622352, + "loss": 8.641, + "step": 489600 + }, + { + "epoch": 5.22, + "learning_rate": 0.0001203992565498381, + "loss": 8.6453, + "step": 489700 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012037195486700037, + "loss": 8.6269, + "step": 489800 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012034465159983359, + "loss": 8.6234, + "step": 489900 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012031734675046118, + "loss": 8.5625, + "step": 490000 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012029004032100659, + "loss": 8.6715, + "step": 490100 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012026273231359347, + "loss": 8.6358, + "step": 490200 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012023542273034556, + "loss": 8.6555, + "step": 490300 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012020811157338674, + "loss": 8.6598, + "step": 490400 + }, + { + "epoch": 5.22, + "learning_rate": 0.00012018079884484098, + "loss": 8.5531, + "step": 490500 + }, + { + "epoch": 5.23, + "learning_rate": 0.0001201534845468324, + "loss": 8.5616, + "step": 490600 + }, + { + "epoch": 5.23, + "learning_rate": 0.00012012616868148522, + "loss": 8.5506, + "step": 490700 + }, + { + "epoch": 5.23, + "learning_rate": 0.00012009885125092386, + "loss": 8.5304, + "step": 490800 + }, + { + "epoch": 5.23, + "learning_rate": 0.00012007153225727272, + "loss": 8.5407, + "step": 490900 + }, + { + "epoch": 5.23, + "learning_rate": 0.00012004421170265647, + "loss": 8.5713, + "step": 491000 + }, + { + "epoch": 5.23, + "learning_rate": 0.00012001688958919976, + "loss": 8.5942, + "step": 491100 + }, + { + "epoch": 5.23, + "learning_rate": 0.00011998956591902752, + "loss": 8.6831, + "step": 491200 + }, + { + "epoch": 5.23, + "learning_rate": 0.00011996224069426465, + "loss": 8.5924, + "step": 491300 + }, + { + "epoch": 5.23, + "learning_rate": 0.00011993491391703627, + "loss": 8.7262, + "step": 491400 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011990758558946753, + "loss": 8.5432, + "step": 491500 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011988025571368384, + "loss": 8.5249, + "step": 491600 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011985292429181056, + "loss": 8.5854, + "step": 491700 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011982559132597335, + "loss": 8.6217, + "step": 491800 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011979825681829784, + "loss": 8.6028, + "step": 491900 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011977092077090981, + "loss": 8.5861, + "step": 492000 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011974358318593526, + "loss": 8.6637, + "step": 492100 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011971624406550017, + "loss": 8.6015, + "step": 492200 + }, + { + "epoch": 5.24, + "learning_rate": 0.00011968890341173078, + "loss": 8.5298, + "step": 492300 + }, + { + "epoch": 5.24, + "learning_rate": 0.0001196615612267533, + "loss": 8.5929, + "step": 492400 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011963421751269419, + "loss": 8.6494, + "step": 492500 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011960687227167993, + "loss": 8.6476, + "step": 492600 + }, + { + "epoch": 5.25, + "learning_rate": 0.0001195795255058372, + "loss": 8.5995, + "step": 492700 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011955217721729273, + "loss": 8.5177, + "step": 492800 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011952482740817343, + "loss": 8.6109, + "step": 492900 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011949747608060629, + "loss": 8.6523, + "step": 493000 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011947012323671838, + "loss": 8.6714, + "step": 493100 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011944276887863699, + "loss": 8.6082, + "step": 493200 + }, + { + "epoch": 5.25, + "learning_rate": 0.00011941541300848944, + "loss": 8.5689, + "step": 493300 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011938805562840323, + "loss": 8.6105, + "step": 493400 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011936069674050592, + "loss": 8.5607, + "step": 493500 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011933333634692522, + "loss": 8.6002, + "step": 493600 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011930597444978892, + "loss": 8.5617, + "step": 493700 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011927861105122505, + "loss": 8.555, + "step": 493800 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011925124615336154, + "loss": 8.6018, + "step": 493900 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011922387975832667, + "loss": 8.6479, + "step": 494000 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011919651186824865, + "loss": 8.6214, + "step": 494100 + }, + { + "epoch": 5.26, + "learning_rate": 0.0001191691424852559, + "loss": 8.6361, + "step": 494200 + }, + { + "epoch": 5.26, + "learning_rate": 0.00011914177161147699, + "loss": 8.6385, + "step": 494300 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011911439924904051, + "loss": 8.5818, + "step": 494400 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011908702540007524, + "loss": 8.6002, + "step": 494500 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011905965006670999, + "loss": 8.6119, + "step": 494600 + }, + { + "epoch": 5.27, + "learning_rate": 0.0001190322732510738, + "loss": 8.655, + "step": 494700 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011900489495529575, + "loss": 8.5886, + "step": 494800 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011897751518150506, + "loss": 8.5782, + "step": 494900 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011895013393183103, + "loss": 8.5946, + "step": 495000 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011892275120840312, + "loss": 8.6736, + "step": 495100 + }, + { + "epoch": 5.27, + "learning_rate": 0.00011889536701335091, + "loss": 8.6123, + "step": 495200 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011886798134880403, + "loss": 8.6256, + "step": 495300 + }, + { + "epoch": 5.28, + "learning_rate": 0.0001188405942168923, + "loss": 8.6785, + "step": 495400 + }, + { + "epoch": 5.28, + "learning_rate": 0.0001188132056197456, + "loss": 8.6425, + "step": 495500 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011878581555949396, + "loss": 8.6101, + "step": 495600 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011875842403826752, + "loss": 8.5835, + "step": 495700 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011873103105819645, + "loss": 8.6049, + "step": 495800 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011870363662141119, + "loss": 8.6794, + "step": 495900 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011867624073004218, + "loss": 8.5501, + "step": 496000 + }, + { + "epoch": 5.28, + "learning_rate": 0.00011864884338621996, + "loss": 8.5533, + "step": 496100 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011862144459207529, + "loss": 8.6319, + "step": 496200 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011859404434973891, + "loss": 8.577, + "step": 496300 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011856664266134179, + "loss": 8.6302, + "step": 496400 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011853923952901496, + "loss": 8.6198, + "step": 496500 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011851183495488956, + "loss": 8.6335, + "step": 496600 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011848442894109682, + "loss": 8.606, + "step": 496700 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011845702148976814, + "loss": 8.6606, + "step": 496800 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011842961260303498, + "loss": 8.6117, + "step": 496900 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011840220228302895, + "loss": 8.6277, + "step": 497000 + }, + { + "epoch": 5.29, + "learning_rate": 0.00011837479053188174, + "loss": 8.635, + "step": 497100 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011834737735172514, + "loss": 8.5724, + "step": 497200 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011831996274469116, + "loss": 8.6165, + "step": 497300 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011829254671291172, + "loss": 8.627, + "step": 497400 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011826512925851907, + "loss": 8.5374, + "step": 497500 + }, + { + "epoch": 5.3, + "learning_rate": 0.0001182377103836454, + "loss": 8.5877, + "step": 497600 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011821029009042313, + "loss": 8.6493, + "step": 497700 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011818286838098469, + "loss": 8.5928, + "step": 497800 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011815544525746269, + "loss": 8.6689, + "step": 497900 + }, + { + "epoch": 5.3, + "learning_rate": 0.00011812802072198986, + "loss": 8.6076, + "step": 498000 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011810059477669897, + "loss": 8.6146, + "step": 498100 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011807316742372294, + "loss": 8.6121, + "step": 498200 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011804573866519482, + "loss": 8.6454, + "step": 498300 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011801830850324771, + "loss": 8.4458, + "step": 498400 + }, + { + "epoch": 5.31, + "learning_rate": 0.0001179908769400149, + "loss": 8.7108, + "step": 498500 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011796344397762975, + "loss": 8.5527, + "step": 498600 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011793600961822567, + "loss": 8.6439, + "step": 498700 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011790857386393628, + "loss": 8.6663, + "step": 498800 + }, + { + "epoch": 5.31, + "learning_rate": 0.00011788113671689524, + "loss": 8.5261, + "step": 498900 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011785369817923635, + "loss": 8.5908, + "step": 499000 + }, + { + "epoch": 5.32, + "learning_rate": 0.0001178262582530935, + "loss": 8.6008, + "step": 499100 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011779881694060071, + "loss": 8.5982, + "step": 499200 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011777137424389207, + "loss": 8.6222, + "step": 499300 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011774393016510183, + "loss": 8.6486, + "step": 499400 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011771648470636429, + "loss": 8.6003, + "step": 499500 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011768903786981391, + "loss": 8.6418, + "step": 499600 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011766158965758522, + "loss": 8.6609, + "step": 499700 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011763414007181288, + "loss": 8.6197, + "step": 499800 + }, + { + "epoch": 5.32, + "learning_rate": 0.00011760668911463164, + "loss": 8.6423, + "step": 499900 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011757923678817635, + "loss": 8.597, + "step": 500000 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011755178309458202, + "loss": 8.6048, + "step": 500100 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011752432803598368, + "loss": 8.5829, + "step": 500200 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011749687161451653, + "loss": 8.5914, + "step": 500300 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011746941383231586, + "loss": 8.5822, + "step": 500400 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011744195469151709, + "loss": 8.5781, + "step": 500500 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011741449419425565, + "loss": 8.6571, + "step": 500600 + }, + { + "epoch": 5.33, + "learning_rate": 0.00011738703234266726, + "loss": 8.557, + "step": 500700 + }, + { + "epoch": 5.33, + "learning_rate": 0.0001173595691388875, + "loss": 8.5924, + "step": 500800 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011733210458505231, + "loss": 8.5993, + "step": 500900 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011730463868329753, + "loss": 8.6787, + "step": 501000 + }, + { + "epoch": 5.34, + "learning_rate": 0.0001172771714357592, + "loss": 8.5717, + "step": 501100 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011724970284457346, + "loss": 8.6292, + "step": 501200 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011722223291187655, + "loss": 8.6195, + "step": 501300 + }, + { + "epoch": 5.34, + "learning_rate": 0.0001171947616398048, + "loss": 8.5627, + "step": 501400 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011716728903049466, + "loss": 8.5988, + "step": 501500 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011713981508608267, + "loss": 8.5941, + "step": 501600 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011711233980870548, + "loss": 8.6163, + "step": 501700 + }, + { + "epoch": 5.34, + "learning_rate": 0.00011708486320049987, + "loss": 8.6322, + "step": 501800 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011705738526360266, + "loss": 8.6877, + "step": 501900 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011702990600015085, + "loss": 8.5848, + "step": 502000 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011700242541228145, + "loss": 8.655, + "step": 502100 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011697494350213172, + "loss": 8.5638, + "step": 502200 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011694746027183885, + "loss": 8.4942, + "step": 502300 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011691997572354024, + "loss": 8.601, + "step": 502400 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011689248985937335, + "loss": 8.6164, + "step": 502500 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011686500268147578, + "loss": 8.6319, + "step": 502600 + }, + { + "epoch": 5.35, + "learning_rate": 0.00011683751419198523, + "loss": 8.5933, + "step": 502700 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011681002439303943, + "loss": 8.4989, + "step": 502800 + }, + { + "epoch": 5.36, + "learning_rate": 0.0001167825332867763, + "loss": 8.5887, + "step": 502900 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011675504087533381, + "loss": 8.6232, + "step": 503000 + }, + { + "epoch": 5.36, + "learning_rate": 0.0001167275471608501, + "loss": 8.6396, + "step": 503100 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011670005214546327, + "loss": 8.597, + "step": 503200 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011667255583131169, + "loss": 8.6166, + "step": 503300 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011664505822053369, + "loss": 8.5821, + "step": 503400 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011661755931526779, + "loss": 8.485, + "step": 503500 + }, + { + "epoch": 5.36, + "learning_rate": 0.00011659005911765257, + "loss": 8.6586, + "step": 503600 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011656255762982677, + "loss": 8.5479, + "step": 503700 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011653505485392915, + "loss": 8.5689, + "step": 503800 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011650755079209856, + "loss": 8.543, + "step": 503900 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011648004544647407, + "loss": 8.6066, + "step": 504000 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011645253881919471, + "loss": 8.6488, + "step": 504100 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011642503091239974, + "loss": 8.6272, + "step": 504200 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011639752172822837, + "loss": 8.6165, + "step": 504300 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011637001126882008, + "loss": 8.7553, + "step": 504400 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011634249953631431, + "loss": 8.4915, + "step": 504500 + }, + { + "epoch": 5.37, + "learning_rate": 0.00011631498653285065, + "loss": 8.5766, + "step": 504600 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011628747226056875, + "loss": 8.6109, + "step": 504700 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011625995672160852, + "loss": 8.5678, + "step": 504800 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011623243991810975, + "loss": 8.6166, + "step": 504900 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011620492185221243, + "loss": 8.5051, + "step": 505000 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011617740252605669, + "loss": 8.645, + "step": 505100 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011614988194178264, + "loss": 8.5927, + "step": 505200 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011612236010153064, + "loss": 8.5159, + "step": 505300 + }, + { + "epoch": 5.38, + "learning_rate": 0.000116094837007441, + "loss": 8.6013, + "step": 505400 + }, + { + "epoch": 5.38, + "learning_rate": 0.00011606731266165423, + "loss": 8.5945, + "step": 505500 + }, + { + "epoch": 5.39, + "learning_rate": 0.0001160397870663109, + "loss": 8.6168, + "step": 505600 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011601226022355165, + "loss": 8.5955, + "step": 505700 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011598473213551726, + "loss": 8.6104, + "step": 505800 + }, + { + "epoch": 5.39, + "learning_rate": 0.0001159572028043486, + "loss": 8.6297, + "step": 505900 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011592967223218664, + "loss": 8.664, + "step": 506000 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011590214042117243, + "loss": 8.6012, + "step": 506100 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011587460737344709, + "loss": 8.5137, + "step": 506200 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011584707309115191, + "loss": 8.5496, + "step": 506300 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011581953757642818, + "loss": 8.5428, + "step": 506400 + }, + { + "epoch": 5.39, + "learning_rate": 0.00011579200083141742, + "loss": 8.6569, + "step": 506500 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011576446285826108, + "loss": 8.6375, + "step": 506600 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011573692365910086, + "loss": 8.634, + "step": 506700 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011570938323607842, + "loss": 8.5523, + "step": 506800 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011568184159133566, + "loss": 8.6621, + "step": 506900 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011565429872701443, + "loss": 8.6475, + "step": 507000 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011562675464525679, + "loss": 8.6032, + "step": 507100 + }, + { + "epoch": 5.4, + "learning_rate": 0.0001155992093482048, + "loss": 8.5718, + "step": 507200 + }, + { + "epoch": 5.4, + "learning_rate": 0.0001155716628380007, + "loss": 8.5959, + "step": 507300 + }, + { + "epoch": 5.4, + "learning_rate": 0.00011554411511678678, + "loss": 8.558, + "step": 507400 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011551656618670539, + "loss": 8.6403, + "step": 507500 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011548901604989906, + "loss": 8.6294, + "step": 507600 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011546146470851033, + "loss": 8.6146, + "step": 507700 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011543391216468191, + "loss": 8.5887, + "step": 507800 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011540635842055654, + "loss": 8.5533, + "step": 507900 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011537880347827705, + "loss": 8.6183, + "step": 508000 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011535124733998648, + "loss": 8.6019, + "step": 508100 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011532369000782776, + "loss": 8.583, + "step": 508200 + }, + { + "epoch": 5.41, + "learning_rate": 0.00011529613148394411, + "loss": 8.6296, + "step": 508300 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011526857177047872, + "loss": 8.6695, + "step": 508400 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011524101086957492, + "loss": 8.597, + "step": 508500 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011521344878337613, + "loss": 8.5984, + "step": 508600 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011518588551402585, + "loss": 8.5465, + "step": 508700 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011515832106366769, + "loss": 8.6685, + "step": 508800 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011513075543444531, + "loss": 8.5788, + "step": 508900 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011510318862850253, + "loss": 8.6073, + "step": 509000 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011507562064798321, + "loss": 8.62, + "step": 509100 + }, + { + "epoch": 5.42, + "learning_rate": 0.0001150480514950313, + "loss": 8.6089, + "step": 509200 + }, + { + "epoch": 5.42, + "learning_rate": 0.00011502048117179087, + "loss": 8.584, + "step": 509300 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011499290968040608, + "loss": 8.6068, + "step": 509400 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011496533702302113, + "loss": 8.6683, + "step": 509500 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011493776320178041, + "loss": 8.6276, + "step": 509600 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011491018821882824, + "loss": 8.6223, + "step": 509700 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011488261207630924, + "loss": 8.6489, + "step": 509800 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011485503477636793, + "loss": 8.589, + "step": 509900 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011482745632114902, + "loss": 8.4884, + "step": 510000 + }, + { + "epoch": 5.43, + "learning_rate": 0.0001147998767127973, + "loss": 8.6326, + "step": 510100 + }, + { + "epoch": 5.43, + "learning_rate": 0.00011477229595345767, + "loss": 8.6089, + "step": 510200 + }, + { + "epoch": 5.44, + "learning_rate": 0.000114744714045275, + "loss": 8.5655, + "step": 510300 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011471713099039445, + "loss": 8.6093, + "step": 510400 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011468954679096105, + "loss": 8.5612, + "step": 510500 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011466196144912011, + "loss": 8.536, + "step": 510600 + }, + { + "epoch": 5.44, + "learning_rate": 0.0001146343749670169, + "loss": 8.6442, + "step": 510700 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011460678734679682, + "loss": 8.4866, + "step": 510800 + }, + { + "epoch": 5.44, + "learning_rate": 0.0001145791985906054, + "loss": 8.6043, + "step": 510900 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011455160870058814, + "loss": 8.5273, + "step": 511000 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011452401767889083, + "loss": 8.5008, + "step": 511100 + }, + { + "epoch": 5.44, + "learning_rate": 0.00011449642552765911, + "loss": 8.5924, + "step": 511200 + }, + { + "epoch": 5.45, + "learning_rate": 0.0001144688322490389, + "loss": 8.6188, + "step": 511300 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011444123784517611, + "loss": 8.5661, + "step": 511400 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011441364231821677, + "loss": 8.526, + "step": 511500 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011438604567030696, + "loss": 8.5537, + "step": 511600 + }, + { + "epoch": 5.45, + "learning_rate": 0.0001143584479035929, + "loss": 8.6089, + "step": 511700 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011433084902022087, + "loss": 8.5562, + "step": 511800 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011430324902233718, + "loss": 8.6272, + "step": 511900 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011427564791208837, + "loss": 8.6369, + "step": 512000 + }, + { + "epoch": 5.45, + "learning_rate": 0.00011424804569162093, + "loss": 8.6289, + "step": 512100 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011422044236308151, + "loss": 8.5537, + "step": 512200 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011419283792861679, + "loss": 8.5957, + "step": 512300 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011416523239037364, + "loss": 8.5644, + "step": 512400 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011413762575049887, + "loss": 8.5157, + "step": 512500 + }, + { + "epoch": 5.46, + "learning_rate": 0.0001141100180111395, + "loss": 8.5527, + "step": 512600 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011408240917444253, + "loss": 8.6572, + "step": 512700 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011405479924255517, + "loss": 8.6175, + "step": 512800 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011402718821762462, + "loss": 8.5576, + "step": 512900 + }, + { + "epoch": 5.46, + "learning_rate": 0.00011399957610179819, + "loss": 8.5555, + "step": 513000 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011397196289722326, + "loss": 8.5721, + "step": 513100 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011394434860604733, + "loss": 8.608, + "step": 513200 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011391673323041798, + "loss": 8.5685, + "step": 513300 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011388911677248284, + "loss": 8.6255, + "step": 513400 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011386149923438964, + "loss": 8.5377, + "step": 513500 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011383388061828621, + "loss": 8.6112, + "step": 513600 + }, + { + "epoch": 5.47, + "learning_rate": 0.0001138062609263205, + "loss": 8.5563, + "step": 513700 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011377864016064039, + "loss": 8.6563, + "step": 513800 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011375101832339403, + "loss": 8.5394, + "step": 513900 + }, + { + "epoch": 5.47, + "learning_rate": 0.00011372339541672953, + "loss": 8.5919, + "step": 514000 + }, + { + "epoch": 5.48, + "learning_rate": 0.0001136957714427952, + "loss": 8.6154, + "step": 514100 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011366814640373924, + "loss": 8.5631, + "step": 514200 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011364052030171018, + "loss": 8.5815, + "step": 514300 + }, + { + "epoch": 5.48, + "learning_rate": 0.0001136128931388564, + "loss": 8.5858, + "step": 514400 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011358526491732652, + "loss": 8.5566, + "step": 514500 + }, + { + "epoch": 5.48, + "learning_rate": 0.0001135576356392692, + "loss": 8.5543, + "step": 514600 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011353000530683316, + "loss": 8.6558, + "step": 514700 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011350237392216718, + "loss": 8.6189, + "step": 514800 + }, + { + "epoch": 5.48, + "learning_rate": 0.00011347474148742019, + "loss": 8.6244, + "step": 514900 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011344710800474116, + "loss": 8.6043, + "step": 515000 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011341947347627913, + "loss": 8.5383, + "step": 515100 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011339183790418327, + "loss": 8.5972, + "step": 515200 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011336420129060279, + "loss": 8.5206, + "step": 515300 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011333656363768699, + "loss": 8.5692, + "step": 515400 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011330892494758525, + "loss": 8.6296, + "step": 515500 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011328128522244708, + "loss": 8.5925, + "step": 515600 + }, + { + "epoch": 5.49, + "learning_rate": 0.00011325364446442195, + "loss": 8.6379, + "step": 515700 + }, + { + "epoch": 5.49, + "learning_rate": 0.0001132260026756595, + "loss": 8.5826, + "step": 515800 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011319835985830949, + "loss": 8.5793, + "step": 515900 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011317071601452166, + "loss": 8.5443, + "step": 516000 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011314307114644587, + "loss": 8.5712, + "step": 516100 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011311542525623206, + "loss": 8.5994, + "step": 516200 + }, + { + "epoch": 5.5, + "learning_rate": 0.0001130877783460303, + "loss": 8.6708, + "step": 516300 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011306013041799063, + "loss": 8.5542, + "step": 516400 + }, + { + "epoch": 5.5, + "learning_rate": 0.0001130324814742633, + "loss": 8.6051, + "step": 516500 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011300483151699852, + "loss": 8.5905, + "step": 516600 + }, + { + "epoch": 5.5, + "learning_rate": 0.00011297718054834664, + "loss": 8.5778, + "step": 516700 + }, + { + "epoch": 5.5, + "learning_rate": 0.0001129495285704581, + "loss": 8.5755, + "step": 516800 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011292187558548339, + "loss": 8.6306, + "step": 516900 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011289422159557306, + "loss": 8.5319, + "step": 517000 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011286656660287778, + "loss": 8.6152, + "step": 517100 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011283891060954828, + "loss": 8.5563, + "step": 517200 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011281125361773536, + "loss": 8.5719, + "step": 517300 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011278359562958996, + "loss": 8.6021, + "step": 517400 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011275593664726298, + "loss": 8.5263, + "step": 517500 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011272827667290552, + "loss": 8.6072, + "step": 517600 + }, + { + "epoch": 5.51, + "learning_rate": 0.00011270061570866861, + "loss": 8.5817, + "step": 517700 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011267295375670352, + "loss": 8.567, + "step": 517800 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011264529081916151, + "loss": 8.5714, + "step": 517900 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011261762689819396, + "loss": 8.5686, + "step": 518000 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011258996199595219, + "loss": 8.5692, + "step": 518100 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011256229611458782, + "loss": 8.5858, + "step": 518200 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011253462925625236, + "loss": 8.5558, + "step": 518300 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011250696142309749, + "loss": 8.5997, + "step": 518400 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011247929261727494, + "loss": 8.5244, + "step": 518500 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011245162284093651, + "loss": 8.5886, + "step": 518600 + }, + { + "epoch": 5.52, + "learning_rate": 0.00011242395209623409, + "loss": 8.4468, + "step": 518700 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011239628038531962, + "loss": 8.5429, + "step": 518800 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011236860771034515, + "loss": 8.5888, + "step": 518900 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011234093407346275, + "loss": 8.5901, + "step": 519000 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011231325947682468, + "loss": 8.6178, + "step": 519100 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011228558392258308, + "loss": 8.4962, + "step": 519200 + }, + { + "epoch": 5.53, + "learning_rate": 0.0001122579074128904, + "loss": 8.6164, + "step": 519300 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011223022994989894, + "loss": 8.5276, + "step": 519400 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011220255153576127, + "loss": 8.6205, + "step": 519500 + }, + { + "epoch": 5.53, + "learning_rate": 0.00011217487217262988, + "loss": 8.6096, + "step": 519600 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011214719186265746, + "loss": 8.657, + "step": 519700 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011211951060799664, + "loss": 8.6299, + "step": 519800 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011209182841080021, + "loss": 8.6175, + "step": 519900 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011206414527322107, + "loss": 8.5322, + "step": 520000 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011203646119741208, + "loss": 8.6974, + "step": 520100 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011200877618552628, + "loss": 8.5435, + "step": 520200 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011198109023971668, + "loss": 8.6212, + "step": 520300 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011195340336213646, + "loss": 8.5506, + "step": 520400 + }, + { + "epoch": 5.54, + "learning_rate": 0.00011192571555493883, + "loss": 8.5507, + "step": 520500 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011189802682027708, + "loss": 8.5364, + "step": 520600 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011187033716030453, + "loss": 8.5619, + "step": 520700 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011184264657717464, + "loss": 8.5156, + "step": 520800 + }, + { + "epoch": 5.55, + "learning_rate": 0.0001118149550730409, + "loss": 8.559, + "step": 520900 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011178726265005687, + "loss": 8.6755, + "step": 521000 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011175956931037621, + "loss": 8.6455, + "step": 521100 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011173187505615265, + "loss": 8.6074, + "step": 521200 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011170417988953992, + "loss": 8.5176, + "step": 521300 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011167648381269193, + "loss": 8.5966, + "step": 521400 + }, + { + "epoch": 5.55, + "learning_rate": 0.00011164878682776256, + "loss": 8.5559, + "step": 521500 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011162108893690583, + "loss": 8.569, + "step": 521600 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011159339014227585, + "loss": 8.5953, + "step": 521700 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011156569044602667, + "loss": 8.6457, + "step": 521800 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011153798985031258, + "loss": 8.5609, + "step": 521900 + }, + { + "epoch": 5.56, + "learning_rate": 0.0001115102883572878, + "loss": 8.6024, + "step": 522000 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011148258596910675, + "loss": 8.6467, + "step": 522100 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011145488268792375, + "loss": 8.5679, + "step": 522200 + }, + { + "epoch": 5.56, + "learning_rate": 0.00011142717851589337, + "loss": 8.5524, + "step": 522300 + }, + { + "epoch": 5.56, + "learning_rate": 0.0001113994734551701, + "loss": 8.5929, + "step": 522400 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011137176750790863, + "loss": 8.5195, + "step": 522500 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011134406067626364, + "loss": 8.5756, + "step": 522600 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011131635296238983, + "loss": 8.4927, + "step": 522700 + }, + { + "epoch": 5.57, + "learning_rate": 0.0001112886443684421, + "loss": 8.5785, + "step": 522800 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011126093489657532, + "loss": 8.5208, + "step": 522900 + }, + { + "epoch": 5.57, + "learning_rate": 0.0001112332245489445, + "loss": 8.5415, + "step": 523000 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011120551332770459, + "loss": 8.542, + "step": 523100 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011117780123501078, + "loss": 8.6077, + "step": 523200 + }, + { + "epoch": 5.57, + "learning_rate": 0.0001111500882730182, + "loss": 8.5162, + "step": 523300 + }, + { + "epoch": 5.57, + "learning_rate": 0.00011112237444388212, + "loss": 8.518, + "step": 523400 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011109465974975781, + "loss": 8.6116, + "step": 523500 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011106694419280066, + "loss": 8.5618, + "step": 523600 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011103922777516614, + "loss": 8.5044, + "step": 523700 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011101151049900968, + "loss": 8.565, + "step": 523800 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011098379236648695, + "loss": 8.5809, + "step": 523900 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011095607337975354, + "loss": 8.5396, + "step": 524000 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011092835354096519, + "loss": 8.5642, + "step": 524100 + }, + { + "epoch": 5.58, + "learning_rate": 0.0001109006328522776, + "loss": 8.5101, + "step": 524200 + }, + { + "epoch": 5.58, + "learning_rate": 0.00011087291131584671, + "loss": 8.562, + "step": 524300 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011084518893382838, + "loss": 8.552, + "step": 524400 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011081746570837859, + "loss": 8.5933, + "step": 524500 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011078974164165332, + "loss": 8.6776, + "step": 524600 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011076201673580879, + "loss": 8.632, + "step": 524700 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011073429099300106, + "loss": 8.4993, + "step": 524800 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011070656441538642, + "loss": 8.4952, + "step": 524900 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011067883700512116, + "loss": 8.476, + "step": 525000 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011065110876436165, + "loss": 8.5479, + "step": 525100 + }, + { + "epoch": 5.59, + "learning_rate": 0.00011062337969526429, + "loss": 8.6555, + "step": 525200 + }, + { + "epoch": 5.6, + "learning_rate": 0.0001105956497999856, + "loss": 8.6625, + "step": 525300 + }, + { + "epoch": 5.6, + "learning_rate": 0.00011056791908068214, + "loss": 8.5323, + "step": 525400 + }, + { + "epoch": 5.6, + "learning_rate": 0.0001105401875395105, + "loss": 8.5907, + "step": 525500 + }, + { + "epoch": 5.6, + "learning_rate": 0.0001105124551786274, + "loss": 8.5602, + "step": 525600 + }, + { + "epoch": 5.6, + "learning_rate": 0.00011048472200018953, + "loss": 8.5779, + "step": 525700 + }, + { + "epoch": 5.6, + "learning_rate": 0.0001104569880063538, + "loss": 8.6586, + "step": 525800 + }, + { + "epoch": 5.6, + "learning_rate": 0.000110429253199277, + "loss": 8.5431, + "step": 525900 + }, + { + "epoch": 5.6, + "learning_rate": 0.00011040151758111612, + "loss": 8.5403, + "step": 526000 + }, + { + "epoch": 5.6, + "learning_rate": 0.0001103737811540281, + "loss": 8.5755, + "step": 526100 + }, + { + "epoch": 5.6, + "learning_rate": 0.00011034604392017009, + "loss": 8.5626, + "step": 526200 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011031830588169913, + "loss": 8.5576, + "step": 526300 + }, + { + "epoch": 5.61, + "learning_rate": 0.0001102905670407725, + "loss": 8.6225, + "step": 526400 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011026282739954736, + "loss": 8.5163, + "step": 526500 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011023508696018113, + "loss": 8.6354, + "step": 526600 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011020734572483108, + "loss": 8.5856, + "step": 526700 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011017960369565472, + "loss": 8.5388, + "step": 526800 + }, + { + "epoch": 5.61, + "learning_rate": 0.0001101518608748095, + "loss": 8.5649, + "step": 526900 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011012411726445302, + "loss": 8.5446, + "step": 527000 + }, + { + "epoch": 5.61, + "learning_rate": 0.00011009637286674293, + "loss": 8.5912, + "step": 527100 + }, + { + "epoch": 5.62, + "learning_rate": 0.00011006862768383681, + "loss": 8.5878, + "step": 527200 + }, + { + "epoch": 5.62, + "learning_rate": 0.00011004088171789253, + "loss": 8.5955, + "step": 527300 + }, + { + "epoch": 5.62, + "learning_rate": 0.0001100131349710678, + "loss": 8.5406, + "step": 527400 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010998538744552054, + "loss": 8.5765, + "step": 527500 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010995763914340867, + "loss": 8.4787, + "step": 527600 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010992989006689016, + "loss": 8.6036, + "step": 527700 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010990214021812309, + "loss": 8.5898, + "step": 527800 + }, + { + "epoch": 5.62, + "learning_rate": 0.0001098743895992655, + "loss": 8.5593, + "step": 527900 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010984663821247567, + "loss": 8.5508, + "step": 528000 + }, + { + "epoch": 5.62, + "learning_rate": 0.00010981888605991173, + "loss": 8.5086, + "step": 528100 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010979113314373204, + "loss": 8.568, + "step": 528200 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010976337946609487, + "loss": 8.6119, + "step": 528300 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010973562502915871, + "loss": 8.5923, + "step": 528400 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010970786983508194, + "loss": 8.5886, + "step": 528500 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010968011388602318, + "loss": 8.5686, + "step": 528600 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010965235718414091, + "loss": 8.5724, + "step": 528700 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010962459973159385, + "loss": 8.6187, + "step": 528800 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010959684153054066, + "loss": 8.5502, + "step": 528900 + }, + { + "epoch": 5.63, + "learning_rate": 0.00010956908258314011, + "loss": 8.5288, + "step": 529000 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010954132289155101, + "loss": 8.6025, + "step": 529100 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010951356245793227, + "loss": 8.5908, + "step": 529200 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010948580128444277, + "loss": 8.4752, + "step": 529300 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010945803937324153, + "loss": 8.5379, + "step": 529400 + }, + { + "epoch": 5.64, + "learning_rate": 0.0001094302767264876, + "loss": 8.5647, + "step": 529500 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010940251334634004, + "loss": 8.6037, + "step": 529600 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010937474923495808, + "loss": 8.5561, + "step": 529700 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010934698439450088, + "loss": 8.5203, + "step": 529800 + }, + { + "epoch": 5.64, + "learning_rate": 0.00010931921882712774, + "loss": 8.6464, + "step": 529900 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010929145253499795, + "loss": 8.4227, + "step": 530000 + }, + { + "epoch": 5.65, + "learning_rate": 0.000109263685520271, + "loss": 8.602, + "step": 530100 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010923591778510622, + "loss": 8.5601, + "step": 530200 + }, + { + "epoch": 5.65, + "learning_rate": 0.0001092081493316632, + "loss": 8.6653, + "step": 530300 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010918038016210139, + "loss": 8.5775, + "step": 530400 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010915261027858052, + "loss": 8.6495, + "step": 530500 + }, + { + "epoch": 5.65, + "learning_rate": 0.0001091248396832602, + "loss": 8.5314, + "step": 530600 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010909706837830012, + "loss": 8.4847, + "step": 530700 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010906929636586013, + "loss": 8.5063, + "step": 530800 + }, + { + "epoch": 5.65, + "learning_rate": 0.00010904152364809998, + "loss": 8.5391, + "step": 530900 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010901375022717964, + "loss": 8.5388, + "step": 531000 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010898597610525899, + "loss": 8.6267, + "step": 531100 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010895820128449803, + "loss": 8.531, + "step": 531200 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010893042576705685, + "loss": 8.6173, + "step": 531300 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010890264955509555, + "loss": 8.6125, + "step": 531400 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010887487265077423, + "loss": 8.6393, + "step": 531500 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010884709505625319, + "loss": 8.488, + "step": 531600 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010881931677369262, + "loss": 8.5472, + "step": 531700 + }, + { + "epoch": 5.66, + "learning_rate": 0.00010879153780525288, + "loss": 8.6138, + "step": 531800 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010876375815309432, + "loss": 8.4956, + "step": 531900 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010873597781937741, + "loss": 8.5628, + "step": 532000 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010870819680626259, + "loss": 8.5038, + "step": 532100 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010868041511591038, + "loss": 8.5353, + "step": 532200 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010865263275048141, + "loss": 8.4691, + "step": 532300 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010862484971213629, + "loss": 8.6581, + "step": 532400 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010859706600303572, + "loss": 8.5744, + "step": 532500 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010856928162534044, + "loss": 8.5426, + "step": 532600 + }, + { + "epoch": 5.67, + "learning_rate": 0.00010854149658121123, + "loss": 8.5267, + "step": 532700 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010851371087280895, + "loss": 8.595, + "step": 532800 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010848592450229449, + "loss": 8.4798, + "step": 532900 + }, + { + "epoch": 5.68, + "learning_rate": 0.0001084581374718288, + "loss": 8.5075, + "step": 533000 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010843034978357292, + "loss": 8.5263, + "step": 533100 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010840256143968781, + "loss": 8.6327, + "step": 533200 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010837477244233467, + "loss": 8.5682, + "step": 533300 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010834698279367456, + "loss": 8.5889, + "step": 533400 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010831919249586879, + "loss": 8.5969, + "step": 533500 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010829140155107856, + "loss": 8.5186, + "step": 533600 + }, + { + "epoch": 5.68, + "learning_rate": 0.00010826360996146512, + "loss": 8.5265, + "step": 533700 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010823581772918991, + "loss": 8.558, + "step": 533800 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010820802485641431, + "loss": 8.5667, + "step": 533900 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010818023134529975, + "loss": 8.5531, + "step": 534000 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010815243719800774, + "loss": 8.6445, + "step": 534100 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010812464241669986, + "loss": 8.5534, + "step": 534200 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010809684700353768, + "loss": 8.6529, + "step": 534300 + }, + { + "epoch": 5.69, + "learning_rate": 0.0001080690509606829, + "loss": 8.4898, + "step": 534400 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010804125429029718, + "loss": 8.5484, + "step": 534500 + }, + { + "epoch": 5.69, + "learning_rate": 0.00010801345699454229, + "loss": 8.5518, + "step": 534600 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010798565907558003, + "loss": 8.4891, + "step": 534700 + }, + { + "epoch": 5.7, + "learning_rate": 0.0001079578605355722, + "loss": 8.5684, + "step": 534800 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010793006137668076, + "loss": 8.5733, + "step": 534900 + }, + { + "epoch": 5.7, + "learning_rate": 0.0001079022616010676, + "loss": 8.5522, + "step": 535000 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010787446121089476, + "loss": 8.5198, + "step": 535100 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010784666020832423, + "loss": 8.569, + "step": 535200 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010781885859551817, + "loss": 8.6105, + "step": 535300 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010779105637463863, + "loss": 8.544, + "step": 535400 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010776325354784784, + "loss": 8.5004, + "step": 535500 + }, + { + "epoch": 5.7, + "learning_rate": 0.00010773545011730801, + "loss": 8.6066, + "step": 535600 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010770764608518145, + "loss": 8.642, + "step": 535700 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010767984145363042, + "loss": 8.5904, + "step": 535800 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010765203622481734, + "loss": 8.5511, + "step": 535900 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010762423040090461, + "loss": 8.6147, + "step": 536000 + }, + { + "epoch": 5.71, + "learning_rate": 0.0001075964239840547, + "loss": 8.5604, + "step": 536100 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010756861697643008, + "loss": 8.5243, + "step": 536200 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010754080938019335, + "loss": 8.4797, + "step": 536300 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010751300119750711, + "loss": 8.6234, + "step": 536400 + }, + { + "epoch": 5.71, + "learning_rate": 0.00010748519243053398, + "loss": 8.4941, + "step": 536500 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010745738308143667, + "loss": 8.6094, + "step": 536600 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010742957315237787, + "loss": 8.535, + "step": 536700 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010740176264552044, + "loss": 8.5629, + "step": 536800 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010737395156302715, + "loss": 8.5038, + "step": 536900 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010734613990706088, + "loss": 8.6247, + "step": 537000 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010731832767978454, + "loss": 8.4595, + "step": 537100 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010729051488336111, + "loss": 8.5149, + "step": 537200 + }, + { + "epoch": 5.72, + "learning_rate": 0.00010726270151995357, + "loss": 8.6072, + "step": 537300 + }, + { + "epoch": 5.72, + "learning_rate": 0.000107234887591725, + "loss": 8.5838, + "step": 537400 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010720707310083846, + "loss": 8.5672, + "step": 537500 + }, + { + "epoch": 5.73, + "learning_rate": 0.0001071792580494571, + "loss": 8.5394, + "step": 537600 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010715144243974411, + "loss": 8.6341, + "step": 537700 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010712362627386268, + "loss": 8.5938, + "step": 537800 + }, + { + "epoch": 5.73, + "learning_rate": 0.0001070958095539761, + "loss": 8.5615, + "step": 537900 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010706799228224769, + "loss": 8.5312, + "step": 538000 + }, + { + "epoch": 5.73, + "learning_rate": 0.0001070401744608408, + "loss": 8.5275, + "step": 538100 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010701235609191878, + "loss": 8.5395, + "step": 538200 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010698453717764512, + "loss": 8.5871, + "step": 538300 + }, + { + "epoch": 5.73, + "learning_rate": 0.00010695671772018325, + "loss": 8.6252, + "step": 538400 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010692889772169676, + "loss": 8.585, + "step": 538500 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010690107718434916, + "loss": 8.5215, + "step": 538600 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010687325611030407, + "loss": 8.494, + "step": 538700 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010684543450172518, + "loss": 8.5809, + "step": 538800 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010681761236077608, + "loss": 8.5313, + "step": 538900 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010678978968962062, + "loss": 8.5318, + "step": 539000 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010676196649042245, + "loss": 8.5851, + "step": 539100 + }, + { + "epoch": 5.74, + "learning_rate": 0.0001067341427653455, + "loss": 8.5844, + "step": 539200 + }, + { + "epoch": 5.74, + "learning_rate": 0.00010670631851655352, + "loss": 8.5454, + "step": 539300 + }, + { + "epoch": 5.75, + "learning_rate": 0.0001066784937462105, + "loss": 8.6251, + "step": 539400 + }, + { + "epoch": 5.75, + "learning_rate": 0.0001066506684564803, + "loss": 8.5501, + "step": 539500 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010662284264952696, + "loss": 8.6819, + "step": 539600 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010659501632751443, + "loss": 8.5177, + "step": 539700 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010656718949260683, + "loss": 8.5143, + "step": 539800 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010653936214696818, + "loss": 8.5337, + "step": 539900 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010651153429276269, + "loss": 8.5235, + "step": 540000 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010648370593215446, + "loss": 8.6422, + "step": 540100 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010645587706730782, + "loss": 8.4896, + "step": 540200 + }, + { + "epoch": 5.75, + "learning_rate": 0.00010642804770038689, + "loss": 8.5517, + "step": 540300 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010640021783355605, + "loss": 8.5041, + "step": 540400 + }, + { + "epoch": 5.76, + "learning_rate": 0.0001063723874689796, + "loss": 8.5429, + "step": 540500 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010634455660882192, + "loss": 8.4744, + "step": 540600 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010631672525524741, + "loss": 8.5738, + "step": 540700 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010628889341042052, + "loss": 8.5874, + "step": 540800 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010626106107650576, + "loss": 8.5334, + "step": 540900 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010623322825566762, + "loss": 8.5468, + "step": 541000 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010620539495007067, + "loss": 8.6567, + "step": 541100 + }, + { + "epoch": 5.76, + "learning_rate": 0.00010617756116187951, + "loss": 8.5695, + "step": 541200 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010614972689325879, + "loss": 8.5611, + "step": 541300 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010612189214637318, + "loss": 8.5753, + "step": 541400 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010609405692338737, + "loss": 8.5996, + "step": 541500 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010606622122646613, + "loss": 8.4975, + "step": 541600 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010603838505777422, + "loss": 8.6378, + "step": 541700 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010601054841947648, + "loss": 8.6102, + "step": 541800 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010598271131373779, + "loss": 8.57, + "step": 541900 + }, + { + "epoch": 5.77, + "learning_rate": 0.000105954873742723, + "loss": 8.6004, + "step": 542000 + }, + { + "epoch": 5.77, + "learning_rate": 0.00010592703570859705, + "loss": 8.557, + "step": 542100 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010589919721352497, + "loss": 8.5366, + "step": 542200 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010587135825967165, + "loss": 8.5752, + "step": 542300 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010584351884920225, + "loss": 8.5944, + "step": 542400 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010581567898428172, + "loss": 8.6032, + "step": 542500 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010578783866707526, + "loss": 8.5583, + "step": 542600 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010575999789974802, + "loss": 8.5204, + "step": 542700 + }, + { + "epoch": 5.78, + "learning_rate": 0.0001057321566844651, + "loss": 8.5366, + "step": 542800 + }, + { + "epoch": 5.78, + "learning_rate": 0.0001057043150233918, + "loss": 8.582, + "step": 542900 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010567647291869331, + "loss": 8.5414, + "step": 543000 + }, + { + "epoch": 5.78, + "learning_rate": 0.00010564863037253493, + "loss": 8.6211, + "step": 543100 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010562078738708199, + "loss": 8.5379, + "step": 543200 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010559294396449983, + "loss": 8.5867, + "step": 543300 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010556510010695384, + "loss": 8.6273, + "step": 543400 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010553725581660945, + "loss": 8.6178, + "step": 543500 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010550941109563209, + "loss": 8.5501, + "step": 543600 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010548156594618726, + "loss": 8.5281, + "step": 543700 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010545372037044046, + "loss": 8.6019, + "step": 543800 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010542587437055728, + "loss": 8.5379, + "step": 543900 + }, + { + "epoch": 5.79, + "learning_rate": 0.00010539802794870328, + "loss": 8.5763, + "step": 544000 + }, + { + "epoch": 5.8, + "learning_rate": 0.0001053701811070441, + "loss": 8.4557, + "step": 544100 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010534233384774538, + "loss": 8.5428, + "step": 544200 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010531448617297281, + "loss": 8.4776, + "step": 544300 + }, + { + "epoch": 5.8, + "learning_rate": 0.0001052866380848921, + "loss": 8.5997, + "step": 544400 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010525878958566897, + "loss": 8.6119, + "step": 544500 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010523094067746926, + "loss": 8.4848, + "step": 544600 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010520309136245871, + "loss": 8.5184, + "step": 544700 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010517524164280323, + "loss": 8.5777, + "step": 544800 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010514739152066868, + "loss": 8.5529, + "step": 544900 + }, + { + "epoch": 5.8, + "learning_rate": 0.00010511954099822096, + "loss": 8.4821, + "step": 545000 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010509169007762598, + "loss": 8.5073, + "step": 545100 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010506383876104977, + "loss": 8.5086, + "step": 545200 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010503598705065828, + "loss": 8.6025, + "step": 545300 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010500813494861757, + "loss": 8.5708, + "step": 545400 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010498028245709369, + "loss": 8.4899, + "step": 545500 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010495242957825268, + "loss": 8.5898, + "step": 545600 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010492457631426076, + "loss": 8.5012, + "step": 545700 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010489672266728401, + "loss": 8.5887, + "step": 545800 + }, + { + "epoch": 5.81, + "learning_rate": 0.00010486886863948864, + "loss": 8.6169, + "step": 545900 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010484101423304082, + "loss": 8.588, + "step": 546000 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010481315945010686, + "loss": 8.5467, + "step": 546100 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010478530429285295, + "loss": 8.5537, + "step": 546200 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010475744876344549, + "loss": 8.6253, + "step": 546300 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010472959286405068, + "loss": 8.5755, + "step": 546400 + }, + { + "epoch": 5.82, + "learning_rate": 0.000104701736596835, + "loss": 8.6039, + "step": 546500 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010467387996396474, + "loss": 8.6681, + "step": 546600 + }, + { + "epoch": 5.82, + "learning_rate": 0.00010464602296760637, + "loss": 8.4822, + "step": 546700 + }, + { + "epoch": 5.82, + "learning_rate": 0.0001046181656099263, + "loss": 8.519, + "step": 546800 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010459030789309101, + "loss": 8.5853, + "step": 546900 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010456244981926703, + "loss": 8.5415, + "step": 547000 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010453459139062083, + "loss": 8.555, + "step": 547100 + }, + { + "epoch": 5.83, + "learning_rate": 0.000104506732609319, + "loss": 8.6129, + "step": 547200 + }, + { + "epoch": 5.83, + "learning_rate": 0.0001044788734775281, + "loss": 8.4719, + "step": 547300 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010445101399741475, + "loss": 8.4976, + "step": 547400 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010442315417114556, + "loss": 8.5676, + "step": 547500 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010439529400088725, + "loss": 8.6269, + "step": 547600 + }, + { + "epoch": 5.83, + "learning_rate": 0.00010436743348880644, + "loss": 8.5861, + "step": 547700 + }, + { + "epoch": 5.83, + "learning_rate": 0.0001043395726370699, + "loss": 8.5345, + "step": 547800 + }, + { + "epoch": 5.84, + "learning_rate": 0.0001043117114478443, + "loss": 8.5639, + "step": 547900 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010428384992329651, + "loss": 8.5216, + "step": 548000 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010425598806559325, + "loss": 8.5006, + "step": 548100 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010422812587690136, + "loss": 8.6382, + "step": 548200 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010420026335938768, + "loss": 8.5107, + "step": 548300 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010417240051521906, + "loss": 8.5173, + "step": 548400 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010414453734656247, + "loss": 8.6414, + "step": 548500 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010411667385558473, + "loss": 8.6333, + "step": 548600 + }, + { + "epoch": 5.84, + "learning_rate": 0.00010408881004445286, + "loss": 8.58, + "step": 548700 + }, + { + "epoch": 5.85, + "learning_rate": 0.0001040609459153338, + "loss": 8.5604, + "step": 548800 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010403308147039454, + "loss": 8.5345, + "step": 548900 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010400521671180214, + "loss": 8.528, + "step": 549000 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010397735164172364, + "loss": 8.5325, + "step": 549100 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010394948626232606, + "loss": 8.5253, + "step": 549200 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010392162057577654, + "loss": 8.6327, + "step": 549300 + }, + { + "epoch": 5.85, + "learning_rate": 0.0001038937545842422, + "loss": 8.588, + "step": 549400 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010386588828989013, + "loss": 8.5639, + "step": 549500 + }, + { + "epoch": 5.85, + "learning_rate": 0.00010383802169488755, + "loss": 8.4542, + "step": 549600 + }, + { + "epoch": 5.86, + "learning_rate": 0.0001038101548014016, + "loss": 8.5626, + "step": 549700 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010378228761159954, + "loss": 8.5438, + "step": 549800 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010375442012764855, + "loss": 8.5288, + "step": 549900 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010372655235171595, + "loss": 8.6014, + "step": 550000 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010369868428596899, + "loss": 8.56, + "step": 550100 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010367081593257495, + "loss": 8.5436, + "step": 550200 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010364294729370119, + "loss": 8.4828, + "step": 550300 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010361507837151502, + "loss": 8.6114, + "step": 550400 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010358720916818385, + "loss": 8.5544, + "step": 550500 + }, + { + "epoch": 5.86, + "learning_rate": 0.00010355933968587505, + "loss": 8.5926, + "step": 550600 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010353146992675602, + "loss": 8.4256, + "step": 550700 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010350359989299424, + "loss": 8.557, + "step": 550800 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010347572958675709, + "loss": 8.5857, + "step": 550900 + }, + { + "epoch": 5.87, + "learning_rate": 0.0001034478590102121, + "loss": 8.5897, + "step": 551000 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010341998816552679, + "loss": 8.5551, + "step": 551100 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010339211705486862, + "loss": 8.5056, + "step": 551200 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010336424568040517, + "loss": 8.5623, + "step": 551300 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010333637404430397, + "loss": 8.5826, + "step": 551400 + }, + { + "epoch": 5.87, + "learning_rate": 0.00010330850214873262, + "loss": 8.5998, + "step": 551500 + }, + { + "epoch": 5.88, + "learning_rate": 0.0001032806299958587, + "loss": 8.5781, + "step": 551600 + }, + { + "epoch": 5.88, + "learning_rate": 0.0001032527575878499, + "loss": 8.5285, + "step": 551700 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010322488492687375, + "loss": 8.5566, + "step": 551800 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010319701201509799, + "loss": 8.599, + "step": 551900 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010316913885469027, + "loss": 8.6, + "step": 552000 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010314126544781827, + "loss": 8.5538, + "step": 552100 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010311339179664976, + "loss": 8.5206, + "step": 552200 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010308551790335244, + "loss": 8.5504, + "step": 552300 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010305764377009408, + "loss": 8.6553, + "step": 552400 + }, + { + "epoch": 5.88, + "learning_rate": 0.00010302976939904244, + "loss": 8.5057, + "step": 552500 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010300189479236532, + "loss": 8.5045, + "step": 552600 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010297401995223052, + "loss": 8.532, + "step": 552700 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010294614488080592, + "loss": 8.505, + "step": 552800 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010291826958025927, + "loss": 8.4947, + "step": 552900 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010289039405275855, + "loss": 8.4374, + "step": 553000 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010286251830047153, + "loss": 8.6161, + "step": 553100 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010283464232556619, + "loss": 8.5952, + "step": 553200 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010280676613021042, + "loss": 8.4213, + "step": 553300 + }, + { + "epoch": 5.89, + "learning_rate": 0.00010277888971657217, + "loss": 8.4787, + "step": 553400 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010275101308681938, + "loss": 8.5799, + "step": 553500 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010272313624312, + "loss": 8.5996, + "step": 553600 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010269525918764207, + "loss": 8.4795, + "step": 553700 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010266738192255349, + "loss": 8.4708, + "step": 553800 + }, + { + "epoch": 5.9, + "learning_rate": 0.0001026395044500224, + "loss": 8.5779, + "step": 553900 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010261162677221674, + "loss": 8.5724, + "step": 554000 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010258374889130462, + "loss": 8.5326, + "step": 554100 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010255587080945406, + "loss": 8.448, + "step": 554200 + }, + { + "epoch": 5.9, + "learning_rate": 0.00010252799252883319, + "loss": 8.5504, + "step": 554300 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010250011405161005, + "loss": 8.4411, + "step": 554400 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010247223537995281, + "loss": 8.5737, + "step": 554500 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010244435651602953, + "loss": 8.5264, + "step": 554600 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010241647746200842, + "loss": 8.513, + "step": 554700 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010238859822005759, + "loss": 8.5527, + "step": 554800 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010236071879234525, + "loss": 8.4996, + "step": 554900 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010233283918103955, + "loss": 8.5453, + "step": 555000 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010230495938830871, + "loss": 8.5832, + "step": 555100 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010227707941632094, + "loss": 8.5322, + "step": 555200 + }, + { + "epoch": 5.91, + "learning_rate": 0.00010224919926724446, + "loss": 8.4822, + "step": 555300 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010222131894324754, + "loss": 8.5658, + "step": 555400 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010219343844649838, + "loss": 8.5537, + "step": 555500 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010216555777916533, + "loss": 8.5002, + "step": 555600 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010213767694341658, + "loss": 8.5448, + "step": 555700 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010210979594142051, + "loss": 8.5478, + "step": 555800 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010208191477534537, + "loss": 8.5049, + "step": 555900 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010205403344735953, + "loss": 8.5248, + "step": 556000 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010202615195963125, + "loss": 8.5784, + "step": 556100 + }, + { + "epoch": 5.92, + "learning_rate": 0.00010199827031432898, + "loss": 8.4669, + "step": 556200 + }, + { + "epoch": 5.93, + "learning_rate": 0.000101970388513621, + "loss": 8.57, + "step": 556300 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010194250655967568, + "loss": 8.4714, + "step": 556400 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010191462445466144, + "loss": 8.4846, + "step": 556500 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010188674220074666, + "loss": 8.4856, + "step": 556600 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010185885980009973, + "loss": 8.5285, + "step": 556700 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010183097725488908, + "loss": 8.4725, + "step": 556800 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010180309456728316, + "loss": 8.5, + "step": 556900 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010177521173945038, + "loss": 8.4715, + "step": 557000 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010174732877355919, + "loss": 8.519, + "step": 557100 + }, + { + "epoch": 5.93, + "learning_rate": 0.00010171944567177805, + "loss": 8.5378, + "step": 557200 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010169156243627545, + "loss": 8.5435, + "step": 557300 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010166367906921987, + "loss": 8.4903, + "step": 557400 + }, + { + "epoch": 5.94, + "learning_rate": 0.0001016357955727798, + "loss": 8.5132, + "step": 557500 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010160791194912375, + "loss": 8.6053, + "step": 557600 + }, + { + "epoch": 5.94, + "learning_rate": 0.0001015800282004202, + "loss": 8.5602, + "step": 557700 + }, + { + "epoch": 5.94, + "learning_rate": 0.0001015521443288377, + "loss": 8.5327, + "step": 557800 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010152426033654479, + "loss": 8.5683, + "step": 557900 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010149637622570997, + "loss": 8.5174, + "step": 558000 + }, + { + "epoch": 5.94, + "learning_rate": 0.00010146849199850183, + "loss": 8.5769, + "step": 558100 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010144060765708892, + "loss": 8.5337, + "step": 558200 + }, + { + "epoch": 5.95, + "learning_rate": 0.0001014127232036398, + "loss": 8.5369, + "step": 558300 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010138483864032304, + "loss": 8.5367, + "step": 558400 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010135695396930725, + "loss": 8.567, + "step": 558500 + }, + { + "epoch": 5.95, + "learning_rate": 0.000101329069192761, + "loss": 8.5882, + "step": 558600 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010130118431285289, + "loss": 8.4507, + "step": 558700 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010127329933175156, + "loss": 8.5189, + "step": 558800 + }, + { + "epoch": 5.95, + "learning_rate": 0.0001012454142516256, + "loss": 8.4558, + "step": 558900 + }, + { + "epoch": 5.95, + "learning_rate": 0.00010121752907464366, + "loss": 8.5669, + "step": 559000 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010118964380297433, + "loss": 8.4719, + "step": 559100 + }, + { + "epoch": 5.96, + "learning_rate": 0.0001011617584387863, + "loss": 8.5817, + "step": 559200 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010113387298424821, + "loss": 8.588, + "step": 559300 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010110598744152868, + "loss": 8.5053, + "step": 559400 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010107810181279638, + "loss": 8.4873, + "step": 559500 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010105021610021998, + "loss": 8.5193, + "step": 559600 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010102233030596821, + "loss": 8.5564, + "step": 559700 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010099444443220966, + "loss": 8.4954, + "step": 559800 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010096655848111312, + "loss": 8.5508, + "step": 559900 + }, + { + "epoch": 5.96, + "learning_rate": 0.00010093867245484718, + "loss": 8.5209, + "step": 560000 + }, + { + "epoch": 5.97, + "learning_rate": 0.0001009107863555806, + "loss": 8.5964, + "step": 560100 + }, + { + "epoch": 5.97, + "learning_rate": 0.0001008829001854821, + "loss": 8.5104, + "step": 560200 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010085501394672032, + "loss": 8.4949, + "step": 560300 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010082712764146402, + "loss": 8.5544, + "step": 560400 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010079924127188194, + "loss": 8.5023, + "step": 560500 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010077135484014277, + "loss": 8.5487, + "step": 560600 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010074346834841526, + "loss": 8.5676, + "step": 560700 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010071558179886813, + "loss": 8.5802, + "step": 560800 + }, + { + "epoch": 5.97, + "learning_rate": 0.00010068769519367013, + "loss": 8.5051, + "step": 560900 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010065980853499002, + "loss": 8.5966, + "step": 561000 + }, + { + "epoch": 5.98, + "learning_rate": 0.0001006319218249965, + "loss": 8.4811, + "step": 561100 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010060403506585841, + "loss": 8.508, + "step": 561200 + }, + { + "epoch": 5.98, + "learning_rate": 0.0001005761482597444, + "loss": 8.4965, + "step": 561300 + }, + { + "epoch": 5.98, + "learning_rate": 0.0001005482614088233, + "loss": 8.5874, + "step": 561400 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010052037451526383, + "loss": 8.5284, + "step": 561500 + }, + { + "epoch": 5.98, + "learning_rate": 0.0001004924875812348, + "loss": 8.5755, + "step": 561600 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010046460060890497, + "loss": 8.5806, + "step": 561700 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010043671360044307, + "loss": 8.4593, + "step": 561800 + }, + { + "epoch": 5.98, + "learning_rate": 0.00010040882655801793, + "loss": 8.5434, + "step": 561900 + }, + { + "epoch": 5.99, + "learning_rate": 0.0001003809394837983, + "loss": 8.5234, + "step": 562000 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010035305237995299, + "loss": 8.6089, + "step": 562100 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010032516524865073, + "loss": 8.5755, + "step": 562200 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010029727809206036, + "loss": 8.5514, + "step": 562300 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010026939091235061, + "loss": 8.53, + "step": 562400 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010024150371169034, + "loss": 8.5916, + "step": 562500 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010021361649224825, + "loss": 8.487, + "step": 562600 + }, + { + "epoch": 5.99, + "learning_rate": 0.00010018572925619322, + "loss": 8.5384, + "step": 562700 + }, + { + "epoch": 5.99, + "learning_rate": 0.000100157842005694, + "loss": 8.5072, + "step": 562800 + }, + { + "epoch": 6.0, + "learning_rate": 0.00010012995474291942, + "loss": 8.556, + "step": 562900 + }, + { + "epoch": 6.0, + "learning_rate": 0.00010010206747003821, + "loss": 8.4648, + "step": 563000 + }, + { + "epoch": 6.0, + "learning_rate": 0.00010007418018921921, + "loss": 8.5767, + "step": 563100 + }, + { + "epoch": 6.0, + "learning_rate": 0.00010004629290263121, + "loss": 8.501, + "step": 563200 + }, + { + "epoch": 6.0, + "learning_rate": 0.000100018405612443, + "loss": 8.4751, + "step": 563300 + }, + { + "epoch": 6.0, + "learning_rate": 9.99905183208234e-05, + "loss": 8.5722, + "step": 563400 + }, + { + "epoch": 6.0, + "learning_rate": 9.996263102994114e-05, + "loss": 8.5418, + "step": 563500 + }, + { + "epoch": 6.0, + "learning_rate": 9.993474374196516e-05, + "loss": 8.5778, + "step": 563600 + }, + { + "epoch": 6.0, + "learning_rate": 9.990685645906407e-05, + "loss": 8.501, + "step": 563700 + }, + { + "epoch": 6.01, + "learning_rate": 9.987896918340685e-05, + "loss": 8.5239, + "step": 563800 + }, + { + "epoch": 6.01, + "learning_rate": 9.985108191716213e-05, + "loss": 8.604, + "step": 563900 + }, + { + "epoch": 6.01, + "learning_rate": 9.982319466249886e-05, + "loss": 8.4869, + "step": 564000 + }, + { + "epoch": 6.01, + "learning_rate": 9.979530742158568e-05, + "loss": 8.652, + "step": 564100 + }, + { + "epoch": 6.01, + "learning_rate": 9.976742019659153e-05, + "loss": 8.5728, + "step": 564200 + }, + { + "epoch": 6.01, + "learning_rate": 9.97395329896851e-05, + "loss": 8.4674, + "step": 564300 + }, + { + "epoch": 6.01, + "learning_rate": 9.971164580303524e-05, + "loss": 8.5342, + "step": 564400 + }, + { + "epoch": 6.01, + "learning_rate": 9.96837586388107e-05, + "loss": 8.5184, + "step": 564500 + }, + { + "epoch": 6.01, + "learning_rate": 9.965587149918028e-05, + "loss": 8.5237, + "step": 564600 + }, + { + "epoch": 6.01, + "learning_rate": 9.962798438631277e-05, + "loss": 8.5275, + "step": 564700 + }, + { + "epoch": 6.02, + "learning_rate": 9.960009730237693e-05, + "loss": 8.5166, + "step": 564800 + }, + { + "epoch": 6.02, + "learning_rate": 9.957221024954161e-05, + "loss": 8.5717, + "step": 564900 + }, + { + "epoch": 6.02, + "learning_rate": 9.95443232299755e-05, + "loss": 8.5712, + "step": 565000 + }, + { + "epoch": 6.02, + "learning_rate": 9.951643624584747e-05, + "loss": 8.6126, + "step": 565100 + }, + { + "epoch": 6.02, + "learning_rate": 9.948854929932619e-05, + "loss": 8.5461, + "step": 565200 + }, + { + "epoch": 6.02, + "learning_rate": 9.946066239258054e-05, + "loss": 8.5258, + "step": 565300 + }, + { + "epoch": 6.02, + "learning_rate": 9.94327755277792e-05, + "loss": 8.4516, + "step": 565400 + }, + { + "epoch": 6.02, + "learning_rate": 9.940488870709098e-05, + "loss": 8.4244, + "step": 565500 + }, + { + "epoch": 6.02, + "learning_rate": 9.93770019326846e-05, + "loss": 8.5311, + "step": 565600 + }, + { + "epoch": 6.03, + "learning_rate": 9.934911520672887e-05, + "loss": 8.5485, + "step": 565700 + }, + { + "epoch": 6.03, + "learning_rate": 9.932122853139249e-05, + "loss": 8.5492, + "step": 565800 + }, + { + "epoch": 6.03, + "learning_rate": 9.929334190884427e-05, + "loss": 8.4396, + "step": 565900 + }, + { + "epoch": 6.03, + "learning_rate": 9.92654553412529e-05, + "loss": 8.5508, + "step": 566000 + }, + { + "epoch": 6.03, + "learning_rate": 9.923756883078716e-05, + "loss": 8.5613, + "step": 566100 + }, + { + "epoch": 6.03, + "learning_rate": 9.920968237961575e-05, + "loss": 8.5793, + "step": 566200 + }, + { + "epoch": 6.03, + "learning_rate": 9.918179598990745e-05, + "loss": 8.5341, + "step": 566300 + }, + { + "epoch": 6.03, + "learning_rate": 9.915390966383093e-05, + "loss": 8.4923, + "step": 566400 + }, + { + "epoch": 6.03, + "learning_rate": 9.912602340355499e-05, + "loss": 8.5228, + "step": 566500 + }, + { + "epoch": 6.04, + "learning_rate": 9.909813721124824e-05, + "loss": 8.4863, + "step": 566600 + }, + { + "epoch": 6.04, + "learning_rate": 9.90702510890795e-05, + "loss": 8.5225, + "step": 566700 + }, + { + "epoch": 6.04, + "learning_rate": 9.90423650392174e-05, + "loss": 8.4793, + "step": 566800 + }, + { + "epoch": 6.04, + "learning_rate": 9.901447906383066e-05, + "loss": 8.485, + "step": 566900 + }, + { + "epoch": 6.04, + "learning_rate": 9.898659316508803e-05, + "loss": 8.5709, + "step": 567000 + }, + { + "epoch": 6.04, + "learning_rate": 9.895870734515812e-05, + "loss": 8.5055, + "step": 567100 + }, + { + "epoch": 6.04, + "learning_rate": 9.893082160620966e-05, + "loss": 8.4978, + "step": 567200 + }, + { + "epoch": 6.04, + "learning_rate": 9.890293595041131e-05, + "loss": 8.5188, + "step": 567300 + }, + { + "epoch": 6.04, + "learning_rate": 9.887505037993176e-05, + "loss": 8.628, + "step": 567400 + }, + { + "epoch": 6.04, + "learning_rate": 9.884716489693963e-05, + "loss": 8.46, + "step": 567500 + }, + { + "epoch": 6.05, + "learning_rate": 9.881927950360363e-05, + "loss": 8.5151, + "step": 567600 + }, + { + "epoch": 6.05, + "learning_rate": 9.879139420209237e-05, + "loss": 8.5803, + "step": 567700 + }, + { + "epoch": 6.05, + "learning_rate": 9.876350899457453e-05, + "loss": 8.5582, + "step": 567800 + }, + { + "epoch": 6.05, + "learning_rate": 9.87356238832187e-05, + "loss": 8.4531, + "step": 567900 + }, + { + "epoch": 6.05, + "learning_rate": 9.870773887019355e-05, + "loss": 8.5174, + "step": 568000 + }, + { + "epoch": 6.05, + "learning_rate": 9.867985395766766e-05, + "loss": 8.5339, + "step": 568100 + }, + { + "epoch": 6.05, + "learning_rate": 9.865196914780968e-05, + "loss": 8.5099, + "step": 568200 + }, + { + "epoch": 6.05, + "learning_rate": 9.86240844427882e-05, + "loss": 8.5618, + "step": 568300 + }, + { + "epoch": 6.05, + "learning_rate": 9.859619984477181e-05, + "loss": 8.5566, + "step": 568400 + }, + { + "epoch": 6.06, + "learning_rate": 9.85683153559291e-05, + "loss": 8.5937, + "step": 568500 + }, + { + "epoch": 6.06, + "learning_rate": 9.854043097842866e-05, + "loss": 8.5102, + "step": 568600 + }, + { + "epoch": 6.06, + "learning_rate": 9.851254671443904e-05, + "loss": 8.494, + "step": 568700 + }, + { + "epoch": 6.06, + "learning_rate": 9.848466256612883e-05, + "loss": 8.4991, + "step": 568800 + }, + { + "epoch": 6.06, + "learning_rate": 9.845677853566657e-05, + "loss": 8.4675, + "step": 568900 + }, + { + "epoch": 6.06, + "learning_rate": 9.842889462522077e-05, + "loss": 8.4867, + "step": 569000 + }, + { + "epoch": 6.06, + "learning_rate": 9.840101083696005e-05, + "loss": 8.5785, + "step": 569100 + }, + { + "epoch": 6.06, + "learning_rate": 9.837312717305285e-05, + "loss": 8.4526, + "step": 569200 + }, + { + "epoch": 6.06, + "learning_rate": 9.834524363566773e-05, + "loss": 8.5616, + "step": 569300 + }, + { + "epoch": 6.06, + "learning_rate": 9.831736022697317e-05, + "loss": 8.445, + "step": 569400 + }, + { + "epoch": 6.07, + "learning_rate": 9.828947694913772e-05, + "loss": 8.5979, + "step": 569500 + }, + { + "epoch": 6.07, + "learning_rate": 9.826159380432978e-05, + "loss": 8.4784, + "step": 569600 + }, + { + "epoch": 6.07, + "learning_rate": 9.823371079471789e-05, + "loss": 8.5428, + "step": 569700 + }, + { + "epoch": 6.07, + "learning_rate": 9.820582792247049e-05, + "loss": 8.5617, + "step": 569800 + }, + { + "epoch": 6.07, + "learning_rate": 9.817794518975603e-05, + "loss": 8.5175, + "step": 569900 + }, + { + "epoch": 6.07, + "learning_rate": 9.815006259874296e-05, + "loss": 8.5768, + "step": 570000 + }, + { + "epoch": 6.07, + "learning_rate": 9.812218015159977e-05, + "loss": 8.5546, + "step": 570100 + }, + { + "epoch": 6.07, + "learning_rate": 9.809429785049474e-05, + "loss": 8.5234, + "step": 570200 + }, + { + "epoch": 6.07, + "learning_rate": 9.806641569759644e-05, + "loss": 8.5633, + "step": 570300 + }, + { + "epoch": 6.08, + "learning_rate": 9.80385336950731e-05, + "loss": 8.5458, + "step": 570400 + }, + { + "epoch": 6.08, + "learning_rate": 9.801065184509329e-05, + "loss": 8.5046, + "step": 570500 + }, + { + "epoch": 6.08, + "learning_rate": 9.798277014982522e-05, + "loss": 8.5552, + "step": 570600 + }, + { + "epoch": 6.08, + "learning_rate": 9.79548886114374e-05, + "loss": 8.54, + "step": 570700 + }, + { + "epoch": 6.08, + "learning_rate": 9.792700723209802e-05, + "loss": 8.4892, + "step": 570800 + }, + { + "epoch": 6.08, + "learning_rate": 9.789912601397557e-05, + "loss": 8.4428, + "step": 570900 + }, + { + "epoch": 6.08, + "learning_rate": 9.787124495923825e-05, + "loss": 8.5254, + "step": 571000 + }, + { + "epoch": 6.08, + "learning_rate": 9.784336407005445e-05, + "loss": 8.4707, + "step": 571100 + }, + { + "epoch": 6.08, + "learning_rate": 9.781548334859249e-05, + "loss": 8.452, + "step": 571200 + }, + { + "epoch": 6.09, + "learning_rate": 9.778760279702056e-05, + "loss": 8.5397, + "step": 571300 + }, + { + "epoch": 6.09, + "learning_rate": 9.775972241750704e-05, + "loss": 8.5849, + "step": 571400 + }, + { + "epoch": 6.09, + "learning_rate": 9.773184221222008e-05, + "loss": 8.4681, + "step": 571500 + }, + { + "epoch": 6.09, + "learning_rate": 9.770396218332806e-05, + "loss": 8.5496, + "step": 571600 + }, + { + "epoch": 6.09, + "learning_rate": 9.767608233299907e-05, + "loss": 8.5017, + "step": 571700 + }, + { + "epoch": 6.09, + "learning_rate": 9.764820266340145e-05, + "loss": 8.5101, + "step": 571800 + }, + { + "epoch": 6.09, + "learning_rate": 9.76203231767033e-05, + "loss": 8.5256, + "step": 571900 + }, + { + "epoch": 6.09, + "learning_rate": 9.759244387507292e-05, + "loss": 8.4828, + "step": 572000 + }, + { + "epoch": 6.09, + "learning_rate": 9.756456476067837e-05, + "loss": 8.4744, + "step": 572100 + }, + { + "epoch": 6.09, + "learning_rate": 9.753668583568793e-05, + "loss": 8.4446, + "step": 572200 + }, + { + "epoch": 6.1, + "learning_rate": 9.750880710226965e-05, + "loss": 8.4423, + "step": 572300 + }, + { + "epoch": 6.1, + "learning_rate": 9.748092856259172e-05, + "loss": 8.5426, + "step": 572400 + }, + { + "epoch": 6.1, + "learning_rate": 9.745305021882221e-05, + "loss": 8.5889, + "step": 572500 + }, + { + "epoch": 6.1, + "learning_rate": 9.742517207312929e-05, + "loss": 8.4782, + "step": 572600 + }, + { + "epoch": 6.1, + "learning_rate": 9.739729412768095e-05, + "loss": 8.5241, + "step": 572700 + }, + { + "epoch": 6.1, + "learning_rate": 9.736941638464536e-05, + "loss": 8.5345, + "step": 572800 + }, + { + "epoch": 6.1, + "learning_rate": 9.73415388461905e-05, + "loss": 8.4636, + "step": 572900 + }, + { + "epoch": 6.1, + "learning_rate": 9.731366151448447e-05, + "loss": 8.5277, + "step": 573000 + }, + { + "epoch": 6.1, + "learning_rate": 9.728578439169523e-05, + "loss": 8.526, + "step": 573100 + }, + { + "epoch": 6.11, + "learning_rate": 9.725790747999083e-05, + "loss": 8.5394, + "step": 573200 + }, + { + "epoch": 6.11, + "learning_rate": 9.723003078153927e-05, + "loss": 8.5449, + "step": 573300 + }, + { + "epoch": 6.11, + "learning_rate": 9.720215429850847e-05, + "loss": 8.4931, + "step": 573400 + }, + { + "epoch": 6.11, + "learning_rate": 9.717427803306645e-05, + "loss": 8.5136, + "step": 573500 + }, + { + "epoch": 6.11, + "learning_rate": 9.714640198738108e-05, + "loss": 8.4581, + "step": 573600 + }, + { + "epoch": 6.11, + "learning_rate": 9.711852616362036e-05, + "loss": 8.6564, + "step": 573700 + }, + { + "epoch": 6.11, + "learning_rate": 9.709065056395215e-05, + "loss": 8.4493, + "step": 573800 + }, + { + "epoch": 6.11, + "learning_rate": 9.706277519054435e-05, + "loss": 8.5169, + "step": 573900 + }, + { + "epoch": 6.11, + "learning_rate": 9.703490004556482e-05, + "loss": 8.5241, + "step": 574000 + }, + { + "epoch": 6.11, + "learning_rate": 9.700702513118143e-05, + "loss": 8.4493, + "step": 574100 + }, + { + "epoch": 6.12, + "learning_rate": 9.697915044956198e-05, + "loss": 8.5261, + "step": 574200 + }, + { + "epoch": 6.12, + "learning_rate": 9.695127600287435e-05, + "loss": 8.4313, + "step": 574300 + }, + { + "epoch": 6.12, + "learning_rate": 9.692340179328626e-05, + "loss": 8.483, + "step": 574400 + }, + { + "epoch": 6.12, + "learning_rate": 9.689552782296556e-05, + "loss": 8.521, + "step": 574500 + }, + { + "epoch": 6.12, + "learning_rate": 9.686765409407997e-05, + "loss": 8.4772, + "step": 574600 + }, + { + "epoch": 6.12, + "learning_rate": 9.683978060879726e-05, + "loss": 8.5102, + "step": 574700 + }, + { + "epoch": 6.12, + "learning_rate": 9.681190736928513e-05, + "loss": 8.54, + "step": 574800 + }, + { + "epoch": 6.12, + "learning_rate": 9.67840343777113e-05, + "loss": 8.503, + "step": 574900 + }, + { + "epoch": 6.12, + "learning_rate": 9.675616163624343e-05, + "loss": 8.4948, + "step": 575000 + }, + { + "epoch": 6.13, + "learning_rate": 9.672828914704924e-05, + "loss": 8.4985, + "step": 575100 + }, + { + "epoch": 6.13, + "learning_rate": 9.67004169122963e-05, + "loss": 8.4757, + "step": 575200 + }, + { + "epoch": 6.13, + "learning_rate": 9.667254493415229e-05, + "loss": 8.4969, + "step": 575300 + }, + { + "epoch": 6.13, + "learning_rate": 9.664467321478481e-05, + "loss": 8.5406, + "step": 575400 + }, + { + "epoch": 6.13, + "learning_rate": 9.661680175636144e-05, + "loss": 8.5664, + "step": 575500 + }, + { + "epoch": 6.13, + "learning_rate": 9.658893056104976e-05, + "loss": 8.4155, + "step": 575600 + }, + { + "epoch": 6.13, + "learning_rate": 9.656105963101729e-05, + "loss": 8.4821, + "step": 575700 + }, + { + "epoch": 6.13, + "learning_rate": 9.653318896843159e-05, + "loss": 8.4672, + "step": 575800 + }, + { + "epoch": 6.13, + "learning_rate": 9.650531857546012e-05, + "loss": 8.5579, + "step": 575900 + }, + { + "epoch": 6.14, + "learning_rate": 9.64774484542704e-05, + "loss": 8.5411, + "step": 576000 + }, + { + "epoch": 6.14, + "learning_rate": 9.644957860702987e-05, + "loss": 8.4996, + "step": 576100 + }, + { + "epoch": 6.14, + "learning_rate": 9.642170903590599e-05, + "loss": 8.5612, + "step": 576200 + }, + { + "epoch": 6.14, + "learning_rate": 9.639383974306615e-05, + "loss": 8.501, + "step": 576300 + }, + { + "epoch": 6.14, + "learning_rate": 9.636597073067778e-05, + "loss": 8.6227, + "step": 576400 + }, + { + "epoch": 6.14, + "learning_rate": 9.633810200090822e-05, + "loss": 8.5261, + "step": 576500 + }, + { + "epoch": 6.14, + "learning_rate": 9.631023355592489e-05, + "loss": 8.5239, + "step": 576600 + }, + { + "epoch": 6.14, + "learning_rate": 9.628236539789504e-05, + "loss": 8.5055, + "step": 576700 + }, + { + "epoch": 6.14, + "learning_rate": 9.625449752898604e-05, + "loss": 8.4365, + "step": 576800 + }, + { + "epoch": 6.14, + "learning_rate": 9.622662995136514e-05, + "loss": 8.4983, + "step": 576900 + }, + { + "epoch": 6.15, + "learning_rate": 9.619876266719966e-05, + "loss": 8.541, + "step": 577000 + }, + { + "epoch": 6.15, + "learning_rate": 9.617089567865674e-05, + "loss": 8.415, + "step": 577100 + }, + { + "epoch": 6.15, + "learning_rate": 9.614302898790372e-05, + "loss": 8.4677, + "step": 577200 + }, + { + "epoch": 6.15, + "learning_rate": 9.611516259710766e-05, + "loss": 8.557, + "step": 577300 + }, + { + "epoch": 6.15, + "learning_rate": 9.608729650843584e-05, + "loss": 8.4651, + "step": 577400 + }, + { + "epoch": 6.15, + "learning_rate": 9.60594307240554e-05, + "loss": 8.5404, + "step": 577500 + }, + { + "epoch": 6.15, + "learning_rate": 9.603156524613342e-05, + "loss": 8.4382, + "step": 577600 + }, + { + "epoch": 6.15, + "learning_rate": 9.600370007683703e-05, + "loss": 8.5007, + "step": 577700 + }, + { + "epoch": 6.15, + "learning_rate": 9.597583521833329e-05, + "loss": 8.5073, + "step": 577800 + }, + { + "epoch": 6.16, + "learning_rate": 9.594797067278928e-05, + "loss": 8.5401, + "step": 577900 + }, + { + "epoch": 6.16, + "learning_rate": 9.592010644237198e-05, + "loss": 8.4374, + "step": 578000 + }, + { + "epoch": 6.16, + "learning_rate": 9.589224252924847e-05, + "loss": 8.4422, + "step": 578100 + }, + { + "epoch": 6.16, + "learning_rate": 9.586437893558564e-05, + "loss": 8.4467, + "step": 578200 + }, + { + "epoch": 6.16, + "learning_rate": 9.583651566355052e-05, + "loss": 8.5061, + "step": 578300 + }, + { + "epoch": 6.16, + "learning_rate": 9.580865271530997e-05, + "loss": 8.474, + "step": 578400 + }, + { + "epoch": 6.16, + "learning_rate": 9.578079009303099e-05, + "loss": 8.5522, + "step": 578500 + }, + { + "epoch": 6.16, + "learning_rate": 9.575292779888033e-05, + "loss": 8.6526, + "step": 578600 + }, + { + "epoch": 6.16, + "learning_rate": 9.572506583502498e-05, + "loss": 8.4548, + "step": 578700 + }, + { + "epoch": 6.16, + "learning_rate": 9.569720420363165e-05, + "loss": 8.5234, + "step": 578800 + }, + { + "epoch": 6.17, + "learning_rate": 9.566934290686726e-05, + "loss": 8.5635, + "step": 578900 + }, + { + "epoch": 6.17, + "learning_rate": 9.564148194689843e-05, + "loss": 8.4211, + "step": 579000 + }, + { + "epoch": 6.17, + "learning_rate": 9.561362132589208e-05, + "loss": 8.5385, + "step": 579100 + }, + { + "epoch": 6.17, + "learning_rate": 9.558576104601482e-05, + "loss": 8.4974, + "step": 579200 + }, + { + "epoch": 6.17, + "learning_rate": 9.55579011094334e-05, + "loss": 8.47, + "step": 579300 + }, + { + "epoch": 6.17, + "learning_rate": 9.553004151831444e-05, + "loss": 8.5051, + "step": 579400 + }, + { + "epoch": 6.17, + "learning_rate": 9.55021822748246e-05, + "loss": 8.4726, + "step": 579500 + }, + { + "epoch": 6.17, + "learning_rate": 9.547432338113058e-05, + "loss": 8.4631, + "step": 579600 + }, + { + "epoch": 6.17, + "learning_rate": 9.54464648393988e-05, + "loss": 8.4759, + "step": 579700 + }, + { + "epoch": 6.18, + "learning_rate": 9.541860665179603e-05, + "loss": 8.5052, + "step": 579800 + }, + { + "epoch": 6.18, + "learning_rate": 9.53907488204886e-05, + "loss": 8.4818, + "step": 579900 + }, + { + "epoch": 6.18, + "learning_rate": 9.53628913476432e-05, + "loss": 8.4775, + "step": 580000 + }, + { + "epoch": 6.18, + "learning_rate": 9.533503423542616e-05, + "loss": 8.5214, + "step": 580100 + }, + { + "epoch": 6.18, + "learning_rate": 9.530717748600402e-05, + "loss": 8.6203, + "step": 580200 + }, + { + "epoch": 6.18, + "learning_rate": 9.527932110154313e-05, + "loss": 8.5026, + "step": 580300 + }, + { + "epoch": 6.18, + "learning_rate": 9.525146508420998e-05, + "loss": 8.5378, + "step": 580400 + }, + { + "epoch": 6.18, + "learning_rate": 9.522360943617085e-05, + "loss": 8.4831, + "step": 580500 + }, + { + "epoch": 6.18, + "learning_rate": 9.519575415959212e-05, + "loss": 8.4541, + "step": 580600 + }, + { + "epoch": 6.19, + "learning_rate": 9.516789925664006e-05, + "loss": 8.5221, + "step": 580700 + }, + { + "epoch": 6.19, + "learning_rate": 9.514004472948102e-05, + "loss": 8.4785, + "step": 580800 + }, + { + "epoch": 6.19, + "learning_rate": 9.511219058028118e-05, + "loss": 8.4873, + "step": 580900 + }, + { + "epoch": 6.19, + "learning_rate": 9.508433681120679e-05, + "loss": 8.4778, + "step": 581000 + }, + { + "epoch": 6.19, + "learning_rate": 9.505648342442402e-05, + "loss": 8.4506, + "step": 581100 + }, + { + "epoch": 6.19, + "learning_rate": 9.502863042209907e-05, + "loss": 8.4299, + "step": 581200 + }, + { + "epoch": 6.19, + "learning_rate": 9.500077780639802e-05, + "loss": 8.4892, + "step": 581300 + }, + { + "epoch": 6.19, + "learning_rate": 9.497292557948702e-05, + "loss": 8.5376, + "step": 581400 + }, + { + "epoch": 6.19, + "learning_rate": 9.494507374353211e-05, + "loss": 8.536, + "step": 581500 + }, + { + "epoch": 6.19, + "learning_rate": 9.491722230069933e-05, + "loss": 8.5869, + "step": 581600 + }, + { + "epoch": 6.2, + "learning_rate": 9.488937125315472e-05, + "loss": 8.5101, + "step": 581700 + }, + { + "epoch": 6.2, + "learning_rate": 9.486152060306421e-05, + "loss": 8.5017, + "step": 581800 + }, + { + "epoch": 6.2, + "learning_rate": 9.483367035259379e-05, + "loss": 8.4706, + "step": 581900 + }, + { + "epoch": 6.2, + "learning_rate": 9.480582050390936e-05, + "loss": 8.5025, + "step": 582000 + }, + { + "epoch": 6.2, + "learning_rate": 9.477797105917681e-05, + "loss": 8.5918, + "step": 582100 + }, + { + "epoch": 6.2, + "learning_rate": 9.475012202056199e-05, + "loss": 8.5499, + "step": 582200 + }, + { + "epoch": 6.2, + "learning_rate": 9.472227339023073e-05, + "loss": 8.4582, + "step": 582300 + }, + { + "epoch": 6.2, + "learning_rate": 9.46944251703488e-05, + "loss": 8.4931, + "step": 582400 + }, + { + "epoch": 6.2, + "learning_rate": 9.466657736308199e-05, + "loss": 8.4801, + "step": 582500 + }, + { + "epoch": 6.21, + "learning_rate": 9.463872997059598e-05, + "loss": 8.5439, + "step": 582600 + }, + { + "epoch": 6.21, + "learning_rate": 9.461088299505654e-05, + "loss": 8.4918, + "step": 582700 + }, + { + "epoch": 6.21, + "learning_rate": 9.458303643862925e-05, + "loss": 8.4593, + "step": 582800 + }, + { + "epoch": 6.21, + "learning_rate": 9.455519030347981e-05, + "loss": 8.5589, + "step": 582900 + }, + { + "epoch": 6.21, + "learning_rate": 9.452734459177375e-05, + "loss": 8.5167, + "step": 583000 + }, + { + "epoch": 6.21, + "learning_rate": 9.44994993056767e-05, + "loss": 8.5241, + "step": 583100 + }, + { + "epoch": 6.21, + "learning_rate": 9.447165444735414e-05, + "loss": 8.4288, + "step": 583200 + }, + { + "epoch": 6.21, + "learning_rate": 9.444381001897159e-05, + "loss": 8.5654, + "step": 583300 + }, + { + "epoch": 6.21, + "learning_rate": 9.441596602269449e-05, + "loss": 8.5318, + "step": 583400 + }, + { + "epoch": 6.22, + "learning_rate": 9.438812246068832e-05, + "loss": 8.5325, + "step": 583500 + }, + { + "epoch": 6.22, + "learning_rate": 9.436027933511843e-05, + "loss": 8.4606, + "step": 583600 + }, + { + "epoch": 6.22, + "learning_rate": 9.433243664815019e-05, + "loss": 8.4975, + "step": 583700 + }, + { + "epoch": 6.22, + "learning_rate": 9.430459440194898e-05, + "loss": 8.4846, + "step": 583800 + }, + { + "epoch": 6.22, + "learning_rate": 9.427675259868001e-05, + "loss": 8.4856, + "step": 583900 + }, + { + "epoch": 6.22, + "learning_rate": 9.424891124050861e-05, + "loss": 8.4815, + "step": 584000 + }, + { + "epoch": 6.22, + "learning_rate": 9.422107032959996e-05, + "loss": 8.4918, + "step": 584100 + }, + { + "epoch": 6.22, + "learning_rate": 9.419322986811929e-05, + "loss": 8.4701, + "step": 584200 + }, + { + "epoch": 6.22, + "learning_rate": 9.41653898582317e-05, + "loss": 8.5292, + "step": 584300 + }, + { + "epoch": 6.22, + "learning_rate": 9.413755030210239e-05, + "loss": 8.4996, + "step": 584400 + }, + { + "epoch": 6.23, + "learning_rate": 9.410971120189637e-05, + "loss": 8.4754, + "step": 584500 + }, + { + "epoch": 6.23, + "learning_rate": 9.408187255977874e-05, + "loss": 8.4594, + "step": 584600 + }, + { + "epoch": 6.23, + "learning_rate": 9.405403437791447e-05, + "loss": 8.6064, + "step": 584700 + }, + { + "epoch": 6.23, + "learning_rate": 9.402619665846857e-05, + "loss": 8.5121, + "step": 584800 + }, + { + "epoch": 6.23, + "learning_rate": 9.399835940360597e-05, + "loss": 8.4949, + "step": 584900 + }, + { + "epoch": 6.23, + "learning_rate": 9.397052261549162e-05, + "loss": 8.4893, + "step": 585000 + }, + { + "epoch": 6.23, + "learning_rate": 9.39426862962903e-05, + "loss": 8.4697, + "step": 585100 + }, + { + "epoch": 6.23, + "learning_rate": 9.391485044816694e-05, + "loss": 8.4848, + "step": 585200 + }, + { + "epoch": 6.23, + "learning_rate": 9.388701507328624e-05, + "loss": 8.5371, + "step": 585300 + }, + { + "epoch": 6.24, + "learning_rate": 9.385918017381305e-05, + "loss": 8.5883, + "step": 585400 + }, + { + "epoch": 6.24, + "learning_rate": 9.383134575191201e-05, + "loss": 8.5422, + "step": 585500 + }, + { + "epoch": 6.24, + "learning_rate": 9.380351180974791e-05, + "loss": 8.4044, + "step": 585600 + }, + { + "epoch": 6.24, + "learning_rate": 9.377567834948527e-05, + "loss": 8.5191, + "step": 585700 + }, + { + "epoch": 6.24, + "learning_rate": 9.37478453732888e-05, + "loss": 8.4791, + "step": 585800 + }, + { + "epoch": 6.24, + "learning_rate": 9.372001288332309e-05, + "loss": 8.5084, + "step": 585900 + }, + { + "epoch": 6.24, + "learning_rate": 9.369218088175254e-05, + "loss": 8.454, + "step": 586000 + }, + { + "epoch": 6.24, + "learning_rate": 9.366434937074183e-05, + "loss": 8.4518, + "step": 586100 + }, + { + "epoch": 6.24, + "learning_rate": 9.363651835245523e-05, + "loss": 8.5338, + "step": 586200 + }, + { + "epoch": 6.24, + "learning_rate": 9.360868782905734e-05, + "loss": 8.4214, + "step": 586300 + }, + { + "epoch": 6.25, + "learning_rate": 9.358085780271239e-05, + "loss": 8.4831, + "step": 586400 + }, + { + "epoch": 6.25, + "learning_rate": 9.355302827558485e-05, + "loss": 8.4647, + "step": 586500 + }, + { + "epoch": 6.25, + "learning_rate": 9.352519924983893e-05, + "loss": 8.5162, + "step": 586600 + }, + { + "epoch": 6.25, + "learning_rate": 9.349737072763898e-05, + "loss": 8.4317, + "step": 586700 + }, + { + "epoch": 6.25, + "learning_rate": 9.346954271114911e-05, + "loss": 8.5411, + "step": 586800 + }, + { + "epoch": 6.25, + "learning_rate": 9.344171520253366e-05, + "loss": 8.4653, + "step": 586900 + }, + { + "epoch": 6.25, + "learning_rate": 9.341388820395664e-05, + "loss": 8.5616, + "step": 587000 + }, + { + "epoch": 6.25, + "learning_rate": 9.338606171758225e-05, + "loss": 8.5073, + "step": 587100 + }, + { + "epoch": 6.25, + "learning_rate": 9.335823574557449e-05, + "loss": 8.5233, + "step": 587200 + }, + { + "epoch": 6.26, + "learning_rate": 9.333041029009747e-05, + "loss": 8.5134, + "step": 587300 + }, + { + "epoch": 6.26, + "learning_rate": 9.330258535331509e-05, + "loss": 8.5779, + "step": 587400 + }, + { + "epoch": 6.26, + "learning_rate": 9.327476093739137e-05, + "loss": 8.5187, + "step": 587500 + }, + { + "epoch": 6.26, + "learning_rate": 9.324693704449017e-05, + "loss": 8.5117, + "step": 587600 + }, + { + "epoch": 6.26, + "learning_rate": 9.321911367677538e-05, + "loss": 8.5275, + "step": 587700 + }, + { + "epoch": 6.26, + "learning_rate": 9.319129083641082e-05, + "loss": 8.587, + "step": 587800 + }, + { + "epoch": 6.26, + "learning_rate": 9.316346852556024e-05, + "loss": 8.4891, + "step": 587900 + }, + { + "epoch": 6.26, + "learning_rate": 9.313564674638749e-05, + "loss": 8.5564, + "step": 588000 + }, + { + "epoch": 6.26, + "learning_rate": 9.310782550105617e-05, + "loss": 8.4626, + "step": 588100 + }, + { + "epoch": 6.27, + "learning_rate": 9.308000479172998e-05, + "loss": 8.5061, + "step": 588200 + }, + { + "epoch": 6.27, + "learning_rate": 9.305218462057252e-05, + "loss": 8.4171, + "step": 588300 + }, + { + "epoch": 6.27, + "learning_rate": 9.30243649897474e-05, + "loss": 8.4827, + "step": 588400 + }, + { + "epoch": 6.27, + "learning_rate": 9.299654590141813e-05, + "loss": 8.4977, + "step": 588500 + }, + { + "epoch": 6.27, + "learning_rate": 9.296872735774822e-05, + "loss": 8.592, + "step": 588600 + }, + { + "epoch": 6.27, + "learning_rate": 9.294090936090109e-05, + "loss": 8.4477, + "step": 588700 + }, + { + "epoch": 6.27, + "learning_rate": 9.291309191304019e-05, + "loss": 8.5058, + "step": 588800 + }, + { + "epoch": 6.27, + "learning_rate": 9.288527501632885e-05, + "loss": 8.4763, + "step": 588900 + }, + { + "epoch": 6.27, + "learning_rate": 9.285745867293042e-05, + "loss": 8.4776, + "step": 589000 + }, + { + "epoch": 6.27, + "learning_rate": 9.282964288500816e-05, + "loss": 8.5006, + "step": 589100 + }, + { + "epoch": 6.28, + "learning_rate": 9.280182765472532e-05, + "loss": 8.5273, + "step": 589200 + }, + { + "epoch": 6.28, + "learning_rate": 9.277401298424508e-05, + "loss": 8.5115, + "step": 589300 + }, + { + "epoch": 6.28, + "learning_rate": 9.274619887573062e-05, + "loss": 8.4804, + "step": 589400 + }, + { + "epoch": 6.28, + "learning_rate": 9.2718385331345e-05, + "loss": 8.525, + "step": 589500 + }, + { + "epoch": 6.28, + "learning_rate": 9.269057235325132e-05, + "loss": 8.4642, + "step": 589600 + }, + { + "epoch": 6.28, + "learning_rate": 9.266275994361257e-05, + "loss": 8.463, + "step": 589700 + }, + { + "epoch": 6.28, + "learning_rate": 9.263494810459176e-05, + "loss": 8.4527, + "step": 589800 + }, + { + "epoch": 6.28, + "learning_rate": 9.26071368383518e-05, + "loss": 8.4791, + "step": 589900 + }, + { + "epoch": 6.28, + "learning_rate": 9.257932614705554e-05, + "loss": 8.5031, + "step": 590000 + }, + { + "epoch": 6.29, + "learning_rate": 9.25515160328659e-05, + "loss": 8.4308, + "step": 590100 + }, + { + "epoch": 6.29, + "learning_rate": 9.252370649794559e-05, + "loss": 8.494, + "step": 590200 + }, + { + "epoch": 6.29, + "learning_rate": 9.249589754445742e-05, + "loss": 8.55, + "step": 590300 + }, + { + "epoch": 6.29, + "learning_rate": 9.246808917456406e-05, + "loss": 8.5189, + "step": 590400 + }, + { + "epoch": 6.29, + "learning_rate": 9.244028139042821e-05, + "loss": 8.4823, + "step": 590500 + }, + { + "epoch": 6.29, + "learning_rate": 9.241247419421244e-05, + "loss": 8.5623, + "step": 590600 + }, + { + "epoch": 6.29, + "learning_rate": 9.238466758807936e-05, + "loss": 8.5652, + "step": 590700 + }, + { + "epoch": 6.29, + "learning_rate": 9.235686157419143e-05, + "loss": 8.411, + "step": 590800 + }, + { + "epoch": 6.29, + "learning_rate": 9.23290561547112e-05, + "loss": 8.3888, + "step": 590900 + }, + { + "epoch": 6.29, + "learning_rate": 9.230125133180106e-05, + "loss": 8.4751, + "step": 591000 + }, + { + "epoch": 6.3, + "learning_rate": 9.22734471076234e-05, + "loss": 8.5057, + "step": 591100 + }, + { + "epoch": 6.3, + "learning_rate": 9.224564348434054e-05, + "loss": 8.5299, + "step": 591200 + }, + { + "epoch": 6.3, + "learning_rate": 9.221784046411481e-05, + "loss": 8.5865, + "step": 591300 + }, + { + "epoch": 6.3, + "learning_rate": 9.219003804910843e-05, + "loss": 8.5438, + "step": 591400 + }, + { + "epoch": 6.3, + "learning_rate": 9.21622362414836e-05, + "loss": 8.4583, + "step": 591500 + }, + { + "epoch": 6.3, + "learning_rate": 9.213443504340246e-05, + "loss": 8.5479, + "step": 591600 + }, + { + "epoch": 6.3, + "learning_rate": 9.210663445702715e-05, + "loss": 8.4803, + "step": 591700 + }, + { + "epoch": 6.3, + "learning_rate": 9.207883448451965e-05, + "loss": 8.4087, + "step": 591800 + }, + { + "epoch": 6.3, + "learning_rate": 9.205103512804208e-05, + "loss": 8.385, + "step": 591900 + }, + { + "epoch": 6.31, + "learning_rate": 9.202323638975625e-05, + "loss": 8.4623, + "step": 592000 + }, + { + "epoch": 6.31, + "learning_rate": 9.199543827182418e-05, + "loss": 8.5054, + "step": 592100 + }, + { + "epoch": 6.31, + "learning_rate": 9.196764077640772e-05, + "loss": 8.5444, + "step": 592200 + }, + { + "epoch": 6.31, + "learning_rate": 9.193984390566866e-05, + "loss": 8.4663, + "step": 592300 + }, + { + "epoch": 6.31, + "learning_rate": 9.191204766176877e-05, + "loss": 8.5157, + "step": 592400 + }, + { + "epoch": 6.31, + "learning_rate": 9.188425204686976e-05, + "loss": 8.5157, + "step": 592500 + }, + { + "epoch": 6.31, + "learning_rate": 9.185645706313335e-05, + "loss": 8.4607, + "step": 592600 + }, + { + "epoch": 6.31, + "learning_rate": 9.182866271272107e-05, + "loss": 8.4383, + "step": 592700 + }, + { + "epoch": 6.31, + "learning_rate": 9.18008689977946e-05, + "loss": 8.4539, + "step": 592800 + }, + { + "epoch": 6.32, + "learning_rate": 9.177307592051533e-05, + "loss": 8.5019, + "step": 592900 + }, + { + "epoch": 6.32, + "learning_rate": 9.174528348304486e-05, + "loss": 8.4796, + "step": 593000 + }, + { + "epoch": 6.32, + "learning_rate": 9.171749168754448e-05, + "loss": 8.4893, + "step": 593100 + }, + { + "epoch": 6.32, + "learning_rate": 9.16897005361757e-05, + "loss": 8.4631, + "step": 593200 + }, + { + "epoch": 6.32, + "learning_rate": 9.166191003109969e-05, + "loss": 8.5257, + "step": 593300 + }, + { + "epoch": 6.32, + "learning_rate": 9.163412017447787e-05, + "loss": 8.4958, + "step": 593400 + }, + { + "epoch": 6.32, + "learning_rate": 9.160633096847133e-05, + "loss": 8.4886, + "step": 593500 + }, + { + "epoch": 6.32, + "learning_rate": 9.157854241524137e-05, + "loss": 8.4396, + "step": 593600 + }, + { + "epoch": 6.32, + "learning_rate": 9.155075451694896e-05, + "loss": 8.5074, + "step": 593700 + }, + { + "epoch": 6.32, + "learning_rate": 9.152296727575532e-05, + "loss": 8.4766, + "step": 593800 + }, + { + "epoch": 6.33, + "learning_rate": 9.149518069382135e-05, + "loss": 8.4426, + "step": 593900 + }, + { + "epoch": 6.33, + "learning_rate": 9.146739477330808e-05, + "loss": 8.4712, + "step": 594000 + }, + { + "epoch": 6.33, + "learning_rate": 9.143960951637638e-05, + "loss": 8.4949, + "step": 594100 + }, + { + "epoch": 6.33, + "learning_rate": 9.14118249251871e-05, + "loss": 8.5177, + "step": 594200 + }, + { + "epoch": 6.33, + "learning_rate": 9.138404100190117e-05, + "loss": 8.4468, + "step": 594300 + }, + { + "epoch": 6.33, + "learning_rate": 9.135625774867918e-05, + "loss": 8.4355, + "step": 594400 + }, + { + "epoch": 6.33, + "learning_rate": 9.132847516768199e-05, + "loss": 8.481, + "step": 594500 + }, + { + "epoch": 6.33, + "learning_rate": 9.130069326107013e-05, + "loss": 8.521, + "step": 594600 + }, + { + "epoch": 6.33, + "learning_rate": 9.127291203100431e-05, + "loss": 8.479, + "step": 594700 + }, + { + "epoch": 6.34, + "learning_rate": 9.124513147964496e-05, + "loss": 8.5538, + "step": 594800 + }, + { + "epoch": 6.34, + "learning_rate": 9.121735160915271e-05, + "loss": 8.4245, + "step": 594900 + }, + { + "epoch": 6.34, + "learning_rate": 9.11895724216879e-05, + "loss": 8.4593, + "step": 595000 + }, + { + "epoch": 6.34, + "learning_rate": 9.116179391941097e-05, + "loss": 8.4567, + "step": 595100 + }, + { + "epoch": 6.34, + "learning_rate": 9.113401610448221e-05, + "loss": 8.5594, + "step": 595200 + }, + { + "epoch": 6.34, + "learning_rate": 9.110623897906197e-05, + "loss": 8.4774, + "step": 595300 + }, + { + "epoch": 6.34, + "learning_rate": 9.107846254531041e-05, + "loss": 8.4008, + "step": 595400 + }, + { + "epoch": 6.34, + "learning_rate": 9.105068680538778e-05, + "loss": 8.5162, + "step": 595500 + }, + { + "epoch": 6.34, + "learning_rate": 9.102291176145413e-05, + "loss": 8.5286, + "step": 595600 + }, + { + "epoch": 6.34, + "learning_rate": 9.09951374156696e-05, + "loss": 8.4857, + "step": 595700 + }, + { + "epoch": 6.35, + "learning_rate": 9.096736377019413e-05, + "loss": 8.493, + "step": 595800 + }, + { + "epoch": 6.35, + "learning_rate": 9.093959082718773e-05, + "loss": 8.5009, + "step": 595900 + }, + { + "epoch": 6.35, + "learning_rate": 9.091181858881029e-05, + "loss": 8.5041, + "step": 596000 + }, + { + "epoch": 6.35, + "learning_rate": 9.088404705722165e-05, + "loss": 8.5605, + "step": 596100 + }, + { + "epoch": 6.35, + "learning_rate": 9.085627623458162e-05, + "loss": 8.5319, + "step": 596200 + }, + { + "epoch": 6.35, + "learning_rate": 9.082850612304992e-05, + "loss": 8.4709, + "step": 596300 + }, + { + "epoch": 6.35, + "learning_rate": 9.080073672478628e-05, + "loss": 8.541, + "step": 596400 + }, + { + "epoch": 6.35, + "learning_rate": 9.077296804195026e-05, + "loss": 8.4581, + "step": 596500 + }, + { + "epoch": 6.35, + "learning_rate": 9.07452000767015e-05, + "loss": 8.4717, + "step": 596600 + }, + { + "epoch": 6.36, + "learning_rate": 9.071743283119948e-05, + "loss": 8.5598, + "step": 596700 + }, + { + "epoch": 6.36, + "learning_rate": 9.068966630760367e-05, + "loss": 8.5446, + "step": 596800 + }, + { + "epoch": 6.36, + "learning_rate": 9.066190050807347e-05, + "loss": 8.5375, + "step": 596900 + }, + { + "epoch": 6.36, + "learning_rate": 9.063413543476826e-05, + "loss": 8.3983, + "step": 597000 + }, + { + "epoch": 6.36, + "learning_rate": 9.060637108984727e-05, + "loss": 8.5254, + "step": 597100 + }, + { + "epoch": 6.36, + "learning_rate": 9.057860747546979e-05, + "loss": 8.393, + "step": 597200 + }, + { + "epoch": 6.36, + "learning_rate": 9.055084459379497e-05, + "loss": 8.4959, + "step": 597300 + }, + { + "epoch": 6.36, + "learning_rate": 9.052308244698198e-05, + "loss": 8.4871, + "step": 597400 + }, + { + "epoch": 6.36, + "learning_rate": 9.04953210371898e-05, + "loss": 8.4644, + "step": 597500 + }, + { + "epoch": 6.37, + "learning_rate": 9.046756036657753e-05, + "loss": 8.482, + "step": 597600 + }, + { + "epoch": 6.37, + "learning_rate": 9.043980043730405e-05, + "loss": 8.4908, + "step": 597700 + }, + { + "epoch": 6.37, + "learning_rate": 9.04120412515283e-05, + "loss": 8.5525, + "step": 597800 + }, + { + "epoch": 6.37, + "learning_rate": 9.038428281140906e-05, + "loss": 8.4884, + "step": 597900 + }, + { + "epoch": 6.37, + "learning_rate": 9.035652511910518e-05, + "loss": 8.5096, + "step": 598000 + }, + { + "epoch": 6.37, + "learning_rate": 9.032876817677533e-05, + "loss": 8.5152, + "step": 598100 + }, + { + "epoch": 6.37, + "learning_rate": 9.030101198657818e-05, + "loss": 8.4829, + "step": 598200 + }, + { + "epoch": 6.37, + "learning_rate": 9.027325655067232e-05, + "loss": 8.5015, + "step": 598300 + }, + { + "epoch": 6.37, + "learning_rate": 9.02455018712163e-05, + "loss": 8.3957, + "step": 598400 + }, + { + "epoch": 6.37, + "learning_rate": 9.021774795036863e-05, + "loss": 8.4514, + "step": 598500 + }, + { + "epoch": 6.38, + "learning_rate": 9.018999479028769e-05, + "loss": 8.5293, + "step": 598600 + }, + { + "epoch": 6.38, + "learning_rate": 9.01622423931319e-05, + "loss": 8.5739, + "step": 598700 + }, + { + "epoch": 6.38, + "learning_rate": 9.013449076105952e-05, + "loss": 8.4474, + "step": 598800 + }, + { + "epoch": 6.38, + "learning_rate": 9.010673989622883e-05, + "loss": 8.4286, + "step": 598900 + }, + { + "epoch": 6.38, + "learning_rate": 9.007898980079799e-05, + "loss": 8.5704, + "step": 599000 + }, + { + "epoch": 6.38, + "learning_rate": 9.005124047692515e-05, + "loss": 8.5052, + "step": 599100 + }, + { + "epoch": 6.38, + "learning_rate": 9.002349192676835e-05, + "loss": 8.5451, + "step": 599200 + }, + { + "epoch": 6.38, + "learning_rate": 8.999574415248564e-05, + "loss": 8.5001, + "step": 599300 + }, + { + "epoch": 6.38, + "learning_rate": 8.996799715623492e-05, + "loss": 8.5023, + "step": 599400 + }, + { + "epoch": 6.39, + "learning_rate": 8.994025094017413e-05, + "loss": 8.4727, + "step": 599500 + }, + { + "epoch": 6.39, + "learning_rate": 8.991250550646103e-05, + "loss": 8.527, + "step": 599600 + }, + { + "epoch": 6.39, + "learning_rate": 8.988476085725347e-05, + "loss": 8.4048, + "step": 599700 + }, + { + "epoch": 6.39, + "learning_rate": 8.985701699470904e-05, + "loss": 8.4551, + "step": 599800 + }, + { + "epoch": 6.39, + "learning_rate": 8.982927392098551e-05, + "loss": 8.5317, + "step": 599900 + }, + { + "epoch": 6.39, + "learning_rate": 8.980153163824034e-05, + "loss": 8.4736, + "step": 600000 + }, + { + "epoch": 6.39, + "learning_rate": 8.977379014863117e-05, + "loss": 8.5021, + "step": 600100 + }, + { + "epoch": 6.39, + "learning_rate": 8.974604945431535e-05, + "loss": 8.5673, + "step": 600200 + }, + { + "epoch": 6.39, + "learning_rate": 8.971830955745038e-05, + "loss": 8.5111, + "step": 600300 + }, + { + "epoch": 6.4, + "learning_rate": 8.969057046019346e-05, + "loss": 8.4874, + "step": 600400 + }, + { + "epoch": 6.4, + "learning_rate": 8.9662832164702e-05, + "loss": 8.4769, + "step": 600500 + }, + { + "epoch": 6.4, + "learning_rate": 8.963509467313315e-05, + "loss": 8.4928, + "step": 600600 + }, + { + "epoch": 6.4, + "learning_rate": 8.960735798764404e-05, + "loss": 8.5121, + "step": 600700 + }, + { + "epoch": 6.4, + "learning_rate": 8.957962211039182e-05, + "loss": 8.5364, + "step": 600800 + }, + { + "epoch": 6.4, + "learning_rate": 8.955188704353342e-05, + "loss": 8.5081, + "step": 600900 + }, + { + "epoch": 6.4, + "learning_rate": 8.952415278922588e-05, + "loss": 8.5347, + "step": 601000 + }, + { + "epoch": 6.4, + "learning_rate": 8.949641934962601e-05, + "loss": 8.5402, + "step": 601100 + }, + { + "epoch": 6.4, + "learning_rate": 8.946868672689077e-05, + "loss": 8.3945, + "step": 601200 + }, + { + "epoch": 6.4, + "learning_rate": 8.944095492317678e-05, + "loss": 8.4797, + "step": 601300 + }, + { + "epoch": 6.41, + "learning_rate": 8.941322394064089e-05, + "loss": 8.4341, + "step": 601400 + }, + { + "epoch": 6.41, + "learning_rate": 8.93854937814396e-05, + "loss": 8.4458, + "step": 601500 + }, + { + "epoch": 6.41, + "learning_rate": 8.935776444772962e-05, + "loss": 8.4205, + "step": 601600 + }, + { + "epoch": 6.41, + "learning_rate": 8.933003594166734e-05, + "loss": 8.5232, + "step": 601700 + }, + { + "epoch": 6.41, + "learning_rate": 8.930230826540933e-05, + "loss": 8.5018, + "step": 601800 + }, + { + "epoch": 6.41, + "learning_rate": 8.927458142111188e-05, + "loss": 8.5517, + "step": 601900 + }, + { + "epoch": 6.41, + "learning_rate": 8.924685541093136e-05, + "loss": 8.4831, + "step": 602000 + }, + { + "epoch": 6.41, + "learning_rate": 8.921913023702398e-05, + "loss": 8.4226, + "step": 602100 + }, + { + "epoch": 6.41, + "learning_rate": 8.919140590154598e-05, + "loss": 8.4946, + "step": 602200 + }, + { + "epoch": 6.42, + "learning_rate": 8.916368240665346e-05, + "loss": 8.5129, + "step": 602300 + }, + { + "epoch": 6.42, + "learning_rate": 8.913595975450248e-05, + "loss": 8.5052, + "step": 602400 + }, + { + "epoch": 6.42, + "learning_rate": 8.910823794724902e-05, + "loss": 8.4697, + "step": 602500 + }, + { + "epoch": 6.42, + "learning_rate": 8.908051698704902e-05, + "loss": 8.4709, + "step": 602600 + }, + { + "epoch": 6.42, + "learning_rate": 8.905279687605839e-05, + "loss": 8.4262, + "step": 602700 + }, + { + "epoch": 6.42, + "learning_rate": 8.902507761643281e-05, + "loss": 8.5225, + "step": 602800 + }, + { + "epoch": 6.42, + "learning_rate": 8.899735921032815e-05, + "loss": 8.4592, + "step": 602900 + }, + { + "epoch": 6.42, + "learning_rate": 8.896964165989996e-05, + "loss": 8.4676, + "step": 603000 + }, + { + "epoch": 6.42, + "learning_rate": 8.894192496730391e-05, + "loss": 8.4809, + "step": 603100 + }, + { + "epoch": 6.42, + "learning_rate": 8.891420913469547e-05, + "loss": 8.4457, + "step": 603200 + }, + { + "epoch": 6.43, + "learning_rate": 8.888649416423015e-05, + "loss": 8.5488, + "step": 603300 + }, + { + "epoch": 6.43, + "learning_rate": 8.885878005806333e-05, + "loss": 8.4359, + "step": 603400 + }, + { + "epoch": 6.43, + "learning_rate": 8.883106681835035e-05, + "loss": 8.4578, + "step": 603500 + }, + { + "epoch": 6.43, + "learning_rate": 8.880335444724644e-05, + "loss": 8.4521, + "step": 603600 + }, + { + "epoch": 6.43, + "learning_rate": 8.877564294690682e-05, + "loss": 8.4372, + "step": 603700 + }, + { + "epoch": 6.43, + "learning_rate": 8.874793231948661e-05, + "loss": 8.4569, + "step": 603800 + }, + { + "epoch": 6.43, + "learning_rate": 8.872022256714088e-05, + "loss": 8.4679, + "step": 603900 + }, + { + "epoch": 6.43, + "learning_rate": 8.869251369202458e-05, + "loss": 8.4635, + "step": 604000 + }, + { + "epoch": 6.43, + "learning_rate": 8.866480569629269e-05, + "loss": 8.441, + "step": 604100 + }, + { + "epoch": 6.44, + "learning_rate": 8.863709858210001e-05, + "loss": 8.5506, + "step": 604200 + }, + { + "epoch": 6.44, + "learning_rate": 8.860939235160137e-05, + "loss": 8.4627, + "step": 604300 + }, + { + "epoch": 6.44, + "learning_rate": 8.858168700695145e-05, + "loss": 8.4235, + "step": 604400 + }, + { + "epoch": 6.44, + "learning_rate": 8.855398255030494e-05, + "loss": 8.4598, + "step": 604500 + }, + { + "epoch": 6.44, + "learning_rate": 8.852627898381635e-05, + "loss": 8.4902, + "step": 604600 + }, + { + "epoch": 6.44, + "learning_rate": 8.849857630964026e-05, + "loss": 8.5509, + "step": 604700 + }, + { + "epoch": 6.44, + "learning_rate": 8.847087452993108e-05, + "loss": 8.4147, + "step": 604800 + }, + { + "epoch": 6.44, + "learning_rate": 8.844317364684317e-05, + "loss": 8.4672, + "step": 604900 + }, + { + "epoch": 6.44, + "learning_rate": 8.841547366253087e-05, + "loss": 8.5015, + "step": 605000 + }, + { + "epoch": 6.45, + "learning_rate": 8.838777457914836e-05, + "loss": 8.4049, + "step": 605100 + }, + { + "epoch": 6.45, + "learning_rate": 8.836007639884983e-05, + "loss": 8.5022, + "step": 605200 + }, + { + "epoch": 6.45, + "learning_rate": 8.833237912378935e-05, + "loss": 8.4342, + "step": 605300 + }, + { + "epoch": 6.45, + "learning_rate": 8.830468275612097e-05, + "loss": 8.3797, + "step": 605400 + }, + { + "epoch": 6.45, + "learning_rate": 8.827698729799861e-05, + "loss": 8.455, + "step": 605500 + }, + { + "epoch": 6.45, + "learning_rate": 8.824929275157618e-05, + "loss": 8.4319, + "step": 605600 + }, + { + "epoch": 6.45, + "learning_rate": 8.822159911900745e-05, + "loss": 8.4766, + "step": 605700 + }, + { + "epoch": 6.45, + "learning_rate": 8.81939064024462e-05, + "loss": 8.5233, + "step": 605800 + }, + { + "epoch": 6.45, + "learning_rate": 8.816621460404604e-05, + "loss": 8.514, + "step": 605900 + }, + { + "epoch": 6.45, + "learning_rate": 8.813852372596063e-05, + "loss": 8.4407, + "step": 606000 + }, + { + "epoch": 6.46, + "learning_rate": 8.811083377034342e-05, + "loss": 8.4569, + "step": 606100 + }, + { + "epoch": 6.46, + "learning_rate": 8.808314473934794e-05, + "loss": 8.5868, + "step": 606200 + }, + { + "epoch": 6.46, + "learning_rate": 8.805545663512749e-05, + "loss": 8.4722, + "step": 606300 + }, + { + "epoch": 6.46, + "learning_rate": 8.802776945983544e-05, + "loss": 8.4384, + "step": 606400 + }, + { + "epoch": 6.46, + "learning_rate": 8.800008321562498e-05, + "loss": 8.4563, + "step": 606500 + }, + { + "epoch": 6.46, + "learning_rate": 8.797239790464932e-05, + "loss": 8.4923, + "step": 606600 + }, + { + "epoch": 6.46, + "learning_rate": 8.794471352906149e-05, + "loss": 8.4877, + "step": 606700 + }, + { + "epoch": 6.46, + "learning_rate": 8.791703009101458e-05, + "loss": 8.4785, + "step": 606800 + }, + { + "epoch": 6.46, + "learning_rate": 8.788934759266143e-05, + "loss": 8.4449, + "step": 606900 + }, + { + "epoch": 6.47, + "learning_rate": 8.786166603615497e-05, + "loss": 8.4579, + "step": 607000 + }, + { + "epoch": 6.47, + "learning_rate": 8.783398542364805e-05, + "loss": 8.5354, + "step": 607100 + }, + { + "epoch": 6.47, + "learning_rate": 8.780630575729331e-05, + "loss": 8.4588, + "step": 607200 + }, + { + "epoch": 6.47, + "learning_rate": 8.777862703924346e-05, + "loss": 8.4408, + "step": 607300 + }, + { + "epoch": 6.47, + "learning_rate": 8.775094927165102e-05, + "loss": 8.4294, + "step": 607400 + }, + { + "epoch": 6.47, + "learning_rate": 8.772327245666855e-05, + "loss": 8.4562, + "step": 607500 + }, + { + "epoch": 6.47, + "learning_rate": 8.769559659644844e-05, + "loss": 8.5075, + "step": 607600 + }, + { + "epoch": 6.47, + "learning_rate": 8.76679216931431e-05, + "loss": 8.4764, + "step": 607700 + }, + { + "epoch": 6.47, + "learning_rate": 8.764024774890468e-05, + "loss": 8.5527, + "step": 607800 + }, + { + "epoch": 6.47, + "learning_rate": 8.761257476588556e-05, + "loss": 8.4272, + "step": 607900 + }, + { + "epoch": 6.48, + "learning_rate": 8.75849027462377e-05, + "loss": 8.4251, + "step": 608000 + }, + { + "epoch": 6.48, + "learning_rate": 8.755723169211332e-05, + "loss": 8.445, + "step": 608100 + }, + { + "epoch": 6.48, + "learning_rate": 8.752956160566424e-05, + "loss": 8.455, + "step": 608200 + }, + { + "epoch": 6.48, + "learning_rate": 8.750189248904252e-05, + "loss": 8.4529, + "step": 608300 + }, + { + "epoch": 6.48, + "learning_rate": 8.747422434439985e-05, + "loss": 8.4387, + "step": 608400 + }, + { + "epoch": 6.48, + "learning_rate": 8.744655717388813e-05, + "loss": 8.4046, + "step": 608500 + }, + { + "epoch": 6.48, + "learning_rate": 8.741889097965886e-05, + "loss": 8.5012, + "step": 608600 + }, + { + "epoch": 6.48, + "learning_rate": 8.739122576386383e-05, + "loss": 8.4325, + "step": 608700 + }, + { + "epoch": 6.48, + "learning_rate": 8.736356152865442e-05, + "loss": 8.4523, + "step": 608800 + }, + { + "epoch": 6.49, + "learning_rate": 8.733589827618217e-05, + "loss": 8.482, + "step": 608900 + }, + { + "epoch": 6.49, + "learning_rate": 8.73082360085984e-05, + "loss": 8.4641, + "step": 609000 + }, + { + "epoch": 6.49, + "learning_rate": 8.72805747280544e-05, + "loss": 8.4152, + "step": 609100 + }, + { + "epoch": 6.49, + "learning_rate": 8.725291443670149e-05, + "loss": 8.5291, + "step": 609200 + }, + { + "epoch": 6.49, + "learning_rate": 8.722525513669068e-05, + "loss": 8.4376, + "step": 609300 + }, + { + "epoch": 6.49, + "learning_rate": 8.719759683017317e-05, + "loss": 8.4499, + "step": 609400 + }, + { + "epoch": 6.49, + "learning_rate": 8.716993951929982e-05, + "loss": 8.5077, + "step": 609500 + }, + { + "epoch": 6.49, + "learning_rate": 8.714228320622166e-05, + "loss": 8.5348, + "step": 609600 + }, + { + "epoch": 6.49, + "learning_rate": 8.711462789308939e-05, + "loss": 8.5457, + "step": 609700 + }, + { + "epoch": 6.5, + "learning_rate": 8.708697358205392e-05, + "loss": 8.4914, + "step": 609800 + }, + { + "epoch": 6.5, + "learning_rate": 8.70593202752658e-05, + "loss": 8.5252, + "step": 609900 + }, + { + "epoch": 6.5, + "learning_rate": 8.703166797487572e-05, + "loss": 8.4427, + "step": 610000 + }, + { + "epoch": 6.5, + "learning_rate": 8.700401668303413e-05, + "loss": 8.4444, + "step": 610100 + }, + { + "epoch": 6.5, + "learning_rate": 8.697636640189152e-05, + "loss": 8.5292, + "step": 610200 + }, + { + "epoch": 6.5, + "learning_rate": 8.694871713359824e-05, + "loss": 8.417, + "step": 610300 + }, + { + "epoch": 6.5, + "learning_rate": 8.692106888030461e-05, + "loss": 8.4894, + "step": 610400 + }, + { + "epoch": 6.5, + "learning_rate": 8.689342164416077e-05, + "loss": 8.4911, + "step": 610500 + }, + { + "epoch": 6.5, + "learning_rate": 8.68657754273169e-05, + "loss": 8.5352, + "step": 610600 + }, + { + "epoch": 6.5, + "learning_rate": 8.683813023192302e-05, + "loss": 8.5764, + "step": 610700 + }, + { + "epoch": 6.51, + "learning_rate": 8.681048606012913e-05, + "loss": 8.4115, + "step": 610800 + }, + { + "epoch": 6.51, + "learning_rate": 8.67828429140851e-05, + "loss": 8.5351, + "step": 610900 + }, + { + "epoch": 6.51, + "learning_rate": 8.675520079594077e-05, + "loss": 8.4095, + "step": 611000 + }, + { + "epoch": 6.51, + "learning_rate": 8.672755970784581e-05, + "loss": 8.4392, + "step": 611100 + }, + { + "epoch": 6.51, + "learning_rate": 8.669991965194991e-05, + "loss": 8.4315, + "step": 611200 + }, + { + "epoch": 6.51, + "learning_rate": 8.667228063040265e-05, + "loss": 8.4811, + "step": 611300 + }, + { + "epoch": 6.51, + "learning_rate": 8.664464264535348e-05, + "loss": 8.4135, + "step": 611400 + }, + { + "epoch": 6.51, + "learning_rate": 8.661700569895186e-05, + "loss": 8.4997, + "step": 611500 + }, + { + "epoch": 6.51, + "learning_rate": 8.658936979334709e-05, + "loss": 8.4874, + "step": 611600 + }, + { + "epoch": 6.52, + "learning_rate": 8.656173493068842e-05, + "loss": 8.4587, + "step": 611700 + }, + { + "epoch": 6.52, + "learning_rate": 8.6534101113125e-05, + "loss": 8.5112, + "step": 611800 + }, + { + "epoch": 6.52, + "learning_rate": 8.650646834280596e-05, + "loss": 8.477, + "step": 611900 + }, + { + "epoch": 6.52, + "learning_rate": 8.647883662188025e-05, + "loss": 8.4766, + "step": 612000 + }, + { + "epoch": 6.52, + "learning_rate": 8.645120595249683e-05, + "loss": 8.4333, + "step": 612100 + }, + { + "epoch": 6.52, + "learning_rate": 8.64235763368045e-05, + "loss": 8.4972, + "step": 612200 + }, + { + "epoch": 6.52, + "learning_rate": 8.639594777695208e-05, + "loss": 8.4964, + "step": 612300 + }, + { + "epoch": 6.52, + "learning_rate": 8.636832027508819e-05, + "loss": 8.4573, + "step": 612400 + }, + { + "epoch": 6.52, + "learning_rate": 8.634069383336146e-05, + "loss": 8.4059, + "step": 612500 + }, + { + "epoch": 6.52, + "learning_rate": 8.631306845392036e-05, + "loss": 8.4105, + "step": 612600 + }, + { + "epoch": 6.53, + "learning_rate": 8.628544413891336e-05, + "loss": 8.4437, + "step": 612700 + }, + { + "epoch": 6.53, + "learning_rate": 8.625782089048877e-05, + "loss": 8.4878, + "step": 612800 + }, + { + "epoch": 6.53, + "learning_rate": 8.62301987107949e-05, + "loss": 8.4748, + "step": 612900 + }, + { + "epoch": 6.53, + "learning_rate": 8.620257760197988e-05, + "loss": 8.4617, + "step": 613000 + }, + { + "epoch": 6.53, + "learning_rate": 8.617495756619184e-05, + "loss": 8.4617, + "step": 613100 + }, + { + "epoch": 6.53, + "learning_rate": 8.614733860557877e-05, + "loss": 8.441, + "step": 613200 + }, + { + "epoch": 6.53, + "learning_rate": 8.61197207222886e-05, + "loss": 8.557, + "step": 613300 + }, + { + "epoch": 6.53, + "learning_rate": 8.609210391846922e-05, + "loss": 8.4028, + "step": 613400 + }, + { + "epoch": 6.53, + "learning_rate": 8.606448819626834e-05, + "loss": 8.4185, + "step": 613500 + }, + { + "epoch": 6.54, + "learning_rate": 8.603687355783367e-05, + "loss": 8.54, + "step": 613600 + }, + { + "epoch": 6.54, + "learning_rate": 8.600926000531278e-05, + "loss": 8.573, + "step": 613700 + }, + { + "epoch": 6.54, + "learning_rate": 8.598164754085319e-05, + "loss": 8.4132, + "step": 613800 + }, + { + "epoch": 6.54, + "learning_rate": 8.595403616660232e-05, + "loss": 8.5025, + "step": 613900 + }, + { + "epoch": 6.54, + "learning_rate": 8.592642588470752e-05, + "loss": 8.5343, + "step": 614000 + }, + { + "epoch": 6.54, + "learning_rate": 8.589881669731601e-05, + "loss": 8.4242, + "step": 614100 + }, + { + "epoch": 6.54, + "learning_rate": 8.587120860657501e-05, + "loss": 8.5087, + "step": 614200 + }, + { + "epoch": 6.54, + "learning_rate": 8.584360161463156e-05, + "loss": 8.4296, + "step": 614300 + }, + { + "epoch": 6.54, + "learning_rate": 8.58159957236327e-05, + "loss": 8.4786, + "step": 614400 + }, + { + "epoch": 6.55, + "learning_rate": 8.57883909357253e-05, + "loss": 8.4425, + "step": 614500 + }, + { + "epoch": 6.55, + "learning_rate": 8.576078725305626e-05, + "loss": 8.3764, + "step": 614600 + }, + { + "epoch": 6.55, + "learning_rate": 8.573318467777221e-05, + "loss": 8.4289, + "step": 614700 + }, + { + "epoch": 6.55, + "learning_rate": 8.570558321201992e-05, + "loss": 8.4595, + "step": 614800 + }, + { + "epoch": 6.55, + "learning_rate": 8.567798285794583e-05, + "loss": 8.5034, + "step": 614900 + }, + { + "epoch": 6.55, + "learning_rate": 8.565038361769656e-05, + "loss": 8.4582, + "step": 615000 + }, + { + "epoch": 6.55, + "learning_rate": 8.562278549341838e-05, + "loss": 8.5144, + "step": 615100 + }, + { + "epoch": 6.55, + "learning_rate": 8.559518848725774e-05, + "loss": 8.4676, + "step": 615200 + }, + { + "epoch": 6.55, + "learning_rate": 8.556759260136071e-05, + "loss": 8.4804, + "step": 615300 + }, + { + "epoch": 6.55, + "learning_rate": 8.553999783787352e-05, + "loss": 8.4803, + "step": 615400 + }, + { + "epoch": 6.56, + "learning_rate": 8.551240419894221e-05, + "loss": 8.4445, + "step": 615500 + }, + { + "epoch": 6.56, + "learning_rate": 8.548481168671271e-05, + "loss": 8.412, + "step": 615600 + }, + { + "epoch": 6.56, + "learning_rate": 8.545722030333095e-05, + "loss": 8.4858, + "step": 615700 + }, + { + "epoch": 6.56, + "learning_rate": 8.542963005094261e-05, + "loss": 8.4975, + "step": 615800 + }, + { + "epoch": 6.56, + "learning_rate": 8.54020409316935e-05, + "loss": 8.5591, + "step": 615900 + }, + { + "epoch": 6.56, + "learning_rate": 8.537445294772913e-05, + "loss": 8.4913, + "step": 616000 + }, + { + "epoch": 6.56, + "learning_rate": 8.534686610119514e-05, + "loss": 8.4821, + "step": 616100 + }, + { + "epoch": 6.56, + "learning_rate": 8.531928039423681e-05, + "loss": 8.4492, + "step": 616200 + }, + { + "epoch": 6.56, + "learning_rate": 8.529169582899962e-05, + "loss": 8.4497, + "step": 616300 + }, + { + "epoch": 6.57, + "learning_rate": 8.526411240762873e-05, + "loss": 8.3505, + "step": 616400 + }, + { + "epoch": 6.57, + "learning_rate": 8.52365301322694e-05, + "loss": 8.5451, + "step": 616500 + }, + { + "epoch": 6.57, + "learning_rate": 8.520894900506658e-05, + "loss": 8.4391, + "step": 616600 + }, + { + "epoch": 6.57, + "learning_rate": 8.518136902816539e-05, + "loss": 8.4354, + "step": 616700 + }, + { + "epoch": 6.57, + "learning_rate": 8.515379020371063e-05, + "loss": 8.4718, + "step": 616800 + }, + { + "epoch": 6.57, + "learning_rate": 8.512621253384715e-05, + "loss": 8.4772, + "step": 616900 + }, + { + "epoch": 6.57, + "learning_rate": 8.509863602071966e-05, + "loss": 8.405, + "step": 617000 + }, + { + "epoch": 6.57, + "learning_rate": 8.50710606664728e-05, + "loss": 8.4752, + "step": 617100 + }, + { + "epoch": 6.57, + "learning_rate": 8.504348647325108e-05, + "loss": 8.4132, + "step": 617200 + }, + { + "epoch": 6.58, + "learning_rate": 8.501591344319898e-05, + "loss": 8.4913, + "step": 617300 + }, + { + "epoch": 6.58, + "learning_rate": 8.498834157846082e-05, + "loss": 8.421, + "step": 617400 + }, + { + "epoch": 6.58, + "learning_rate": 8.496077088118087e-05, + "loss": 8.5544, + "step": 617500 + }, + { + "epoch": 6.58, + "learning_rate": 8.49332013535034e-05, + "loss": 8.4075, + "step": 617600 + }, + { + "epoch": 6.58, + "learning_rate": 8.490563299757237e-05, + "loss": 8.4677, + "step": 617700 + }, + { + "epoch": 6.58, + "learning_rate": 8.487806581553185e-05, + "loss": 8.5172, + "step": 617800 + }, + { + "epoch": 6.58, + "learning_rate": 8.485049980952569e-05, + "loss": 8.466, + "step": 617900 + }, + { + "epoch": 6.58, + "learning_rate": 8.482293498169776e-05, + "loss": 8.4853, + "step": 618000 + }, + { + "epoch": 6.58, + "learning_rate": 8.479537133419173e-05, + "loss": 8.5141, + "step": 618100 + }, + { + "epoch": 6.58, + "learning_rate": 8.476780886915125e-05, + "loss": 8.4989, + "step": 618200 + }, + { + "epoch": 6.59, + "learning_rate": 8.474024758871984e-05, + "loss": 8.5157, + "step": 618300 + }, + { + "epoch": 6.59, + "learning_rate": 8.471268749504097e-05, + "loss": 8.4083, + "step": 618400 + }, + { + "epoch": 6.59, + "learning_rate": 8.468512859025796e-05, + "loss": 8.4985, + "step": 618500 + }, + { + "epoch": 6.59, + "learning_rate": 8.46575708765141e-05, + "loss": 8.4756, + "step": 618600 + }, + { + "epoch": 6.59, + "learning_rate": 8.463001435595252e-05, + "loss": 8.4111, + "step": 618700 + }, + { + "epoch": 6.59, + "learning_rate": 8.460245903071635e-05, + "loss": 8.5353, + "step": 618800 + }, + { + "epoch": 6.59, + "learning_rate": 8.45749049029485e-05, + "loss": 8.4745, + "step": 618900 + }, + { + "epoch": 6.59, + "learning_rate": 8.454735197479194e-05, + "loss": 8.4531, + "step": 619000 + }, + { + "epoch": 6.59, + "learning_rate": 8.451980024838937e-05, + "loss": 8.4533, + "step": 619100 + }, + { + "epoch": 6.6, + "learning_rate": 8.449224972588356e-05, + "loss": 8.4664, + "step": 619200 + }, + { + "epoch": 6.6, + "learning_rate": 8.446470040941708e-05, + "loss": 8.3869, + "step": 619300 + }, + { + "epoch": 6.6, + "learning_rate": 8.443715230113247e-05, + "loss": 8.4353, + "step": 619400 + }, + { + "epoch": 6.6, + "learning_rate": 8.440960540317213e-05, + "loss": 8.5448, + "step": 619500 + }, + { + "epoch": 6.6, + "learning_rate": 8.438205971767839e-05, + "loss": 8.4406, + "step": 619600 + }, + { + "epoch": 6.6, + "learning_rate": 8.435451524679349e-05, + "loss": 8.4382, + "step": 619700 + }, + { + "epoch": 6.6, + "learning_rate": 8.432697199265954e-05, + "loss": 8.4327, + "step": 619800 + }, + { + "epoch": 6.6, + "learning_rate": 8.429942995741864e-05, + "loss": 8.4246, + "step": 619900 + }, + { + "epoch": 6.6, + "learning_rate": 8.427188914321266e-05, + "loss": 8.4569, + "step": 620000 + }, + { + "epoch": 6.6, + "learning_rate": 8.424434955218352e-05, + "loss": 8.4688, + "step": 620100 + }, + { + "epoch": 6.61, + "learning_rate": 8.421681118647293e-05, + "loss": 8.4731, + "step": 620200 + }, + { + "epoch": 6.61, + "learning_rate": 8.418927404822257e-05, + "loss": 8.4823, + "step": 620300 + }, + { + "epoch": 6.61, + "learning_rate": 8.4161738139574e-05, + "loss": 8.4706, + "step": 620400 + }, + { + "epoch": 6.61, + "learning_rate": 8.413420346266871e-05, + "loss": 8.5096, + "step": 620500 + }, + { + "epoch": 6.61, + "learning_rate": 8.410667001964804e-05, + "loss": 8.4983, + "step": 620600 + }, + { + "epoch": 6.61, + "learning_rate": 8.40791378126533e-05, + "loss": 8.4777, + "step": 620700 + }, + { + "epoch": 6.61, + "learning_rate": 8.405160684382566e-05, + "loss": 8.4796, + "step": 620800 + }, + { + "epoch": 6.61, + "learning_rate": 8.402407711530621e-05, + "loss": 8.4323, + "step": 620900 + }, + { + "epoch": 6.61, + "learning_rate": 8.399654862923593e-05, + "loss": 8.4416, + "step": 621000 + }, + { + "epoch": 6.62, + "learning_rate": 8.396902138775572e-05, + "loss": 8.4834, + "step": 621100 + }, + { + "epoch": 6.62, + "learning_rate": 8.394149539300638e-05, + "loss": 8.5055, + "step": 621200 + }, + { + "epoch": 6.62, + "learning_rate": 8.391397064712861e-05, + "loss": 8.4266, + "step": 621300 + }, + { + "epoch": 6.62, + "learning_rate": 8.388644715226299e-05, + "loss": 8.464, + "step": 621400 + }, + { + "epoch": 6.62, + "learning_rate": 8.38589249105501e-05, + "loss": 8.4616, + "step": 621500 + }, + { + "epoch": 6.62, + "learning_rate": 8.383140392413022e-05, + "loss": 8.4549, + "step": 621600 + }, + { + "epoch": 6.62, + "learning_rate": 8.380388419514375e-05, + "loss": 8.4599, + "step": 621700 + }, + { + "epoch": 6.62, + "learning_rate": 8.377636572573089e-05, + "loss": 8.4222, + "step": 621800 + }, + { + "epoch": 6.62, + "learning_rate": 8.374884851803174e-05, + "loss": 8.4082, + "step": 621900 + }, + { + "epoch": 6.63, + "learning_rate": 8.372133257418635e-05, + "loss": 8.5666, + "step": 622000 + }, + { + "epoch": 6.63, + "learning_rate": 8.369381789633458e-05, + "loss": 8.4847, + "step": 622100 + }, + { + "epoch": 6.63, + "learning_rate": 8.366630448661631e-05, + "loss": 8.4504, + "step": 622200 + }, + { + "epoch": 6.63, + "learning_rate": 8.36387923471712e-05, + "loss": 8.4612, + "step": 622300 + }, + { + "epoch": 6.63, + "learning_rate": 8.361128148013893e-05, + "loss": 8.3987, + "step": 622400 + }, + { + "epoch": 6.63, + "learning_rate": 8.358377188765898e-05, + "loss": 8.4151, + "step": 622500 + }, + { + "epoch": 6.63, + "learning_rate": 8.355626357187083e-05, + "loss": 8.5178, + "step": 622600 + }, + { + "epoch": 6.63, + "learning_rate": 8.35287565349137e-05, + "loss": 8.5098, + "step": 622700 + }, + { + "epoch": 6.63, + "learning_rate": 8.350125077892695e-05, + "loss": 8.4416, + "step": 622800 + }, + { + "epoch": 6.63, + "learning_rate": 8.347374630604954e-05, + "loss": 8.4934, + "step": 622900 + }, + { + "epoch": 6.64, + "learning_rate": 8.344624311842069e-05, + "loss": 8.4183, + "step": 623000 + }, + { + "epoch": 6.64, + "learning_rate": 8.341874121817915e-05, + "loss": 8.4993, + "step": 623100 + }, + { + "epoch": 6.64, + "learning_rate": 8.339124060746389e-05, + "loss": 8.3968, + "step": 623200 + }, + { + "epoch": 6.64, + "learning_rate": 8.33637412884135e-05, + "loss": 8.4606, + "step": 623300 + }, + { + "epoch": 6.64, + "learning_rate": 8.333624326316673e-05, + "loss": 8.4426, + "step": 623400 + }, + { + "epoch": 6.64, + "learning_rate": 8.3308746533862e-05, + "loss": 8.462, + "step": 623500 + }, + { + "epoch": 6.64, + "learning_rate": 8.328125110263783e-05, + "loss": 8.5074, + "step": 623600 + }, + { + "epoch": 6.64, + "learning_rate": 8.325375697163245e-05, + "loss": 8.4585, + "step": 623700 + }, + { + "epoch": 6.64, + "learning_rate": 8.322626414298412e-05, + "loss": 8.4674, + "step": 623800 + }, + { + "epoch": 6.65, + "learning_rate": 8.319877261883101e-05, + "loss": 8.4567, + "step": 623900 + }, + { + "epoch": 6.65, + "learning_rate": 8.317128240131103e-05, + "loss": 8.5406, + "step": 624000 + }, + { + "epoch": 6.65, + "learning_rate": 8.314379349256224e-05, + "loss": 8.4436, + "step": 624100 + }, + { + "epoch": 6.65, + "learning_rate": 8.311630589472231e-05, + "loss": 8.4287, + "step": 624200 + }, + { + "epoch": 6.65, + "learning_rate": 8.308881960992907e-05, + "loss": 8.4907, + "step": 624300 + }, + { + "epoch": 6.65, + "learning_rate": 8.306133464032001e-05, + "loss": 8.4493, + "step": 624400 + }, + { + "epoch": 6.65, + "learning_rate": 8.303385098803278e-05, + "loss": 8.4223, + "step": 624500 + }, + { + "epoch": 6.65, + "learning_rate": 8.300636865520468e-05, + "loss": 8.4943, + "step": 624600 + }, + { + "epoch": 6.65, + "learning_rate": 8.297888764397307e-05, + "loss": 8.5449, + "step": 624700 + }, + { + "epoch": 6.65, + "learning_rate": 8.29514079564751e-05, + "loss": 8.4251, + "step": 624800 + }, + { + "epoch": 6.66, + "learning_rate": 8.292392959484792e-05, + "loss": 8.3823, + "step": 624900 + }, + { + "epoch": 6.66, + "learning_rate": 8.289645256122848e-05, + "loss": 8.5218, + "step": 625000 + }, + { + "epoch": 6.66, + "learning_rate": 8.286897685775374e-05, + "loss": 8.496, + "step": 625100 + }, + { + "epoch": 6.66, + "learning_rate": 8.28415024865604e-05, + "loss": 8.4725, + "step": 625200 + }, + { + "epoch": 6.66, + "learning_rate": 8.281402944978522e-05, + "loss": 8.4976, + "step": 625300 + }, + { + "epoch": 6.66, + "learning_rate": 8.278655774956472e-05, + "loss": 8.5743, + "step": 625400 + }, + { + "epoch": 6.66, + "learning_rate": 8.275908738803545e-05, + "loss": 8.549, + "step": 625500 + }, + { + "epoch": 6.66, + "learning_rate": 8.273161836733369e-05, + "loss": 8.4689, + "step": 625600 + }, + { + "epoch": 6.66, + "learning_rate": 8.270415068959578e-05, + "loss": 8.4299, + "step": 625700 + }, + { + "epoch": 6.67, + "learning_rate": 8.267668435695784e-05, + "loss": 8.4724, + "step": 625800 + }, + { + "epoch": 6.67, + "learning_rate": 8.264921937155597e-05, + "loss": 8.4292, + "step": 625900 + }, + { + "epoch": 6.67, + "learning_rate": 8.262175573552611e-05, + "loss": 8.5077, + "step": 626000 + }, + { + "epoch": 6.67, + "learning_rate": 8.259429345100409e-05, + "loss": 8.5091, + "step": 626100 + }, + { + "epoch": 6.67, + "learning_rate": 8.25668325201257e-05, + "loss": 8.456, + "step": 626200 + }, + { + "epoch": 6.67, + "learning_rate": 8.253937294502652e-05, + "loss": 8.4404, + "step": 626300 + }, + { + "epoch": 6.67, + "learning_rate": 8.251191472784213e-05, + "loss": 8.4024, + "step": 626400 + }, + { + "epoch": 6.67, + "learning_rate": 8.248445787070793e-05, + "loss": 8.5325, + "step": 626500 + }, + { + "epoch": 6.67, + "learning_rate": 8.245700237575927e-05, + "loss": 8.4521, + "step": 626600 + }, + { + "epoch": 6.68, + "learning_rate": 8.242954824513133e-05, + "loss": 8.4644, + "step": 626700 + }, + { + "epoch": 6.68, + "learning_rate": 8.240209548095927e-05, + "loss": 8.4662, + "step": 626800 + }, + { + "epoch": 6.68, + "learning_rate": 8.237464408537804e-05, + "loss": 8.4711, + "step": 626900 + }, + { + "epoch": 6.68, + "learning_rate": 8.23471940605226e-05, + "loss": 8.3778, + "step": 627000 + }, + { + "epoch": 6.68, + "learning_rate": 8.231974540852768e-05, + "loss": 8.4625, + "step": 627100 + }, + { + "epoch": 6.68, + "learning_rate": 8.2292298131528e-05, + "loss": 8.5197, + "step": 627200 + }, + { + "epoch": 6.68, + "learning_rate": 8.226485223165811e-05, + "loss": 8.4708, + "step": 627300 + }, + { + "epoch": 6.68, + "learning_rate": 8.223740771105253e-05, + "loss": 8.3515, + "step": 627400 + }, + { + "epoch": 6.68, + "learning_rate": 8.220996457184556e-05, + "loss": 8.4111, + "step": 627500 + }, + { + "epoch": 6.68, + "learning_rate": 8.21825228161715e-05, + "loss": 8.3452, + "step": 627600 + }, + { + "epoch": 6.69, + "learning_rate": 8.215508244616448e-05, + "loss": 8.4776, + "step": 627700 + }, + { + "epoch": 6.69, + "learning_rate": 8.212764346395856e-05, + "loss": 8.3751, + "step": 627800 + }, + { + "epoch": 6.69, + "learning_rate": 8.210020587168763e-05, + "loss": 8.4714, + "step": 627900 + }, + { + "epoch": 6.69, + "learning_rate": 8.207276967148556e-05, + "loss": 8.4411, + "step": 628000 + }, + { + "epoch": 6.69, + "learning_rate": 8.204533486548605e-05, + "loss": 8.4251, + "step": 628100 + }, + { + "epoch": 6.69, + "learning_rate": 8.20179014558227e-05, + "loss": 8.3869, + "step": 628200 + }, + { + "epoch": 6.69, + "learning_rate": 8.199046944462904e-05, + "loss": 8.4163, + "step": 628300 + }, + { + "epoch": 6.69, + "learning_rate": 8.19630388340384e-05, + "loss": 8.4454, + "step": 628400 + }, + { + "epoch": 6.69, + "learning_rate": 8.193560962618414e-05, + "loss": 8.4119, + "step": 628500 + }, + { + "epoch": 6.7, + "learning_rate": 8.190818182319935e-05, + "loss": 8.4556, + "step": 628600 + }, + { + "epoch": 6.7, + "learning_rate": 8.188075542721715e-05, + "loss": 8.411, + "step": 628700 + }, + { + "epoch": 6.7, + "learning_rate": 8.185333044037046e-05, + "loss": 8.5141, + "step": 628800 + }, + { + "epoch": 6.7, + "learning_rate": 8.182590686479217e-05, + "loss": 8.4825, + "step": 628900 + }, + { + "epoch": 6.7, + "learning_rate": 8.179848470261498e-05, + "loss": 8.444, + "step": 629000 + }, + { + "epoch": 6.7, + "learning_rate": 8.177106395597151e-05, + "loss": 8.5022, + "step": 629100 + }, + { + "epoch": 6.7, + "learning_rate": 8.174364462699427e-05, + "loss": 8.471, + "step": 629200 + }, + { + "epoch": 6.7, + "learning_rate": 8.171622671781572e-05, + "loss": 8.4392, + "step": 629300 + }, + { + "epoch": 6.7, + "learning_rate": 8.168881023056806e-05, + "loss": 8.4344, + "step": 629400 + }, + { + "epoch": 6.71, + "learning_rate": 8.166139516738359e-05, + "loss": 8.5094, + "step": 629500 + }, + { + "epoch": 6.71, + "learning_rate": 8.163398153039423e-05, + "loss": 8.3946, + "step": 629600 + }, + { + "epoch": 6.71, + "learning_rate": 8.160656932173211e-05, + "loss": 8.4478, + "step": 629700 + }, + { + "epoch": 6.71, + "learning_rate": 8.157915854352892e-05, + "loss": 8.5021, + "step": 629800 + }, + { + "epoch": 6.71, + "learning_rate": 8.155174919791655e-05, + "loss": 8.4036, + "step": 629900 + }, + { + "epoch": 6.71, + "learning_rate": 8.15243412870265e-05, + "loss": 8.4358, + "step": 630000 + }, + { + "epoch": 6.71, + "learning_rate": 8.149693481299034e-05, + "loss": 8.497, + "step": 630100 + }, + { + "epoch": 6.71, + "learning_rate": 8.146952977793951e-05, + "loss": 8.3806, + "step": 630200 + }, + { + "epoch": 6.71, + "learning_rate": 8.144212618400526e-05, + "loss": 8.4468, + "step": 630300 + }, + { + "epoch": 6.71, + "learning_rate": 8.141472403331878e-05, + "loss": 8.3759, + "step": 630400 + }, + { + "epoch": 6.72, + "learning_rate": 8.138732332801112e-05, + "loss": 8.436, + "step": 630500 + }, + { + "epoch": 6.72, + "learning_rate": 8.13599240702133e-05, + "loss": 8.5548, + "step": 630600 + }, + { + "epoch": 6.72, + "learning_rate": 8.133252626205604e-05, + "loss": 8.4209, + "step": 630700 + }, + { + "epoch": 6.72, + "learning_rate": 8.130512990567021e-05, + "loss": 8.4383, + "step": 630800 + }, + { + "epoch": 6.72, + "learning_rate": 8.127773500318631e-05, + "loss": 8.4369, + "step": 630900 + }, + { + "epoch": 6.72, + "learning_rate": 8.125034155673497e-05, + "loss": 8.4232, + "step": 631000 + }, + { + "epoch": 6.72, + "learning_rate": 8.122294956844643e-05, + "loss": 8.4483, + "step": 631100 + }, + { + "epoch": 6.72, + "learning_rate": 8.119555904045111e-05, + "loss": 8.456, + "step": 631200 + }, + { + "epoch": 6.72, + "learning_rate": 8.116816997487904e-05, + "loss": 8.4901, + "step": 631300 + }, + { + "epoch": 6.73, + "learning_rate": 8.114078237386041e-05, + "loss": 8.4621, + "step": 631400 + }, + { + "epoch": 6.73, + "learning_rate": 8.111339623952506e-05, + "loss": 8.4301, + "step": 631500 + }, + { + "epoch": 6.73, + "learning_rate": 8.108601157400284e-05, + "loss": 8.4561, + "step": 631600 + }, + { + "epoch": 6.73, + "learning_rate": 8.105862837942346e-05, + "loss": 8.4833, + "step": 631700 + }, + { + "epoch": 6.73, + "learning_rate": 8.103124665791651e-05, + "loss": 8.4051, + "step": 631800 + }, + { + "epoch": 6.73, + "learning_rate": 8.100386641161147e-05, + "loss": 8.5105, + "step": 631900 + }, + { + "epoch": 6.73, + "learning_rate": 8.09764876426377e-05, + "loss": 8.4655, + "step": 632000 + }, + { + "epoch": 6.73, + "learning_rate": 8.094911035312445e-05, + "loss": 8.4267, + "step": 632100 + }, + { + "epoch": 6.73, + "learning_rate": 8.092173454520085e-05, + "loss": 8.4926, + "step": 632200 + }, + { + "epoch": 6.73, + "learning_rate": 8.089436022099598e-05, + "loss": 8.5183, + "step": 632300 + }, + { + "epoch": 6.74, + "learning_rate": 8.086698738263863e-05, + "loss": 8.4489, + "step": 632400 + }, + { + "epoch": 6.74, + "learning_rate": 8.083961603225772e-05, + "loss": 8.4383, + "step": 632500 + }, + { + "epoch": 6.74, + "learning_rate": 8.081224617198182e-05, + "loss": 8.4887, + "step": 632600 + }, + { + "epoch": 6.74, + "learning_rate": 8.078487780393953e-05, + "loss": 8.4886, + "step": 632700 + }, + { + "epoch": 6.74, + "learning_rate": 8.075751093025927e-05, + "loss": 8.3987, + "step": 632800 + }, + { + "epoch": 6.74, + "learning_rate": 8.07301455530694e-05, + "loss": 8.4399, + "step": 632900 + }, + { + "epoch": 6.74, + "learning_rate": 8.070278167449808e-05, + "loss": 8.5023, + "step": 633000 + }, + { + "epoch": 6.74, + "learning_rate": 8.067541929667345e-05, + "loss": 8.4388, + "step": 633100 + }, + { + "epoch": 6.74, + "learning_rate": 8.064805842172344e-05, + "loss": 8.4159, + "step": 633200 + }, + { + "epoch": 6.75, + "learning_rate": 8.062069905177595e-05, + "loss": 8.4026, + "step": 633300 + }, + { + "epoch": 6.75, + "learning_rate": 8.059334118895868e-05, + "loss": 8.4122, + "step": 633400 + }, + { + "epoch": 6.75, + "learning_rate": 8.05659848353993e-05, + "loss": 8.3817, + "step": 633500 + }, + { + "epoch": 6.75, + "learning_rate": 8.053862999322526e-05, + "loss": 8.3691, + "step": 633600 + }, + { + "epoch": 6.75, + "learning_rate": 8.051127666456402e-05, + "loss": 8.51, + "step": 633700 + }, + { + "epoch": 6.75, + "learning_rate": 8.048392485154278e-05, + "loss": 8.4287, + "step": 633800 + }, + { + "epoch": 6.75, + "learning_rate": 8.045657455628874e-05, + "loss": 8.4832, + "step": 633900 + }, + { + "epoch": 6.75, + "learning_rate": 8.042922578092892e-05, + "loss": 8.5682, + "step": 634000 + }, + { + "epoch": 6.75, + "learning_rate": 8.040187852759025e-05, + "loss": 8.4374, + "step": 634100 + }, + { + "epoch": 6.76, + "learning_rate": 8.037453279839949e-05, + "loss": 8.4466, + "step": 634200 + }, + { + "epoch": 6.76, + "learning_rate": 8.034718859548334e-05, + "loss": 8.3575, + "step": 634300 + }, + { + "epoch": 6.76, + "learning_rate": 8.031984592096839e-05, + "loss": 8.4577, + "step": 634400 + }, + { + "epoch": 6.76, + "learning_rate": 8.029250477698105e-05, + "loss": 8.4755, + "step": 634500 + }, + { + "epoch": 6.76, + "learning_rate": 8.026516516564766e-05, + "loss": 8.4906, + "step": 634600 + }, + { + "epoch": 6.76, + "learning_rate": 8.023782708909442e-05, + "loss": 8.5493, + "step": 634700 + }, + { + "epoch": 6.76, + "learning_rate": 8.021049054944741e-05, + "loss": 8.4979, + "step": 634800 + }, + { + "epoch": 6.76, + "learning_rate": 8.01831555488326e-05, + "loss": 8.3888, + "step": 634900 + }, + { + "epoch": 6.76, + "learning_rate": 8.015582208937584e-05, + "loss": 8.4178, + "step": 635000 + }, + { + "epoch": 6.76, + "learning_rate": 8.012849017320282e-05, + "loss": 8.4243, + "step": 635100 + }, + { + "epoch": 6.77, + "learning_rate": 8.010115980243922e-05, + "loss": 8.4666, + "step": 635200 + }, + { + "epoch": 6.77, + "learning_rate": 8.007383097921047e-05, + "loss": 8.497, + "step": 635300 + }, + { + "epoch": 6.77, + "learning_rate": 8.004650370564195e-05, + "loss": 8.4718, + "step": 635400 + }, + { + "epoch": 6.77, + "learning_rate": 8.001917798385889e-05, + "loss": 8.3723, + "step": 635500 + }, + { + "epoch": 6.77, + "learning_rate": 7.999185381598645e-05, + "loss": 8.4497, + "step": 635600 + }, + { + "epoch": 6.77, + "learning_rate": 7.996453120414958e-05, + "loss": 8.4452, + "step": 635700 + }, + { + "epoch": 6.77, + "learning_rate": 7.993721015047321e-05, + "loss": 8.4015, + "step": 635800 + }, + { + "epoch": 6.77, + "learning_rate": 7.990989065708206e-05, + "loss": 8.4066, + "step": 635900 + }, + { + "epoch": 6.77, + "learning_rate": 7.988257272610083e-05, + "loss": 8.506, + "step": 636000 + }, + { + "epoch": 6.78, + "learning_rate": 7.985525635965398e-05, + "loss": 8.4733, + "step": 636100 + }, + { + "epoch": 6.78, + "learning_rate": 7.982794155986594e-05, + "loss": 8.496, + "step": 636200 + }, + { + "epoch": 6.78, + "learning_rate": 7.980062832886096e-05, + "loss": 8.3597, + "step": 636300 + }, + { + "epoch": 6.78, + "learning_rate": 7.97733166687632e-05, + "loss": 8.4707, + "step": 636400 + }, + { + "epoch": 6.78, + "learning_rate": 7.974600658169671e-05, + "loss": 8.4579, + "step": 636500 + }, + { + "epoch": 6.78, + "learning_rate": 7.971869806978539e-05, + "loss": 8.4621, + "step": 636600 + }, + { + "epoch": 6.78, + "learning_rate": 7.969139113515303e-05, + "loss": 8.4977, + "step": 636700 + }, + { + "epoch": 6.78, + "learning_rate": 7.966408577992326e-05, + "loss": 8.4343, + "step": 636800 + }, + { + "epoch": 6.78, + "learning_rate": 7.963678200621966e-05, + "loss": 8.4563, + "step": 636900 + }, + { + "epoch": 6.78, + "learning_rate": 7.960947981616562e-05, + "loss": 8.4176, + "step": 637000 + }, + { + "epoch": 6.79, + "learning_rate": 7.958217921188447e-05, + "loss": 8.4006, + "step": 637100 + }, + { + "epoch": 6.79, + "learning_rate": 7.955488019549932e-05, + "loss": 8.4416, + "step": 637200 + }, + { + "epoch": 6.79, + "learning_rate": 7.952758276913332e-05, + "loss": 8.4316, + "step": 637300 + }, + { + "epoch": 6.79, + "learning_rate": 7.950028693490926e-05, + "loss": 8.4453, + "step": 637400 + }, + { + "epoch": 6.79, + "learning_rate": 7.947299269495006e-05, + "loss": 8.431, + "step": 637500 + }, + { + "epoch": 6.79, + "learning_rate": 7.944570005137829e-05, + "loss": 8.451, + "step": 637600 + }, + { + "epoch": 6.79, + "learning_rate": 7.94184090063166e-05, + "loss": 8.418, + "step": 637700 + }, + { + "epoch": 6.79, + "learning_rate": 7.939111956188731e-05, + "loss": 8.4546, + "step": 637800 + }, + { + "epoch": 6.79, + "learning_rate": 7.936383172021286e-05, + "loss": 8.383, + "step": 637900 + }, + { + "epoch": 6.8, + "learning_rate": 7.933654548341529e-05, + "loss": 8.4485, + "step": 638000 + }, + { + "epoch": 6.8, + "learning_rate": 7.930926085361678e-05, + "loss": 8.4, + "step": 638100 + }, + { + "epoch": 6.8, + "learning_rate": 7.928197783293913e-05, + "loss": 8.4857, + "step": 638200 + }, + { + "epoch": 6.8, + "learning_rate": 7.92546964235043e-05, + "loss": 8.4693, + "step": 638300 + }, + { + "epoch": 6.8, + "learning_rate": 7.92274166274338e-05, + "loss": 8.4969, + "step": 638400 + }, + { + "epoch": 6.8, + "learning_rate": 7.920013844684925e-05, + "loss": 8.4458, + "step": 638500 + }, + { + "epoch": 6.8, + "learning_rate": 7.917286188387215e-05, + "loss": 8.484, + "step": 638600 + }, + { + "epoch": 6.8, + "learning_rate": 7.914558694062367e-05, + "loss": 8.4335, + "step": 638700 + }, + { + "epoch": 6.8, + "learning_rate": 7.911831361922512e-05, + "loss": 8.5105, + "step": 638800 + }, + { + "epoch": 6.81, + "learning_rate": 7.909104192179742e-05, + "loss": 8.4906, + "step": 638900 + }, + { + "epoch": 6.81, + "learning_rate": 7.906377185046162e-05, + "loss": 8.4454, + "step": 639000 + }, + { + "epoch": 6.81, + "learning_rate": 7.903650340733838e-05, + "loss": 8.5688, + "step": 639100 + }, + { + "epoch": 6.81, + "learning_rate": 7.90092365945485e-05, + "loss": 8.4018, + "step": 639200 + }, + { + "epoch": 6.81, + "learning_rate": 7.898197141421242e-05, + "loss": 8.3979, + "step": 639300 + }, + { + "epoch": 6.81, + "learning_rate": 7.895470786845065e-05, + "loss": 8.4349, + "step": 639400 + }, + { + "epoch": 6.81, + "learning_rate": 7.892744595938338e-05, + "loss": 8.3784, + "step": 639500 + }, + { + "epoch": 6.81, + "learning_rate": 7.890018568913085e-05, + "loss": 8.5196, + "step": 639600 + }, + { + "epoch": 6.81, + "learning_rate": 7.887292705981304e-05, + "loss": 8.4485, + "step": 639700 + }, + { + "epoch": 6.81, + "learning_rate": 7.88456700735499e-05, + "loss": 8.4793, + "step": 639800 + }, + { + "epoch": 6.82, + "learning_rate": 7.881841473246117e-05, + "loss": 8.3695, + "step": 639900 + }, + { + "epoch": 6.82, + "learning_rate": 7.879116103866654e-05, + "loss": 8.4859, + "step": 640000 + }, + { + "epoch": 6.82, + "learning_rate": 7.87639089942855e-05, + "loss": 8.4572, + "step": 640100 + }, + { + "epoch": 6.82, + "learning_rate": 7.873665860143746e-05, + "loss": 8.4956, + "step": 640200 + }, + { + "epoch": 6.82, + "learning_rate": 7.870940986224166e-05, + "loss": 8.4324, + "step": 640300 + }, + { + "epoch": 6.82, + "learning_rate": 7.86821627788173e-05, + "loss": 8.334, + "step": 640400 + }, + { + "epoch": 6.82, + "learning_rate": 7.865491735328333e-05, + "loss": 8.4526, + "step": 640500 + }, + { + "epoch": 6.82, + "learning_rate": 7.862767358775863e-05, + "loss": 8.4964, + "step": 640600 + }, + { + "epoch": 6.82, + "learning_rate": 7.860043148436199e-05, + "loss": 8.4774, + "step": 640700 + }, + { + "epoch": 6.83, + "learning_rate": 7.8573191045212e-05, + "loss": 8.486, + "step": 640800 + }, + { + "epoch": 6.83, + "learning_rate": 7.854595227242716e-05, + "loss": 8.4165, + "step": 640900 + }, + { + "epoch": 6.83, + "learning_rate": 7.851871516812583e-05, + "loss": 8.442, + "step": 641000 + }, + { + "epoch": 6.83, + "learning_rate": 7.849147973442626e-05, + "loss": 8.5073, + "step": 641100 + }, + { + "epoch": 6.83, + "learning_rate": 7.846424597344653e-05, + "loss": 8.4901, + "step": 641200 + }, + { + "epoch": 6.83, + "learning_rate": 7.843701388730462e-05, + "loss": 8.4499, + "step": 641300 + }, + { + "epoch": 6.83, + "learning_rate": 7.840978347811838e-05, + "loss": 8.4855, + "step": 641400 + }, + { + "epoch": 6.83, + "learning_rate": 7.83825547480055e-05, + "loss": 8.4858, + "step": 641500 + }, + { + "epoch": 6.83, + "learning_rate": 7.835532769908359e-05, + "loss": 8.463, + "step": 641600 + }, + { + "epoch": 6.83, + "learning_rate": 7.832810233347008e-05, + "loss": 8.3982, + "step": 641700 + }, + { + "epoch": 6.84, + "learning_rate": 7.830087865328229e-05, + "loss": 8.4182, + "step": 641800 + }, + { + "epoch": 6.84, + "learning_rate": 7.827365666063742e-05, + "loss": 8.4641, + "step": 641900 + }, + { + "epoch": 6.84, + "learning_rate": 7.824643635765251e-05, + "loss": 8.4686, + "step": 642000 + }, + { + "epoch": 6.84, + "learning_rate": 7.821921774644452e-05, + "loss": 8.4227, + "step": 642100 + }, + { + "epoch": 6.84, + "learning_rate": 7.81920008291302e-05, + "loss": 8.4333, + "step": 642200 + }, + { + "epoch": 6.84, + "learning_rate": 7.816478560782625e-05, + "loss": 8.5416, + "step": 642300 + }, + { + "epoch": 6.84, + "learning_rate": 7.813757208464916e-05, + "loss": 8.4377, + "step": 642400 + }, + { + "epoch": 6.84, + "learning_rate": 7.811036026171538e-05, + "loss": 8.4491, + "step": 642500 + }, + { + "epoch": 6.84, + "learning_rate": 7.808315014114113e-05, + "loss": 8.4476, + "step": 642600 + }, + { + "epoch": 6.85, + "learning_rate": 7.805594172504256e-05, + "loss": 8.4524, + "step": 642700 + }, + { + "epoch": 6.85, + "learning_rate": 7.802873501553569e-05, + "loss": 8.3829, + "step": 642800 + }, + { + "epoch": 6.85, + "learning_rate": 7.800153001473635e-05, + "loss": 8.4801, + "step": 642900 + }, + { + "epoch": 6.85, + "learning_rate": 7.797432672476033e-05, + "loss": 8.5178, + "step": 643000 + }, + { + "epoch": 6.85, + "learning_rate": 7.794712514772318e-05, + "loss": 8.432, + "step": 643100 + }, + { + "epoch": 6.85, + "learning_rate": 7.791992528574041e-05, + "loss": 8.4472, + "step": 643200 + }, + { + "epoch": 6.85, + "learning_rate": 7.789272714092731e-05, + "loss": 8.4107, + "step": 643300 + }, + { + "epoch": 6.85, + "learning_rate": 7.786553071539913e-05, + "loss": 8.4364, + "step": 643400 + }, + { + "epoch": 6.85, + "learning_rate": 7.783833601127091e-05, + "loss": 8.4533, + "step": 643500 + }, + { + "epoch": 6.86, + "learning_rate": 7.78111430306576e-05, + "loss": 8.4434, + "step": 643600 + }, + { + "epoch": 6.86, + "learning_rate": 7.778395177567398e-05, + "loss": 8.4923, + "step": 643700 + }, + { + "epoch": 6.86, + "learning_rate": 7.775676224843476e-05, + "loss": 8.3885, + "step": 643800 + }, + { + "epoch": 6.86, + "learning_rate": 7.772957445105439e-05, + "loss": 8.318, + "step": 643900 + }, + { + "epoch": 6.86, + "learning_rate": 7.770238838564737e-05, + "loss": 8.4489, + "step": 644000 + }, + { + "epoch": 6.86, + "learning_rate": 7.767520405432788e-05, + "loss": 8.4177, + "step": 644100 + }, + { + "epoch": 6.86, + "learning_rate": 7.764802145921014e-05, + "loss": 8.477, + "step": 644200 + }, + { + "epoch": 6.86, + "learning_rate": 7.762084060240801e-05, + "loss": 8.454, + "step": 644300 + }, + { + "epoch": 6.86, + "learning_rate": 7.759366148603549e-05, + "loss": 8.4591, + "step": 644400 + }, + { + "epoch": 6.86, + "learning_rate": 7.756648411220617e-05, + "loss": 8.4864, + "step": 644500 + }, + { + "epoch": 6.87, + "learning_rate": 7.753930848303379e-05, + "loss": 8.4371, + "step": 644600 + }, + { + "epoch": 6.87, + "learning_rate": 7.751213460063162e-05, + "loss": 8.4202, + "step": 644700 + }, + { + "epoch": 6.87, + "learning_rate": 7.74849624671131e-05, + "loss": 8.3555, + "step": 644800 + }, + { + "epoch": 6.87, + "learning_rate": 7.745779208459141e-05, + "loss": 8.5002, + "step": 644900 + }, + { + "epoch": 6.87, + "learning_rate": 7.743062345517953e-05, + "loss": 8.4367, + "step": 645000 + }, + { + "epoch": 6.87, + "learning_rate": 7.740345658099044e-05, + "loss": 8.3939, + "step": 645100 + }, + { + "epoch": 6.87, + "learning_rate": 7.737629146413683e-05, + "loss": 8.5227, + "step": 645200 + }, + { + "epoch": 6.87, + "learning_rate": 7.734912810673142e-05, + "loss": 8.4316, + "step": 645300 + }, + { + "epoch": 6.87, + "learning_rate": 7.732196651088661e-05, + "loss": 8.4288, + "step": 645400 + }, + { + "epoch": 6.88, + "learning_rate": 7.729480667871488e-05, + "loss": 8.453, + "step": 645500 + }, + { + "epoch": 6.88, + "learning_rate": 7.726764861232832e-05, + "loss": 8.3989, + "step": 645600 + }, + { + "epoch": 6.88, + "learning_rate": 7.724049231383914e-05, + "loss": 8.4344, + "step": 645700 + }, + { + "epoch": 6.88, + "learning_rate": 7.721333778535915e-05, + "loss": 8.3313, + "step": 645800 + }, + { + "epoch": 6.88, + "learning_rate": 7.718618502900033e-05, + "loss": 8.4155, + "step": 645900 + }, + { + "epoch": 6.88, + "learning_rate": 7.715903404687418e-05, + "loss": 8.4904, + "step": 646000 + }, + { + "epoch": 6.88, + "learning_rate": 7.71318848410924e-05, + "loss": 8.4526, + "step": 646100 + }, + { + "epoch": 6.88, + "learning_rate": 7.710473741376621e-05, + "loss": 8.4893, + "step": 646200 + }, + { + "epoch": 6.88, + "learning_rate": 7.707759176700705e-05, + "loss": 8.4126, + "step": 646300 + }, + { + "epoch": 6.89, + "learning_rate": 7.70504479029259e-05, + "loss": 8.5039, + "step": 646400 + }, + { + "epoch": 6.89, + "learning_rate": 7.702330582363382e-05, + "loss": 8.4638, + "step": 646500 + }, + { + "epoch": 6.89, + "learning_rate": 7.69961655312416e-05, + "loss": 8.4604, + "step": 646600 + }, + { + "epoch": 6.89, + "learning_rate": 7.696902702785999e-05, + "loss": 8.3661, + "step": 646700 + }, + { + "epoch": 6.89, + "learning_rate": 7.69418903155995e-05, + "loss": 8.4466, + "step": 646800 + }, + { + "epoch": 6.89, + "learning_rate": 7.691475539657059e-05, + "loss": 8.4191, + "step": 646900 + }, + { + "epoch": 6.89, + "learning_rate": 7.68876222728836e-05, + "loss": 8.4195, + "step": 647000 + }, + { + "epoch": 6.89, + "learning_rate": 7.686049094664855e-05, + "loss": 8.424, + "step": 647100 + }, + { + "epoch": 6.89, + "learning_rate": 7.683336141997557e-05, + "loss": 8.5089, + "step": 647200 + }, + { + "epoch": 6.89, + "learning_rate": 7.680623369497442e-05, + "loss": 8.4636, + "step": 647300 + }, + { + "epoch": 6.9, + "learning_rate": 7.677910777375493e-05, + "loss": 8.4681, + "step": 647400 + }, + { + "epoch": 6.9, + "learning_rate": 7.67519836584266e-05, + "loss": 8.3789, + "step": 647500 + }, + { + "epoch": 6.9, + "learning_rate": 7.672486135109893e-05, + "loss": 8.3926, + "step": 647600 + }, + { + "epoch": 6.9, + "learning_rate": 7.669774085388117e-05, + "loss": 8.5104, + "step": 647700 + }, + { + "epoch": 6.9, + "learning_rate": 7.667062216888256e-05, + "loss": 8.4114, + "step": 647800 + }, + { + "epoch": 6.9, + "learning_rate": 7.664350529821204e-05, + "loss": 8.4454, + "step": 647900 + }, + { + "epoch": 6.9, + "learning_rate": 7.661639024397856e-05, + "loss": 8.4979, + "step": 648000 + }, + { + "epoch": 6.9, + "learning_rate": 7.658927700829081e-05, + "loss": 8.4473, + "step": 648100 + }, + { + "epoch": 6.9, + "learning_rate": 7.656216559325741e-05, + "loss": 8.4327, + "step": 648200 + }, + { + "epoch": 6.91, + "learning_rate": 7.653505600098683e-05, + "loss": 8.4482, + "step": 648300 + }, + { + "epoch": 6.91, + "learning_rate": 7.650794823358738e-05, + "loss": 8.4336, + "step": 648400 + }, + { + "epoch": 6.91, + "learning_rate": 7.64808422931672e-05, + "loss": 8.3623, + "step": 648500 + }, + { + "epoch": 6.91, + "learning_rate": 7.645373818183439e-05, + "loss": 8.4865, + "step": 648600 + }, + { + "epoch": 6.91, + "learning_rate": 7.642663590169678e-05, + "loss": 8.4881, + "step": 648700 + }, + { + "epoch": 6.91, + "learning_rate": 7.639953545486214e-05, + "loss": 8.4017, + "step": 648800 + }, + { + "epoch": 6.91, + "learning_rate": 7.637243684343807e-05, + "loss": 8.4086, + "step": 648900 + }, + { + "epoch": 6.91, + "learning_rate": 7.634534006953201e-05, + "loss": 8.4017, + "step": 649000 + }, + { + "epoch": 6.91, + "learning_rate": 7.631824513525133e-05, + "loss": 8.4433, + "step": 649100 + }, + { + "epoch": 6.91, + "learning_rate": 7.629115204270317e-05, + "loss": 8.4826, + "step": 649200 + }, + { + "epoch": 6.92, + "learning_rate": 7.626406079399457e-05, + "loss": 8.4147, + "step": 649300 + }, + { + "epoch": 6.92, + "learning_rate": 7.623697139123243e-05, + "loss": 8.3973, + "step": 649400 + }, + { + "epoch": 6.92, + "learning_rate": 7.620988383652347e-05, + "loss": 8.3809, + "step": 649500 + }, + { + "epoch": 6.92, + "learning_rate": 7.618279813197431e-05, + "loss": 8.4401, + "step": 649600 + }, + { + "epoch": 6.92, + "learning_rate": 7.615571427969143e-05, + "loss": 8.5153, + "step": 649700 + }, + { + "epoch": 6.92, + "learning_rate": 7.612863228178109e-05, + "loss": 8.4139, + "step": 649800 + }, + { + "epoch": 6.92, + "learning_rate": 7.61015521403495e-05, + "loss": 8.4454, + "step": 649900 + }, + { + "epoch": 6.92, + "learning_rate": 7.607447385750267e-05, + "loss": 8.4274, + "step": 650000 + }, + { + "epoch": 6.92, + "learning_rate": 7.604739743534649e-05, + "loss": 8.408, + "step": 650100 + }, + { + "epoch": 6.93, + "learning_rate": 7.602032287598668e-05, + "loss": 8.4514, + "step": 650200 + }, + { + "epoch": 6.93, + "learning_rate": 7.599325018152886e-05, + "loss": 8.5111, + "step": 650300 + }, + { + "epoch": 6.93, + "learning_rate": 7.596617935407845e-05, + "loss": 8.4094, + "step": 650400 + }, + { + "epoch": 6.93, + "learning_rate": 7.593911039574076e-05, + "loss": 8.4151, + "step": 650500 + }, + { + "epoch": 6.93, + "learning_rate": 7.591204330862095e-05, + "loss": 8.4363, + "step": 650600 + }, + { + "epoch": 6.93, + "learning_rate": 7.588497809482403e-05, + "loss": 8.4833, + "step": 650700 + }, + { + "epoch": 6.93, + "learning_rate": 7.585791475645484e-05, + "loss": 8.4222, + "step": 650800 + }, + { + "epoch": 6.93, + "learning_rate": 7.583085329561815e-05, + "loss": 8.4354, + "step": 650900 + }, + { + "epoch": 6.93, + "learning_rate": 7.580379371441847e-05, + "loss": 8.4346, + "step": 651000 + }, + { + "epoch": 6.94, + "learning_rate": 7.577673601496027e-05, + "loss": 8.4595, + "step": 651100 + }, + { + "epoch": 6.94, + "learning_rate": 7.574968019934782e-05, + "loss": 8.3455, + "step": 651200 + }, + { + "epoch": 6.94, + "learning_rate": 7.572262626968524e-05, + "loss": 8.486, + "step": 651300 + }, + { + "epoch": 6.94, + "learning_rate": 7.569557422807656e-05, + "loss": 8.364, + "step": 651400 + }, + { + "epoch": 6.94, + "learning_rate": 7.566852407662557e-05, + "loss": 8.409, + "step": 651500 + }, + { + "epoch": 6.94, + "learning_rate": 7.564147581743599e-05, + "loss": 8.3525, + "step": 651600 + }, + { + "epoch": 6.94, + "learning_rate": 7.561442945261133e-05, + "loss": 8.4226, + "step": 651700 + }, + { + "epoch": 6.94, + "learning_rate": 7.558738498425507e-05, + "loss": 8.3859, + "step": 651800 + }, + { + "epoch": 6.94, + "learning_rate": 7.556034241447036e-05, + "loss": 8.4856, + "step": 651900 + }, + { + "epoch": 6.94, + "learning_rate": 7.553330174536036e-05, + "loss": 8.3856, + "step": 652000 + }, + { + "epoch": 6.95, + "learning_rate": 7.550626297902802e-05, + "loss": 8.3653, + "step": 652100 + }, + { + "epoch": 6.95, + "learning_rate": 7.547922611757617e-05, + "loss": 8.4271, + "step": 652200 + }, + { + "epoch": 6.95, + "learning_rate": 7.54521911631074e-05, + "loss": 8.5169, + "step": 652300 + }, + { + "epoch": 6.95, + "learning_rate": 7.542515811772432e-05, + "loss": 8.438, + "step": 652400 + }, + { + "epoch": 6.95, + "learning_rate": 7.539812698352916e-05, + "loss": 8.3981, + "step": 652500 + }, + { + "epoch": 6.95, + "learning_rate": 7.537109776262428e-05, + "loss": 8.3645, + "step": 652600 + }, + { + "epoch": 6.95, + "learning_rate": 7.534407045711162e-05, + "loss": 8.3573, + "step": 652700 + }, + { + "epoch": 6.95, + "learning_rate": 7.531704506909323e-05, + "loss": 8.3851, + "step": 652800 + }, + { + "epoch": 6.95, + "learning_rate": 7.529002160067072e-05, + "loss": 8.3844, + "step": 652900 + }, + { + "epoch": 6.96, + "learning_rate": 7.526300005394586e-05, + "loss": 8.4799, + "step": 653000 + }, + { + "epoch": 6.96, + "learning_rate": 7.523598043101999e-05, + "loss": 8.4565, + "step": 653100 + }, + { + "epoch": 6.96, + "learning_rate": 7.52089627339945e-05, + "loss": 8.3766, + "step": 653200 + }, + { + "epoch": 6.96, + "learning_rate": 7.518194696497057e-05, + "loss": 8.4898, + "step": 653300 + }, + { + "epoch": 6.96, + "learning_rate": 7.515493312604915e-05, + "loss": 8.4602, + "step": 653400 + }, + { + "epoch": 6.96, + "learning_rate": 7.51279212193312e-05, + "loss": 8.3758, + "step": 653500 + }, + { + "epoch": 6.96, + "learning_rate": 7.510091124691734e-05, + "loss": 8.4518, + "step": 653600 + }, + { + "epoch": 6.96, + "learning_rate": 7.507390321090826e-05, + "loss": 8.3674, + "step": 653700 + }, + { + "epoch": 6.96, + "learning_rate": 7.504689711340425e-05, + "loss": 8.4023, + "step": 653800 + }, + { + "epoch": 6.96, + "learning_rate": 7.501989295650569e-05, + "loss": 8.4592, + "step": 653900 + }, + { + "epoch": 6.97, + "learning_rate": 7.499289074231257e-05, + "loss": 8.4332, + "step": 654000 + }, + { + "epoch": 6.97, + "learning_rate": 7.4965890472925e-05, + "loss": 8.4073, + "step": 654100 + }, + { + "epoch": 6.97, + "learning_rate": 7.493889215044265e-05, + "loss": 8.4053, + "step": 654200 + }, + { + "epoch": 6.97, + "learning_rate": 7.491189577696531e-05, + "loss": 8.4804, + "step": 654300 + }, + { + "epoch": 6.97, + "learning_rate": 7.48849013545924e-05, + "loss": 8.4067, + "step": 654400 + }, + { + "epoch": 6.97, + "learning_rate": 7.485790888542335e-05, + "loss": 8.3596, + "step": 654500 + }, + { + "epoch": 6.97, + "learning_rate": 7.483091837155728e-05, + "loss": 8.5037, + "step": 654600 + }, + { + "epoch": 6.97, + "learning_rate": 7.480392981509332e-05, + "loss": 8.3081, + "step": 654700 + }, + { + "epoch": 6.97, + "learning_rate": 7.477694321813034e-05, + "loss": 8.4807, + "step": 654800 + }, + { + "epoch": 6.98, + "learning_rate": 7.47499585827671e-05, + "loss": 8.4695, + "step": 654900 + }, + { + "epoch": 6.98, + "learning_rate": 7.47229759111022e-05, + "loss": 8.4184, + "step": 655000 + }, + { + "epoch": 6.98, + "learning_rate": 7.469599520523407e-05, + "loss": 8.3723, + "step": 655100 + }, + { + "epoch": 6.98, + "learning_rate": 7.466901646726101e-05, + "loss": 8.3264, + "step": 655200 + }, + { + "epoch": 6.98, + "learning_rate": 7.464203969928114e-05, + "loss": 8.3358, + "step": 655300 + }, + { + "epoch": 6.98, + "learning_rate": 7.461506490339255e-05, + "loss": 8.3855, + "step": 655400 + }, + { + "epoch": 6.98, + "learning_rate": 7.458809208169292e-05, + "loss": 8.3873, + "step": 655500 + }, + { + "epoch": 6.98, + "learning_rate": 7.456112123628003e-05, + "loss": 8.4141, + "step": 655600 + }, + { + "epoch": 6.98, + "learning_rate": 7.453415236925136e-05, + "loss": 8.3996, + "step": 655700 + }, + { + "epoch": 6.99, + "learning_rate": 7.45071854827043e-05, + "loss": 8.4816, + "step": 655800 + }, + { + "epoch": 6.99, + "learning_rate": 7.448022057873607e-05, + "loss": 8.4039, + "step": 655900 + }, + { + "epoch": 6.99, + "learning_rate": 7.445325765944373e-05, + "loss": 8.3814, + "step": 656000 + }, + { + "epoch": 6.99, + "learning_rate": 7.442629672692417e-05, + "loss": 8.4521, + "step": 656100 + }, + { + "epoch": 6.99, + "learning_rate": 7.439933778327419e-05, + "loss": 8.4248, + "step": 656200 + }, + { + "epoch": 6.99, + "learning_rate": 7.437238083059033e-05, + "loss": 8.4679, + "step": 656300 + }, + { + "epoch": 6.99, + "learning_rate": 7.434542587096909e-05, + "loss": 8.3724, + "step": 656400 + }, + { + "epoch": 6.99, + "learning_rate": 7.431847290650672e-05, + "loss": 8.4307, + "step": 656500 + }, + { + "epoch": 6.99, + "learning_rate": 7.42915219392994e-05, + "loss": 8.3559, + "step": 656600 + }, + { + "epoch": 6.99, + "learning_rate": 7.426457297144304e-05, + "loss": 8.4167, + "step": 656700 + }, + { + "epoch": 7.0, + "learning_rate": 7.423762600503355e-05, + "loss": 8.342, + "step": 656800 + }, + { + "epoch": 7.0, + "learning_rate": 7.42106810421665e-05, + "loss": 8.4148, + "step": 656900 + }, + { + "epoch": 7.0, + "learning_rate": 7.41837380849375e-05, + "loss": 8.4249, + "step": 657000 + }, + { + "epoch": 7.0, + "learning_rate": 7.415679713544184e-05, + "loss": 8.3992, + "step": 657100 + }, + { + "epoch": 7.0, + "learning_rate": 7.412985819577476e-05, + "loss": 8.4166, + "step": 657200 + }, + { + "epoch": 7.0, + "learning_rate": 7.410292126803125e-05, + "loss": 8.3132, + "step": 657300 + }, + { + "epoch": 7.0, + "learning_rate": 7.407598635430625e-05, + "loss": 8.4117, + "step": 657400 + }, + { + "epoch": 7.0, + "learning_rate": 7.404905345669451e-05, + "loss": 8.4263, + "step": 657500 + }, + { + "epoch": 7.0, + "learning_rate": 7.402212257729053e-05, + "loss": 8.4366, + "step": 657600 + }, + { + "epoch": 7.01, + "learning_rate": 7.399519371818879e-05, + "loss": 8.3817, + "step": 657700 + }, + { + "epoch": 7.01, + "learning_rate": 7.39682668814835e-05, + "loss": 8.4676, + "step": 657800 + }, + { + "epoch": 7.01, + "learning_rate": 7.394134206926883e-05, + "loss": 8.499, + "step": 657900 + }, + { + "epoch": 7.01, + "learning_rate": 7.391441928363865e-05, + "loss": 8.3965, + "step": 658000 + }, + { + "epoch": 7.01, + "learning_rate": 7.38874985266868e-05, + "loss": 8.4679, + "step": 658100 + }, + { + "epoch": 7.01, + "learning_rate": 7.38605798005069e-05, + "loss": 8.4265, + "step": 658200 + }, + { + "epoch": 7.01, + "learning_rate": 7.38336631071924e-05, + "loss": 8.4544, + "step": 658300 + }, + { + "epoch": 7.01, + "learning_rate": 7.380674844883663e-05, + "loss": 8.4146, + "step": 658400 + }, + { + "epoch": 7.01, + "learning_rate": 7.377983582753275e-05, + "loss": 8.4098, + "step": 658500 + }, + { + "epoch": 7.01, + "learning_rate": 7.375292524537376e-05, + "loss": 8.3984, + "step": 658600 + }, + { + "epoch": 7.02, + "learning_rate": 7.37260167044525e-05, + "loss": 8.2634, + "step": 658700 + }, + { + "epoch": 7.02, + "learning_rate": 7.36991102068616e-05, + "loss": 8.4326, + "step": 658800 + }, + { + "epoch": 7.02, + "learning_rate": 7.367220575469367e-05, + "loss": 8.5116, + "step": 658900 + }, + { + "epoch": 7.02, + "learning_rate": 7.364530335004099e-05, + "loss": 8.4527, + "step": 659000 + }, + { + "epoch": 7.02, + "learning_rate": 7.361840299499584e-05, + "loss": 8.4144, + "step": 659100 + }, + { + "epoch": 7.02, + "learning_rate": 7.359150469165016e-05, + "loss": 8.4051, + "step": 659200 + }, + { + "epoch": 7.02, + "learning_rate": 7.356460844209596e-05, + "loss": 8.3978, + "step": 659300 + }, + { + "epoch": 7.02, + "learning_rate": 7.353771424842483e-05, + "loss": 8.4083, + "step": 659400 + }, + { + "epoch": 7.02, + "learning_rate": 7.351082211272843e-05, + "loss": 8.4466, + "step": 659500 + }, + { + "epoch": 7.03, + "learning_rate": 7.348393203709816e-05, + "loss": 8.4651, + "step": 659600 + }, + { + "epoch": 7.03, + "learning_rate": 7.345704402362521e-05, + "loss": 8.42, + "step": 659700 + }, + { + "epoch": 7.03, + "learning_rate": 7.343015807440072e-05, + "loss": 8.3943, + "step": 659800 + }, + { + "epoch": 7.03, + "learning_rate": 7.340327419151556e-05, + "loss": 8.4234, + "step": 659900 + }, + { + "epoch": 7.03, + "learning_rate": 7.337639237706055e-05, + "loss": 8.3309, + "step": 660000 + }, + { + "epoch": 7.03, + "learning_rate": 7.334951263312623e-05, + "loss": 8.4198, + "step": 660100 + }, + { + "epoch": 7.03, + "learning_rate": 7.332263496180313e-05, + "loss": 8.4328, + "step": 660200 + }, + { + "epoch": 7.03, + "learning_rate": 7.329575936518141e-05, + "loss": 8.4438, + "step": 660300 + }, + { + "epoch": 7.03, + "learning_rate": 7.32688858453513e-05, + "loss": 8.4497, + "step": 660400 + }, + { + "epoch": 7.04, + "learning_rate": 7.324201440440265e-05, + "loss": 8.4215, + "step": 660500 + }, + { + "epoch": 7.04, + "learning_rate": 7.321514504442537e-05, + "loss": 8.4027, + "step": 660600 + }, + { + "epoch": 7.04, + "learning_rate": 7.318827776750896e-05, + "loss": 8.4236, + "step": 660700 + }, + { + "epoch": 7.04, + "learning_rate": 7.316141257574305e-05, + "loss": 8.4379, + "step": 660800 + }, + { + "epoch": 7.04, + "learning_rate": 7.313454947121678e-05, + "loss": 8.4178, + "step": 660900 + }, + { + "epoch": 7.04, + "learning_rate": 7.310768845601945e-05, + "loss": 8.4767, + "step": 661000 + }, + { + "epoch": 7.04, + "learning_rate": 7.308082953223991e-05, + "loss": 8.3884, + "step": 661100 + }, + { + "epoch": 7.04, + "learning_rate": 7.30539727019671e-05, + "loss": 8.4181, + "step": 661200 + }, + { + "epoch": 7.04, + "learning_rate": 7.302711796728959e-05, + "loss": 8.4029, + "step": 661300 + }, + { + "epoch": 7.04, + "learning_rate": 7.300026533029594e-05, + "loss": 8.3238, + "step": 661400 + }, + { + "epoch": 7.05, + "learning_rate": 7.297341479307441e-05, + "loss": 8.3654, + "step": 661500 + }, + { + "epoch": 7.05, + "learning_rate": 7.29465663577132e-05, + "loss": 8.383, + "step": 661600 + }, + { + "epoch": 7.05, + "learning_rate": 7.291972002630038e-05, + "loss": 8.4246, + "step": 661700 + }, + { + "epoch": 7.05, + "learning_rate": 7.289287580092368e-05, + "loss": 8.3381, + "step": 661800 + }, + { + "epoch": 7.05, + "learning_rate": 7.286603368367089e-05, + "loss": 8.4791, + "step": 661900 + }, + { + "epoch": 7.05, + "learning_rate": 7.283919367662942e-05, + "loss": 8.3769, + "step": 662000 + }, + { + "epoch": 7.05, + "learning_rate": 7.281235578188672e-05, + "loss": 8.4404, + "step": 662100 + }, + { + "epoch": 7.05, + "learning_rate": 7.278552000152987e-05, + "loss": 8.3524, + "step": 662200 + }, + { + "epoch": 7.05, + "learning_rate": 7.275868633764602e-05, + "loss": 8.4721, + "step": 662300 + }, + { + "epoch": 7.06, + "learning_rate": 7.273185479232189e-05, + "loss": 8.3665, + "step": 662400 + }, + { + "epoch": 7.06, + "learning_rate": 7.270502536764428e-05, + "loss": 8.3884, + "step": 662500 + }, + { + "epoch": 7.06, + "learning_rate": 7.267819806569965e-05, + "loss": 8.4531, + "step": 662600 + }, + { + "epoch": 7.06, + "learning_rate": 7.26513728885744e-05, + "loss": 8.4106, + "step": 662700 + }, + { + "epoch": 7.06, + "learning_rate": 7.262454983835468e-05, + "loss": 8.4048, + "step": 662800 + }, + { + "epoch": 7.06, + "learning_rate": 7.259772891712659e-05, + "loss": 8.4227, + "step": 662900 + }, + { + "epoch": 7.06, + "learning_rate": 7.257091012697593e-05, + "loss": 8.3954, + "step": 663000 + }, + { + "epoch": 7.06, + "learning_rate": 7.254409346998845e-05, + "loss": 8.4994, + "step": 663100 + }, + { + "epoch": 7.06, + "learning_rate": 7.251727894824966e-05, + "loss": 8.4122, + "step": 663200 + }, + { + "epoch": 7.07, + "learning_rate": 7.249046656384492e-05, + "loss": 8.4022, + "step": 663300 + }, + { + "epoch": 7.07, + "learning_rate": 7.246365631885945e-05, + "loss": 8.3832, + "step": 663400 + }, + { + "epoch": 7.07, + "learning_rate": 7.24368482153783e-05, + "loss": 8.4295, + "step": 663500 + }, + { + "epoch": 7.07, + "learning_rate": 7.24100422554863e-05, + "loss": 8.4797, + "step": 663600 + }, + { + "epoch": 7.07, + "learning_rate": 7.238323844126816e-05, + "loss": 8.4673, + "step": 663700 + }, + { + "epoch": 7.07, + "learning_rate": 7.235643677480843e-05, + "loss": 8.3788, + "step": 663800 + }, + { + "epoch": 7.07, + "learning_rate": 7.232963725819148e-05, + "loss": 8.4493, + "step": 663900 + }, + { + "epoch": 7.07, + "learning_rate": 7.230283989350151e-05, + "loss": 8.453, + "step": 664000 + }, + { + "epoch": 7.07, + "learning_rate": 7.227604468282254e-05, + "loss": 8.4197, + "step": 664100 + }, + { + "epoch": 7.07, + "learning_rate": 7.224925162823846e-05, + "loss": 8.4016, + "step": 664200 + }, + { + "epoch": 7.08, + "learning_rate": 7.222246073183294e-05, + "loss": 8.3592, + "step": 664300 + }, + { + "epoch": 7.08, + "learning_rate": 7.219567199568953e-05, + "loss": 8.4209, + "step": 664400 + }, + { + "epoch": 7.08, + "learning_rate": 7.216888542189159e-05, + "loss": 8.4078, + "step": 664500 + }, + { + "epoch": 7.08, + "learning_rate": 7.214210101252232e-05, + "loss": 8.3248, + "step": 664600 + }, + { + "epoch": 7.08, + "learning_rate": 7.211531876966471e-05, + "loss": 8.4783, + "step": 664700 + }, + { + "epoch": 7.08, + "learning_rate": 7.208853869540169e-05, + "loss": 8.4227, + "step": 664800 + }, + { + "epoch": 7.08, + "learning_rate": 7.206176079181586e-05, + "loss": 8.3899, + "step": 664900 + }, + { + "epoch": 7.08, + "learning_rate": 7.203498506098982e-05, + "loss": 8.3928, + "step": 665000 + }, + { + "epoch": 7.08, + "learning_rate": 7.200821150500587e-05, + "loss": 8.3166, + "step": 665100 + }, + { + "epoch": 7.09, + "learning_rate": 7.198144012594623e-05, + "loss": 8.4393, + "step": 665200 + }, + { + "epoch": 7.09, + "learning_rate": 7.195467092589286e-05, + "loss": 8.321, + "step": 665300 + }, + { + "epoch": 7.09, + "learning_rate": 7.192790390692766e-05, + "loss": 8.4184, + "step": 665400 + }, + { + "epoch": 7.09, + "learning_rate": 7.190113907113227e-05, + "loss": 8.4299, + "step": 665500 + }, + { + "epoch": 7.09, + "learning_rate": 7.18743764205882e-05, + "loss": 8.3911, + "step": 665600 + }, + { + "epoch": 7.09, + "learning_rate": 7.18476159573768e-05, + "loss": 8.3518, + "step": 665700 + }, + { + "epoch": 7.09, + "learning_rate": 7.18208576835792e-05, + "loss": 8.4316, + "step": 665800 + }, + { + "epoch": 7.09, + "learning_rate": 7.179410160127645e-05, + "loss": 8.4927, + "step": 665900 + }, + { + "epoch": 7.09, + "learning_rate": 7.176734771254931e-05, + "loss": 8.5519, + "step": 666000 + }, + { + "epoch": 7.09, + "learning_rate": 7.17405960194785e-05, + "loss": 8.4656, + "step": 666100 + }, + { + "epoch": 7.1, + "learning_rate": 7.171384652414441e-05, + "loss": 8.3754, + "step": 666200 + }, + { + "epoch": 7.1, + "learning_rate": 7.168709922862744e-05, + "loss": 8.4315, + "step": 666300 + }, + { + "epoch": 7.1, + "learning_rate": 7.166035413500768e-05, + "loss": 8.4057, + "step": 666400 + }, + { + "epoch": 7.1, + "learning_rate": 7.163361124536512e-05, + "loss": 8.4329, + "step": 666500 + }, + { + "epoch": 7.1, + "learning_rate": 7.160687056177954e-05, + "loss": 8.4347, + "step": 666600 + }, + { + "epoch": 7.1, + "learning_rate": 7.15801320863306e-05, + "loss": 8.3958, + "step": 666700 + }, + { + "epoch": 7.1, + "learning_rate": 7.155339582109771e-05, + "loss": 8.4996, + "step": 666800 + }, + { + "epoch": 7.1, + "learning_rate": 7.152666176816019e-05, + "loss": 8.4261, + "step": 666900 + }, + { + "epoch": 7.1, + "learning_rate": 7.149992992959711e-05, + "loss": 8.3349, + "step": 667000 + }, + { + "epoch": 7.11, + "learning_rate": 7.147320030748747e-05, + "loss": 8.4352, + "step": 667100 + }, + { + "epoch": 7.11, + "learning_rate": 7.144647290390994e-05, + "loss": 8.4143, + "step": 667200 + }, + { + "epoch": 7.11, + "learning_rate": 7.141974772094323e-05, + "loss": 8.3171, + "step": 667300 + }, + { + "epoch": 7.11, + "learning_rate": 7.139302476066563e-05, + "loss": 8.4312, + "step": 667400 + }, + { + "epoch": 7.11, + "learning_rate": 7.136630402515552e-05, + "loss": 8.4288, + "step": 667500 + }, + { + "epoch": 7.11, + "learning_rate": 7.133958551649086e-05, + "loss": 8.392, + "step": 667600 + }, + { + "epoch": 7.11, + "learning_rate": 7.131286923674966e-05, + "loss": 8.3968, + "step": 667700 + }, + { + "epoch": 7.11, + "learning_rate": 7.128615518800952e-05, + "loss": 8.4187, + "step": 667800 + }, + { + "epoch": 7.11, + "learning_rate": 7.125944337234809e-05, + "loss": 8.377, + "step": 667900 + }, + { + "epoch": 7.12, + "learning_rate": 7.123273379184274e-05, + "loss": 8.3482, + "step": 668000 + }, + { + "epoch": 7.12, + "learning_rate": 7.120602644857064e-05, + "loss": 8.3671, + "step": 668100 + }, + { + "epoch": 7.12, + "learning_rate": 7.11793213446089e-05, + "loss": 8.4338, + "step": 668200 + }, + { + "epoch": 7.12, + "learning_rate": 7.115261848203425e-05, + "loss": 8.3749, + "step": 668300 + }, + { + "epoch": 7.12, + "learning_rate": 7.11259178629235e-05, + "loss": 8.4192, + "step": 668400 + }, + { + "epoch": 7.12, + "learning_rate": 7.109921948935306e-05, + "loss": 8.3558, + "step": 668500 + }, + { + "epoch": 7.12, + "learning_rate": 7.107252336339938e-05, + "loss": 8.4305, + "step": 668600 + }, + { + "epoch": 7.12, + "learning_rate": 7.104582948713848e-05, + "loss": 8.3053, + "step": 668700 + }, + { + "epoch": 7.12, + "learning_rate": 7.101913786264649e-05, + "loss": 8.4625, + "step": 668800 + }, + { + "epoch": 7.12, + "learning_rate": 7.099244849199907e-05, + "loss": 8.3858, + "step": 668900 + }, + { + "epoch": 7.13, + "learning_rate": 7.096576137727202e-05, + "loss": 8.4807, + "step": 669000 + }, + { + "epoch": 7.13, + "learning_rate": 7.093907652054066e-05, + "loss": 8.3771, + "step": 669100 + }, + { + "epoch": 7.13, + "learning_rate": 7.091239392388033e-05, + "loss": 8.4409, + "step": 669200 + }, + { + "epoch": 7.13, + "learning_rate": 7.088571358936616e-05, + "loss": 8.385, + "step": 669300 + }, + { + "epoch": 7.13, + "learning_rate": 7.085903551907304e-05, + "loss": 8.4105, + "step": 669400 + }, + { + "epoch": 7.13, + "learning_rate": 7.083235971507574e-05, + "loss": 8.4479, + "step": 669500 + }, + { + "epoch": 7.13, + "learning_rate": 7.080568617944886e-05, + "loss": 8.3692, + "step": 669600 + }, + { + "epoch": 7.13, + "learning_rate": 7.077901491426677e-05, + "loss": 8.474, + "step": 669700 + }, + { + "epoch": 7.13, + "learning_rate": 7.075234592160373e-05, + "loss": 8.3984, + "step": 669800 + }, + { + "epoch": 7.14, + "learning_rate": 7.072567920353377e-05, + "loss": 8.4514, + "step": 669900 + }, + { + "epoch": 7.14, + "learning_rate": 7.069901476213079e-05, + "loss": 8.345, + "step": 670000 + }, + { + "epoch": 7.14, + "learning_rate": 7.067235259946844e-05, + "loss": 8.41, + "step": 670100 + }, + { + "epoch": 7.14, + "learning_rate": 7.064569271762026e-05, + "loss": 8.3939, + "step": 670200 + }, + { + "epoch": 7.14, + "learning_rate": 7.061903511865962e-05, + "loss": 8.4698, + "step": 670300 + }, + { + "epoch": 7.14, + "learning_rate": 7.059237980465965e-05, + "loss": 8.3954, + "step": 670400 + }, + { + "epoch": 7.14, + "learning_rate": 7.056572677769335e-05, + "loss": 8.3294, + "step": 670500 + }, + { + "epoch": 7.14, + "learning_rate": 7.053907603983352e-05, + "loss": 8.4247, + "step": 670600 + }, + { + "epoch": 7.14, + "learning_rate": 7.051242759315281e-05, + "loss": 8.3916, + "step": 670700 + }, + { + "epoch": 7.14, + "learning_rate": 7.048578143972364e-05, + "loss": 8.4098, + "step": 670800 + }, + { + "epoch": 7.15, + "learning_rate": 7.045913758161832e-05, + "loss": 8.4417, + "step": 670900 + }, + { + "epoch": 7.15, + "learning_rate": 7.043249602090892e-05, + "loss": 8.4027, + "step": 671000 + }, + { + "epoch": 7.15, + "learning_rate": 7.040585675966738e-05, + "loss": 8.4244, + "step": 671100 + }, + { + "epoch": 7.15, + "learning_rate": 7.03792197999654e-05, + "loss": 8.4289, + "step": 671200 + }, + { + "epoch": 7.15, + "learning_rate": 7.035258514387459e-05, + "loss": 8.4284, + "step": 671300 + }, + { + "epoch": 7.15, + "learning_rate": 7.032595279346628e-05, + "loss": 8.3293, + "step": 671400 + }, + { + "epoch": 7.15, + "learning_rate": 7.029932275081173e-05, + "loss": 8.3622, + "step": 671500 + }, + { + "epoch": 7.15, + "learning_rate": 7.027269501798188e-05, + "loss": 8.3377, + "step": 671600 + }, + { + "epoch": 7.15, + "learning_rate": 7.024606959704765e-05, + "loss": 8.4432, + "step": 671700 + }, + { + "epoch": 7.16, + "learning_rate": 7.021944649007964e-05, + "loss": 8.445, + "step": 671800 + }, + { + "epoch": 7.16, + "learning_rate": 7.019282569914837e-05, + "loss": 8.3882, + "step": 671900 + }, + { + "epoch": 7.16, + "learning_rate": 7.016620722632415e-05, + "loss": 8.3894, + "step": 672000 + }, + { + "epoch": 7.16, + "learning_rate": 7.013959107367706e-05, + "loss": 8.3952, + "step": 672100 + }, + { + "epoch": 7.16, + "learning_rate": 7.011297724327707e-05, + "loss": 8.4551, + "step": 672200 + }, + { + "epoch": 7.16, + "learning_rate": 7.008636573719393e-05, + "loss": 8.3521, + "step": 672300 + }, + { + "epoch": 7.16, + "learning_rate": 7.005975655749723e-05, + "loss": 8.3796, + "step": 672400 + }, + { + "epoch": 7.16, + "learning_rate": 7.003314970625637e-05, + "loss": 8.4552, + "step": 672500 + }, + { + "epoch": 7.16, + "learning_rate": 7.000654518554055e-05, + "loss": 8.3184, + "step": 672600 + }, + { + "epoch": 7.17, + "learning_rate": 6.997994299741881e-05, + "loss": 8.4079, + "step": 672700 + }, + { + "epoch": 7.17, + "learning_rate": 6.995334314396004e-05, + "loss": 8.4499, + "step": 672800 + }, + { + "epoch": 7.17, + "learning_rate": 6.992674562723284e-05, + "loss": 8.4047, + "step": 672900 + }, + { + "epoch": 7.17, + "learning_rate": 6.99001504493058e-05, + "loss": 8.4225, + "step": 673000 + }, + { + "epoch": 7.17, + "learning_rate": 6.987355761224715e-05, + "loss": 8.3548, + "step": 673100 + }, + { + "epoch": 7.17, + "learning_rate": 6.984696711812504e-05, + "loss": 8.4309, + "step": 673200 + }, + { + "epoch": 7.17, + "learning_rate": 6.982037896900741e-05, + "loss": 8.4109, + "step": 673300 + }, + { + "epoch": 7.17, + "learning_rate": 6.979379316696207e-05, + "loss": 8.4086, + "step": 673400 + }, + { + "epoch": 7.17, + "learning_rate": 6.976720971405652e-05, + "loss": 8.4169, + "step": 673500 + }, + { + "epoch": 7.17, + "learning_rate": 6.974062861235822e-05, + "loss": 8.4361, + "step": 673600 + }, + { + "epoch": 7.18, + "learning_rate": 6.971404986393436e-05, + "loss": 8.3859, + "step": 673700 + }, + { + "epoch": 7.18, + "learning_rate": 6.968747347085199e-05, + "loss": 8.414, + "step": 673800 + }, + { + "epoch": 7.18, + "learning_rate": 6.966089943517793e-05, + "loss": 8.4196, + "step": 673900 + }, + { + "epoch": 7.18, + "learning_rate": 6.96343277589789e-05, + "loss": 8.4411, + "step": 674000 + }, + { + "epoch": 7.18, + "learning_rate": 6.960775844432128e-05, + "loss": 8.4191, + "step": 674100 + }, + { + "epoch": 7.18, + "learning_rate": 6.958119149327151e-05, + "loss": 8.4434, + "step": 674200 + }, + { + "epoch": 7.18, + "learning_rate": 6.955462690789556e-05, + "loss": 8.368, + "step": 674300 + }, + { + "epoch": 7.18, + "learning_rate": 6.952806469025943e-05, + "loss": 8.3475, + "step": 674400 + }, + { + "epoch": 7.18, + "learning_rate": 6.95015048424289e-05, + "loss": 8.4697, + "step": 674500 + }, + { + "epoch": 7.19, + "learning_rate": 6.947494736646947e-05, + "loss": 8.4659, + "step": 674600 + }, + { + "epoch": 7.19, + "learning_rate": 6.944839226444657e-05, + "loss": 8.3778, + "step": 674700 + }, + { + "epoch": 7.19, + "learning_rate": 6.942183953842536e-05, + "loss": 8.3879, + "step": 674800 + }, + { + "epoch": 7.19, + "learning_rate": 6.939528919047084e-05, + "loss": 8.4407, + "step": 674900 + }, + { + "epoch": 7.19, + "learning_rate": 6.936874122264787e-05, + "loss": 8.4506, + "step": 675000 + }, + { + "epoch": 7.19, + "learning_rate": 6.934219563702108e-05, + "loss": 8.3146, + "step": 675100 + }, + { + "epoch": 7.19, + "learning_rate": 6.931565243565486e-05, + "loss": 8.3266, + "step": 675200 + }, + { + "epoch": 7.19, + "learning_rate": 6.928911162061358e-05, + "loss": 8.3778, + "step": 675300 + }, + { + "epoch": 7.19, + "learning_rate": 6.926257319396121e-05, + "loss": 8.3823, + "step": 675400 + }, + { + "epoch": 7.19, + "learning_rate": 6.923603715776177e-05, + "loss": 8.3998, + "step": 675500 + }, + { + "epoch": 7.2, + "learning_rate": 6.920950351407884e-05, + "loss": 8.4528, + "step": 675600 + }, + { + "epoch": 7.2, + "learning_rate": 6.918297226497608e-05, + "loss": 8.4392, + "step": 675700 + }, + { + "epoch": 7.2, + "learning_rate": 6.915644341251668e-05, + "loss": 8.4378, + "step": 675800 + }, + { + "epoch": 7.2, + "learning_rate": 6.912991695876395e-05, + "loss": 8.4485, + "step": 675900 + }, + { + "epoch": 7.2, + "learning_rate": 6.910339290578071e-05, + "loss": 8.3436, + "step": 676000 + }, + { + "epoch": 7.2, + "learning_rate": 6.907687125562984e-05, + "loss": 8.3932, + "step": 676100 + }, + { + "epoch": 7.2, + "learning_rate": 6.905035201037388e-05, + "loss": 8.3667, + "step": 676200 + }, + { + "epoch": 7.2, + "learning_rate": 6.902383517207526e-05, + "loss": 8.4343, + "step": 676300 + }, + { + "epoch": 7.2, + "learning_rate": 6.899732074279616e-05, + "loss": 8.3996, + "step": 676400 + }, + { + "epoch": 7.21, + "learning_rate": 6.897080872459863e-05, + "loss": 8.389, + "step": 676500 + }, + { + "epoch": 7.21, + "learning_rate": 6.894429911954456e-05, + "loss": 8.3711, + "step": 676600 + }, + { + "epoch": 7.21, + "learning_rate": 6.891779192969552e-05, + "loss": 8.3591, + "step": 676700 + }, + { + "epoch": 7.21, + "learning_rate": 6.889128715711307e-05, + "loss": 8.3039, + "step": 676800 + }, + { + "epoch": 7.21, + "learning_rate": 6.886478480385838e-05, + "loss": 8.3542, + "step": 676900 + }, + { + "epoch": 7.21, + "learning_rate": 6.883828487199266e-05, + "loss": 8.3904, + "step": 677000 + }, + { + "epoch": 7.21, + "learning_rate": 6.881178736357672e-05, + "loss": 8.4185, + "step": 677100 + }, + { + "epoch": 7.21, + "learning_rate": 6.878529228067132e-05, + "loss": 8.3673, + "step": 677200 + }, + { + "epoch": 7.21, + "learning_rate": 6.875879962533695e-05, + "loss": 8.4164, + "step": 677300 + }, + { + "epoch": 7.22, + "learning_rate": 6.8732309399634e-05, + "loss": 8.3623, + "step": 677400 + }, + { + "epoch": 7.22, + "learning_rate": 6.870582160562254e-05, + "loss": 8.3949, + "step": 677500 + }, + { + "epoch": 7.22, + "learning_rate": 6.867933624536262e-05, + "loss": 8.3292, + "step": 677600 + }, + { + "epoch": 7.22, + "learning_rate": 6.865285332091394e-05, + "loss": 8.4135, + "step": 677700 + }, + { + "epoch": 7.22, + "learning_rate": 6.862637283433611e-05, + "loss": 8.418, + "step": 677800 + }, + { + "epoch": 7.22, + "learning_rate": 6.85998947876885e-05, + "loss": 8.3836, + "step": 677900 + }, + { + "epoch": 7.22, + "learning_rate": 6.857341918303032e-05, + "loss": 8.4122, + "step": 678000 + }, + { + "epoch": 7.22, + "learning_rate": 6.85469460224206e-05, + "loss": 8.4, + "step": 678100 + }, + { + "epoch": 7.22, + "learning_rate": 6.852047530791815e-05, + "loss": 8.4092, + "step": 678200 + }, + { + "epoch": 7.22, + "learning_rate": 6.849400704158158e-05, + "loss": 8.299, + "step": 678300 + }, + { + "epoch": 7.23, + "learning_rate": 6.846754122546936e-05, + "loss": 8.2916, + "step": 678400 + }, + { + "epoch": 7.23, + "learning_rate": 6.84410778616397e-05, + "loss": 8.4047, + "step": 678500 + }, + { + "epoch": 7.23, + "learning_rate": 6.841461695215068e-05, + "loss": 8.3493, + "step": 678600 + }, + { + "epoch": 7.23, + "learning_rate": 6.83881584990602e-05, + "loss": 8.4568, + "step": 678700 + }, + { + "epoch": 7.23, + "learning_rate": 6.836170250442588e-05, + "loss": 8.3981, + "step": 678800 + }, + { + "epoch": 7.23, + "learning_rate": 6.833524897030526e-05, + "loss": 8.3934, + "step": 678900 + }, + { + "epoch": 7.23, + "learning_rate": 6.830879789875558e-05, + "loss": 8.4902, + "step": 679000 + }, + { + "epoch": 7.23, + "learning_rate": 6.828234929183399e-05, + "loss": 8.3852, + "step": 679100 + }, + { + "epoch": 7.23, + "learning_rate": 6.825590315159736e-05, + "loss": 8.4127, + "step": 679200 + }, + { + "epoch": 7.24, + "learning_rate": 6.822945948010246e-05, + "loss": 8.4126, + "step": 679300 + }, + { + "epoch": 7.24, + "learning_rate": 6.820301827940574e-05, + "loss": 8.4348, + "step": 679400 + }, + { + "epoch": 7.24, + "learning_rate": 6.817657955156362e-05, + "loss": 8.4592, + "step": 679500 + }, + { + "epoch": 7.24, + "learning_rate": 6.815014329863219e-05, + "loss": 8.4525, + "step": 679600 + }, + { + "epoch": 7.24, + "learning_rate": 6.812370952266741e-05, + "loss": 8.4007, + "step": 679700 + }, + { + "epoch": 7.24, + "learning_rate": 6.809727822572503e-05, + "loss": 8.4001, + "step": 679800 + }, + { + "epoch": 7.24, + "learning_rate": 6.807084940986066e-05, + "loss": 8.3711, + "step": 679900 + }, + { + "epoch": 7.24, + "learning_rate": 6.804442307712959e-05, + "loss": 8.4108, + "step": 680000 + }, + { + "epoch": 7.24, + "learning_rate": 6.801799922958709e-05, + "loss": 8.3912, + "step": 680100 + }, + { + "epoch": 7.25, + "learning_rate": 6.799157786928807e-05, + "loss": 8.404, + "step": 680200 + }, + { + "epoch": 7.25, + "learning_rate": 6.796515899828739e-05, + "loss": 8.4322, + "step": 680300 + }, + { + "epoch": 7.25, + "learning_rate": 6.793874261863957e-05, + "loss": 8.4065, + "step": 680400 + }, + { + "epoch": 7.25, + "learning_rate": 6.791232873239909e-05, + "loss": 8.4575, + "step": 680500 + }, + { + "epoch": 7.25, + "learning_rate": 6.78859173416201e-05, + "loss": 8.3892, + "step": 680600 + }, + { + "epoch": 7.25, + "learning_rate": 6.785950844835665e-05, + "loss": 8.4222, + "step": 680700 + }, + { + "epoch": 7.25, + "learning_rate": 6.783310205466257e-05, + "loss": 8.2779, + "step": 680800 + }, + { + "epoch": 7.25, + "learning_rate": 6.780669816259145e-05, + "loss": 8.4062, + "step": 680900 + }, + { + "epoch": 7.25, + "learning_rate": 6.778029677419678e-05, + "loss": 8.4105, + "step": 681000 + }, + { + "epoch": 7.25, + "learning_rate": 6.775389789153173e-05, + "loss": 8.3917, + "step": 681100 + }, + { + "epoch": 7.26, + "learning_rate": 6.772750151664941e-05, + "loss": 8.343, + "step": 681200 + }, + { + "epoch": 7.26, + "learning_rate": 6.770110765160262e-05, + "loss": 8.3328, + "step": 681300 + }, + { + "epoch": 7.26, + "learning_rate": 6.767471629844404e-05, + "loss": 8.414, + "step": 681400 + }, + { + "epoch": 7.26, + "learning_rate": 6.764832745922612e-05, + "loss": 8.3821, + "step": 681500 + }, + { + "epoch": 7.26, + "learning_rate": 6.762194113600113e-05, + "loss": 8.4977, + "step": 681600 + }, + { + "epoch": 7.26, + "learning_rate": 6.759555733082111e-05, + "loss": 8.4193, + "step": 681700 + }, + { + "epoch": 7.26, + "learning_rate": 6.756917604573798e-05, + "loss": 8.4161, + "step": 681800 + }, + { + "epoch": 7.26, + "learning_rate": 6.754279728280336e-05, + "loss": 8.3675, + "step": 681900 + }, + { + "epoch": 7.26, + "learning_rate": 6.751642104406879e-05, + "loss": 8.4394, + "step": 682000 + }, + { + "epoch": 7.27, + "learning_rate": 6.749004733158548e-05, + "loss": 8.3474, + "step": 682100 + }, + { + "epoch": 7.27, + "learning_rate": 6.74636761474046e-05, + "loss": 8.3748, + "step": 682200 + }, + { + "epoch": 7.27, + "learning_rate": 6.743730749357693e-05, + "loss": 8.4147, + "step": 682300 + }, + { + "epoch": 7.27, + "learning_rate": 6.741094137215329e-05, + "loss": 8.3644, + "step": 682400 + }, + { + "epoch": 7.27, + "learning_rate": 6.738457778518407e-05, + "loss": 8.4317, + "step": 682500 + }, + { + "epoch": 7.27, + "learning_rate": 6.735821673471966e-05, + "loss": 8.4248, + "step": 682600 + }, + { + "epoch": 7.27, + "learning_rate": 6.733185822281005e-05, + "loss": 8.4437, + "step": 682700 + }, + { + "epoch": 7.27, + "learning_rate": 6.730550225150523e-05, + "loss": 8.3652, + "step": 682800 + }, + { + "epoch": 7.27, + "learning_rate": 6.727914882285494e-05, + "loss": 8.3673, + "step": 682900 + }, + { + "epoch": 7.27, + "learning_rate": 6.725279793890855e-05, + "loss": 8.3395, + "step": 683000 + }, + { + "epoch": 7.28, + "learning_rate": 6.722644960171554e-05, + "loss": 8.3385, + "step": 683100 + }, + { + "epoch": 7.28, + "learning_rate": 6.720010381332486e-05, + "loss": 8.3045, + "step": 683200 + }, + { + "epoch": 7.28, + "learning_rate": 6.717376057578557e-05, + "loss": 8.3881, + "step": 683300 + }, + { + "epoch": 7.28, + "learning_rate": 6.714741989114625e-05, + "loss": 8.3757, + "step": 683400 + }, + { + "epoch": 7.28, + "learning_rate": 6.712108176145557e-05, + "loss": 8.3626, + "step": 683500 + }, + { + "epoch": 7.28, + "learning_rate": 6.709474618876168e-05, + "loss": 8.3617, + "step": 683600 + }, + { + "epoch": 7.28, + "learning_rate": 6.706841317511286e-05, + "loss": 8.2918, + "step": 683700 + }, + { + "epoch": 7.28, + "learning_rate": 6.70420827225569e-05, + "loss": 8.3848, + "step": 683800 + }, + { + "epoch": 7.28, + "learning_rate": 6.701575483314162e-05, + "loss": 8.362, + "step": 683900 + }, + { + "epoch": 7.29, + "learning_rate": 6.698942950891448e-05, + "loss": 8.381, + "step": 684000 + }, + { + "epoch": 7.29, + "learning_rate": 6.696310675192283e-05, + "loss": 8.3831, + "step": 684100 + }, + { + "epoch": 7.29, + "learning_rate": 6.693678656421378e-05, + "loss": 8.395, + "step": 684200 + }, + { + "epoch": 7.29, + "learning_rate": 6.691046894783428e-05, + "loss": 8.3717, + "step": 684300 + }, + { + "epoch": 7.29, + "learning_rate": 6.688415390483102e-05, + "loss": 8.3517, + "step": 684400 + }, + { + "epoch": 7.29, + "learning_rate": 6.685784143725055e-05, + "loss": 8.3842, + "step": 684500 + }, + { + "epoch": 7.29, + "learning_rate": 6.683153154713919e-05, + "loss": 8.3397, + "step": 684600 + }, + { + "epoch": 7.29, + "learning_rate": 6.680522423654305e-05, + "loss": 8.2979, + "step": 684700 + }, + { + "epoch": 7.29, + "learning_rate": 6.677891950750805e-05, + "loss": 8.3548, + "step": 684800 + }, + { + "epoch": 7.3, + "learning_rate": 6.675261736207989e-05, + "loss": 8.4069, + "step": 684900 + }, + { + "epoch": 7.3, + "learning_rate": 6.672631780230418e-05, + "loss": 8.3614, + "step": 685000 + }, + { + "epoch": 7.3, + "learning_rate": 6.670002083022614e-05, + "loss": 8.4326, + "step": 685100 + }, + { + "epoch": 7.3, + "learning_rate": 6.667372644789095e-05, + "loss": 8.3506, + "step": 685200 + }, + { + "epoch": 7.3, + "learning_rate": 6.664743465734348e-05, + "loss": 8.3209, + "step": 685300 + }, + { + "epoch": 7.3, + "learning_rate": 6.66211454606285e-05, + "loss": 8.3669, + "step": 685400 + }, + { + "epoch": 7.3, + "learning_rate": 6.659485885979048e-05, + "loss": 8.4289, + "step": 685500 + }, + { + "epoch": 7.3, + "learning_rate": 6.656857485687375e-05, + "loss": 8.323, + "step": 685600 + }, + { + "epoch": 7.3, + "learning_rate": 6.65422934539224e-05, + "loss": 8.476, + "step": 685700 + }, + { + "epoch": 7.3, + "learning_rate": 6.651601465298038e-05, + "loss": 8.3713, + "step": 685800 + }, + { + "epoch": 7.31, + "learning_rate": 6.648973845609133e-05, + "loss": 8.3977, + "step": 685900 + }, + { + "epoch": 7.31, + "learning_rate": 6.646346486529882e-05, + "loss": 8.3222, + "step": 686000 + }, + { + "epoch": 7.31, + "learning_rate": 6.64371938826461e-05, + "loss": 8.3526, + "step": 686100 + }, + { + "epoch": 7.31, + "learning_rate": 6.641092551017632e-05, + "loss": 8.4282, + "step": 686200 + }, + { + "epoch": 7.31, + "learning_rate": 6.63846597499323e-05, + "loss": 8.3357, + "step": 686300 + }, + { + "epoch": 7.31, + "learning_rate": 6.635839660395679e-05, + "loss": 8.3312, + "step": 686400 + }, + { + "epoch": 7.31, + "learning_rate": 6.633213607429226e-05, + "loss": 8.4293, + "step": 686500 + }, + { + "epoch": 7.31, + "learning_rate": 6.6305878162981e-05, + "loss": 8.3395, + "step": 686600 + }, + { + "epoch": 7.31, + "learning_rate": 6.627962287206506e-05, + "loss": 8.4239, + "step": 686700 + }, + { + "epoch": 7.32, + "learning_rate": 6.625337020358638e-05, + "loss": 8.4014, + "step": 686800 + }, + { + "epoch": 7.32, + "learning_rate": 6.622712015958655e-05, + "loss": 8.4099, + "step": 686900 + }, + { + "epoch": 7.32, + "learning_rate": 6.62008727421071e-05, + "loss": 8.4285, + "step": 687000 + }, + { + "epoch": 7.32, + "learning_rate": 6.617462795318929e-05, + "loss": 8.4067, + "step": 687100 + }, + { + "epoch": 7.32, + "learning_rate": 6.614838579487415e-05, + "loss": 8.4301, + "step": 687200 + }, + { + "epoch": 7.32, + "learning_rate": 6.612214626920256e-05, + "loss": 8.3723, + "step": 687300 + }, + { + "epoch": 7.32, + "learning_rate": 6.609590937821514e-05, + "loss": 8.3764, + "step": 687400 + }, + { + "epoch": 7.32, + "learning_rate": 6.60696751239524e-05, + "loss": 8.4094, + "step": 687500 + }, + { + "epoch": 7.32, + "learning_rate": 6.604344350845451e-05, + "loss": 8.3548, + "step": 687600 + }, + { + "epoch": 7.32, + "learning_rate": 6.601721453376155e-05, + "loss": 8.4091, + "step": 687700 + }, + { + "epoch": 7.33, + "learning_rate": 6.599098820191332e-05, + "loss": 8.4638, + "step": 687800 + }, + { + "epoch": 7.33, + "learning_rate": 6.596476451494946e-05, + "loss": 8.356, + "step": 687900 + }, + { + "epoch": 7.33, + "learning_rate": 6.59385434749094e-05, + "loss": 8.4048, + "step": 688000 + }, + { + "epoch": 7.33, + "learning_rate": 6.591232508383233e-05, + "loss": 8.3232, + "step": 688100 + }, + { + "epoch": 7.33, + "learning_rate": 6.588610934375727e-05, + "loss": 8.3646, + "step": 688200 + }, + { + "epoch": 7.33, + "learning_rate": 6.585989625672302e-05, + "loss": 8.4346, + "step": 688300 + }, + { + "epoch": 7.33, + "learning_rate": 6.583368582476817e-05, + "loss": 8.3842, + "step": 688400 + }, + { + "epoch": 7.33, + "learning_rate": 6.580747804993113e-05, + "loss": 8.4007, + "step": 688500 + }, + { + "epoch": 7.33, + "learning_rate": 6.578127293425003e-05, + "loss": 8.4069, + "step": 688600 + }, + { + "epoch": 7.34, + "learning_rate": 6.575507047976293e-05, + "loss": 8.365, + "step": 688700 + }, + { + "epoch": 7.34, + "learning_rate": 6.572887068850748e-05, + "loss": 8.4084, + "step": 688800 + }, + { + "epoch": 7.34, + "learning_rate": 6.570267356252135e-05, + "loss": 8.3941, + "step": 688900 + }, + { + "epoch": 7.34, + "learning_rate": 6.56764791038418e-05, + "loss": 8.4147, + "step": 689000 + }, + { + "epoch": 7.34, + "learning_rate": 6.565028731450605e-05, + "loss": 8.3519, + "step": 689100 + }, + { + "epoch": 7.34, + "learning_rate": 6.562409819655102e-05, + "loss": 8.3431, + "step": 689200 + }, + { + "epoch": 7.34, + "learning_rate": 6.559791175201344e-05, + "loss": 8.3293, + "step": 689300 + }, + { + "epoch": 7.34, + "learning_rate": 6.557172798292981e-05, + "loss": 8.4178, + "step": 689400 + }, + { + "epoch": 7.34, + "learning_rate": 6.554554689133645e-05, + "loss": 8.3044, + "step": 689500 + }, + { + "epoch": 7.35, + "learning_rate": 6.551936847926951e-05, + "loss": 8.3514, + "step": 689600 + }, + { + "epoch": 7.35, + "learning_rate": 6.549319274876482e-05, + "loss": 8.2949, + "step": 689700 + }, + { + "epoch": 7.35, + "learning_rate": 6.546701970185816e-05, + "loss": 8.3469, + "step": 689800 + }, + { + "epoch": 7.35, + "learning_rate": 6.544084934058488e-05, + "loss": 8.3981, + "step": 689900 + }, + { + "epoch": 7.35, + "learning_rate": 6.541468166698037e-05, + "loss": 8.3344, + "step": 690000 + }, + { + "epoch": 7.35, + "learning_rate": 6.538851668307961e-05, + "loss": 8.3181, + "step": 690100 + }, + { + "epoch": 7.35, + "learning_rate": 6.536235439091755e-05, + "loss": 8.3749, + "step": 690200 + }, + { + "epoch": 7.35, + "learning_rate": 6.53361947925287e-05, + "loss": 8.455, + "step": 690300 + }, + { + "epoch": 7.35, + "learning_rate": 6.531003788994765e-05, + "loss": 8.3975, + "step": 690400 + }, + { + "epoch": 7.35, + "learning_rate": 6.528388368520847e-05, + "loss": 8.3343, + "step": 690500 + }, + { + "epoch": 7.36, + "learning_rate": 6.525773218034533e-05, + "loss": 8.3945, + "step": 690600 + }, + { + "epoch": 7.36, + "learning_rate": 6.523158337739187e-05, + "loss": 8.3766, + "step": 690700 + }, + { + "epoch": 7.36, + "learning_rate": 6.520543727838184e-05, + "loss": 8.4301, + "step": 690800 + }, + { + "epoch": 7.36, + "learning_rate": 6.517929388534852e-05, + "loss": 8.4454, + "step": 690900 + }, + { + "epoch": 7.36, + "learning_rate": 6.515315320032514e-05, + "loss": 8.3622, + "step": 691000 + }, + { + "epoch": 7.36, + "learning_rate": 6.512701522534461e-05, + "loss": 8.3427, + "step": 691100 + }, + { + "epoch": 7.36, + "learning_rate": 6.510087996243972e-05, + "loss": 8.475, + "step": 691200 + }, + { + "epoch": 7.36, + "learning_rate": 6.507474741364306e-05, + "loss": 8.3798, + "step": 691300 + }, + { + "epoch": 7.36, + "learning_rate": 6.504861758098684e-05, + "loss": 8.3431, + "step": 691400 + }, + { + "epoch": 7.37, + "learning_rate": 6.502249046650331e-05, + "loss": 8.3309, + "step": 691500 + }, + { + "epoch": 7.37, + "learning_rate": 6.499636607222428e-05, + "loss": 8.3851, + "step": 691600 + }, + { + "epoch": 7.37, + "learning_rate": 6.497024440018153e-05, + "loss": 8.3792, + "step": 691700 + }, + { + "epoch": 7.37, + "learning_rate": 6.494412545240642e-05, + "loss": 8.3997, + "step": 691800 + }, + { + "epoch": 7.37, + "learning_rate": 6.491800923093038e-05, + "loss": 8.49, + "step": 691900 + }, + { + "epoch": 7.37, + "learning_rate": 6.489189573778439e-05, + "loss": 8.2792, + "step": 692000 + }, + { + "epoch": 7.37, + "learning_rate": 6.486578497499928e-05, + "loss": 8.3713, + "step": 692100 + }, + { + "epoch": 7.37, + "learning_rate": 6.48396769446057e-05, + "loss": 8.3985, + "step": 692200 + }, + { + "epoch": 7.37, + "learning_rate": 6.481357164863411e-05, + "loss": 8.3387, + "step": 692300 + }, + { + "epoch": 7.37, + "learning_rate": 6.478746908911467e-05, + "loss": 8.4102, + "step": 692400 + }, + { + "epoch": 7.38, + "learning_rate": 6.476136926807742e-05, + "loss": 8.4689, + "step": 692500 + }, + { + "epoch": 7.38, + "learning_rate": 6.473527218755212e-05, + "loss": 8.4803, + "step": 692600 + }, + { + "epoch": 7.38, + "learning_rate": 6.470917784956839e-05, + "loss": 8.3974, + "step": 692700 + }, + { + "epoch": 7.38, + "learning_rate": 6.468308625615549e-05, + "loss": 8.3528, + "step": 692800 + }, + { + "epoch": 7.38, + "learning_rate": 6.465699740934268e-05, + "loss": 8.4198, + "step": 692900 + }, + { + "epoch": 7.38, + "learning_rate": 6.463091131115882e-05, + "loss": 8.3716, + "step": 693000 + }, + { + "epoch": 7.38, + "learning_rate": 6.460482796363265e-05, + "loss": 8.3573, + "step": 693100 + }, + { + "epoch": 7.38, + "learning_rate": 6.457874736879268e-05, + "loss": 8.4145, + "step": 693200 + }, + { + "epoch": 7.38, + "learning_rate": 6.455266952866718e-05, + "loss": 8.3292, + "step": 693300 + }, + { + "epoch": 7.39, + "learning_rate": 6.452659444528425e-05, + "loss": 8.4543, + "step": 693400 + }, + { + "epoch": 7.39, + "learning_rate": 6.450052212067175e-05, + "loss": 8.4469, + "step": 693500 + }, + { + "epoch": 7.39, + "learning_rate": 6.447445255685732e-05, + "loss": 8.3601, + "step": 693600 + }, + { + "epoch": 7.39, + "learning_rate": 6.444838575586837e-05, + "loss": 8.4048, + "step": 693700 + }, + { + "epoch": 7.39, + "learning_rate": 6.44223217197322e-05, + "loss": 8.3649, + "step": 693800 + }, + { + "epoch": 7.39, + "learning_rate": 6.439626045047569e-05, + "loss": 8.3966, + "step": 693900 + }, + { + "epoch": 7.39, + "learning_rate": 6.437020195012573e-05, + "loss": 8.4548, + "step": 694000 + }, + { + "epoch": 7.39, + "learning_rate": 6.434414622070883e-05, + "loss": 8.4042, + "step": 694100 + }, + { + "epoch": 7.39, + "learning_rate": 6.431809326425137e-05, + "loss": 8.3085, + "step": 694200 + }, + { + "epoch": 7.4, + "learning_rate": 6.42920430827795e-05, + "loss": 8.2906, + "step": 694300 + }, + { + "epoch": 7.4, + "learning_rate": 6.426599567831915e-05, + "loss": 8.3368, + "step": 694400 + }, + { + "epoch": 7.4, + "learning_rate": 6.4239951052896e-05, + "loss": 8.3351, + "step": 694500 + }, + { + "epoch": 7.4, + "learning_rate": 6.421390920853557e-05, + "loss": 8.3878, + "step": 694600 + }, + { + "epoch": 7.4, + "learning_rate": 6.418787014726311e-05, + "loss": 8.406, + "step": 694700 + }, + { + "epoch": 7.4, + "learning_rate": 6.416183387110372e-05, + "loss": 8.3671, + "step": 694800 + }, + { + "epoch": 7.4, + "learning_rate": 6.413580038208221e-05, + "loss": 8.4278, + "step": 694900 + }, + { + "epoch": 7.4, + "learning_rate": 6.410976968222321e-05, + "loss": 8.443, + "step": 695000 + }, + { + "epoch": 7.4, + "learning_rate": 6.408374177355113e-05, + "loss": 8.3379, + "step": 695100 + }, + { + "epoch": 7.4, + "learning_rate": 6.40577166580902e-05, + "loss": 8.4052, + "step": 695200 + }, + { + "epoch": 7.41, + "learning_rate": 6.403169433786433e-05, + "loss": 8.434, + "step": 695300 + }, + { + "epoch": 7.41, + "learning_rate": 6.400567481489733e-05, + "loss": 8.4512, + "step": 695400 + }, + { + "epoch": 7.41, + "learning_rate": 6.397965809121274e-05, + "loss": 8.4079, + "step": 695500 + }, + { + "epoch": 7.41, + "learning_rate": 6.395364416883386e-05, + "loss": 8.3812, + "step": 695600 + }, + { + "epoch": 7.41, + "learning_rate": 6.392763304978382e-05, + "loss": 8.3314, + "step": 695700 + }, + { + "epoch": 7.41, + "learning_rate": 6.390162473608548e-05, + "loss": 8.4, + "step": 695800 + }, + { + "epoch": 7.41, + "learning_rate": 6.387561922976152e-05, + "loss": 8.3831, + "step": 695900 + }, + { + "epoch": 7.41, + "learning_rate": 6.384961653283439e-05, + "loss": 8.3772, + "step": 696000 + }, + { + "epoch": 7.41, + "learning_rate": 6.382361664732633e-05, + "loss": 8.343, + "step": 696100 + }, + { + "epoch": 7.42, + "learning_rate": 6.379761957525934e-05, + "loss": 8.3798, + "step": 696200 + }, + { + "epoch": 7.42, + "learning_rate": 6.377162531865526e-05, + "loss": 8.3461, + "step": 696300 + }, + { + "epoch": 7.42, + "learning_rate": 6.374563387953558e-05, + "loss": 8.3109, + "step": 696400 + }, + { + "epoch": 7.42, + "learning_rate": 6.371964525992175e-05, + "loss": 8.343, + "step": 696500 + }, + { + "epoch": 7.42, + "learning_rate": 6.369365946183483e-05, + "loss": 8.4254, + "step": 696600 + }, + { + "epoch": 7.42, + "learning_rate": 6.366767648729583e-05, + "loss": 8.3791, + "step": 696700 + }, + { + "epoch": 7.42, + "learning_rate": 6.364169633832531e-05, + "loss": 8.329, + "step": 696800 + }, + { + "epoch": 7.42, + "learning_rate": 6.361571901694389e-05, + "loss": 8.3778, + "step": 696900 + }, + { + "epoch": 7.42, + "learning_rate": 6.358974452517173e-05, + "loss": 8.4051, + "step": 697000 + }, + { + "epoch": 7.43, + "learning_rate": 6.356377286502897e-05, + "loss": 8.3469, + "step": 697100 + }, + { + "epoch": 7.43, + "learning_rate": 6.353780403853528e-05, + "loss": 8.3307, + "step": 697200 + }, + { + "epoch": 7.43, + "learning_rate": 6.351183804771043e-05, + "loss": 8.3602, + "step": 697300 + }, + { + "epoch": 7.43, + "learning_rate": 6.348587489457365e-05, + "loss": 8.4515, + "step": 697400 + }, + { + "epoch": 7.43, + "learning_rate": 6.345991458114418e-05, + "loss": 8.4082, + "step": 697500 + }, + { + "epoch": 7.43, + "learning_rate": 6.343395710944094e-05, + "loss": 8.4264, + "step": 697600 + }, + { + "epoch": 7.43, + "learning_rate": 6.340800248148263e-05, + "loss": 8.2652, + "step": 697700 + }, + { + "epoch": 7.43, + "learning_rate": 6.33820506992878e-05, + "loss": 8.3739, + "step": 697800 + }, + { + "epoch": 7.43, + "learning_rate": 6.335610176487462e-05, + "loss": 8.1965, + "step": 697900 + }, + { + "epoch": 7.43, + "learning_rate": 6.333015568026126e-05, + "loss": 8.4479, + "step": 698000 + }, + { + "epoch": 7.44, + "learning_rate": 6.330421244746542e-05, + "loss": 8.3573, + "step": 698100 + }, + { + "epoch": 7.44, + "learning_rate": 6.327827206850485e-05, + "loss": 8.2975, + "step": 698200 + }, + { + "epoch": 7.44, + "learning_rate": 6.32523345453968e-05, + "loss": 8.4279, + "step": 698300 + }, + { + "epoch": 7.44, + "learning_rate": 6.322639988015855e-05, + "loss": 8.4031, + "step": 698400 + }, + { + "epoch": 7.44, + "learning_rate": 6.320046807480693e-05, + "loss": 8.3741, + "step": 698500 + }, + { + "epoch": 7.44, + "learning_rate": 6.317453913135878e-05, + "loss": 8.3058, + "step": 698600 + }, + { + "epoch": 7.44, + "learning_rate": 6.314861305183049e-05, + "loss": 8.3926, + "step": 698700 + }, + { + "epoch": 7.44, + "learning_rate": 6.312268983823841e-05, + "loss": 8.4086, + "step": 698800 + }, + { + "epoch": 7.44, + "learning_rate": 6.309676949259855e-05, + "loss": 8.4075, + "step": 698900 + }, + { + "epoch": 7.45, + "learning_rate": 6.307085201692676e-05, + "loss": 8.4266, + "step": 699000 + }, + { + "epoch": 7.45, + "learning_rate": 6.304493741323859e-05, + "loss": 8.3433, + "step": 699100 + }, + { + "epoch": 7.45, + "learning_rate": 6.30190256835495e-05, + "loss": 8.4402, + "step": 699200 + }, + { + "epoch": 7.45, + "learning_rate": 6.299311682987459e-05, + "loss": 8.337, + "step": 699300 + }, + { + "epoch": 7.45, + "learning_rate": 6.296721085422883e-05, + "loss": 8.3568, + "step": 699400 + }, + { + "epoch": 7.45, + "learning_rate": 6.294130775862691e-05, + "loss": 8.3701, + "step": 699500 + }, + { + "epoch": 7.45, + "learning_rate": 6.29154075450833e-05, + "loss": 8.356, + "step": 699600 + }, + { + "epoch": 7.45, + "learning_rate": 6.288951021561234e-05, + "loss": 8.3686, + "step": 699700 + }, + { + "epoch": 7.45, + "learning_rate": 6.286361577222795e-05, + "loss": 8.3206, + "step": 699800 + }, + { + "epoch": 7.45, + "learning_rate": 6.283772421694407e-05, + "loss": 8.4585, + "step": 699900 + }, + { + "epoch": 7.46, + "learning_rate": 6.281183555177419e-05, + "loss": 8.3798, + "step": 700000 + }, + { + "epoch": 7.46, + "learning_rate": 6.278594977873173e-05, + "loss": 8.3546, + "step": 700100 + }, + { + "epoch": 7.46, + "learning_rate": 6.276006689982978e-05, + "loss": 8.3594, + "step": 700200 + }, + { + "epoch": 7.46, + "learning_rate": 6.27341869170813e-05, + "loss": 8.3165, + "step": 700300 + }, + { + "epoch": 7.46, + "learning_rate": 6.270830983249895e-05, + "loss": 8.4568, + "step": 700400 + }, + { + "epoch": 7.46, + "learning_rate": 6.268243564809523e-05, + "loss": 8.3271, + "step": 700500 + }, + { + "epoch": 7.46, + "learning_rate": 6.265656436588233e-05, + "loss": 8.3696, + "step": 700600 + }, + { + "epoch": 7.46, + "learning_rate": 6.26306959878723e-05, + "loss": 8.4234, + "step": 700700 + }, + { + "epoch": 7.46, + "learning_rate": 6.26048305160769e-05, + "loss": 8.3777, + "step": 700800 + }, + { + "epoch": 7.47, + "learning_rate": 6.257896795250773e-05, + "loss": 8.3607, + "step": 700900 + }, + { + "epoch": 7.47, + "learning_rate": 6.255310829917606e-05, + "loss": 8.3884, + "step": 701000 + }, + { + "epoch": 7.47, + "learning_rate": 6.252725155809306e-05, + "loss": 8.3947, + "step": 701100 + }, + { + "epoch": 7.47, + "learning_rate": 6.250139773126957e-05, + "loss": 8.388, + "step": 701200 + }, + { + "epoch": 7.47, + "learning_rate": 6.247554682071629e-05, + "loss": 8.4551, + "step": 701300 + }, + { + "epoch": 7.47, + "learning_rate": 6.244969882844358e-05, + "loss": 8.4046, + "step": 701400 + }, + { + "epoch": 7.47, + "learning_rate": 6.242385375646171e-05, + "loss": 8.3299, + "step": 701500 + }, + { + "epoch": 7.47, + "learning_rate": 6.239801160678061e-05, + "loss": 8.4234, + "step": 701600 + }, + { + "epoch": 7.47, + "learning_rate": 6.237217238141003e-05, + "loss": 8.3715, + "step": 701700 + }, + { + "epoch": 7.48, + "learning_rate": 6.234633608235951e-05, + "loss": 8.3698, + "step": 701800 + }, + { + "epoch": 7.48, + "learning_rate": 6.232050271163834e-05, + "loss": 8.394, + "step": 701900 + }, + { + "epoch": 7.48, + "learning_rate": 6.229467227125557e-05, + "loss": 8.3659, + "step": 702000 + }, + { + "epoch": 7.48, + "learning_rate": 6.226884476322004e-05, + "loss": 8.4133, + "step": 702100 + }, + { + "epoch": 7.48, + "learning_rate": 6.224302018954035e-05, + "loss": 8.4579, + "step": 702200 + }, + { + "epoch": 7.48, + "learning_rate": 6.22171985522249e-05, + "loss": 8.3884, + "step": 702300 + }, + { + "epoch": 7.48, + "learning_rate": 6.219137985328185e-05, + "loss": 8.3895, + "step": 702400 + }, + { + "epoch": 7.48, + "learning_rate": 6.216556409471908e-05, + "loss": 8.4267, + "step": 702500 + }, + { + "epoch": 7.48, + "learning_rate": 6.213975127854432e-05, + "loss": 8.371, + "step": 702600 + }, + { + "epoch": 7.48, + "learning_rate": 6.211394140676501e-05, + "loss": 8.4044, + "step": 702700 + }, + { + "epoch": 7.49, + "learning_rate": 6.208813448138842e-05, + "loss": 8.3526, + "step": 702800 + }, + { + "epoch": 7.49, + "learning_rate": 6.20623305044215e-05, + "loss": 8.3443, + "step": 702900 + }, + { + "epoch": 7.49, + "learning_rate": 6.20365294778711e-05, + "loss": 8.2818, + "step": 703000 + }, + { + "epoch": 7.49, + "learning_rate": 6.201073140374372e-05, + "loss": 8.3238, + "step": 703100 + }, + { + "epoch": 7.49, + "learning_rate": 6.198493628404569e-05, + "loss": 8.3477, + "step": 703200 + }, + { + "epoch": 7.49, + "learning_rate": 6.195914412078309e-05, + "loss": 8.3832, + "step": 703300 + }, + { + "epoch": 7.49, + "learning_rate": 6.193335491596182e-05, + "loss": 8.3557, + "step": 703400 + }, + { + "epoch": 7.49, + "learning_rate": 6.190756867158746e-05, + "loss": 8.4248, + "step": 703500 + }, + { + "epoch": 7.49, + "learning_rate": 6.188178538966547e-05, + "loss": 8.3508, + "step": 703600 + }, + { + "epoch": 7.5, + "learning_rate": 6.18560050722009e-05, + "loss": 8.3652, + "step": 703700 + }, + { + "epoch": 7.5, + "learning_rate": 6.18302277211988e-05, + "loss": 8.4308, + "step": 703800 + }, + { + "epoch": 7.5, + "learning_rate": 6.180445333866386e-05, + "loss": 8.3375, + "step": 703900 + }, + { + "epoch": 7.5, + "learning_rate": 6.177868192660052e-05, + "loss": 8.3959, + "step": 704000 + }, + { + "epoch": 7.5, + "learning_rate": 6.175291348701306e-05, + "loss": 8.3694, + "step": 704100 + }, + { + "epoch": 7.5, + "learning_rate": 6.172714802190544e-05, + "loss": 8.4087, + "step": 704200 + }, + { + "epoch": 7.5, + "learning_rate": 6.170138553328153e-05, + "loss": 8.3739, + "step": 704300 + }, + { + "epoch": 7.5, + "learning_rate": 6.16756260231448e-05, + "loss": 8.3591, + "step": 704400 + }, + { + "epoch": 7.5, + "learning_rate": 6.164986949349862e-05, + "loss": 8.365, + "step": 704500 + }, + { + "epoch": 7.5, + "learning_rate": 6.162411594634605e-05, + "loss": 8.4001, + "step": 704600 + }, + { + "epoch": 7.51, + "learning_rate": 6.159836538369e-05, + "loss": 8.356, + "step": 704700 + }, + { + "epoch": 7.51, + "learning_rate": 6.157261780753298e-05, + "loss": 8.4026, + "step": 704800 + }, + { + "epoch": 7.51, + "learning_rate": 6.15468732198775e-05, + "loss": 8.431, + "step": 704900 + }, + { + "epoch": 7.51, + "learning_rate": 6.152113162272564e-05, + "loss": 8.3104, + "step": 705000 + }, + { + "epoch": 7.51, + "learning_rate": 6.14953930180794e-05, + "loss": 8.3707, + "step": 705100 + }, + { + "epoch": 7.51, + "learning_rate": 6.146965740794039e-05, + "loss": 8.3475, + "step": 705200 + }, + { + "epoch": 7.51, + "learning_rate": 6.144392479431016e-05, + "loss": 8.3527, + "step": 705300 + }, + { + "epoch": 7.51, + "learning_rate": 6.141819517918981e-05, + "loss": 8.401, + "step": 705400 + }, + { + "epoch": 7.51, + "learning_rate": 6.139246856458051e-05, + "loss": 8.3437, + "step": 705500 + }, + { + "epoch": 7.52, + "learning_rate": 6.136674495248285e-05, + "loss": 8.4015, + "step": 705600 + }, + { + "epoch": 7.52, + "learning_rate": 6.13410243448975e-05, + "loss": 8.3129, + "step": 705700 + }, + { + "epoch": 7.52, + "learning_rate": 6.131530674382464e-05, + "loss": 8.3775, + "step": 705800 + }, + { + "epoch": 7.52, + "learning_rate": 6.128959215126437e-05, + "loss": 8.3875, + "step": 705900 + }, + { + "epoch": 7.52, + "learning_rate": 6.126388056921658e-05, + "loss": 8.337, + "step": 706000 + }, + { + "epoch": 7.52, + "learning_rate": 6.123817199968076e-05, + "loss": 8.3207, + "step": 706100 + }, + { + "epoch": 7.52, + "learning_rate": 6.121246644465637e-05, + "loss": 8.3494, + "step": 706200 + }, + { + "epoch": 7.52, + "learning_rate": 6.118676390614242e-05, + "loss": 8.3382, + "step": 706300 + }, + { + "epoch": 7.52, + "learning_rate": 6.116106438613791e-05, + "loss": 8.4126, + "step": 706400 + }, + { + "epoch": 7.53, + "learning_rate": 6.11353678866414e-05, + "loss": 8.288, + "step": 706500 + }, + { + "epoch": 7.53, + "learning_rate": 6.11096744096514e-05, + "loss": 8.3923, + "step": 706600 + }, + { + "epoch": 7.53, + "learning_rate": 6.1083983957166e-05, + "loss": 8.4539, + "step": 706700 + }, + { + "epoch": 7.53, + "learning_rate": 6.105829653118326e-05, + "loss": 8.3902, + "step": 706800 + }, + { + "epoch": 7.53, + "learning_rate": 6.1032612133700785e-05, + "loss": 8.3675, + "step": 706900 + }, + { + "epoch": 7.53, + "learning_rate": 6.1006930766716107e-05, + "loss": 8.3995, + "step": 707000 + }, + { + "epoch": 7.53, + "learning_rate": 6.098125243222645e-05, + "loss": 8.3993, + "step": 707100 + }, + { + "epoch": 7.53, + "learning_rate": 6.0955577132228857e-05, + "loss": 8.4059, + "step": 707200 + }, + { + "epoch": 7.53, + "learning_rate": 6.092990486872006e-05, + "loss": 8.3124, + "step": 707300 + }, + { + "epoch": 7.53, + "learning_rate": 6.0904235643696604e-05, + "loss": 8.3944, + "step": 707400 + }, + { + "epoch": 7.54, + "learning_rate": 6.08785694591548e-05, + "loss": 8.2808, + "step": 707500 + }, + { + "epoch": 7.54, + "learning_rate": 6.08529063170907e-05, + "loss": 8.395, + "step": 707600 + }, + { + "epoch": 7.54, + "learning_rate": 6.082724621950011e-05, + "loss": 8.3648, + "step": 707700 + }, + { + "epoch": 7.54, + "learning_rate": 6.080158916837866e-05, + "loss": 8.3482, + "step": 707800 + }, + { + "epoch": 7.54, + "learning_rate": 6.0775935165721655e-05, + "loss": 8.3837, + "step": 707900 + }, + { + "epoch": 7.54, + "learning_rate": 6.0750284213524236e-05, + "loss": 8.395, + "step": 708000 + }, + { + "epoch": 7.54, + "learning_rate": 6.072463631378129e-05, + "loss": 8.3753, + "step": 708100 + }, + { + "epoch": 7.54, + "learning_rate": 6.069899146848743e-05, + "loss": 8.2949, + "step": 708200 + }, + { + "epoch": 7.54, + "learning_rate": 6.067334967963711e-05, + "loss": 8.3929, + "step": 708300 + }, + { + "epoch": 7.55, + "learning_rate": 6.064771094922441e-05, + "loss": 8.3877, + "step": 708400 + }, + { + "epoch": 7.55, + "learning_rate": 6.062207527924335e-05, + "loss": 8.3885, + "step": 708500 + }, + { + "epoch": 7.55, + "learning_rate": 6.0596442671687535e-05, + "loss": 8.4084, + "step": 708600 + }, + { + "epoch": 7.55, + "learning_rate": 6.0570813128550486e-05, + "loss": 8.3421, + "step": 708700 + }, + { + "epoch": 7.55, + "learning_rate": 6.054518665182535e-05, + "loss": 8.3627, + "step": 708800 + }, + { + "epoch": 7.55, + "learning_rate": 6.051956324350516e-05, + "loss": 8.3666, + "step": 708900 + }, + { + "epoch": 7.55, + "learning_rate": 6.0493942905582604e-05, + "loss": 8.4017, + "step": 709000 + }, + { + "epoch": 7.55, + "learning_rate": 6.046832564005021e-05, + "loss": 8.3574, + "step": 709100 + }, + { + "epoch": 7.55, + "learning_rate": 6.0442711448900215e-05, + "loss": 8.3928, + "step": 709200 + }, + { + "epoch": 7.55, + "learning_rate": 6.041710033412466e-05, + "loss": 8.3376, + "step": 709300 + }, + { + "epoch": 7.56, + "learning_rate": 6.03914922977153e-05, + "loss": 8.3717, + "step": 709400 + }, + { + "epoch": 7.56, + "learning_rate": 6.03658873416637e-05, + "loss": 8.2952, + "step": 709500 + }, + { + "epoch": 7.56, + "learning_rate": 6.034028546796112e-05, + "loss": 8.3449, + "step": 709600 + }, + { + "epoch": 7.56, + "learning_rate": 6.031468667859867e-05, + "loss": 8.4509, + "step": 709700 + }, + { + "epoch": 7.56, + "learning_rate": 6.028909097556712e-05, + "loss": 8.3832, + "step": 709800 + }, + { + "epoch": 7.56, + "learning_rate": 6.026349836085711e-05, + "loss": 8.3987, + "step": 709900 + }, + { + "epoch": 7.56, + "learning_rate": 6.023790883645892e-05, + "loss": 8.3917, + "step": 710000 + }, + { + "epoch": 7.56, + "learning_rate": 6.021232240436267e-05, + "loss": 8.2916, + "step": 710100 + }, + { + "epoch": 7.56, + "learning_rate": 6.0186739066558254e-05, + "loss": 8.3651, + "step": 710200 + }, + { + "epoch": 7.57, + "learning_rate": 6.0161158825035245e-05, + "loss": 8.3883, + "step": 710300 + }, + { + "epoch": 7.57, + "learning_rate": 6.013558168178306e-05, + "loss": 8.4676, + "step": 710400 + }, + { + "epoch": 7.57, + "learning_rate": 6.011000763879079e-05, + "loss": 8.4253, + "step": 710500 + }, + { + "epoch": 7.57, + "learning_rate": 6.0084436698047376e-05, + "loss": 8.3855, + "step": 710600 + }, + { + "epoch": 7.57, + "learning_rate": 6.005886886154144e-05, + "loss": 8.3879, + "step": 710700 + }, + { + "epoch": 7.57, + "learning_rate": 6.0033304131261414e-05, + "loss": 8.451, + "step": 710800 + }, + { + "epoch": 7.57, + "learning_rate": 6.0007742509195455e-05, + "loss": 8.3101, + "step": 710900 + }, + { + "epoch": 7.57, + "learning_rate": 5.998218399733152e-05, + "loss": 8.3072, + "step": 711000 + }, + { + "epoch": 7.57, + "learning_rate": 5.995662859765725e-05, + "loss": 8.3147, + "step": 711100 + }, + { + "epoch": 7.58, + "learning_rate": 5.993107631216015e-05, + "loss": 8.3199, + "step": 711200 + }, + { + "epoch": 7.58, + "learning_rate": 5.990552714282738e-05, + "loss": 8.3577, + "step": 711300 + }, + { + "epoch": 7.58, + "learning_rate": 5.987998109164592e-05, + "loss": 8.2816, + "step": 711400 + }, + { + "epoch": 7.58, + "learning_rate": 5.985443816060248e-05, + "loss": 8.3361, + "step": 711500 + }, + { + "epoch": 7.58, + "learning_rate": 5.9828898351683574e-05, + "loss": 8.4159, + "step": 711600 + }, + { + "epoch": 7.58, + "learning_rate": 5.9803361666875345e-05, + "loss": 8.3643, + "step": 711700 + }, + { + "epoch": 7.58, + "learning_rate": 5.9777828108163904e-05, + "loss": 8.3748, + "step": 711800 + }, + { + "epoch": 7.58, + "learning_rate": 5.9752297677534864e-05, + "loss": 8.375, + "step": 711900 + }, + { + "epoch": 7.58, + "learning_rate": 5.972677037697387e-05, + "loss": 8.3002, + "step": 712000 + }, + { + "epoch": 7.58, + "learning_rate": 5.9701246208466046e-05, + "loss": 8.3549, + "step": 712100 + }, + { + "epoch": 7.59, + "learning_rate": 5.967572517399651e-05, + "loss": 8.368, + "step": 712200 + }, + { + "epoch": 7.59, + "learning_rate": 5.9650207275550015e-05, + "loss": 8.3377, + "step": 712300 + }, + { + "epoch": 7.59, + "learning_rate": 5.962469251511105e-05, + "loss": 8.3943, + "step": 712400 + }, + { + "epoch": 7.59, + "learning_rate": 5.9599180894663963e-05, + "loss": 8.3911, + "step": 712500 + }, + { + "epoch": 7.59, + "learning_rate": 5.957367241619274e-05, + "loss": 8.3961, + "step": 712600 + }, + { + "epoch": 7.59, + "learning_rate": 5.954816708168123e-05, + "loss": 8.3858, + "step": 712700 + }, + { + "epoch": 7.59, + "learning_rate": 5.952266489311289e-05, + "loss": 8.3804, + "step": 712800 + }, + { + "epoch": 7.59, + "learning_rate": 5.949716585247116e-05, + "loss": 8.4243, + "step": 712900 + }, + { + "epoch": 7.59, + "learning_rate": 5.9471669961738986e-05, + "loss": 8.3548, + "step": 713000 + }, + { + "epoch": 7.6, + "learning_rate": 5.944617722289928e-05, + "loss": 8.3458, + "step": 713100 + }, + { + "epoch": 7.6, + "learning_rate": 5.942068763793451e-05, + "loss": 8.3397, + "step": 713200 + }, + { + "epoch": 7.6, + "learning_rate": 5.9395201208827136e-05, + "loss": 8.378, + "step": 713300 + }, + { + "epoch": 7.6, + "learning_rate": 5.93697179375591e-05, + "loss": 8.393, + "step": 713400 + }, + { + "epoch": 7.6, + "learning_rate": 5.934423782611238e-05, + "loss": 8.3441, + "step": 713500 + }, + { + "epoch": 7.6, + "learning_rate": 5.9318760876468415e-05, + "loss": 8.3185, + "step": 713600 + }, + { + "epoch": 7.6, + "learning_rate": 5.929328709060871e-05, + "loss": 8.3648, + "step": 713700 + }, + { + "epoch": 7.6, + "learning_rate": 5.9267816470514224e-05, + "loss": 8.3673, + "step": 713800 + }, + { + "epoch": 7.6, + "learning_rate": 5.924234901816589e-05, + "loss": 8.3825, + "step": 713900 + }, + { + "epoch": 7.61, + "learning_rate": 5.921688473554428e-05, + "loss": 8.3945, + "step": 714000 + }, + { + "epoch": 7.61, + "learning_rate": 5.919142362462978e-05, + "loss": 8.3782, + "step": 714100 + }, + { + "epoch": 7.61, + "learning_rate": 5.9165965687402466e-05, + "loss": 8.422, + "step": 714200 + }, + { + "epoch": 7.61, + "learning_rate": 5.914051092584223e-05, + "loss": 8.4246, + "step": 714300 + }, + { + "epoch": 7.61, + "learning_rate": 5.9115059341928736e-05, + "loss": 8.4503, + "step": 714400 + }, + { + "epoch": 7.61, + "learning_rate": 5.9089610937641246e-05, + "loss": 8.3699, + "step": 714500 + }, + { + "epoch": 7.61, + "learning_rate": 5.906416571495902e-05, + "loss": 8.3288, + "step": 714600 + }, + { + "epoch": 7.61, + "learning_rate": 5.903872367586082e-05, + "loss": 8.4104, + "step": 714700 + }, + { + "epoch": 7.61, + "learning_rate": 5.901328482232533e-05, + "loss": 8.4135, + "step": 714800 + }, + { + "epoch": 7.61, + "learning_rate": 5.898784915633092e-05, + "loss": 8.3802, + "step": 714900 + }, + { + "epoch": 7.62, + "learning_rate": 5.896241667985575e-05, + "loss": 8.3511, + "step": 715000 + }, + { + "epoch": 7.62, + "learning_rate": 5.893698739487765e-05, + "loss": 8.3575, + "step": 715100 + }, + { + "epoch": 7.62, + "learning_rate": 5.891156130337432e-05, + "loss": 8.4325, + "step": 715200 + }, + { + "epoch": 7.62, + "learning_rate": 5.88861384073231e-05, + "loss": 8.4283, + "step": 715300 + }, + { + "epoch": 7.62, + "learning_rate": 5.886071870870117e-05, + "loss": 8.3824, + "step": 715400 + }, + { + "epoch": 7.62, + "learning_rate": 5.883530220948539e-05, + "loss": 8.4848, + "step": 715500 + }, + { + "epoch": 7.62, + "learning_rate": 5.880988891165244e-05, + "loss": 8.3851, + "step": 715600 + }, + { + "epoch": 7.62, + "learning_rate": 5.878447881717867e-05, + "loss": 8.3708, + "step": 715700 + }, + { + "epoch": 7.62, + "learning_rate": 5.875907192804026e-05, + "loss": 8.3731, + "step": 715800 + }, + { + "epoch": 7.63, + "learning_rate": 5.873366824621309e-05, + "loss": 8.3176, + "step": 715900 + }, + { + "epoch": 7.63, + "learning_rate": 5.8708267773672845e-05, + "loss": 8.3763, + "step": 716000 + }, + { + "epoch": 7.63, + "learning_rate": 5.868287051239485e-05, + "loss": 8.3724, + "step": 716100 + }, + { + "epoch": 7.63, + "learning_rate": 5.8657476464354324e-05, + "loss": 8.488, + "step": 716200 + }, + { + "epoch": 7.63, + "learning_rate": 5.863208563152611e-05, + "loss": 8.3724, + "step": 716300 + }, + { + "epoch": 7.63, + "learning_rate": 5.860669801588487e-05, + "loss": 8.3363, + "step": 716400 + }, + { + "epoch": 7.63, + "learning_rate": 5.858131361940506e-05, + "loss": 8.2958, + "step": 716500 + }, + { + "epoch": 7.63, + "learning_rate": 5.8555932444060736e-05, + "loss": 8.361, + "step": 716600 + }, + { + "epoch": 7.63, + "learning_rate": 5.853055449182585e-05, + "loss": 8.3581, + "step": 716700 + }, + { + "epoch": 7.63, + "learning_rate": 5.8505179764674036e-05, + "loss": 8.3927, + "step": 716800 + }, + { + "epoch": 7.64, + "learning_rate": 5.8479808264578694e-05, + "loss": 8.3565, + "step": 716900 + }, + { + "epoch": 7.64, + "learning_rate": 5.8454439993512944e-05, + "loss": 8.3485, + "step": 717000 + }, + { + "epoch": 7.64, + "learning_rate": 5.842907495344972e-05, + "loss": 8.3027, + "step": 717100 + }, + { + "epoch": 7.64, + "learning_rate": 5.840371314636163e-05, + "loss": 8.358, + "step": 717200 + }, + { + "epoch": 7.64, + "learning_rate": 5.837835457422108e-05, + "loss": 8.3922, + "step": 717300 + }, + { + "epoch": 7.64, + "learning_rate": 5.835299923900017e-05, + "loss": 8.3852, + "step": 717400 + }, + { + "epoch": 7.64, + "learning_rate": 5.832764714267092e-05, + "loss": 8.3905, + "step": 717500 + }, + { + "epoch": 7.64, + "learning_rate": 5.830229828720476e-05, + "loss": 8.3523, + "step": 717600 + }, + { + "epoch": 7.64, + "learning_rate": 5.8276952674573224e-05, + "loss": 8.4068, + "step": 717700 + }, + { + "epoch": 7.65, + "learning_rate": 5.825161030674741e-05, + "loss": 8.4162, + "step": 717800 + }, + { + "epoch": 7.65, + "learning_rate": 5.822627118569818e-05, + "loss": 8.37, + "step": 717900 + }, + { + "epoch": 7.65, + "learning_rate": 5.820093531339612e-05, + "loss": 8.3486, + "step": 718000 + }, + { + "epoch": 7.65, + "learning_rate": 5.817560269181169e-05, + "loss": 8.3687, + "step": 718100 + }, + { + "epoch": 7.65, + "learning_rate": 5.815027332291497e-05, + "loss": 8.366, + "step": 718200 + }, + { + "epoch": 7.65, + "learning_rate": 5.812494720867583e-05, + "loss": 8.3037, + "step": 718300 + }, + { + "epoch": 7.65, + "learning_rate": 5.8099624351063844e-05, + "loss": 8.4019, + "step": 718400 + }, + { + "epoch": 7.65, + "learning_rate": 5.8074304752048405e-05, + "loss": 8.42, + "step": 718500 + }, + { + "epoch": 7.65, + "learning_rate": 5.8048988413598714e-05, + "loss": 8.288, + "step": 718600 + }, + { + "epoch": 7.66, + "learning_rate": 5.802367533768345e-05, + "loss": 8.3351, + "step": 718700 + }, + { + "epoch": 7.66, + "learning_rate": 5.7998365526271336e-05, + "loss": 8.3642, + "step": 718800 + }, + { + "epoch": 7.66, + "learning_rate": 5.79730589813307e-05, + "loss": 8.2937, + "step": 718900 + }, + { + "epoch": 7.66, + "learning_rate": 5.794775570482961e-05, + "loss": 8.3303, + "step": 719000 + }, + { + "epoch": 7.66, + "learning_rate": 5.7922455698735866e-05, + "loss": 8.4061, + "step": 719100 + }, + { + "epoch": 7.66, + "learning_rate": 5.789715896501715e-05, + "loss": 8.3692, + "step": 719200 + }, + { + "epoch": 7.66, + "learning_rate": 5.7871865505640744e-05, + "loss": 8.364, + "step": 719300 + }, + { + "epoch": 7.66, + "learning_rate": 5.7846575322573715e-05, + "loss": 8.413, + "step": 719400 + }, + { + "epoch": 7.66, + "learning_rate": 5.7821288417782835e-05, + "loss": 8.3851, + "step": 719500 + }, + { + "epoch": 7.66, + "learning_rate": 5.779600479323477e-05, + "loss": 8.4147, + "step": 719600 + }, + { + "epoch": 7.67, + "learning_rate": 5.7770724450895795e-05, + "loss": 8.3537, + "step": 719700 + }, + { + "epoch": 7.67, + "learning_rate": 5.7745447392731935e-05, + "loss": 8.3501, + "step": 719800 + }, + { + "epoch": 7.67, + "learning_rate": 5.7720173620708986e-05, + "loss": 8.2817, + "step": 719900 + }, + { + "epoch": 7.67, + "learning_rate": 5.769490313679253e-05, + "loss": 8.2931, + "step": 720000 + }, + { + "epoch": 7.67, + "learning_rate": 5.766963594294785e-05, + "loss": 8.3301, + "step": 720100 + }, + { + "epoch": 7.67, + "learning_rate": 5.764437204113996e-05, + "loss": 8.3896, + "step": 720200 + }, + { + "epoch": 7.67, + "learning_rate": 5.761911143333361e-05, + "loss": 8.3352, + "step": 720300 + }, + { + "epoch": 7.67, + "learning_rate": 5.759385412149338e-05, + "loss": 8.4096, + "step": 720400 + }, + { + "epoch": 7.67, + "learning_rate": 5.7568600107583514e-05, + "loss": 8.3606, + "step": 720500 + }, + { + "epoch": 7.68, + "learning_rate": 5.7543349393567966e-05, + "loss": 8.3369, + "step": 720600 + }, + { + "epoch": 7.68, + "learning_rate": 5.7518101981410574e-05, + "loss": 8.3896, + "step": 720700 + }, + { + "epoch": 7.68, + "learning_rate": 5.749285787307479e-05, + "loss": 8.2792, + "step": 720800 + }, + { + "epoch": 7.68, + "learning_rate": 5.746761707052386e-05, + "loss": 8.3725, + "step": 720900 + }, + { + "epoch": 7.68, + "learning_rate": 5.7442379575720705e-05, + "loss": 8.41, + "step": 721000 + }, + { + "epoch": 7.68, + "learning_rate": 5.7417145390628135e-05, + "loss": 8.3605, + "step": 721100 + }, + { + "epoch": 7.68, + "learning_rate": 5.739191451720858e-05, + "loss": 8.2901, + "step": 721200 + }, + { + "epoch": 7.68, + "learning_rate": 5.736668695742424e-05, + "loss": 8.2998, + "step": 721300 + }, + { + "epoch": 7.68, + "learning_rate": 5.7341462713237035e-05, + "loss": 8.3488, + "step": 721400 + }, + { + "epoch": 7.68, + "learning_rate": 5.731624178660874e-05, + "loss": 8.3372, + "step": 721500 + }, + { + "epoch": 7.69, + "learning_rate": 5.729102417950073e-05, + "loss": 8.3199, + "step": 721600 + }, + { + "epoch": 7.69, + "learning_rate": 5.726580989387419e-05, + "loss": 8.4434, + "step": 721700 + }, + { + "epoch": 7.69, + "learning_rate": 5.7240598931690004e-05, + "loss": 8.407, + "step": 721800 + }, + { + "epoch": 7.69, + "learning_rate": 5.72153912949089e-05, + "loss": 8.3874, + "step": 721900 + }, + { + "epoch": 7.69, + "learning_rate": 5.719018698549126e-05, + "loss": 8.3833, + "step": 722000 + }, + { + "epoch": 7.69, + "learning_rate": 5.716498600539719e-05, + "loss": 8.3116, + "step": 722100 + }, + { + "epoch": 7.69, + "learning_rate": 5.713978835658658e-05, + "loss": 8.3228, + "step": 722200 + }, + { + "epoch": 7.69, + "learning_rate": 5.711459404101909e-05, + "loss": 8.3187, + "step": 722300 + }, + { + "epoch": 7.69, + "learning_rate": 5.708940306065407e-05, + "loss": 8.4053, + "step": 722400 + }, + { + "epoch": 7.7, + "learning_rate": 5.7064215417450596e-05, + "loss": 8.3917, + "step": 722500 + }, + { + "epoch": 7.7, + "learning_rate": 5.703903111336751e-05, + "loss": 8.3913, + "step": 722600 + }, + { + "epoch": 7.7, + "learning_rate": 5.701385015036347e-05, + "loss": 8.3642, + "step": 722700 + }, + { + "epoch": 7.7, + "learning_rate": 5.6988672530396746e-05, + "loss": 8.3697, + "step": 722800 + }, + { + "epoch": 7.7, + "learning_rate": 5.696349825542537e-05, + "loss": 8.3358, + "step": 722900 + }, + { + "epoch": 7.7, + "learning_rate": 5.693832732740724e-05, + "loss": 8.4764, + "step": 723000 + }, + { + "epoch": 7.7, + "learning_rate": 5.691315974829985e-05, + "loss": 8.4496, + "step": 723100 + }, + { + "epoch": 7.7, + "learning_rate": 5.688799552006048e-05, + "loss": 8.3288, + "step": 723200 + }, + { + "epoch": 7.7, + "learning_rate": 5.686283464464612e-05, + "loss": 8.3156, + "step": 723300 + }, + { + "epoch": 7.71, + "learning_rate": 5.683767712401362e-05, + "loss": 8.386, + "step": 723400 + }, + { + "epoch": 7.71, + "learning_rate": 5.6812522960119454e-05, + "loss": 8.4206, + "step": 723500 + }, + { + "epoch": 7.71, + "learning_rate": 5.678737215491984e-05, + "loss": 8.4106, + "step": 723600 + }, + { + "epoch": 7.71, + "learning_rate": 5.6762224710370716e-05, + "loss": 8.3356, + "step": 723700 + }, + { + "epoch": 7.71, + "learning_rate": 5.6737080628427955e-05, + "loss": 8.3853, + "step": 723800 + }, + { + "epoch": 7.71, + "learning_rate": 5.671193991104682e-05, + "loss": 8.3739, + "step": 723900 + }, + { + "epoch": 7.71, + "learning_rate": 5.668680256018264e-05, + "loss": 8.3456, + "step": 724000 + }, + { + "epoch": 7.71, + "learning_rate": 5.666166857779027e-05, + "loss": 8.4299, + "step": 724100 + }, + { + "epoch": 7.71, + "learning_rate": 5.66365379658245e-05, + "loss": 8.377, + "step": 724200 + }, + { + "epoch": 7.71, + "learning_rate": 5.661141072623959e-05, + "loss": 8.2994, + "step": 724300 + }, + { + "epoch": 7.72, + "learning_rate": 5.658628686098979e-05, + "loss": 8.3236, + "step": 724400 + }, + { + "epoch": 7.72, + "learning_rate": 5.656116637202896e-05, + "loss": 8.4093, + "step": 724500 + }, + { + "epoch": 7.72, + "learning_rate": 5.653604926131073e-05, + "loss": 8.4002, + "step": 724600 + }, + { + "epoch": 7.72, + "learning_rate": 5.651093553078839e-05, + "loss": 8.419, + "step": 724700 + }, + { + "epoch": 7.72, + "learning_rate": 5.648582518241511e-05, + "loss": 8.3322, + "step": 724800 + }, + { + "epoch": 7.72, + "learning_rate": 5.646071821814378e-05, + "loss": 8.3462, + "step": 724900 + }, + { + "epoch": 7.72, + "learning_rate": 5.6435614639926836e-05, + "loss": 8.3644, + "step": 725000 + }, + { + "epoch": 7.72, + "learning_rate": 5.641051444971669e-05, + "loss": 8.2363, + "step": 725100 + }, + { + "epoch": 7.72, + "learning_rate": 5.63854176494653e-05, + "loss": 8.3588, + "step": 725200 + }, + { + "epoch": 7.73, + "learning_rate": 5.6360324241124587e-05, + "loss": 8.3077, + "step": 725300 + }, + { + "epoch": 7.73, + "learning_rate": 5.6335234226645886e-05, + "loss": 8.3645, + "step": 725400 + }, + { + "epoch": 7.73, + "learning_rate": 5.631014760798059e-05, + "loss": 8.2876, + "step": 725500 + }, + { + "epoch": 7.73, + "learning_rate": 5.628506438707963e-05, + "loss": 8.3797, + "step": 725600 + }, + { + "epoch": 7.73, + "learning_rate": 5.625998456589374e-05, + "loss": 8.4437, + "step": 725700 + }, + { + "epoch": 7.73, + "learning_rate": 5.623490814637333e-05, + "loss": 8.4041, + "step": 725800 + }, + { + "epoch": 7.73, + "learning_rate": 5.6209835130468716e-05, + "loss": 8.2973, + "step": 725900 + }, + { + "epoch": 7.73, + "learning_rate": 5.6184765520129725e-05, + "loss": 8.273, + "step": 726000 + }, + { + "epoch": 7.73, + "learning_rate": 5.615969931730607e-05, + "loss": 8.3747, + "step": 726100 + }, + { + "epoch": 7.73, + "learning_rate": 5.6134636523947105e-05, + "loss": 8.3796, + "step": 726200 + }, + { + "epoch": 7.74, + "learning_rate": 5.6109577142002024e-05, + "loss": 8.3921, + "step": 726300 + }, + { + "epoch": 7.74, + "learning_rate": 5.608452117341969e-05, + "loss": 8.3093, + "step": 726400 + }, + { + "epoch": 7.74, + "learning_rate": 5.605946862014868e-05, + "loss": 8.353, + "step": 726500 + }, + { + "epoch": 7.74, + "learning_rate": 5.60344194841373e-05, + "loss": 8.3488, + "step": 726600 + }, + { + "epoch": 7.74, + "learning_rate": 5.6009373767333726e-05, + "loss": 8.3717, + "step": 726700 + }, + { + "epoch": 7.74, + "learning_rate": 5.598433147168569e-05, + "loss": 8.3548, + "step": 726800 + }, + { + "epoch": 7.74, + "learning_rate": 5.595929259914072e-05, + "loss": 8.3745, + "step": 726900 + }, + { + "epoch": 7.74, + "learning_rate": 5.593425715164617e-05, + "loss": 8.3144, + "step": 727000 + }, + { + "epoch": 7.74, + "learning_rate": 5.5909225131149e-05, + "loss": 8.3581, + "step": 727100 + }, + { + "epoch": 7.75, + "learning_rate": 5.5884196539595954e-05, + "loss": 8.3875, + "step": 727200 + }, + { + "epoch": 7.75, + "learning_rate": 5.585917137893349e-05, + "loss": 8.2806, + "step": 727300 + }, + { + "epoch": 7.75, + "learning_rate": 5.583414965110786e-05, + "loss": 8.3967, + "step": 727400 + }, + { + "epoch": 7.75, + "learning_rate": 5.580913135806498e-05, + "loss": 8.3711, + "step": 727500 + }, + { + "epoch": 7.75, + "learning_rate": 5.578411650175055e-05, + "loss": 8.3485, + "step": 727600 + }, + { + "epoch": 7.75, + "learning_rate": 5.57591050841099e-05, + "loss": 8.3715, + "step": 727700 + }, + { + "epoch": 7.75, + "learning_rate": 5.573409710708828e-05, + "loss": 8.3115, + "step": 727800 + }, + { + "epoch": 7.75, + "learning_rate": 5.570909257263051e-05, + "loss": 8.2485, + "step": 727900 + }, + { + "epoch": 7.75, + "learning_rate": 5.568409148268119e-05, + "loss": 8.3785, + "step": 728000 + }, + { + "epoch": 7.76, + "learning_rate": 5.5659093839184616e-05, + "loss": 8.3286, + "step": 728100 + }, + { + "epoch": 7.76, + "learning_rate": 5.563409964408496e-05, + "loss": 8.3223, + "step": 728200 + }, + { + "epoch": 7.76, + "learning_rate": 5.5609108899325954e-05, + "loss": 8.4215, + "step": 728300 + }, + { + "epoch": 7.76, + "learning_rate": 5.558412160685116e-05, + "loss": 8.402, + "step": 728400 + }, + { + "epoch": 7.76, + "learning_rate": 5.5559137768603784e-05, + "loss": 8.4067, + "step": 728500 + }, + { + "epoch": 7.76, + "learning_rate": 5.553415738652691e-05, + "loss": 8.3587, + "step": 728600 + }, + { + "epoch": 7.76, + "learning_rate": 5.5509180462563214e-05, + "loss": 8.3321, + "step": 728700 + }, + { + "epoch": 7.76, + "learning_rate": 5.5484206998655155e-05, + "loss": 8.4041, + "step": 728800 + }, + { + "epoch": 7.76, + "learning_rate": 5.545923699674489e-05, + "loss": 8.3426, + "step": 728900 + }, + { + "epoch": 7.76, + "learning_rate": 5.543427045877443e-05, + "loss": 8.3671, + "step": 729000 + }, + { + "epoch": 7.77, + "learning_rate": 5.540930738668535e-05, + "loss": 8.3484, + "step": 729100 + }, + { + "epoch": 7.77, + "learning_rate": 5.5384347782419024e-05, + "loss": 8.3863, + "step": 729200 + }, + { + "epoch": 7.77, + "learning_rate": 5.535939164791662e-05, + "loss": 8.3311, + "step": 729300 + }, + { + "epoch": 7.77, + "learning_rate": 5.533443898511896e-05, + "loss": 8.2451, + "step": 729400 + }, + { + "epoch": 7.77, + "learning_rate": 5.53094897959666e-05, + "loss": 8.3946, + "step": 729500 + }, + { + "epoch": 7.77, + "learning_rate": 5.52845440823998e-05, + "loss": 8.4296, + "step": 729600 + }, + { + "epoch": 7.77, + "learning_rate": 5.52596018463587e-05, + "loss": 8.3986, + "step": 729700 + }, + { + "epoch": 7.77, + "learning_rate": 5.523466308978298e-05, + "loss": 8.3443, + "step": 729800 + }, + { + "epoch": 7.77, + "learning_rate": 5.5209727814612146e-05, + "loss": 8.3398, + "step": 729900 + }, + { + "epoch": 7.78, + "learning_rate": 5.518479602278538e-05, + "loss": 8.3684, + "step": 730000 + }, + { + "epoch": 7.78, + "learning_rate": 5.5159867716241705e-05, + "loss": 8.3626, + "step": 730100 + }, + { + "epoch": 7.78, + "learning_rate": 5.513494289691976e-05, + "loss": 8.3072, + "step": 730200 + }, + { + "epoch": 7.78, + "learning_rate": 5.5110021566757954e-05, + "loss": 8.3541, + "step": 730300 + }, + { + "epoch": 7.78, + "learning_rate": 5.5085103727694374e-05, + "loss": 8.376, + "step": 730400 + }, + { + "epoch": 7.78, + "learning_rate": 5.506018938166702e-05, + "loss": 8.3639, + "step": 730500 + }, + { + "epoch": 7.78, + "learning_rate": 5.50352785306133e-05, + "loss": 8.2801, + "step": 730600 + }, + { + "epoch": 7.78, + "learning_rate": 5.501037117647067e-05, + "loss": 8.3628, + "step": 730700 + }, + { + "epoch": 7.78, + "learning_rate": 5.498546732117609e-05, + "loss": 8.3362, + "step": 730800 + }, + { + "epoch": 7.79, + "learning_rate": 5.496056696666645e-05, + "loss": 8.4597, + "step": 730900 + }, + { + "epoch": 7.79, + "learning_rate": 5.493567011487811e-05, + "loss": 8.2731, + "step": 731000 + }, + { + "epoch": 7.79, + "learning_rate": 5.4910776767747374e-05, + "loss": 8.3232, + "step": 731100 + }, + { + "epoch": 7.79, + "learning_rate": 5.488588692721024e-05, + "loss": 8.3797, + "step": 731200 + }, + { + "epoch": 7.79, + "learning_rate": 5.4861000595202336e-05, + "loss": 8.3605, + "step": 731300 + }, + { + "epoch": 7.79, + "learning_rate": 5.483611777365909e-05, + "loss": 8.3617, + "step": 731400 + }, + { + "epoch": 7.79, + "learning_rate": 5.481123846451561e-05, + "loss": 8.3087, + "step": 731500 + }, + { + "epoch": 7.79, + "learning_rate": 5.478636266970687e-05, + "loss": 8.3573, + "step": 731600 + }, + { + "epoch": 7.79, + "learning_rate": 5.4761490391167316e-05, + "loss": 8.3535, + "step": 731700 + }, + { + "epoch": 7.79, + "learning_rate": 5.473662163083136e-05, + "loss": 8.2978, + "step": 731800 + }, + { + "epoch": 7.8, + "learning_rate": 5.471175639063301e-05, + "loss": 8.4017, + "step": 731900 + }, + { + "epoch": 7.8, + "learning_rate": 5.4686894672506115e-05, + "loss": 8.421, + "step": 732000 + }, + { + "epoch": 7.8, + "learning_rate": 5.466203647838404e-05, + "loss": 8.4284, + "step": 732100 + }, + { + "epoch": 7.8, + "learning_rate": 5.463718181020012e-05, + "loss": 8.3703, + "step": 732200 + }, + { + "epoch": 7.8, + "learning_rate": 5.4612330669887236e-05, + "loss": 8.2435, + "step": 732300 + }, + { + "epoch": 7.8, + "learning_rate": 5.458748305937817e-05, + "loss": 8.3929, + "step": 732400 + }, + { + "epoch": 7.8, + "learning_rate": 5.456263898060516e-05, + "loss": 8.3322, + "step": 732500 + }, + { + "epoch": 7.8, + "learning_rate": 5.4537798435500465e-05, + "loss": 8.3519, + "step": 732600 + }, + { + "epoch": 7.8, + "learning_rate": 5.451296142599591e-05, + "loss": 8.3901, + "step": 732700 + }, + { + "epoch": 7.81, + "learning_rate": 5.448812795402304e-05, + "loss": 8.3314, + "step": 732800 + }, + { + "epoch": 7.81, + "learning_rate": 5.446329802151313e-05, + "loss": 8.4278, + "step": 732900 + }, + { + "epoch": 7.81, + "learning_rate": 5.443847163039729e-05, + "loss": 8.3407, + "step": 733000 + }, + { + "epoch": 7.81, + "learning_rate": 5.441364878260623e-05, + "loss": 8.3765, + "step": 733100 + }, + { + "epoch": 7.81, + "learning_rate": 5.438882948007043e-05, + "loss": 8.2956, + "step": 733200 + }, + { + "epoch": 7.81, + "learning_rate": 5.436401372472003e-05, + "loss": 8.4061, + "step": 733300 + }, + { + "epoch": 7.81, + "learning_rate": 5.4339201518485014e-05, + "loss": 8.3766, + "step": 733400 + }, + { + "epoch": 7.81, + "learning_rate": 5.4314392863295114e-05, + "loss": 8.3216, + "step": 733500 + }, + { + "epoch": 7.81, + "learning_rate": 5.428958776107952e-05, + "loss": 8.3167, + "step": 733600 + }, + { + "epoch": 7.81, + "learning_rate": 5.426478621376746e-05, + "loss": 8.4571, + "step": 733700 + }, + { + "epoch": 7.82, + "learning_rate": 5.423998822328772e-05, + "loss": 8.3464, + "step": 733800 + }, + { + "epoch": 7.82, + "learning_rate": 5.421519379156883e-05, + "loss": 8.3608, + "step": 733900 + }, + { + "epoch": 7.82, + "learning_rate": 5.419040292053901e-05, + "loss": 8.3295, + "step": 734000 + }, + { + "epoch": 7.82, + "learning_rate": 5.416561561212634e-05, + "loss": 8.2898, + "step": 734100 + }, + { + "epoch": 7.82, + "learning_rate": 5.41408318682585e-05, + "loss": 8.4489, + "step": 734200 + }, + { + "epoch": 7.82, + "learning_rate": 5.411605169086291e-05, + "loss": 8.3942, + "step": 734300 + }, + { + "epoch": 7.82, + "learning_rate": 5.409127508186669e-05, + "loss": 8.3679, + "step": 734400 + }, + { + "epoch": 7.82, + "learning_rate": 5.406650204319681e-05, + "loss": 8.3438, + "step": 734500 + }, + { + "epoch": 7.82, + "learning_rate": 5.40417325767798e-05, + "loss": 8.2851, + "step": 734600 + }, + { + "epoch": 7.83, + "learning_rate": 5.401696668454201e-05, + "loss": 8.3556, + "step": 734700 + }, + { + "epoch": 7.83, + "learning_rate": 5.399220436840946e-05, + "loss": 8.3342, + "step": 734800 + }, + { + "epoch": 7.83, + "learning_rate": 5.396744563030798e-05, + "loss": 8.3476, + "step": 734900 + }, + { + "epoch": 7.83, + "learning_rate": 5.3942690472163006e-05, + "loss": 8.3216, + "step": 735000 + }, + { + "epoch": 7.83, + "learning_rate": 5.391793889589977e-05, + "loss": 8.2899, + "step": 735100 + }, + { + "epoch": 7.83, + "learning_rate": 5.389319090344316e-05, + "loss": 8.3015, + "step": 735200 + }, + { + "epoch": 7.83, + "learning_rate": 5.3868446496717894e-05, + "loss": 8.3258, + "step": 735300 + }, + { + "epoch": 7.83, + "learning_rate": 5.384370567764833e-05, + "loss": 8.3911, + "step": 735400 + }, + { + "epoch": 7.83, + "learning_rate": 5.381896844815851e-05, + "loss": 8.4047, + "step": 735500 + }, + { + "epoch": 7.84, + "learning_rate": 5.379423481017234e-05, + "loss": 8.3406, + "step": 735600 + }, + { + "epoch": 7.84, + "learning_rate": 5.376950476561331e-05, + "loss": 8.4018, + "step": 735700 + }, + { + "epoch": 7.84, + "learning_rate": 5.3744778316404686e-05, + "loss": 8.3684, + "step": 735800 + }, + { + "epoch": 7.84, + "learning_rate": 5.3720055464469396e-05, + "loss": 8.3585, + "step": 735900 + }, + { + "epoch": 7.84, + "learning_rate": 5.369533621173023e-05, + "loss": 8.3905, + "step": 736000 + }, + { + "epoch": 7.84, + "learning_rate": 5.3670620560109565e-05, + "loss": 8.3058, + "step": 736100 + }, + { + "epoch": 7.84, + "learning_rate": 5.364590851152953e-05, + "loss": 8.2602, + "step": 736200 + }, + { + "epoch": 7.84, + "learning_rate": 5.362120006791194e-05, + "loss": 8.2821, + "step": 736300 + }, + { + "epoch": 7.84, + "learning_rate": 5.359649523117848e-05, + "loss": 8.3103, + "step": 736400 + }, + { + "epoch": 7.84, + "learning_rate": 5.357179400325039e-05, + "loss": 8.3487, + "step": 736500 + }, + { + "epoch": 7.85, + "learning_rate": 5.3547096386048666e-05, + "loss": 8.3075, + "step": 736600 + }, + { + "epoch": 7.85, + "learning_rate": 5.3522402381494044e-05, + "loss": 8.3105, + "step": 736700 + }, + { + "epoch": 7.85, + "learning_rate": 5.349771199150704e-05, + "loss": 8.3273, + "step": 736800 + }, + { + "epoch": 7.85, + "learning_rate": 5.347302521800779e-05, + "loss": 8.3251, + "step": 736900 + }, + { + "epoch": 7.85, + "learning_rate": 5.344834206291618e-05, + "loss": 8.3082, + "step": 737000 + }, + { + "epoch": 7.85, + "learning_rate": 5.3423662528151786e-05, + "loss": 8.3335, + "step": 737100 + }, + { + "epoch": 7.85, + "learning_rate": 5.339898661563407e-05, + "loss": 8.2746, + "step": 737200 + }, + { + "epoch": 7.85, + "learning_rate": 5.33743143272819e-05, + "loss": 8.3869, + "step": 737300 + }, + { + "epoch": 7.85, + "learning_rate": 5.334964566501418e-05, + "loss": 8.3026, + "step": 737400 + }, + { + "epoch": 7.86, + "learning_rate": 5.332498063074931e-05, + "loss": 8.4006, + "step": 737500 + }, + { + "epoch": 7.86, + "learning_rate": 5.330031922640556e-05, + "loss": 8.3184, + "step": 737600 + }, + { + "epoch": 7.86, + "learning_rate": 5.327566145390083e-05, + "loss": 8.3046, + "step": 737700 + }, + { + "epoch": 7.86, + "learning_rate": 5.325100731515269e-05, + "loss": 8.3819, + "step": 737800 + }, + { + "epoch": 7.86, + "learning_rate": 5.3226356812078603e-05, + "loss": 8.3347, + "step": 737900 + }, + { + "epoch": 7.86, + "learning_rate": 5.320170994659559e-05, + "loss": 8.326, + "step": 738000 + }, + { + "epoch": 7.86, + "learning_rate": 5.317706672062044e-05, + "loss": 8.3343, + "step": 738100 + }, + { + "epoch": 7.86, + "learning_rate": 5.315242713606962e-05, + "loss": 8.3227, + "step": 738200 + }, + { + "epoch": 7.86, + "learning_rate": 5.312779119485947e-05, + "loss": 8.3178, + "step": 738300 + }, + { + "epoch": 7.86, + "learning_rate": 5.310315889890577e-05, + "loss": 8.24, + "step": 738400 + }, + { + "epoch": 7.87, + "learning_rate": 5.30785302501243e-05, + "loss": 8.3214, + "step": 738500 + }, + { + "epoch": 7.87, + "learning_rate": 5.305390525043035e-05, + "loss": 8.3436, + "step": 738600 + }, + { + "epoch": 7.87, + "learning_rate": 5.302928390173914e-05, + "loss": 8.4162, + "step": 738700 + }, + { + "epoch": 7.87, + "learning_rate": 5.3004666205965295e-05, + "loss": 8.3537, + "step": 738800 + }, + { + "epoch": 7.87, + "learning_rate": 5.298005216502345e-05, + "loss": 8.3048, + "step": 738900 + }, + { + "epoch": 7.87, + "learning_rate": 5.29554417808278e-05, + "loss": 8.3456, + "step": 739000 + }, + { + "epoch": 7.87, + "learning_rate": 5.293083505529238e-05, + "loss": 8.4182, + "step": 739100 + }, + { + "epoch": 7.87, + "learning_rate": 5.290623199033071e-05, + "loss": 8.2906, + "step": 739200 + }, + { + "epoch": 7.87, + "learning_rate": 5.288163258785631e-05, + "loss": 8.3389, + "step": 739300 + }, + { + "epoch": 7.88, + "learning_rate": 5.285703684978222e-05, + "loss": 8.3183, + "step": 739400 + }, + { + "epoch": 7.88, + "learning_rate": 5.283244477802125e-05, + "loss": 8.4448, + "step": 739500 + }, + { + "epoch": 7.88, + "learning_rate": 5.280785637448589e-05, + "loss": 8.3707, + "step": 739600 + }, + { + "epoch": 7.88, + "learning_rate": 5.278327164108843e-05, + "loss": 8.3978, + "step": 739700 + }, + { + "epoch": 7.88, + "learning_rate": 5.27586905797409e-05, + "loss": 8.3151, + "step": 739800 + }, + { + "epoch": 7.88, + "learning_rate": 5.2734113192354815e-05, + "loss": 8.3911, + "step": 739900 + }, + { + "epoch": 7.88, + "learning_rate": 5.2709539480841686e-05, + "loss": 8.3217, + "step": 740000 + }, + { + "epoch": 7.88, + "learning_rate": 5.268496944711252e-05, + "loss": 8.2986, + "step": 740100 + }, + { + "epoch": 7.88, + "learning_rate": 5.266040309307827e-05, + "loss": 8.3226, + "step": 740200 + }, + { + "epoch": 7.89, + "learning_rate": 5.2635840420649285e-05, + "loss": 8.3882, + "step": 740300 + }, + { + "epoch": 7.89, + "learning_rate": 5.261128143173595e-05, + "loss": 8.3611, + "step": 740400 + }, + { + "epoch": 7.89, + "learning_rate": 5.258672612824814e-05, + "loss": 8.4675, + "step": 740500 + }, + { + "epoch": 7.89, + "learning_rate": 5.2562174512095566e-05, + "loss": 8.394, + "step": 740600 + }, + { + "epoch": 7.89, + "learning_rate": 5.253762658518755e-05, + "loss": 8.321, + "step": 740700 + }, + { + "epoch": 7.89, + "learning_rate": 5.2513082349433266e-05, + "loss": 8.3121, + "step": 740800 + }, + { + "epoch": 7.89, + "learning_rate": 5.248854180674148e-05, + "loss": 8.3656, + "step": 740900 + }, + { + "epoch": 7.89, + "learning_rate": 5.246400495902072e-05, + "loss": 8.3095, + "step": 741000 + }, + { + "epoch": 7.89, + "learning_rate": 5.2439471808179186e-05, + "loss": 8.3561, + "step": 741100 + }, + { + "epoch": 7.89, + "learning_rate": 5.241494235612487e-05, + "loss": 8.3467, + "step": 741200 + }, + { + "epoch": 7.9, + "learning_rate": 5.2390416604765434e-05, + "loss": 8.3455, + "step": 741300 + }, + { + "epoch": 7.9, + "learning_rate": 5.236589455600822e-05, + "loss": 8.3257, + "step": 741400 + }, + { + "epoch": 7.9, + "learning_rate": 5.23413762117603e-05, + "loss": 8.3845, + "step": 741500 + }, + { + "epoch": 7.9, + "learning_rate": 5.23168615739285e-05, + "loss": 8.3865, + "step": 741600 + }, + { + "epoch": 7.9, + "learning_rate": 5.2292350644419316e-05, + "loss": 8.3213, + "step": 741700 + }, + { + "epoch": 7.9, + "learning_rate": 5.226784342513893e-05, + "loss": 8.2629, + "step": 741800 + }, + { + "epoch": 7.9, + "learning_rate": 5.224333991799335e-05, + "loss": 8.3535, + "step": 741900 + }, + { + "epoch": 7.9, + "learning_rate": 5.2218840124888156e-05, + "loss": 8.4178, + "step": 742000 + }, + { + "epoch": 7.9, + "learning_rate": 5.219434404772872e-05, + "loss": 8.367, + "step": 742100 + }, + { + "epoch": 7.91, + "learning_rate": 5.2169851688420054e-05, + "loss": 8.3527, + "step": 742200 + }, + { + "epoch": 7.91, + "learning_rate": 5.2145363048867014e-05, + "loss": 8.3335, + "step": 742300 + }, + { + "epoch": 7.91, + "learning_rate": 5.212087813097404e-05, + "loss": 8.2853, + "step": 742400 + }, + { + "epoch": 7.91, + "learning_rate": 5.209639693664534e-05, + "loss": 8.3729, + "step": 742500 + }, + { + "epoch": 7.91, + "learning_rate": 5.207191946778477e-05, + "loss": 8.3216, + "step": 742600 + }, + { + "epoch": 7.91, + "learning_rate": 5.2047445726296026e-05, + "loss": 8.3693, + "step": 742700 + }, + { + "epoch": 7.91, + "learning_rate": 5.202297571408239e-05, + "loss": 8.357, + "step": 742800 + }, + { + "epoch": 7.91, + "learning_rate": 5.1998509433046894e-05, + "loss": 8.3639, + "step": 742900 + }, + { + "epoch": 7.91, + "learning_rate": 5.1974046885092265e-05, + "loss": 8.2746, + "step": 743000 + }, + { + "epoch": 7.91, + "learning_rate": 5.1949588072121e-05, + "loss": 8.3458, + "step": 743100 + }, + { + "epoch": 7.92, + "learning_rate": 5.1925132996035275e-05, + "loss": 8.3322, + "step": 743200 + }, + { + "epoch": 7.92, + "learning_rate": 5.190068165873692e-05, + "loss": 8.2632, + "step": 743300 + }, + { + "epoch": 7.92, + "learning_rate": 5.187623406212749e-05, + "loss": 8.3729, + "step": 743400 + }, + { + "epoch": 7.92, + "learning_rate": 5.1851790208108355e-05, + "loss": 8.3, + "step": 743500 + }, + { + "epoch": 7.92, + "learning_rate": 5.182735009858049e-05, + "loss": 8.3062, + "step": 743600 + }, + { + "epoch": 7.92, + "learning_rate": 5.180291373544459e-05, + "loss": 8.3556, + "step": 743700 + }, + { + "epoch": 7.92, + "learning_rate": 5.177848112060104e-05, + "loss": 8.3595, + "step": 743800 + }, + { + "epoch": 7.92, + "learning_rate": 5.175405225595006e-05, + "loss": 8.3275, + "step": 743900 + }, + { + "epoch": 7.92, + "learning_rate": 5.172962714339142e-05, + "loss": 8.339, + "step": 744000 + }, + { + "epoch": 7.93, + "learning_rate": 5.1705205784824626e-05, + "loss": 8.4662, + "step": 744100 + }, + { + "epoch": 7.93, + "learning_rate": 5.168078818214903e-05, + "loss": 8.2512, + "step": 744200 + }, + { + "epoch": 7.93, + "learning_rate": 5.165637433726355e-05, + "loss": 8.3429, + "step": 744300 + }, + { + "epoch": 7.93, + "learning_rate": 5.1631964252066825e-05, + "loss": 8.3694, + "step": 744400 + }, + { + "epoch": 7.93, + "learning_rate": 5.1607557928457214e-05, + "loss": 8.295, + "step": 744500 + }, + { + "epoch": 7.93, + "learning_rate": 5.158315536833288e-05, + "loss": 8.3342, + "step": 744600 + }, + { + "epoch": 7.93, + "learning_rate": 5.155875657359156e-05, + "loss": 8.299, + "step": 744700 + }, + { + "epoch": 7.93, + "learning_rate": 5.153436154613076e-05, + "loss": 8.2935, + "step": 744800 + }, + { + "epoch": 7.93, + "learning_rate": 5.150997028784763e-05, + "loss": 8.3183, + "step": 744900 + }, + { + "epoch": 7.94, + "learning_rate": 5.148558280063919e-05, + "loss": 8.3778, + "step": 745000 + }, + { + "epoch": 7.94, + "learning_rate": 5.146119908640199e-05, + "loss": 8.3795, + "step": 745100 + }, + { + "epoch": 7.94, + "learning_rate": 5.143681914703236e-05, + "loss": 8.3828, + "step": 745200 + }, + { + "epoch": 7.94, + "learning_rate": 5.141244298442629e-05, + "loss": 8.2263, + "step": 745300 + }, + { + "epoch": 7.94, + "learning_rate": 5.1388070600479656e-05, + "loss": 8.3062, + "step": 745400 + }, + { + "epoch": 7.94, + "learning_rate": 5.136370199708771e-05, + "loss": 8.4526, + "step": 745500 + }, + { + "epoch": 7.94, + "learning_rate": 5.133933717614574e-05, + "loss": 8.3578, + "step": 745600 + }, + { + "epoch": 7.94, + "learning_rate": 5.131497613954852e-05, + "loss": 8.2948, + "step": 745700 + }, + { + "epoch": 7.94, + "learning_rate": 5.1290618889190715e-05, + "loss": 8.317, + "step": 745800 + }, + { + "epoch": 7.94, + "learning_rate": 5.1266265426966444e-05, + "loss": 8.3716, + "step": 745900 + }, + { + "epoch": 7.95, + "learning_rate": 5.124191575476978e-05, + "loss": 8.3362, + "step": 746000 + }, + { + "epoch": 7.95, + "learning_rate": 5.121756987449439e-05, + "loss": 8.3913, + "step": 746100 + }, + { + "epoch": 7.95, + "learning_rate": 5.119322778803365e-05, + "loss": 8.288, + "step": 746200 + }, + { + "epoch": 7.95, + "learning_rate": 5.116888949728065e-05, + "loss": 8.4173, + "step": 746300 + }, + { + "epoch": 7.95, + "learning_rate": 5.1144555004128115e-05, + "loss": 8.3214, + "step": 746400 + }, + { + "epoch": 7.95, + "learning_rate": 5.1120224310468676e-05, + "loss": 8.2794, + "step": 746500 + }, + { + "epoch": 7.95, + "learning_rate": 5.109589741819438e-05, + "loss": 8.3127, + "step": 746600 + }, + { + "epoch": 7.95, + "learning_rate": 5.1071574329197244e-05, + "loss": 8.3025, + "step": 746700 + }, + { + "epoch": 7.95, + "learning_rate": 5.1047255045368804e-05, + "loss": 8.2908, + "step": 746800 + }, + { + "epoch": 7.96, + "learning_rate": 5.102293956860047e-05, + "loss": 8.3649, + "step": 746900 + }, + { + "epoch": 7.96, + "learning_rate": 5.0998627900783134e-05, + "loss": 8.3099, + "step": 747000 + }, + { + "epoch": 7.96, + "learning_rate": 5.097432004380761e-05, + "loss": 8.3068, + "step": 747100 + }, + { + "epoch": 7.96, + "learning_rate": 5.095001599956426e-05, + "loss": 8.3238, + "step": 747200 + }, + { + "epoch": 7.96, + "learning_rate": 5.0925715769943314e-05, + "loss": 8.3223, + "step": 747300 + }, + { + "epoch": 7.96, + "learning_rate": 5.090141935683447e-05, + "loss": 8.3123, + "step": 747400 + }, + { + "epoch": 7.96, + "learning_rate": 5.087712676212736e-05, + "loss": 8.2286, + "step": 747500 + }, + { + "epoch": 7.96, + "learning_rate": 5.085283798771119e-05, + "loss": 8.423, + "step": 747600 + }, + { + "epoch": 7.96, + "learning_rate": 5.082855303547489e-05, + "loss": 8.276, + "step": 747700 + }, + { + "epoch": 7.97, + "learning_rate": 5.080427190730708e-05, + "loss": 8.4193, + "step": 747800 + }, + { + "epoch": 7.97, + "learning_rate": 5.077999460509617e-05, + "loss": 8.3018, + "step": 747900 + }, + { + "epoch": 7.97, + "learning_rate": 5.0755721130730174e-05, + "loss": 8.2723, + "step": 748000 + }, + { + "epoch": 7.97, + "learning_rate": 5.073145148609681e-05, + "loss": 8.3896, + "step": 748100 + }, + { + "epoch": 7.97, + "learning_rate": 5.070718567308361e-05, + "loss": 8.3279, + "step": 748200 + }, + { + "epoch": 7.97, + "learning_rate": 5.068292369357763e-05, + "loss": 8.2793, + "step": 748300 + }, + { + "epoch": 7.97, + "learning_rate": 5.0658665549465876e-05, + "loss": 8.2694, + "step": 748400 + }, + { + "epoch": 7.97, + "learning_rate": 5.063441124263473e-05, + "loss": 8.3056, + "step": 748500 + }, + { + "epoch": 7.97, + "learning_rate": 5.061016077497056e-05, + "loss": 8.3418, + "step": 748600 + }, + { + "epoch": 7.97, + "learning_rate": 5.0585914148359316e-05, + "loss": 8.3446, + "step": 748700 + }, + { + "epoch": 7.98, + "learning_rate": 5.0561671364686635e-05, + "loss": 8.3417, + "step": 748800 + }, + { + "epoch": 7.98, + "learning_rate": 5.053743242583786e-05, + "loss": 8.2812, + "step": 748900 + }, + { + "epoch": 7.98, + "learning_rate": 5.0513197333698124e-05, + "loss": 8.3631, + "step": 749000 + }, + { + "epoch": 7.98, + "learning_rate": 5.048896609015215e-05, + "loss": 8.3354, + "step": 749100 + }, + { + "epoch": 7.98, + "learning_rate": 5.0464738697084414e-05, + "loss": 8.2998, + "step": 749200 + }, + { + "epoch": 7.98, + "learning_rate": 5.044051515637903e-05, + "loss": 8.3098, + "step": 749300 + }, + { + "epoch": 7.98, + "learning_rate": 5.041629546991996e-05, + "loss": 8.2806, + "step": 749400 + }, + { + "epoch": 7.98, + "learning_rate": 5.039207963959073e-05, + "loss": 8.3202, + "step": 749500 + }, + { + "epoch": 7.98, + "learning_rate": 5.036786766727459e-05, + "loss": 8.3953, + "step": 749600 + }, + { + "epoch": 7.99, + "learning_rate": 5.0343659554854494e-05, + "loss": 8.3111, + "step": 749700 + }, + { + "epoch": 7.99, + "learning_rate": 5.031945530421317e-05, + "loss": 8.3062, + "step": 749800 + }, + { + "epoch": 7.99, + "learning_rate": 5.029525491723295e-05, + "loss": 8.2426, + "step": 749900 + }, + { + "epoch": 7.99, + "learning_rate": 5.027105839579591e-05, + "loss": 8.3471, + "step": 750000 + }, + { + "epoch": 7.99, + "learning_rate": 5.024686574178376e-05, + "loss": 8.2639, + "step": 750100 + }, + { + "epoch": 7.99, + "learning_rate": 5.022267695707805e-05, + "loss": 8.3483, + "step": 750200 + }, + { + "epoch": 7.99, + "learning_rate": 5.019849204355991e-05, + "loss": 8.3196, + "step": 750300 + }, + { + "epoch": 7.99, + "learning_rate": 5.0174311003110164e-05, + "loss": 8.2872, + "step": 750400 + }, + { + "epoch": 7.99, + "learning_rate": 5.0150133837609445e-05, + "loss": 8.3562, + "step": 750500 + }, + { + "epoch": 7.99, + "learning_rate": 5.012596054893799e-05, + "loss": 8.3942, + "step": 750600 + }, + { + "epoch": 8.0, + "learning_rate": 5.010179113897574e-05, + "loss": 8.3766, + "step": 750700 + }, + { + "epoch": 8.0, + "learning_rate": 5.007762560960232e-05, + "loss": 8.3378, + "step": 750800 + }, + { + "epoch": 8.0, + "learning_rate": 5.005346396269717e-05, + "loss": 8.2988, + "step": 750900 + }, + { + "epoch": 8.0, + "learning_rate": 5.002930620013928e-05, + "loss": 8.3779, + "step": 751000 + }, + { + "epoch": 8.0, + "learning_rate": 5.000515232380745e-05, + "loss": 8.4353, + "step": 751100 + }, + { + "epoch": 8.0, + "learning_rate": 4.998100233558004e-05, + "loss": 8.32, + "step": 751200 + }, + { + "epoch": 8.0, + "learning_rate": 4.9956856237335294e-05, + "loss": 8.3417, + "step": 751300 + }, + { + "epoch": 8.0, + "learning_rate": 4.993271403095103e-05, + "loss": 8.3173, + "step": 751400 + }, + { + "epoch": 8.0, + "learning_rate": 4.9908575718304787e-05, + "loss": 8.34, + "step": 751500 + }, + { + "epoch": 8.01, + "learning_rate": 4.988444130127374e-05, + "loss": 8.2829, + "step": 751600 + }, + { + "epoch": 8.01, + "learning_rate": 4.986031078173493e-05, + "loss": 8.3183, + "step": 751700 + }, + { + "epoch": 8.01, + "learning_rate": 4.983618416156495e-05, + "loss": 8.2792, + "step": 751800 + }, + { + "epoch": 8.01, + "learning_rate": 4.981206144264011e-05, + "loss": 8.2598, + "step": 751900 + }, + { + "epoch": 8.01, + "learning_rate": 4.978794262683641e-05, + "loss": 8.3461, + "step": 752000 + }, + { + "epoch": 8.01, + "learning_rate": 4.97638277160297e-05, + "loss": 8.304, + "step": 752100 + }, + { + "epoch": 8.01, + "learning_rate": 4.9739716712095216e-05, + "loss": 8.3119, + "step": 752200 + }, + { + "epoch": 8.01, + "learning_rate": 4.971560961690818e-05, + "loss": 8.3062, + "step": 752300 + }, + { + "epoch": 8.01, + "learning_rate": 4.9691506432343424e-05, + "loss": 8.3447, + "step": 752400 + }, + { + "epoch": 8.02, + "learning_rate": 4.966740716027542e-05, + "loss": 8.2941, + "step": 752500 + }, + { + "epoch": 8.02, + "learning_rate": 4.964331180257837e-05, + "loss": 8.3118, + "step": 752600 + }, + { + "epoch": 8.02, + "learning_rate": 4.961922036112615e-05, + "loss": 8.3121, + "step": 752700 + }, + { + "epoch": 8.02, + "learning_rate": 4.959513283779241e-05, + "loss": 8.3576, + "step": 752800 + }, + { + "epoch": 8.02, + "learning_rate": 4.95710492344504e-05, + "loss": 8.4074, + "step": 752900 + }, + { + "epoch": 8.02, + "learning_rate": 4.9546969552973134e-05, + "loss": 8.348, + "step": 753000 + }, + { + "epoch": 8.02, + "learning_rate": 4.952289379523322e-05, + "loss": 8.2549, + "step": 753100 + }, + { + "epoch": 8.02, + "learning_rate": 4.949882196310315e-05, + "loss": 8.3092, + "step": 753200 + }, + { + "epoch": 8.02, + "learning_rate": 4.9474754058454865e-05, + "loss": 8.3529, + "step": 753300 + }, + { + "epoch": 8.02, + "learning_rate": 4.9450690083160214e-05, + "loss": 8.262, + "step": 753400 + }, + { + "epoch": 8.03, + "learning_rate": 4.942663003909059e-05, + "loss": 8.3085, + "step": 753500 + }, + { + "epoch": 8.03, + "learning_rate": 4.940257392811727e-05, + "loss": 8.2889, + "step": 753600 + }, + { + "epoch": 8.03, + "learning_rate": 4.937852175211093e-05, + "loss": 8.3307, + "step": 753700 + }, + { + "epoch": 8.03, + "learning_rate": 4.935447351294224e-05, + "loss": 8.3139, + "step": 753800 + }, + { + "epoch": 8.03, + "learning_rate": 4.933042921248134e-05, + "loss": 8.2347, + "step": 753900 + }, + { + "epoch": 8.03, + "learning_rate": 4.930638885259829e-05, + "loss": 8.3042, + "step": 754000 + }, + { + "epoch": 8.03, + "learning_rate": 4.928235243516255e-05, + "loss": 8.3034, + "step": 754100 + }, + { + "epoch": 8.03, + "learning_rate": 4.9258319962043556e-05, + "loss": 8.3104, + "step": 754200 + }, + { + "epoch": 8.03, + "learning_rate": 4.923429143511027e-05, + "loss": 8.3593, + "step": 754300 + }, + { + "epoch": 8.04, + "learning_rate": 4.921026685623136e-05, + "loss": 8.304, + "step": 754400 + }, + { + "epoch": 8.04, + "learning_rate": 4.9186246227275277e-05, + "loss": 8.3203, + "step": 754500 + }, + { + "epoch": 8.04, + "learning_rate": 4.9162229550110075e-05, + "loss": 8.3016, + "step": 754600 + }, + { + "epoch": 8.04, + "learning_rate": 4.91382168266036e-05, + "loss": 8.2619, + "step": 754700 + }, + { + "epoch": 8.04, + "learning_rate": 4.9114208058623205e-05, + "loss": 8.4049, + "step": 754800 + }, + { + "epoch": 8.04, + "learning_rate": 4.909020324803615e-05, + "loss": 8.3501, + "step": 754900 + }, + { + "epoch": 8.04, + "learning_rate": 4.9066202396709216e-05, + "loss": 8.2575, + "step": 755000 + }, + { + "epoch": 8.04, + "learning_rate": 4.904220550650908e-05, + "loss": 8.3625, + "step": 755100 + }, + { + "epoch": 8.04, + "learning_rate": 4.9018212579301815e-05, + "loss": 8.3245, + "step": 755200 + }, + { + "epoch": 8.04, + "learning_rate": 4.8994223616953485e-05, + "loss": 8.2683, + "step": 755300 + }, + { + "epoch": 8.05, + "learning_rate": 4.897023862132965e-05, + "loss": 8.315, + "step": 755400 + }, + { + "epoch": 8.05, + "learning_rate": 4.8946257594295655e-05, + "loss": 8.3576, + "step": 755500 + }, + { + "epoch": 8.05, + "learning_rate": 4.892228053771646e-05, + "loss": 8.2853, + "step": 755600 + }, + { + "epoch": 8.05, + "learning_rate": 4.8898307453456816e-05, + "loss": 8.2814, + "step": 755700 + }, + { + "epoch": 8.05, + "learning_rate": 4.887433834338111e-05, + "loss": 8.35, + "step": 755800 + }, + { + "epoch": 8.05, + "learning_rate": 4.88503732093534e-05, + "loss": 8.3237, + "step": 755900 + }, + { + "epoch": 8.05, + "learning_rate": 4.882641205323741e-05, + "loss": 8.2915, + "step": 756000 + }, + { + "epoch": 8.05, + "learning_rate": 4.8802454876896706e-05, + "loss": 8.3302, + "step": 756100 + }, + { + "epoch": 8.05, + "learning_rate": 4.8778501682194376e-05, + "loss": 8.3304, + "step": 756200 + }, + { + "epoch": 8.06, + "learning_rate": 4.8754552470993285e-05, + "loss": 8.3192, + "step": 756300 + }, + { + "epoch": 8.06, + "learning_rate": 4.8730607245155916e-05, + "loss": 8.3388, + "step": 756400 + }, + { + "epoch": 8.06, + "learning_rate": 4.8706666006544555e-05, + "loss": 8.3474, + "step": 756500 + }, + { + "epoch": 8.06, + "learning_rate": 4.86827287570211e-05, + "loss": 8.2596, + "step": 756600 + }, + { + "epoch": 8.06, + "learning_rate": 4.8658795498447104e-05, + "loss": 8.2713, + "step": 756700 + }, + { + "epoch": 8.06, + "learning_rate": 4.863486623268394e-05, + "loss": 8.2894, + "step": 756800 + }, + { + "epoch": 8.06, + "learning_rate": 4.861094096159254e-05, + "loss": 8.2709, + "step": 756900 + }, + { + "epoch": 8.06, + "learning_rate": 4.8587019687033586e-05, + "loss": 8.3101, + "step": 757000 + }, + { + "epoch": 8.06, + "learning_rate": 4.85631024108674e-05, + "loss": 8.3098, + "step": 757100 + }, + { + "epoch": 8.07, + "learning_rate": 4.85391891349541e-05, + "loss": 8.2811, + "step": 757200 + }, + { + "epoch": 8.07, + "learning_rate": 4.851527986115338e-05, + "loss": 8.3197, + "step": 757300 + }, + { + "epoch": 8.07, + "learning_rate": 4.849137459132469e-05, + "loss": 8.2662, + "step": 757400 + }, + { + "epoch": 8.07, + "learning_rate": 4.8467473327327096e-05, + "loss": 8.3738, + "step": 757500 + }, + { + "epoch": 8.07, + "learning_rate": 4.844357607101947e-05, + "loss": 8.2913, + "step": 757600 + }, + { + "epoch": 8.07, + "learning_rate": 4.841968282426026e-05, + "loss": 8.253, + "step": 757700 + }, + { + "epoch": 8.07, + "learning_rate": 4.839579358890767e-05, + "loss": 8.4133, + "step": 757800 + }, + { + "epoch": 8.07, + "learning_rate": 4.8371908366819516e-05, + "loss": 8.2905, + "step": 757900 + }, + { + "epoch": 8.07, + "learning_rate": 4.8348027159853426e-05, + "loss": 8.3313, + "step": 758000 + }, + { + "epoch": 8.07, + "learning_rate": 4.832414996986662e-05, + "loss": 8.257, + "step": 758100 + }, + { + "epoch": 8.08, + "learning_rate": 4.8300276798716016e-05, + "loss": 8.2825, + "step": 758200 + }, + { + "epoch": 8.08, + "learning_rate": 4.8276407648258204e-05, + "loss": 8.2725, + "step": 758300 + }, + { + "epoch": 8.08, + "learning_rate": 4.8252542520349566e-05, + "loss": 8.3773, + "step": 758400 + }, + { + "epoch": 8.08, + "learning_rate": 4.8228681416846044e-05, + "loss": 8.41, + "step": 758500 + }, + { + "epoch": 8.08, + "learning_rate": 4.820482433960329e-05, + "loss": 8.3796, + "step": 758600 + }, + { + "epoch": 8.08, + "learning_rate": 4.818097129047676e-05, + "loss": 8.3855, + "step": 758700 + }, + { + "epoch": 8.08, + "learning_rate": 4.815712227132145e-05, + "loss": 8.3408, + "step": 758800 + }, + { + "epoch": 8.08, + "learning_rate": 4.8133277283992115e-05, + "loss": 8.404, + "step": 758900 + }, + { + "epoch": 8.08, + "learning_rate": 4.810943633034313e-05, + "loss": 8.2789, + "step": 759000 + }, + { + "epoch": 8.09, + "learning_rate": 4.80855994122287e-05, + "loss": 8.3205, + "step": 759100 + }, + { + "epoch": 8.09, + "learning_rate": 4.806176653150257e-05, + "loss": 8.3379, + "step": 759200 + }, + { + "epoch": 8.09, + "learning_rate": 4.8037937690018244e-05, + "loss": 8.3224, + "step": 759300 + }, + { + "epoch": 8.09, + "learning_rate": 4.801411288962885e-05, + "loss": 8.2463, + "step": 759400 + }, + { + "epoch": 8.09, + "learning_rate": 4.799029213218731e-05, + "loss": 8.3215, + "step": 759500 + }, + { + "epoch": 8.09, + "learning_rate": 4.796647541954613e-05, + "loss": 8.3583, + "step": 759600 + }, + { + "epoch": 8.09, + "learning_rate": 4.7942662753557556e-05, + "loss": 8.3012, + "step": 759700 + }, + { + "epoch": 8.09, + "learning_rate": 4.7918854136073445e-05, + "loss": 8.2844, + "step": 759800 + }, + { + "epoch": 8.09, + "learning_rate": 4.789504956894547e-05, + "loss": 8.3222, + "step": 759900 + }, + { + "epoch": 8.1, + "learning_rate": 4.7871249054024904e-05, + "loss": 8.2941, + "step": 760000 + }, + { + "epoch": 8.1, + "learning_rate": 4.784745259316268e-05, + "loss": 8.3561, + "step": 760100 + }, + { + "epoch": 8.1, + "learning_rate": 4.782366018820944e-05, + "loss": 8.2528, + "step": 760200 + }, + { + "epoch": 8.1, + "learning_rate": 4.779987184101562e-05, + "loss": 8.3142, + "step": 760300 + }, + { + "epoch": 8.1, + "learning_rate": 4.77760875534311e-05, + "loss": 8.2523, + "step": 760400 + }, + { + "epoch": 8.1, + "learning_rate": 4.77523073273057e-05, + "loss": 8.3081, + "step": 760500 + }, + { + "epoch": 8.1, + "learning_rate": 4.772853116448872e-05, + "loss": 8.294, + "step": 760600 + }, + { + "epoch": 8.1, + "learning_rate": 4.770475906682933e-05, + "loss": 8.2772, + "step": 760700 + }, + { + "epoch": 8.1, + "learning_rate": 4.768099103617624e-05, + "loss": 8.3889, + "step": 760800 + }, + { + "epoch": 8.1, + "learning_rate": 4.765722707437785e-05, + "loss": 8.2871, + "step": 760900 + }, + { + "epoch": 8.11, + "learning_rate": 4.763346718328238e-05, + "loss": 8.2907, + "step": 761000 + }, + { + "epoch": 8.11, + "learning_rate": 4.760971136473759e-05, + "loss": 8.3036, + "step": 761100 + }, + { + "epoch": 8.11, + "learning_rate": 4.7585959620590956e-05, + "loss": 8.2764, + "step": 761200 + }, + { + "epoch": 8.11, + "learning_rate": 4.756221195268965e-05, + "loss": 8.3969, + "step": 761300 + }, + { + "epoch": 8.11, + "learning_rate": 4.7538468362880615e-05, + "loss": 8.4331, + "step": 761400 + }, + { + "epoch": 8.11, + "learning_rate": 4.7514728853010246e-05, + "loss": 8.2956, + "step": 761500 + }, + { + "epoch": 8.11, + "learning_rate": 4.7490993424924904e-05, + "loss": 8.2677, + "step": 761600 + }, + { + "epoch": 8.11, + "learning_rate": 4.746726208047039e-05, + "loss": 8.2695, + "step": 761700 + }, + { + "epoch": 8.11, + "learning_rate": 4.744353482149242e-05, + "loss": 8.4119, + "step": 761800 + }, + { + "epoch": 8.12, + "learning_rate": 4.741981164983611e-05, + "loss": 8.2505, + "step": 761900 + }, + { + "epoch": 8.12, + "learning_rate": 4.739609256734654e-05, + "loss": 8.2743, + "step": 762000 + }, + { + "epoch": 8.12, + "learning_rate": 4.737237757586829e-05, + "loss": 8.3063, + "step": 762100 + }, + { + "epoch": 8.12, + "learning_rate": 4.734866667724569e-05, + "loss": 8.283, + "step": 762200 + }, + { + "epoch": 8.12, + "learning_rate": 4.7324959873322695e-05, + "loss": 8.3095, + "step": 762300 + }, + { + "epoch": 8.12, + "learning_rate": 4.730125716594306e-05, + "loss": 8.264, + "step": 762400 + }, + { + "epoch": 8.12, + "learning_rate": 4.727755855695011e-05, + "loss": 8.3222, + "step": 762500 + }, + { + "epoch": 8.12, + "learning_rate": 4.725386404818689e-05, + "loss": 8.3306, + "step": 762600 + }, + { + "epoch": 8.12, + "learning_rate": 4.7230173641496086e-05, + "loss": 8.2638, + "step": 762700 + }, + { + "epoch": 8.12, + "learning_rate": 4.720648733872015e-05, + "loss": 8.3266, + "step": 762800 + }, + { + "epoch": 8.13, + "learning_rate": 4.718280514170124e-05, + "loss": 8.3002, + "step": 762900 + }, + { + "epoch": 8.13, + "learning_rate": 4.715912705228095e-05, + "loss": 8.3038, + "step": 763000 + }, + { + "epoch": 8.13, + "learning_rate": 4.713545307230087e-05, + "loss": 8.303, + "step": 763100 + }, + { + "epoch": 8.13, + "learning_rate": 4.711178320360208e-05, + "loss": 8.275, + "step": 763200 + }, + { + "epoch": 8.13, + "learning_rate": 4.708811744802537e-05, + "loss": 8.2203, + "step": 763300 + }, + { + "epoch": 8.13, + "learning_rate": 4.7064455807411226e-05, + "loss": 8.3568, + "step": 763400 + }, + { + "epoch": 8.13, + "learning_rate": 4.7040798283599865e-05, + "loss": 8.3515, + "step": 763500 + }, + { + "epoch": 8.13, + "learning_rate": 4.7017144878431105e-05, + "loss": 8.3423, + "step": 763600 + }, + { + "epoch": 8.13, + "learning_rate": 4.699349559374447e-05, + "loss": 8.3598, + "step": 763700 + }, + { + "epoch": 8.14, + "learning_rate": 4.696985043137914e-05, + "loss": 8.3191, + "step": 763800 + }, + { + "epoch": 8.14, + "learning_rate": 4.6946209393174065e-05, + "loss": 8.3133, + "step": 763900 + }, + { + "epoch": 8.14, + "learning_rate": 4.6922572480967785e-05, + "loss": 8.3811, + "step": 764000 + }, + { + "epoch": 8.14, + "learning_rate": 4.689893969659853e-05, + "loss": 8.4151, + "step": 764100 + }, + { + "epoch": 8.14, + "learning_rate": 4.68753110419042e-05, + "loss": 8.3313, + "step": 764200 + }, + { + "epoch": 8.14, + "learning_rate": 4.685168651872247e-05, + "loss": 8.2794, + "step": 764300 + }, + { + "epoch": 8.14, + "learning_rate": 4.682806612889059e-05, + "loss": 8.3164, + "step": 764400 + }, + { + "epoch": 8.14, + "learning_rate": 4.68044498742455e-05, + "loss": 8.3319, + "step": 764500 + }, + { + "epoch": 8.14, + "learning_rate": 4.678083775662382e-05, + "loss": 8.3406, + "step": 764600 + }, + { + "epoch": 8.15, + "learning_rate": 4.675722977786193e-05, + "loss": 8.3444, + "step": 764700 + }, + { + "epoch": 8.15, + "learning_rate": 4.67336259397958e-05, + "loss": 8.3395, + "step": 764800 + }, + { + "epoch": 8.15, + "learning_rate": 4.671002624426106e-05, + "loss": 8.3433, + "step": 764900 + }, + { + "epoch": 8.15, + "learning_rate": 4.668643069309313e-05, + "loss": 8.2873, + "step": 765000 + }, + { + "epoch": 8.15, + "learning_rate": 4.6662839288127e-05, + "loss": 8.2733, + "step": 765100 + }, + { + "epoch": 8.15, + "learning_rate": 4.663925203119739e-05, + "loss": 8.3673, + "step": 765200 + }, + { + "epoch": 8.15, + "learning_rate": 4.6615668924138635e-05, + "loss": 8.2492, + "step": 765300 + }, + { + "epoch": 8.15, + "learning_rate": 4.659208996878487e-05, + "loss": 8.3209, + "step": 765400 + }, + { + "epoch": 8.15, + "learning_rate": 4.656851516696979e-05, + "loss": 8.3032, + "step": 765500 + }, + { + "epoch": 8.15, + "learning_rate": 4.6544944520526834e-05, + "loss": 8.2799, + "step": 765600 + }, + { + "epoch": 8.16, + "learning_rate": 4.652137803128903e-05, + "loss": 8.2492, + "step": 765700 + }, + { + "epoch": 8.16, + "learning_rate": 4.649781570108924e-05, + "loss": 8.3176, + "step": 765800 + }, + { + "epoch": 8.16, + "learning_rate": 4.6474257531759856e-05, + "loss": 8.2642, + "step": 765900 + }, + { + "epoch": 8.16, + "learning_rate": 4.645070352513301e-05, + "loss": 8.312, + "step": 766000 + }, + { + "epoch": 8.16, + "learning_rate": 4.6427153683040457e-05, + "loss": 8.2727, + "step": 766100 + }, + { + "epoch": 8.16, + "learning_rate": 4.640360800731375e-05, + "loss": 8.2571, + "step": 766200 + }, + { + "epoch": 8.16, + "learning_rate": 4.638006649978398e-05, + "loss": 8.2905, + "step": 766300 + }, + { + "epoch": 8.16, + "learning_rate": 4.6356529162282e-05, + "loss": 8.2944, + "step": 766400 + }, + { + "epoch": 8.16, + "learning_rate": 4.6332995996638276e-05, + "loss": 8.3656, + "step": 766500 + }, + { + "epoch": 8.17, + "learning_rate": 4.630946700468304e-05, + "loss": 8.2606, + "step": 766600 + }, + { + "epoch": 8.17, + "learning_rate": 4.628594218824611e-05, + "loss": 8.3642, + "step": 766700 + }, + { + "epoch": 8.17, + "learning_rate": 4.626242154915702e-05, + "loss": 8.3176, + "step": 766800 + }, + { + "epoch": 8.17, + "learning_rate": 4.623890508924493e-05, + "loss": 8.2843, + "step": 766900 + }, + { + "epoch": 8.17, + "learning_rate": 4.621539281033881e-05, + "loss": 8.3291, + "step": 767000 + }, + { + "epoch": 8.17, + "learning_rate": 4.619188471426714e-05, + "loss": 8.3288, + "step": 767100 + }, + { + "epoch": 8.17, + "learning_rate": 4.616838080285815e-05, + "loss": 8.3612, + "step": 767200 + }, + { + "epoch": 8.17, + "learning_rate": 4.614488107793977e-05, + "loss": 8.326, + "step": 767300 + }, + { + "epoch": 8.17, + "learning_rate": 4.612138554133959e-05, + "loss": 8.2712, + "step": 767400 + }, + { + "epoch": 8.17, + "learning_rate": 4.6097894194884813e-05, + "loss": 8.3116, + "step": 767500 + }, + { + "epoch": 8.18, + "learning_rate": 4.607440704040237e-05, + "loss": 8.355, + "step": 767600 + }, + { + "epoch": 8.18, + "learning_rate": 4.605092407971891e-05, + "loss": 8.291, + "step": 767700 + }, + { + "epoch": 8.18, + "learning_rate": 4.602744531466065e-05, + "loss": 8.3399, + "step": 767800 + }, + { + "epoch": 8.18, + "learning_rate": 4.600397074705357e-05, + "loss": 8.3232, + "step": 767900 + }, + { + "epoch": 8.18, + "learning_rate": 4.5980500378723235e-05, + "loss": 8.3808, + "step": 768000 + }, + { + "epoch": 8.18, + "learning_rate": 4.595703421149507e-05, + "loss": 8.2938, + "step": 768100 + }, + { + "epoch": 8.18, + "learning_rate": 4.5933572247193854e-05, + "loss": 8.3684, + "step": 768200 + }, + { + "epoch": 8.18, + "learning_rate": 4.591011448764436e-05, + "loss": 8.3281, + "step": 768300 + }, + { + "epoch": 8.18, + "learning_rate": 4.5886660934670845e-05, + "loss": 8.3619, + "step": 768400 + }, + { + "epoch": 8.19, + "learning_rate": 4.586321159009736e-05, + "loss": 8.2872, + "step": 768500 + }, + { + "epoch": 8.19, + "learning_rate": 4.583976645574747e-05, + "loss": 8.3783, + "step": 768600 + }, + { + "epoch": 8.19, + "learning_rate": 4.581632553344457e-05, + "loss": 8.3623, + "step": 768700 + }, + { + "epoch": 8.19, + "learning_rate": 4.57928888250116e-05, + "loss": 8.3158, + "step": 768800 + }, + { + "epoch": 8.19, + "learning_rate": 4.5769456332271377e-05, + "loss": 8.4373, + "step": 768900 + }, + { + "epoch": 8.19, + "learning_rate": 4.574602805704605e-05, + "loss": 8.4186, + "step": 769000 + }, + { + "epoch": 8.19, + "learning_rate": 4.572260400115775e-05, + "loss": 8.322, + "step": 769100 + }, + { + "epoch": 8.19, + "learning_rate": 4.569918416642823e-05, + "loss": 8.3189, + "step": 769200 + }, + { + "epoch": 8.19, + "learning_rate": 4.567576855467871e-05, + "loss": 8.3423, + "step": 769300 + }, + { + "epoch": 8.2, + "learning_rate": 4.565235716773033e-05, + "loss": 8.306, + "step": 769400 + }, + { + "epoch": 8.2, + "learning_rate": 4.562895000740373e-05, + "loss": 8.318, + "step": 769500 + }, + { + "epoch": 8.2, + "learning_rate": 4.560554707551938e-05, + "loss": 8.2685, + "step": 769600 + }, + { + "epoch": 8.2, + "learning_rate": 4.558214837389719e-05, + "loss": 8.3846, + "step": 769700 + }, + { + "epoch": 8.2, + "learning_rate": 4.5558753904357e-05, + "loss": 8.2887, + "step": 769800 + }, + { + "epoch": 8.2, + "learning_rate": 4.553536366871812e-05, + "loss": 8.3132, + "step": 769900 + }, + { + "epoch": 8.2, + "learning_rate": 4.551197766879972e-05, + "loss": 8.2872, + "step": 770000 + }, + { + "epoch": 8.2, + "learning_rate": 4.548859590642038e-05, + "loss": 8.427, + "step": 770100 + }, + { + "epoch": 8.2, + "learning_rate": 4.5465218383398624e-05, + "loss": 8.2987, + "step": 770200 + }, + { + "epoch": 8.2, + "learning_rate": 4.544184510155247e-05, + "loss": 8.2758, + "step": 770300 + }, + { + "epoch": 8.21, + "learning_rate": 4.5418476062699676e-05, + "loss": 8.3352, + "step": 770400 + }, + { + "epoch": 8.21, + "learning_rate": 4.539511126865762e-05, + "loss": 8.2598, + "step": 770500 + }, + { + "epoch": 8.21, + "learning_rate": 4.537175072124345e-05, + "loss": 8.3071, + "step": 770600 + }, + { + "epoch": 8.21, + "learning_rate": 4.5348394422273874e-05, + "loss": 8.3077, + "step": 770700 + }, + { + "epoch": 8.21, + "learning_rate": 4.532504237356533e-05, + "loss": 8.2968, + "step": 770800 + }, + { + "epoch": 8.21, + "learning_rate": 4.530169457693386e-05, + "loss": 8.3358, + "step": 770900 + }, + { + "epoch": 8.21, + "learning_rate": 4.527835103419531e-05, + "loss": 8.2635, + "step": 771000 + }, + { + "epoch": 8.21, + "learning_rate": 4.525501174716507e-05, + "loss": 8.2893, + "step": 771100 + }, + { + "epoch": 8.21, + "learning_rate": 4.523167671765819e-05, + "loss": 8.386, + "step": 771200 + }, + { + "epoch": 8.22, + "learning_rate": 4.520834594748952e-05, + "loss": 8.2862, + "step": 771300 + }, + { + "epoch": 8.22, + "learning_rate": 4.518501943847345e-05, + "loss": 8.3303, + "step": 771400 + }, + { + "epoch": 8.22, + "learning_rate": 4.516169719242411e-05, + "loss": 8.3268, + "step": 771500 + }, + { + "epoch": 8.22, + "learning_rate": 4.5138379211155214e-05, + "loss": 8.3935, + "step": 771600 + }, + { + "epoch": 8.22, + "learning_rate": 4.511506549648028e-05, + "loss": 8.2634, + "step": 771700 + }, + { + "epoch": 8.22, + "learning_rate": 4.5091756050212385e-05, + "loss": 8.3845, + "step": 771800 + }, + { + "epoch": 8.22, + "learning_rate": 4.5068450874164314e-05, + "loss": 8.4281, + "step": 771900 + }, + { + "epoch": 8.22, + "learning_rate": 4.504514997014847e-05, + "loss": 8.3118, + "step": 772000 + }, + { + "epoch": 8.22, + "learning_rate": 4.502185333997704e-05, + "loss": 8.3056, + "step": 772100 + }, + { + "epoch": 8.22, + "learning_rate": 4.499856098546176e-05, + "loss": 8.2506, + "step": 772200 + }, + { + "epoch": 8.23, + "learning_rate": 4.497527290841409e-05, + "loss": 8.2776, + "step": 772300 + }, + { + "epoch": 8.23, + "learning_rate": 4.495198911064511e-05, + "loss": 8.2813, + "step": 772400 + }, + { + "epoch": 8.23, + "learning_rate": 4.492870959396568e-05, + "loss": 8.3068, + "step": 772500 + }, + { + "epoch": 8.23, + "learning_rate": 4.49054343601862e-05, + "loss": 8.2958, + "step": 772600 + }, + { + "epoch": 8.23, + "learning_rate": 4.4882163411116786e-05, + "loss": 8.3338, + "step": 772700 + }, + { + "epoch": 8.23, + "learning_rate": 4.4858896748567204e-05, + "loss": 8.3271, + "step": 772800 + }, + { + "epoch": 8.23, + "learning_rate": 4.483563437434696e-05, + "loss": 8.3649, + "step": 772900 + }, + { + "epoch": 8.23, + "learning_rate": 4.4812376290265146e-05, + "loss": 8.3616, + "step": 773000 + }, + { + "epoch": 8.23, + "learning_rate": 4.478912249813055e-05, + "loss": 8.3114, + "step": 773100 + }, + { + "epoch": 8.24, + "learning_rate": 4.4765872999751566e-05, + "loss": 8.3054, + "step": 773200 + }, + { + "epoch": 8.24, + "learning_rate": 4.474262779693641e-05, + "loss": 8.2718, + "step": 773300 + }, + { + "epoch": 8.24, + "learning_rate": 4.47193868914928e-05, + "loss": 8.3286, + "step": 773400 + }, + { + "epoch": 8.24, + "learning_rate": 4.469615028522816e-05, + "loss": 8.37, + "step": 773500 + }, + { + "epoch": 8.24, + "learning_rate": 4.4672917979949666e-05, + "loss": 8.3298, + "step": 773600 + }, + { + "epoch": 8.24, + "learning_rate": 4.4649689977464085e-05, + "loss": 8.2755, + "step": 773700 + }, + { + "epoch": 8.24, + "learning_rate": 4.462646627957784e-05, + "loss": 8.3544, + "step": 773800 + }, + { + "epoch": 8.24, + "learning_rate": 4.460324688809701e-05, + "loss": 8.327, + "step": 773900 + }, + { + "epoch": 8.24, + "learning_rate": 4.458003180482744e-05, + "loss": 8.2807, + "step": 774000 + }, + { + "epoch": 8.25, + "learning_rate": 4.455682103157453e-05, + "loss": 8.2035, + "step": 774100 + }, + { + "epoch": 8.25, + "learning_rate": 4.45336145701434e-05, + "loss": 8.3293, + "step": 774200 + }, + { + "epoch": 8.25, + "learning_rate": 4.4510412422338765e-05, + "loss": 8.3373, + "step": 774300 + }, + { + "epoch": 8.25, + "learning_rate": 4.4487214589965144e-05, + "loss": 8.264, + "step": 774400 + }, + { + "epoch": 8.25, + "learning_rate": 4.446402107482659e-05, + "loss": 8.3063, + "step": 774500 + }, + { + "epoch": 8.25, + "learning_rate": 4.444083187872686e-05, + "loss": 8.2955, + "step": 774600 + }, + { + "epoch": 8.25, + "learning_rate": 4.4417647003469354e-05, + "loss": 8.3857, + "step": 774700 + }, + { + "epoch": 8.25, + "learning_rate": 4.4394466450857244e-05, + "loss": 8.3483, + "step": 774800 + }, + { + "epoch": 8.25, + "learning_rate": 4.437129022269322e-05, + "loss": 8.3765, + "step": 774900 + }, + { + "epoch": 8.25, + "learning_rate": 4.434811832077972e-05, + "loss": 8.3546, + "step": 775000 + }, + { + "epoch": 8.26, + "learning_rate": 4.432495074691878e-05, + "loss": 8.2744, + "step": 775100 + }, + { + "epoch": 8.26, + "learning_rate": 4.4301787502912275e-05, + "loss": 8.2888, + "step": 775200 + }, + { + "epoch": 8.26, + "learning_rate": 4.427862859056143e-05, + "loss": 8.3535, + "step": 775300 + }, + { + "epoch": 8.26, + "learning_rate": 4.425547401166742e-05, + "loss": 8.2852, + "step": 775400 + }, + { + "epoch": 8.26, + "learning_rate": 4.4232323768031e-05, + "loss": 8.3156, + "step": 775500 + }, + { + "epoch": 8.26, + "learning_rate": 4.420917786145253e-05, + "loss": 8.3119, + "step": 775600 + }, + { + "epoch": 8.26, + "learning_rate": 4.418603629373209e-05, + "loss": 8.3629, + "step": 775700 + }, + { + "epoch": 8.26, + "learning_rate": 4.416289906666934e-05, + "loss": 8.3371, + "step": 775800 + }, + { + "epoch": 8.26, + "learning_rate": 4.413976618206379e-05, + "loss": 8.3301, + "step": 775900 + }, + { + "epoch": 8.27, + "learning_rate": 4.4116637641714334e-05, + "loss": 8.3373, + "step": 776000 + }, + { + "epoch": 8.27, + "learning_rate": 4.409351344741977e-05, + "loss": 8.3693, + "step": 776100 + }, + { + "epoch": 8.27, + "learning_rate": 4.4070393600978446e-05, + "loss": 8.3392, + "step": 776200 + }, + { + "epoch": 8.27, + "learning_rate": 4.404727810418846e-05, + "loss": 8.3038, + "step": 776300 + }, + { + "epoch": 8.27, + "learning_rate": 4.402416695884739e-05, + "loss": 8.2995, + "step": 776400 + }, + { + "epoch": 8.27, + "learning_rate": 4.400106016675267e-05, + "loss": 8.3308, + "step": 776500 + }, + { + "epoch": 8.27, + "learning_rate": 4.397795772970128e-05, + "loss": 8.3025, + "step": 776600 + }, + { + "epoch": 8.27, + "learning_rate": 4.395485964948998e-05, + "loss": 8.4344, + "step": 776700 + }, + { + "epoch": 8.27, + "learning_rate": 4.393176592791498e-05, + "loss": 8.2222, + "step": 776800 + }, + { + "epoch": 8.28, + "learning_rate": 4.390867656677238e-05, + "loss": 8.2826, + "step": 776900 + }, + { + "epoch": 8.28, + "learning_rate": 4.388559156785782e-05, + "loss": 8.332, + "step": 777000 + }, + { + "epoch": 8.28, + "learning_rate": 4.3862510932966604e-05, + "loss": 8.313, + "step": 777100 + }, + { + "epoch": 8.28, + "learning_rate": 4.3839434663893696e-05, + "loss": 8.3412, + "step": 777200 + }, + { + "epoch": 8.28, + "learning_rate": 4.38163627624338e-05, + "loss": 8.2653, + "step": 777300 + }, + { + "epoch": 8.28, + "learning_rate": 4.379329523038119e-05, + "loss": 8.2581, + "step": 777400 + }, + { + "epoch": 8.28, + "learning_rate": 4.37702320695298e-05, + "loss": 8.3178, + "step": 777500 + }, + { + "epoch": 8.28, + "learning_rate": 4.374717328167331e-05, + "loss": 8.3804, + "step": 777600 + }, + { + "epoch": 8.28, + "learning_rate": 4.372411886860496e-05, + "loss": 8.3296, + "step": 777700 + }, + { + "epoch": 8.28, + "learning_rate": 4.3701068832117774e-05, + "loss": 8.2697, + "step": 777800 + }, + { + "epoch": 8.29, + "learning_rate": 4.3678023174004224e-05, + "loss": 8.3217, + "step": 777900 + }, + { + "epoch": 8.29, + "learning_rate": 4.3654981896056676e-05, + "loss": 8.3001, + "step": 778000 + }, + { + "epoch": 8.29, + "learning_rate": 4.363194500006702e-05, + "loss": 8.3018, + "step": 778100 + }, + { + "epoch": 8.29, + "learning_rate": 4.3608912487826835e-05, + "loss": 8.2355, + "step": 778200 + }, + { + "epoch": 8.29, + "learning_rate": 4.3585884361127335e-05, + "loss": 8.2306, + "step": 778300 + }, + { + "epoch": 8.29, + "learning_rate": 4.356286062175948e-05, + "loss": 8.2769, + "step": 778400 + }, + { + "epoch": 8.29, + "learning_rate": 4.35398412715138e-05, + "loss": 8.2931, + "step": 778500 + }, + { + "epoch": 8.29, + "learning_rate": 4.351682631218051e-05, + "loss": 8.2752, + "step": 778600 + }, + { + "epoch": 8.29, + "learning_rate": 4.349381574554946e-05, + "loss": 8.2269, + "step": 778700 + }, + { + "epoch": 8.3, + "learning_rate": 4.3470809573410245e-05, + "loss": 8.3145, + "step": 778800 + }, + { + "epoch": 8.3, + "learning_rate": 4.344780779755201e-05, + "loss": 8.2831, + "step": 778900 + }, + { + "epoch": 8.3, + "learning_rate": 4.342481041976363e-05, + "loss": 8.3831, + "step": 779000 + }, + { + "epoch": 8.3, + "learning_rate": 4.3401817441833556e-05, + "loss": 8.2982, + "step": 779100 + }, + { + "epoch": 8.3, + "learning_rate": 4.337882886555004e-05, + "loss": 8.2953, + "step": 779200 + }, + { + "epoch": 8.3, + "learning_rate": 4.3355844692700864e-05, + "loss": 8.2719, + "step": 779300 + }, + { + "epoch": 8.3, + "learning_rate": 4.3332864925073516e-05, + "loss": 8.2937, + "step": 779400 + }, + { + "epoch": 8.3, + "learning_rate": 4.3309889564455085e-05, + "loss": 8.3075, + "step": 779500 + }, + { + "epoch": 8.3, + "learning_rate": 4.328691861263245e-05, + "loss": 8.2254, + "step": 779600 + }, + { + "epoch": 8.3, + "learning_rate": 4.3263952071392036e-05, + "loss": 8.329, + "step": 779700 + }, + { + "epoch": 8.31, + "learning_rate": 4.3240989942519905e-05, + "loss": 8.2473, + "step": 779800 + }, + { + "epoch": 8.31, + "learning_rate": 4.3218032227801894e-05, + "loss": 8.3352, + "step": 779900 + }, + { + "epoch": 8.31, + "learning_rate": 4.31950789290234e-05, + "loss": 8.3366, + "step": 780000 + }, + { + "epoch": 8.31, + "learning_rate": 4.3172130047969504e-05, + "loss": 8.3426, + "step": 780100 + }, + { + "epoch": 8.31, + "learning_rate": 4.3149185586424886e-05, + "loss": 8.3264, + "step": 780200 + }, + { + "epoch": 8.31, + "learning_rate": 4.312624554617405e-05, + "loss": 8.3535, + "step": 780300 + }, + { + "epoch": 8.31, + "learning_rate": 4.3103309929000965e-05, + "loss": 8.3785, + "step": 780400 + }, + { + "epoch": 8.31, + "learning_rate": 4.308037873668938e-05, + "loss": 8.2685, + "step": 780500 + }, + { + "epoch": 8.31, + "learning_rate": 4.305745197102258e-05, + "loss": 8.3165, + "step": 780600 + }, + { + "epoch": 8.32, + "learning_rate": 4.3034529633783684e-05, + "loss": 8.2574, + "step": 780700 + }, + { + "epoch": 8.32, + "learning_rate": 4.301161172675531e-05, + "loss": 8.2277, + "step": 780800 + }, + { + "epoch": 8.32, + "learning_rate": 4.298869825171979e-05, + "loss": 8.3556, + "step": 780900 + }, + { + "epoch": 8.32, + "learning_rate": 4.2965789210459076e-05, + "loss": 8.338, + "step": 781000 + }, + { + "epoch": 8.32, + "learning_rate": 4.294288460475489e-05, + "loss": 8.2988, + "step": 781100 + }, + { + "epoch": 8.32, + "learning_rate": 4.291998443638846e-05, + "loss": 8.2629, + "step": 781200 + }, + { + "epoch": 8.32, + "learning_rate": 4.2897088707140753e-05, + "loss": 8.2395, + "step": 781300 + }, + { + "epoch": 8.32, + "learning_rate": 4.2874197418792336e-05, + "loss": 8.3138, + "step": 781400 + }, + { + "epoch": 8.32, + "learning_rate": 4.285131057312354e-05, + "loss": 8.3277, + "step": 781500 + }, + { + "epoch": 8.33, + "learning_rate": 4.282842817191423e-05, + "loss": 8.4252, + "step": 781600 + }, + { + "epoch": 8.33, + "learning_rate": 4.280555021694395e-05, + "loss": 8.3596, + "step": 781700 + }, + { + "epoch": 8.33, + "learning_rate": 4.278267670999199e-05, + "loss": 8.2834, + "step": 781800 + }, + { + "epoch": 8.33, + "learning_rate": 4.275980765283718e-05, + "loss": 8.2597, + "step": 781900 + }, + { + "epoch": 8.33, + "learning_rate": 4.273694304725806e-05, + "loss": 8.2692, + "step": 782000 + }, + { + "epoch": 8.33, + "learning_rate": 4.271408289503277e-05, + "loss": 8.4273, + "step": 782100 + }, + { + "epoch": 8.33, + "learning_rate": 4.269122719793922e-05, + "loss": 8.2744, + "step": 782200 + }, + { + "epoch": 8.33, + "learning_rate": 4.266837595775487e-05, + "loss": 8.2671, + "step": 782300 + }, + { + "epoch": 8.33, + "learning_rate": 4.264552917625686e-05, + "loss": 8.3111, + "step": 782400 + }, + { + "epoch": 8.33, + "learning_rate": 4.2622686855221936e-05, + "loss": 8.359, + "step": 782500 + }, + { + "epoch": 8.34, + "learning_rate": 4.259984899642665e-05, + "loss": 8.2323, + "step": 782600 + }, + { + "epoch": 8.34, + "learning_rate": 4.257701560164704e-05, + "loss": 8.2789, + "step": 782700 + }, + { + "epoch": 8.34, + "learning_rate": 4.255418667265887e-05, + "loss": 8.2474, + "step": 782800 + }, + { + "epoch": 8.34, + "learning_rate": 4.253136221123752e-05, + "loss": 8.3012, + "step": 782900 + }, + { + "epoch": 8.34, + "learning_rate": 4.250854221915816e-05, + "loss": 8.3184, + "step": 783000 + }, + { + "epoch": 8.34, + "learning_rate": 4.2485726698195337e-05, + "loss": 8.2577, + "step": 783100 + }, + { + "epoch": 8.34, + "learning_rate": 4.246291565012355e-05, + "loss": 8.2711, + "step": 783200 + }, + { + "epoch": 8.34, + "learning_rate": 4.244010907671673e-05, + "loss": 8.3059, + "step": 783300 + }, + { + "epoch": 8.34, + "learning_rate": 4.241730697974866e-05, + "loss": 8.2966, + "step": 783400 + }, + { + "epoch": 8.35, + "learning_rate": 4.239450936099252e-05, + "loss": 8.2735, + "step": 783500 + }, + { + "epoch": 8.35, + "learning_rate": 4.237171622222138e-05, + "loss": 8.319, + "step": 783600 + }, + { + "epoch": 8.35, + "learning_rate": 4.234892756520781e-05, + "loss": 8.3417, + "step": 783700 + }, + { + "epoch": 8.35, + "learning_rate": 4.232614339172414e-05, + "loss": 8.3186, + "step": 783800 + }, + { + "epoch": 8.35, + "learning_rate": 4.230336370354228e-05, + "loss": 8.1956, + "step": 783900 + }, + { + "epoch": 8.35, + "learning_rate": 4.228058850243375e-05, + "loss": 8.2791, + "step": 784000 + }, + { + "epoch": 8.35, + "learning_rate": 4.2257817790169905e-05, + "loss": 8.3498, + "step": 784100 + }, + { + "epoch": 8.35, + "learning_rate": 4.223505156852148e-05, + "loss": 8.2216, + "step": 784200 + }, + { + "epoch": 8.35, + "learning_rate": 4.221228983925911e-05, + "loss": 8.279, + "step": 784300 + }, + { + "epoch": 8.35, + "learning_rate": 4.2189532604152905e-05, + "loss": 8.3247, + "step": 784400 + }, + { + "epoch": 8.36, + "learning_rate": 4.2166779864972814e-05, + "loss": 8.3042, + "step": 784500 + }, + { + "epoch": 8.36, + "learning_rate": 4.2144031623488165e-05, + "loss": 8.3009, + "step": 784600 + }, + { + "epoch": 8.36, + "learning_rate": 4.2121287881468195e-05, + "loss": 8.3036, + "step": 784700 + }, + { + "epoch": 8.36, + "learning_rate": 4.2098548640681635e-05, + "loss": 8.3119, + "step": 784800 + }, + { + "epoch": 8.36, + "learning_rate": 4.207581390289701e-05, + "loss": 8.3274, + "step": 784900 + }, + { + "epoch": 8.36, + "learning_rate": 4.205308366988227e-05, + "loss": 8.3617, + "step": 785000 + }, + { + "epoch": 8.36, + "learning_rate": 4.203035794340524e-05, + "loss": 8.4156, + "step": 785100 + }, + { + "epoch": 8.36, + "learning_rate": 4.200763672523328e-05, + "loss": 8.3865, + "step": 785200 + }, + { + "epoch": 8.36, + "learning_rate": 4.198492001713341e-05, + "loss": 8.3487, + "step": 785300 + }, + { + "epoch": 8.37, + "learning_rate": 4.196220782087229e-05, + "loss": 8.253, + "step": 785400 + }, + { + "epoch": 8.37, + "learning_rate": 4.19395001382163e-05, + "loss": 8.2736, + "step": 785500 + }, + { + "epoch": 8.37, + "learning_rate": 4.191679697093142e-05, + "loss": 8.3211, + "step": 785600 + }, + { + "epoch": 8.37, + "learning_rate": 4.189409832078324e-05, + "loss": 8.2602, + "step": 785700 + }, + { + "epoch": 8.37, + "learning_rate": 4.1871404189537e-05, + "loss": 8.3708, + "step": 785800 + }, + { + "epoch": 8.37, + "learning_rate": 4.1848714578957695e-05, + "loss": 8.3161, + "step": 785900 + }, + { + "epoch": 8.37, + "learning_rate": 4.1826029490809936e-05, + "loss": 8.3086, + "step": 786000 + }, + { + "epoch": 8.37, + "learning_rate": 4.1803348926857825e-05, + "loss": 8.3323, + "step": 786100 + }, + { + "epoch": 8.37, + "learning_rate": 4.1780672888865324e-05, + "loss": 8.3014, + "step": 786200 + }, + { + "epoch": 8.38, + "learning_rate": 4.175800137859592e-05, + "loss": 8.3449, + "step": 786300 + }, + { + "epoch": 8.38, + "learning_rate": 4.1735334397812784e-05, + "loss": 8.3362, + "step": 786400 + }, + { + "epoch": 8.38, + "learning_rate": 4.171267194827869e-05, + "loss": 8.3231, + "step": 786500 + }, + { + "epoch": 8.38, + "learning_rate": 4.1690014031756174e-05, + "loss": 8.3099, + "step": 786600 + }, + { + "epoch": 8.38, + "learning_rate": 4.1667360650007294e-05, + "loss": 8.3335, + "step": 786700 + }, + { + "epoch": 8.38, + "learning_rate": 4.164471180479382e-05, + "loss": 8.2966, + "step": 786800 + }, + { + "epoch": 8.38, + "learning_rate": 4.1622067497877115e-05, + "loss": 8.2942, + "step": 786900 + }, + { + "epoch": 8.38, + "learning_rate": 4.15994277310183e-05, + "loss": 8.2999, + "step": 787000 + }, + { + "epoch": 8.38, + "learning_rate": 4.157679250597805e-05, + "loss": 8.248, + "step": 787100 + }, + { + "epoch": 8.38, + "learning_rate": 4.155416182451668e-05, + "loss": 8.3629, + "step": 787200 + }, + { + "epoch": 8.39, + "learning_rate": 4.153153568839416e-05, + "loss": 8.3831, + "step": 787300 + }, + { + "epoch": 8.39, + "learning_rate": 4.150891409937021e-05, + "loss": 8.2885, + "step": 787400 + }, + { + "epoch": 8.39, + "learning_rate": 4.148629705920406e-05, + "loss": 8.3487, + "step": 787500 + }, + { + "epoch": 8.39, + "learning_rate": 4.146368456965465e-05, + "loss": 8.2833, + "step": 787600 + }, + { + "epoch": 8.39, + "learning_rate": 4.144107663248051e-05, + "loss": 8.3613, + "step": 787700 + }, + { + "epoch": 8.39, + "learning_rate": 4.141847324943995e-05, + "loss": 8.4463, + "step": 787800 + }, + { + "epoch": 8.39, + "learning_rate": 4.139587442229078e-05, + "loss": 8.2915, + "step": 787900 + }, + { + "epoch": 8.39, + "learning_rate": 4.13732801527905e-05, + "loss": 8.2751, + "step": 788000 + }, + { + "epoch": 8.39, + "learning_rate": 4.135069044269632e-05, + "loss": 8.2335, + "step": 788100 + }, + { + "epoch": 8.4, + "learning_rate": 4.132810529376502e-05, + "loss": 8.3586, + "step": 788200 + }, + { + "epoch": 8.4, + "learning_rate": 4.130552470775304e-05, + "loss": 8.1934, + "step": 788300 + }, + { + "epoch": 8.4, + "learning_rate": 4.128294868641645e-05, + "loss": 8.3135, + "step": 788400 + }, + { + "epoch": 8.4, + "learning_rate": 4.126037723151105e-05, + "loss": 8.4192, + "step": 788500 + }, + { + "epoch": 8.4, + "learning_rate": 4.12378103447922e-05, + "loss": 8.3084, + "step": 788600 + }, + { + "epoch": 8.4, + "learning_rate": 4.121524802801493e-05, + "loss": 8.3702, + "step": 788700 + }, + { + "epoch": 8.4, + "learning_rate": 4.119269028293387e-05, + "loss": 8.3464, + "step": 788800 + }, + { + "epoch": 8.4, + "learning_rate": 4.117013711130341e-05, + "loss": 8.2839, + "step": 788900 + }, + { + "epoch": 8.4, + "learning_rate": 4.114758851487747e-05, + "loss": 8.2836, + "step": 789000 + }, + { + "epoch": 8.4, + "learning_rate": 4.112504449540967e-05, + "loss": 8.2972, + "step": 789100 + }, + { + "epoch": 8.41, + "learning_rate": 4.110250505465323e-05, + "loss": 8.2543, + "step": 789200 + }, + { + "epoch": 8.41, + "learning_rate": 4.107997019436109e-05, + "loss": 8.3263, + "step": 789300 + }, + { + "epoch": 8.41, + "learning_rate": 4.1057439916285776e-05, + "loss": 8.3222, + "step": 789400 + }, + { + "epoch": 8.41, + "learning_rate": 4.103491422217947e-05, + "loss": 8.3038, + "step": 789500 + }, + { + "epoch": 8.41, + "learning_rate": 4.1012393113793945e-05, + "loss": 8.3316, + "step": 789600 + }, + { + "epoch": 8.41, + "learning_rate": 4.0989876592880786e-05, + "loss": 8.37, + "step": 789700 + }, + { + "epoch": 8.41, + "learning_rate": 4.096736466119096e-05, + "loss": 8.2947, + "step": 789800 + }, + { + "epoch": 8.41, + "learning_rate": 4.0944857320475347e-05, + "loss": 8.2987, + "step": 789900 + }, + { + "epoch": 8.41, + "learning_rate": 4.092235457248425e-05, + "loss": 8.2223, + "step": 790000 + }, + { + "epoch": 8.42, + "learning_rate": 4.089985641896779e-05, + "loss": 8.3387, + "step": 790100 + }, + { + "epoch": 8.42, + "learning_rate": 4.087736286167562e-05, + "loss": 8.3621, + "step": 790200 + }, + { + "epoch": 8.42, + "learning_rate": 4.085487390235701e-05, + "loss": 8.2713, + "step": 790300 + }, + { + "epoch": 8.42, + "learning_rate": 4.083238954276104e-05, + "loss": 8.3528, + "step": 790400 + }, + { + "epoch": 8.42, + "learning_rate": 4.080990978463626e-05, + "loss": 8.216, + "step": 790500 + }, + { + "epoch": 8.42, + "learning_rate": 4.07874346297309e-05, + "loss": 8.2632, + "step": 790600 + }, + { + "epoch": 8.42, + "learning_rate": 4.0764964079792864e-05, + "loss": 8.2451, + "step": 790700 + }, + { + "epoch": 8.42, + "learning_rate": 4.074249813656977e-05, + "loss": 8.3232, + "step": 790800 + }, + { + "epoch": 8.42, + "learning_rate": 4.072003680180867e-05, + "loss": 8.3083, + "step": 790900 + }, + { + "epoch": 8.43, + "learning_rate": 4.069758007725647e-05, + "loss": 8.2514, + "step": 791000 + }, + { + "epoch": 8.43, + "learning_rate": 4.067512796465958e-05, + "loss": 8.2489, + "step": 791100 + }, + { + "epoch": 8.43, + "learning_rate": 4.0652680465764203e-05, + "loss": 8.2838, + "step": 791200 + }, + { + "epoch": 8.43, + "learning_rate": 4.063023758231593e-05, + "loss": 8.2941, + "step": 791300 + }, + { + "epoch": 8.43, + "learning_rate": 4.0607799316060266e-05, + "loss": 8.312, + "step": 791400 + }, + { + "epoch": 8.43, + "learning_rate": 4.058536566874217e-05, + "loss": 8.3408, + "step": 791500 + }, + { + "epoch": 8.43, + "learning_rate": 4.0562936642106407e-05, + "loss": 8.2864, + "step": 791600 + }, + { + "epoch": 8.43, + "learning_rate": 4.054051223789714e-05, + "loss": 8.3458, + "step": 791700 + }, + { + "epoch": 8.43, + "learning_rate": 4.051809245785843e-05, + "loss": 8.32, + "step": 791800 + }, + { + "epoch": 8.43, + "learning_rate": 4.049567730373383e-05, + "loss": 8.2297, + "step": 791900 + }, + { + "epoch": 8.44, + "learning_rate": 4.047326677726656e-05, + "loss": 8.3209, + "step": 792000 + }, + { + "epoch": 8.44, + "learning_rate": 4.045086088019947e-05, + "loss": 8.3612, + "step": 792100 + }, + { + "epoch": 8.44, + "learning_rate": 4.042845961427509e-05, + "loss": 8.3397, + "step": 792200 + }, + { + "epoch": 8.44, + "learning_rate": 4.040606298123565e-05, + "loss": 8.3397, + "step": 792300 + }, + { + "epoch": 8.44, + "learning_rate": 4.0383670982822794e-05, + "loss": 8.375, + "step": 792400 + }, + { + "epoch": 8.44, + "learning_rate": 4.036128362077804e-05, + "loss": 8.2028, + "step": 792500 + }, + { + "epoch": 8.44, + "learning_rate": 4.0338900896842416e-05, + "loss": 8.25, + "step": 792600 + }, + { + "epoch": 8.44, + "learning_rate": 4.031652281275671e-05, + "loss": 8.3443, + "step": 792700 + }, + { + "epoch": 8.44, + "learning_rate": 4.0294149370261125e-05, + "loss": 8.2786, + "step": 792800 + }, + { + "epoch": 8.45, + "learning_rate": 4.027178057109577e-05, + "loss": 8.2803, + "step": 792900 + }, + { + "epoch": 8.45, + "learning_rate": 4.024941641700023e-05, + "loss": 8.3241, + "step": 793000 + }, + { + "epoch": 8.45, + "learning_rate": 4.022705690971376e-05, + "loss": 8.3891, + "step": 793100 + }, + { + "epoch": 8.45, + "learning_rate": 4.020470205097522e-05, + "loss": 8.2609, + "step": 793200 + }, + { + "epoch": 8.45, + "learning_rate": 4.0182351842523225e-05, + "loss": 8.2256, + "step": 793300 + }, + { + "epoch": 8.45, + "learning_rate": 4.0160006286095933e-05, + "loss": 8.2771, + "step": 793400 + }, + { + "epoch": 8.45, + "learning_rate": 4.013766538343113e-05, + "loss": 8.2709, + "step": 793500 + }, + { + "epoch": 8.45, + "learning_rate": 4.011532913626627e-05, + "loss": 8.209, + "step": 793600 + }, + { + "epoch": 8.45, + "learning_rate": 4.0092997546338496e-05, + "loss": 8.2883, + "step": 793700 + }, + { + "epoch": 8.46, + "learning_rate": 4.007067061538451e-05, + "loss": 8.2181, + "step": 793800 + }, + { + "epoch": 8.46, + "learning_rate": 4.004834834514066e-05, + "loss": 8.2832, + "step": 793900 + }, + { + "epoch": 8.46, + "learning_rate": 4.002603073734295e-05, + "loss": 8.2318, + "step": 794000 + }, + { + "epoch": 8.46, + "learning_rate": 4.000371779372706e-05, + "loss": 8.2758, + "step": 794100 + }, + { + "epoch": 8.46, + "learning_rate": 3.9981409516028254e-05, + "loss": 8.2603, + "step": 794200 + }, + { + "epoch": 8.46, + "learning_rate": 3.995910590598141e-05, + "loss": 8.3017, + "step": 794300 + }, + { + "epoch": 8.46, + "learning_rate": 3.993680696532115e-05, + "loss": 8.2583, + "step": 794400 + }, + { + "epoch": 8.46, + "learning_rate": 3.991451269578163e-05, + "loss": 8.3134, + "step": 794500 + }, + { + "epoch": 8.46, + "learning_rate": 3.9892223099096695e-05, + "loss": 8.2884, + "step": 794600 + }, + { + "epoch": 8.46, + "learning_rate": 3.9869938176999745e-05, + "loss": 8.2592, + "step": 794700 + }, + { + "epoch": 8.47, + "learning_rate": 3.984765793122396e-05, + "loss": 8.2973, + "step": 794800 + }, + { + "epoch": 8.47, + "learning_rate": 3.982538236350205e-05, + "loss": 8.3323, + "step": 794900 + }, + { + "epoch": 8.47, + "learning_rate": 3.980311147556638e-05, + "loss": 8.2526, + "step": 795000 + }, + { + "epoch": 8.47, + "learning_rate": 3.9780845269148934e-05, + "loss": 8.3387, + "step": 795100 + }, + { + "epoch": 8.47, + "learning_rate": 3.9758583745981425e-05, + "loss": 8.2604, + "step": 795200 + }, + { + "epoch": 8.47, + "learning_rate": 3.9736326907795085e-05, + "loss": 8.241, + "step": 795300 + }, + { + "epoch": 8.47, + "learning_rate": 3.971407475632085e-05, + "loss": 8.3617, + "step": 795400 + }, + { + "epoch": 8.47, + "learning_rate": 3.969182729328923e-05, + "loss": 8.2692, + "step": 795500 + }, + { + "epoch": 8.47, + "learning_rate": 3.966958452043048e-05, + "loss": 8.3084, + "step": 795600 + }, + { + "epoch": 8.48, + "learning_rate": 3.9647346439474386e-05, + "loss": 8.2853, + "step": 795700 + }, + { + "epoch": 8.48, + "learning_rate": 3.9625113052150406e-05, + "loss": 8.2709, + "step": 795800 + }, + { + "epoch": 8.48, + "learning_rate": 3.96028843601876e-05, + "loss": 8.3188, + "step": 795900 + }, + { + "epoch": 8.48, + "learning_rate": 3.958066036531477e-05, + "loss": 8.2569, + "step": 796000 + }, + { + "epoch": 8.48, + "learning_rate": 3.9558441069260233e-05, + "loss": 8.3528, + "step": 796100 + }, + { + "epoch": 8.48, + "learning_rate": 3.9536226473752006e-05, + "loss": 8.2581, + "step": 796200 + }, + { + "epoch": 8.48, + "learning_rate": 3.9514016580517654e-05, + "loss": 8.341, + "step": 796300 + }, + { + "epoch": 8.48, + "learning_rate": 3.9491811391284536e-05, + "loss": 8.2337, + "step": 796400 + }, + { + "epoch": 8.48, + "learning_rate": 3.946961090777952e-05, + "loss": 8.3404, + "step": 796500 + }, + { + "epoch": 8.48, + "learning_rate": 3.944741513172909e-05, + "loss": 8.4074, + "step": 796600 + }, + { + "epoch": 8.49, + "learning_rate": 3.942522406485949e-05, + "loss": 8.2285, + "step": 796700 + }, + { + "epoch": 8.49, + "learning_rate": 3.9403037708896486e-05, + "loss": 8.2763, + "step": 796800 + }, + { + "epoch": 8.49, + "learning_rate": 3.938085606556552e-05, + "loss": 8.2908, + "step": 796900 + }, + { + "epoch": 8.49, + "learning_rate": 3.9358679136591614e-05, + "loss": 8.3164, + "step": 797000 + }, + { + "epoch": 8.49, + "learning_rate": 3.933650692369955e-05, + "loss": 8.2799, + "step": 797100 + }, + { + "epoch": 8.49, + "learning_rate": 3.931433942861361e-05, + "loss": 8.2296, + "step": 797200 + }, + { + "epoch": 8.49, + "learning_rate": 3.929217665305779e-05, + "loss": 8.2812, + "step": 797300 + }, + { + "epoch": 8.49, + "learning_rate": 3.927001859875564e-05, + "loss": 8.2376, + "step": 797400 + }, + { + "epoch": 8.49, + "learning_rate": 3.924786526743046e-05, + "loss": 8.2757, + "step": 797500 + }, + { + "epoch": 8.5, + "learning_rate": 3.92257166608051e-05, + "loss": 8.3533, + "step": 797600 + }, + { + "epoch": 8.5, + "learning_rate": 3.920357278060204e-05, + "loss": 8.3092, + "step": 797700 + }, + { + "epoch": 8.5, + "learning_rate": 3.918143362854339e-05, + "loss": 8.3247, + "step": 797800 + }, + { + "epoch": 8.5, + "learning_rate": 3.915929920635101e-05, + "loss": 8.36, + "step": 797900 + }, + { + "epoch": 8.5, + "learning_rate": 3.9137169515746155e-05, + "loss": 8.3208, + "step": 798000 + }, + { + "epoch": 8.5, + "learning_rate": 3.911504455844996e-05, + "loss": 8.2905, + "step": 798100 + }, + { + "epoch": 8.5, + "learning_rate": 3.909292433618303e-05, + "loss": 8.2868, + "step": 798200 + }, + { + "epoch": 8.5, + "learning_rate": 3.9070808850665764e-05, + "loss": 8.3005, + "step": 798300 + }, + { + "epoch": 8.5, + "learning_rate": 3.904869810361791e-05, + "loss": 8.2685, + "step": 798400 + }, + { + "epoch": 8.51, + "learning_rate": 3.9026592096759164e-05, + "loss": 8.3454, + "step": 798500 + }, + { + "epoch": 8.51, + "learning_rate": 3.9004490831808616e-05, + "loss": 8.26, + "step": 798600 + }, + { + "epoch": 8.51, + "learning_rate": 3.8982394310485183e-05, + "loss": 8.3354, + "step": 798700 + }, + { + "epoch": 8.51, + "learning_rate": 3.8960302534507245e-05, + "loss": 8.3578, + "step": 798800 + }, + { + "epoch": 8.51, + "learning_rate": 3.8938215505592876e-05, + "loss": 8.3063, + "step": 798900 + }, + { + "epoch": 8.51, + "learning_rate": 3.891613322545987e-05, + "loss": 8.2529, + "step": 799000 + }, + { + "epoch": 8.51, + "learning_rate": 3.889405569582545e-05, + "loss": 8.2343, + "step": 799100 + }, + { + "epoch": 8.51, + "learning_rate": 3.887198291840667e-05, + "loss": 8.3032, + "step": 799200 + }, + { + "epoch": 8.51, + "learning_rate": 3.8849914894920084e-05, + "loss": 8.2452, + "step": 799300 + }, + { + "epoch": 8.51, + "learning_rate": 3.8827851627082e-05, + "loss": 8.2556, + "step": 799400 + }, + { + "epoch": 8.52, + "learning_rate": 3.880579311660818e-05, + "loss": 8.2568, + "step": 799500 + }, + { + "epoch": 8.52, + "learning_rate": 3.8783739365214186e-05, + "loss": 8.2077, + "step": 799600 + }, + { + "epoch": 8.52, + "learning_rate": 3.876169037461509e-05, + "loss": 8.222, + "step": 799700 + }, + { + "epoch": 8.52, + "learning_rate": 3.8739646146525746e-05, + "loss": 8.2997, + "step": 799800 + }, + { + "epoch": 8.52, + "learning_rate": 3.871760668266039e-05, + "loss": 8.2958, + "step": 799900 + }, + { + "epoch": 8.52, + "learning_rate": 3.8695571984733145e-05, + "loss": 8.2999, + "step": 800000 + }, + { + "epoch": 8.52, + "learning_rate": 3.8673542054457615e-05, + "loss": 8.2333, + "step": 800100 + }, + { + "epoch": 8.52, + "learning_rate": 3.865151689354707e-05, + "loss": 8.304, + "step": 800200 + }, + { + "epoch": 8.52, + "learning_rate": 3.862949650371437e-05, + "loss": 8.4194, + "step": 800300 + }, + { + "epoch": 8.53, + "learning_rate": 3.860748088667212e-05, + "loss": 8.3681, + "step": 800400 + }, + { + "epoch": 8.53, + "learning_rate": 3.8585470044132446e-05, + "loss": 8.2798, + "step": 800500 + }, + { + "epoch": 8.53, + "learning_rate": 3.856346397780711e-05, + "loss": 8.3072, + "step": 800600 + }, + { + "epoch": 8.53, + "learning_rate": 3.8541462689407515e-05, + "loss": 8.3033, + "step": 800700 + }, + { + "epoch": 8.53, + "learning_rate": 3.851946618064476e-05, + "loss": 8.2229, + "step": 800800 + }, + { + "epoch": 8.53, + "learning_rate": 3.8497474453229486e-05, + "loss": 8.243, + "step": 800900 + }, + { + "epoch": 8.53, + "learning_rate": 3.847548750887197e-05, + "loss": 8.2809, + "step": 801000 + }, + { + "epoch": 8.53, + "learning_rate": 3.845350534928218e-05, + "loss": 8.2301, + "step": 801100 + }, + { + "epoch": 8.53, + "learning_rate": 3.843152797616966e-05, + "loss": 8.3078, + "step": 801200 + }, + { + "epoch": 8.53, + "learning_rate": 3.840955539124358e-05, + "loss": 8.2779, + "step": 801300 + }, + { + "epoch": 8.54, + "learning_rate": 3.838758759621272e-05, + "loss": 8.3415, + "step": 801400 + }, + { + "epoch": 8.54, + "learning_rate": 3.836562459278559e-05, + "loss": 8.2259, + "step": 801500 + }, + { + "epoch": 8.54, + "learning_rate": 3.834366638267022e-05, + "loss": 8.2701, + "step": 801600 + }, + { + "epoch": 8.54, + "learning_rate": 3.832171296757428e-05, + "loss": 8.2339, + "step": 801700 + }, + { + "epoch": 8.54, + "learning_rate": 3.8299764349205105e-05, + "loss": 8.3599, + "step": 801800 + }, + { + "epoch": 8.54, + "learning_rate": 3.827782052926966e-05, + "loss": 8.2747, + "step": 801900 + }, + { + "epoch": 8.54, + "learning_rate": 3.825588150947451e-05, + "loss": 8.2029, + "step": 802000 + }, + { + "epoch": 8.54, + "learning_rate": 3.823394729152584e-05, + "loss": 8.294, + "step": 802100 + }, + { + "epoch": 8.54, + "learning_rate": 3.821201787712946e-05, + "loss": 8.3604, + "step": 802200 + }, + { + "epoch": 8.55, + "learning_rate": 3.819009326799088e-05, + "loss": 8.2172, + "step": 802300 + }, + { + "epoch": 8.55, + "learning_rate": 3.8168173465815135e-05, + "loss": 8.2765, + "step": 802400 + }, + { + "epoch": 8.55, + "learning_rate": 3.814625847230695e-05, + "loss": 8.2849, + "step": 802500 + }, + { + "epoch": 8.55, + "learning_rate": 3.812434828917062e-05, + "loss": 8.3043, + "step": 802600 + }, + { + "epoch": 8.55, + "learning_rate": 3.8102442918110156e-05, + "loss": 8.2376, + "step": 802700 + }, + { + "epoch": 8.55, + "learning_rate": 3.808054236082912e-05, + "loss": 8.2684, + "step": 802800 + }, + { + "epoch": 8.55, + "learning_rate": 3.805864661903068e-05, + "loss": 8.3014, + "step": 802900 + }, + { + "epoch": 8.55, + "learning_rate": 3.8036755694417734e-05, + "loss": 8.2721, + "step": 803000 + }, + { + "epoch": 8.55, + "learning_rate": 3.8014869588692726e-05, + "loss": 8.3493, + "step": 803100 + }, + { + "epoch": 8.56, + "learning_rate": 3.799298830355772e-05, + "loss": 8.2113, + "step": 803200 + }, + { + "epoch": 8.56, + "learning_rate": 3.79711118407144e-05, + "loss": 8.2686, + "step": 803300 + }, + { + "epoch": 8.56, + "learning_rate": 3.7949240201864176e-05, + "loss": 8.2458, + "step": 803400 + }, + { + "epoch": 8.56, + "learning_rate": 3.7927373388707965e-05, + "loss": 8.2613, + "step": 803500 + }, + { + "epoch": 8.56, + "learning_rate": 3.790551140294635e-05, + "loss": 8.2935, + "step": 803600 + }, + { + "epoch": 8.56, + "learning_rate": 3.78836542462795e-05, + "loss": 8.2685, + "step": 803700 + }, + { + "epoch": 8.56, + "learning_rate": 3.786180192040734e-05, + "loss": 8.2617, + "step": 803800 + }, + { + "epoch": 8.56, + "learning_rate": 3.7839954427029266e-05, + "loss": 8.2589, + "step": 803900 + }, + { + "epoch": 8.56, + "learning_rate": 3.781811176784438e-05, + "loss": 8.2643, + "step": 804000 + }, + { + "epoch": 8.56, + "learning_rate": 3.7796273944551344e-05, + "loss": 8.3234, + "step": 804100 + }, + { + "epoch": 8.57, + "learning_rate": 3.777444095884856e-05, + "loss": 8.316, + "step": 804200 + }, + { + "epoch": 8.57, + "learning_rate": 3.775261281243394e-05, + "loss": 8.359, + "step": 804300 + }, + { + "epoch": 8.57, + "learning_rate": 3.7730789507005074e-05, + "loss": 8.2816, + "step": 804400 + }, + { + "epoch": 8.57, + "learning_rate": 3.770897104425911e-05, + "loss": 8.259, + "step": 804500 + }, + { + "epoch": 8.57, + "learning_rate": 3.7687157425892994e-05, + "loss": 8.2947, + "step": 804600 + }, + { + "epoch": 8.57, + "learning_rate": 3.766534865360303e-05, + "loss": 8.3543, + "step": 804700 + }, + { + "epoch": 8.57, + "learning_rate": 3.764354472908539e-05, + "loss": 8.317, + "step": 804800 + }, + { + "epoch": 8.57, + "learning_rate": 3.7621745654035686e-05, + "loss": 8.3163, + "step": 804900 + }, + { + "epoch": 8.57, + "learning_rate": 3.7599951430149336e-05, + "loss": 8.2631, + "step": 805000 + }, + { + "epoch": 8.58, + "learning_rate": 3.7578162059121214e-05, + "loss": 8.3151, + "step": 805100 + }, + { + "epoch": 8.58, + "learning_rate": 3.755637754264586e-05, + "loss": 8.239, + "step": 805200 + }, + { + "epoch": 8.58, + "learning_rate": 3.753459788241752e-05, + "loss": 8.3436, + "step": 805300 + }, + { + "epoch": 8.58, + "learning_rate": 3.7512823080129975e-05, + "loss": 8.2517, + "step": 805400 + }, + { + "epoch": 8.58, + "learning_rate": 3.749105313747665e-05, + "loss": 8.2861, + "step": 805500 + }, + { + "epoch": 8.58, + "learning_rate": 3.746928805615055e-05, + "loss": 8.254, + "step": 805600 + }, + { + "epoch": 8.58, + "learning_rate": 3.7447527837844473e-05, + "loss": 8.3372, + "step": 805700 + }, + { + "epoch": 8.58, + "learning_rate": 3.7425772484250566e-05, + "loss": 8.3226, + "step": 805800 + }, + { + "epoch": 8.58, + "learning_rate": 3.740402199706083e-05, + "loss": 8.3044, + "step": 805900 + }, + { + "epoch": 8.58, + "learning_rate": 3.7382276377966766e-05, + "loss": 8.2392, + "step": 806000 + }, + { + "epoch": 8.59, + "learning_rate": 3.736053562865961e-05, + "loss": 8.3126, + "step": 806100 + }, + { + "epoch": 8.59, + "learning_rate": 3.733879975083002e-05, + "loss": 8.256, + "step": 806200 + }, + { + "epoch": 8.59, + "learning_rate": 3.73170687461685e-05, + "loss": 8.3251, + "step": 806300 + }, + { + "epoch": 8.59, + "learning_rate": 3.7295342616364995e-05, + "loss": 8.2986, + "step": 806400 + }, + { + "epoch": 8.59, + "learning_rate": 3.7273621363109254e-05, + "loss": 8.392, + "step": 806500 + }, + { + "epoch": 8.59, + "learning_rate": 3.7251904988090414e-05, + "loss": 8.2787, + "step": 806600 + }, + { + "epoch": 8.59, + "learning_rate": 3.7230193492997443e-05, + "loss": 8.3206, + "step": 806700 + }, + { + "epoch": 8.59, + "learning_rate": 3.720848687951883e-05, + "loss": 8.2201, + "step": 806800 + }, + { + "epoch": 8.59, + "learning_rate": 3.7186785149342696e-05, + "loss": 8.2849, + "step": 806900 + }, + { + "epoch": 8.6, + "learning_rate": 3.7165088304156746e-05, + "loss": 8.2903, + "step": 807000 + }, + { + "epoch": 8.6, + "learning_rate": 3.714339634564839e-05, + "loss": 8.3181, + "step": 807100 + }, + { + "epoch": 8.6, + "learning_rate": 3.712170927550468e-05, + "loss": 8.3031, + "step": 807200 + }, + { + "epoch": 8.6, + "learning_rate": 3.710002709541207e-05, + "loss": 8.2829, + "step": 807300 + }, + { + "epoch": 8.6, + "learning_rate": 3.707834980705691e-05, + "loss": 8.1964, + "step": 807400 + }, + { + "epoch": 8.6, + "learning_rate": 3.705667741212496e-05, + "loss": 8.3335, + "step": 807500 + }, + { + "epoch": 8.6, + "learning_rate": 3.70350099123018e-05, + "loss": 8.3063, + "step": 807600 + }, + { + "epoch": 8.6, + "learning_rate": 3.7013347309272365e-05, + "loss": 8.29, + "step": 807700 + }, + { + "epoch": 8.6, + "learning_rate": 3.6991689604721456e-05, + "loss": 8.301, + "step": 807800 + }, + { + "epoch": 8.61, + "learning_rate": 3.6970036800333383e-05, + "loss": 8.2335, + "step": 807900 + }, + { + "epoch": 8.61, + "learning_rate": 3.6948388897792075e-05, + "loss": 8.3206, + "step": 808000 + }, + { + "epoch": 8.61, + "learning_rate": 3.692674589878106e-05, + "loss": 8.3473, + "step": 808100 + }, + { + "epoch": 8.61, + "learning_rate": 3.6905107804983575e-05, + "loss": 8.2919, + "step": 808200 + }, + { + "epoch": 8.61, + "learning_rate": 3.6883474618082383e-05, + "loss": 8.1876, + "step": 808300 + }, + { + "epoch": 8.61, + "learning_rate": 3.686184633975991e-05, + "loss": 8.2412, + "step": 808400 + }, + { + "epoch": 8.61, + "learning_rate": 3.684022297169816e-05, + "loss": 8.2825, + "step": 808500 + }, + { + "epoch": 8.61, + "learning_rate": 3.681860451557882e-05, + "loss": 8.3022, + "step": 808600 + }, + { + "epoch": 8.61, + "learning_rate": 3.6796990973083165e-05, + "loss": 8.3142, + "step": 808700 + }, + { + "epoch": 8.61, + "learning_rate": 3.6775382345892075e-05, + "loss": 8.2742, + "step": 808800 + }, + { + "epoch": 8.62, + "learning_rate": 3.6753778635685996e-05, + "loss": 8.2703, + "step": 808900 + }, + { + "epoch": 8.62, + "learning_rate": 3.6732179844145144e-05, + "loss": 8.362, + "step": 809000 + }, + { + "epoch": 8.62, + "learning_rate": 3.671058597294922e-05, + "loss": 8.2175, + "step": 809100 + }, + { + "epoch": 8.62, + "learning_rate": 3.6688997023777535e-05, + "loss": 8.2989, + "step": 809200 + }, + { + "epoch": 8.62, + "learning_rate": 3.666741299830915e-05, + "loss": 8.1982, + "step": 809300 + }, + { + "epoch": 8.62, + "learning_rate": 3.66458338982226e-05, + "loss": 8.3312, + "step": 809400 + }, + { + "epoch": 8.62, + "learning_rate": 3.6624259725196117e-05, + "loss": 8.3423, + "step": 809500 + }, + { + "epoch": 8.62, + "learning_rate": 3.660269048090749e-05, + "loss": 8.3211, + "step": 809600 + }, + { + "epoch": 8.62, + "learning_rate": 3.6581126167034205e-05, + "loss": 8.315, + "step": 809700 + }, + { + "epoch": 8.63, + "learning_rate": 3.655956678525332e-05, + "loss": 8.2973, + "step": 809800 + }, + { + "epoch": 8.63, + "learning_rate": 3.653801233724148e-05, + "loss": 8.3135, + "step": 809900 + }, + { + "epoch": 8.63, + "learning_rate": 3.651646282467497e-05, + "loss": 8.3081, + "step": 810000 + }, + { + "epoch": 8.63, + "learning_rate": 3.6494918249229746e-05, + "loss": 8.2106, + "step": 810100 + }, + { + "epoch": 8.63, + "learning_rate": 3.647337861258131e-05, + "loss": 8.3093, + "step": 810200 + }, + { + "epoch": 8.63, + "learning_rate": 3.6451843916404796e-05, + "loss": 8.3136, + "step": 810300 + }, + { + "epoch": 8.63, + "learning_rate": 3.643031416237492e-05, + "loss": 8.2702, + "step": 810400 + }, + { + "epoch": 8.63, + "learning_rate": 3.640878935216614e-05, + "loss": 8.1984, + "step": 810500 + }, + { + "epoch": 8.63, + "learning_rate": 3.638726948745239e-05, + "loss": 8.3087, + "step": 810600 + }, + { + "epoch": 8.64, + "learning_rate": 3.6365754569907285e-05, + "loss": 8.2052, + "step": 810700 + }, + { + "epoch": 8.64, + "learning_rate": 3.634424460120399e-05, + "loss": 8.3392, + "step": 810800 + }, + { + "epoch": 8.64, + "learning_rate": 3.632273958301543e-05, + "loss": 8.257, + "step": 810900 + }, + { + "epoch": 8.64, + "learning_rate": 3.6301239517013994e-05, + "loss": 8.4002, + "step": 811000 + }, + { + "epoch": 8.64, + "learning_rate": 3.6279744404871765e-05, + "loss": 8.281, + "step": 811100 + }, + { + "epoch": 8.64, + "learning_rate": 3.625825424826037e-05, + "loss": 8.2724, + "step": 811200 + }, + { + "epoch": 8.64, + "learning_rate": 3.623676904885118e-05, + "loss": 8.2479, + "step": 811300 + }, + { + "epoch": 8.64, + "learning_rate": 3.621528880831507e-05, + "loss": 8.3022, + "step": 811400 + }, + { + "epoch": 8.64, + "learning_rate": 3.6193813528322504e-05, + "loss": 8.3102, + "step": 811500 + }, + { + "epoch": 8.64, + "learning_rate": 3.617234321054371e-05, + "loss": 8.3625, + "step": 811600 + }, + { + "epoch": 8.65, + "learning_rate": 3.61508778566484e-05, + "loss": 8.2938, + "step": 811700 + }, + { + "epoch": 8.65, + "learning_rate": 3.612941746830593e-05, + "loss": 8.2189, + "step": 811800 + }, + { + "epoch": 8.65, + "learning_rate": 3.610796204718523e-05, + "loss": 8.4142, + "step": 811900 + }, + { + "epoch": 8.65, + "learning_rate": 3.608651159495498e-05, + "loss": 8.2854, + "step": 812000 + }, + { + "epoch": 8.65, + "learning_rate": 3.606506611328334e-05, + "loss": 8.2474, + "step": 812100 + }, + { + "epoch": 8.65, + "learning_rate": 3.604362560383813e-05, + "loss": 8.3144, + "step": 812200 + }, + { + "epoch": 8.65, + "learning_rate": 3.6022190068286734e-05, + "loss": 8.2663, + "step": 812300 + }, + { + "epoch": 8.65, + "learning_rate": 3.600075950829629e-05, + "loss": 8.3137, + "step": 812400 + }, + { + "epoch": 8.65, + "learning_rate": 3.59793339255334e-05, + "loss": 8.281, + "step": 812500 + }, + { + "epoch": 8.66, + "learning_rate": 3.5957913321664325e-05, + "loss": 8.2719, + "step": 812600 + }, + { + "epoch": 8.66, + "learning_rate": 3.5936497698354944e-05, + "loss": 8.3441, + "step": 812700 + }, + { + "epoch": 8.66, + "learning_rate": 3.5915087057270834e-05, + "loss": 8.2784, + "step": 812800 + }, + { + "epoch": 8.66, + "learning_rate": 3.589368140007697e-05, + "loss": 8.2941, + "step": 812900 + }, + { + "epoch": 8.66, + "learning_rate": 3.587228072843818e-05, + "loss": 8.2227, + "step": 813000 + }, + { + "epoch": 8.66, + "learning_rate": 3.585088504401872e-05, + "loss": 8.3104, + "step": 813100 + }, + { + "epoch": 8.66, + "learning_rate": 3.582949434848265e-05, + "loss": 8.2008, + "step": 813200 + }, + { + "epoch": 8.66, + "learning_rate": 3.5808108643493365e-05, + "loss": 8.3331, + "step": 813300 + }, + { + "epoch": 8.66, + "learning_rate": 3.5786727930714125e-05, + "loss": 8.3722, + "step": 813400 + }, + { + "epoch": 8.66, + "learning_rate": 3.576535221180778e-05, + "loss": 8.2366, + "step": 813500 + }, + { + "epoch": 8.67, + "learning_rate": 3.5743981488436565e-05, + "loss": 8.3828, + "step": 813600 + }, + { + "epoch": 8.67, + "learning_rate": 3.57226157622626e-05, + "loss": 8.305, + "step": 813700 + }, + { + "epoch": 8.67, + "learning_rate": 3.570125503494742e-05, + "loss": 8.3035, + "step": 813800 + }, + { + "epoch": 8.67, + "learning_rate": 3.5679899308152376e-05, + "loss": 8.2589, + "step": 813900 + }, + { + "epoch": 8.67, + "learning_rate": 3.5658548583538144e-05, + "loss": 8.2822, + "step": 814000 + }, + { + "epoch": 8.67, + "learning_rate": 3.563720286276529e-05, + "loss": 8.1939, + "step": 814100 + }, + { + "epoch": 8.67, + "learning_rate": 3.5615862147493795e-05, + "loss": 8.229, + "step": 814200 + }, + { + "epoch": 8.67, + "learning_rate": 3.559452643938344e-05, + "loss": 8.2686, + "step": 814300 + }, + { + "epoch": 8.67, + "learning_rate": 3.557319574009337e-05, + "loss": 8.2659, + "step": 814400 + }, + { + "epoch": 8.68, + "learning_rate": 3.555187005128255e-05, + "loss": 8.262, + "step": 814500 + }, + { + "epoch": 8.68, + "learning_rate": 3.553054937460949e-05, + "loss": 8.337, + "step": 814600 + }, + { + "epoch": 8.68, + "learning_rate": 3.550923371173228e-05, + "loss": 8.2981, + "step": 814700 + }, + { + "epoch": 8.68, + "learning_rate": 3.5487923064308604e-05, + "loss": 8.3238, + "step": 814800 + }, + { + "epoch": 8.68, + "learning_rate": 3.546661743399586e-05, + "loss": 8.2616, + "step": 814900 + }, + { + "epoch": 8.68, + "learning_rate": 3.544531682245097e-05, + "loss": 8.2402, + "step": 815000 + }, + { + "epoch": 8.68, + "learning_rate": 3.542402123133046e-05, + "loss": 8.3316, + "step": 815100 + }, + { + "epoch": 8.68, + "learning_rate": 3.540273066229048e-05, + "loss": 8.2887, + "step": 815200 + }, + { + "epoch": 8.68, + "learning_rate": 3.538144511698687e-05, + "loss": 8.3173, + "step": 815300 + }, + { + "epoch": 8.69, + "learning_rate": 3.536016459707494e-05, + "loss": 8.238, + "step": 815400 + }, + { + "epoch": 8.69, + "learning_rate": 3.533888910420968e-05, + "loss": 8.2537, + "step": 815500 + }, + { + "epoch": 8.69, + "learning_rate": 3.531761864004575e-05, + "loss": 8.2806, + "step": 815600 + }, + { + "epoch": 8.69, + "learning_rate": 3.529635320623731e-05, + "loss": 8.2207, + "step": 815700 + }, + { + "epoch": 8.69, + "learning_rate": 3.527509280443818e-05, + "loss": 8.2757, + "step": 815800 + }, + { + "epoch": 8.69, + "learning_rate": 3.525383743630175e-05, + "loss": 8.2911, + "step": 815900 + }, + { + "epoch": 8.69, + "learning_rate": 3.523258710348113e-05, + "loss": 8.1748, + "step": 816000 + }, + { + "epoch": 8.69, + "learning_rate": 3.5211341807628905e-05, + "loss": 8.332, + "step": 816100 + }, + { + "epoch": 8.69, + "learning_rate": 3.519010155039734e-05, + "loss": 8.2527, + "step": 816200 + }, + { + "epoch": 8.69, + "learning_rate": 3.5168866333438255e-05, + "loss": 8.2238, + "step": 816300 + }, + { + "epoch": 8.7, + "learning_rate": 3.514763615840317e-05, + "loss": 8.3267, + "step": 816400 + }, + { + "epoch": 8.7, + "learning_rate": 3.512641102694316e-05, + "loss": 8.3493, + "step": 816500 + }, + { + "epoch": 8.7, + "learning_rate": 3.5105190940708865e-05, + "loss": 8.2693, + "step": 816600 + }, + { + "epoch": 8.7, + "learning_rate": 3.5083975901350564e-05, + "loss": 8.2937, + "step": 816700 + }, + { + "epoch": 8.7, + "learning_rate": 3.5062765910518206e-05, + "loss": 8.2988, + "step": 816800 + }, + { + "epoch": 8.7, + "learning_rate": 3.504156096986127e-05, + "loss": 8.2891, + "step": 816900 + }, + { + "epoch": 8.7, + "learning_rate": 3.502036108102886e-05, + "loss": 8.2845, + "step": 817000 + }, + { + "epoch": 8.7, + "learning_rate": 3.499916624566967e-05, + "loss": 8.2329, + "step": 817100 + }, + { + "epoch": 8.7, + "learning_rate": 3.4977976465432086e-05, + "loss": 8.3152, + "step": 817200 + }, + { + "epoch": 8.71, + "learning_rate": 3.495679174196399e-05, + "loss": 8.2288, + "step": 817300 + }, + { + "epoch": 8.71, + "learning_rate": 3.493561207691295e-05, + "loss": 8.2431, + "step": 817400 + }, + { + "epoch": 8.71, + "learning_rate": 3.4914437471926053e-05, + "loss": 8.296, + "step": 817500 + }, + { + "epoch": 8.71, + "learning_rate": 3.489326792865013e-05, + "loss": 8.2868, + "step": 817600 + }, + { + "epoch": 8.71, + "learning_rate": 3.4872103448731495e-05, + "loss": 8.2078, + "step": 817700 + }, + { + "epoch": 8.71, + "learning_rate": 3.48509440338161e-05, + "loss": 8.324, + "step": 817800 + }, + { + "epoch": 8.71, + "learning_rate": 3.482978968554955e-05, + "loss": 8.2558, + "step": 817900 + }, + { + "epoch": 8.71, + "learning_rate": 3.480864040557702e-05, + "loss": 8.256, + "step": 818000 + }, + { + "epoch": 8.71, + "learning_rate": 3.478749619554327e-05, + "loss": 8.2743, + "step": 818100 + }, + { + "epoch": 8.71, + "learning_rate": 3.476635705709266e-05, + "loss": 8.2657, + "step": 818200 + }, + { + "epoch": 8.72, + "learning_rate": 3.474522299186924e-05, + "loss": 8.3199, + "step": 818300 + }, + { + "epoch": 8.72, + "learning_rate": 3.4724094001516594e-05, + "loss": 8.3015, + "step": 818400 + }, + { + "epoch": 8.72, + "learning_rate": 3.470297008767791e-05, + "loss": 8.2461, + "step": 818500 + }, + { + "epoch": 8.72, + "learning_rate": 3.468185125199598e-05, + "loss": 8.2387, + "step": 818600 + }, + { + "epoch": 8.72, + "learning_rate": 3.466073749611327e-05, + "loss": 8.2825, + "step": 818700 + }, + { + "epoch": 8.72, + "learning_rate": 3.463962882167177e-05, + "loss": 8.1763, + "step": 818800 + }, + { + "epoch": 8.72, + "learning_rate": 3.4618525230313106e-05, + "loss": 8.2759, + "step": 818900 + }, + { + "epoch": 8.72, + "learning_rate": 3.459742672367846e-05, + "loss": 8.1941, + "step": 819000 + }, + { + "epoch": 8.72, + "learning_rate": 3.457633330340876e-05, + "loss": 8.2764, + "step": 819100 + }, + { + "epoch": 8.73, + "learning_rate": 3.455524497114437e-05, + "loss": 8.2635, + "step": 819200 + }, + { + "epoch": 8.73, + "learning_rate": 3.453416172852538e-05, + "loss": 8.1773, + "step": 819300 + }, + { + "epoch": 8.73, + "learning_rate": 3.4513083577191364e-05, + "loss": 8.2157, + "step": 819400 + }, + { + "epoch": 8.73, + "learning_rate": 3.4492010518781695e-05, + "loss": 8.2502, + "step": 819500 + }, + { + "epoch": 8.73, + "learning_rate": 3.447094255493507e-05, + "loss": 8.2317, + "step": 819600 + }, + { + "epoch": 8.73, + "learning_rate": 3.444987968729004e-05, + "loss": 8.3289, + "step": 819700 + }, + { + "epoch": 8.73, + "learning_rate": 3.442882191748468e-05, + "loss": 8.2501, + "step": 819800 + }, + { + "epoch": 8.73, + "learning_rate": 3.4407769247156626e-05, + "loss": 8.2403, + "step": 819900 + }, + { + "epoch": 8.73, + "learning_rate": 3.438672167794316e-05, + "loss": 8.3078, + "step": 820000 + }, + { + "epoch": 8.74, + "learning_rate": 3.4365679211481106e-05, + "loss": 8.2615, + "step": 820100 + }, + { + "epoch": 8.74, + "learning_rate": 3.434464184940701e-05, + "loss": 8.2465, + "step": 820200 + }, + { + "epoch": 8.74, + "learning_rate": 3.432360959335691e-05, + "loss": 8.2827, + "step": 820300 + }, + { + "epoch": 8.74, + "learning_rate": 3.430258244496649e-05, + "loss": 8.2704, + "step": 820400 + }, + { + "epoch": 8.74, + "learning_rate": 3.428156040587101e-05, + "loss": 8.2523, + "step": 820500 + }, + { + "epoch": 8.74, + "learning_rate": 3.426054347770545e-05, + "loss": 8.1777, + "step": 820600 + }, + { + "epoch": 8.74, + "learning_rate": 3.423953166210416e-05, + "loss": 8.2534, + "step": 820700 + }, + { + "epoch": 8.74, + "learning_rate": 3.421852496070134e-05, + "loss": 8.2912, + "step": 820800 + }, + { + "epoch": 8.74, + "learning_rate": 3.41975233751306e-05, + "loss": 8.279, + "step": 820900 + }, + { + "epoch": 8.74, + "learning_rate": 3.4176526907025355e-05, + "loss": 8.3365, + "step": 821000 + }, + { + "epoch": 8.75, + "learning_rate": 3.415553555801836e-05, + "loss": 8.3212, + "step": 821100 + }, + { + "epoch": 8.75, + "learning_rate": 3.413454932974222e-05, + "loss": 8.2295, + "step": 821200 + }, + { + "epoch": 8.75, + "learning_rate": 3.411356822382895e-05, + "loss": 8.327, + "step": 821300 + }, + { + "epoch": 8.75, + "learning_rate": 3.4092592241910385e-05, + "loss": 8.3531, + "step": 821400 + }, + { + "epoch": 8.75, + "learning_rate": 3.407162138561767e-05, + "loss": 8.2526, + "step": 821500 + }, + { + "epoch": 8.75, + "learning_rate": 3.4050655656581824e-05, + "loss": 8.2377, + "step": 821600 + }, + { + "epoch": 8.75, + "learning_rate": 3.4029695056433316e-05, + "loss": 8.2871, + "step": 821700 + }, + { + "epoch": 8.75, + "learning_rate": 3.400873958680222e-05, + "loss": 8.3123, + "step": 821800 + }, + { + "epoch": 8.75, + "learning_rate": 3.398778924931831e-05, + "loss": 8.1796, + "step": 821900 + }, + { + "epoch": 8.76, + "learning_rate": 3.3966844045610835e-05, + "loss": 8.2755, + "step": 822000 + }, + { + "epoch": 8.76, + "learning_rate": 3.39459039773088e-05, + "loss": 8.2498, + "step": 822100 + }, + { + "epoch": 8.76, + "learning_rate": 3.392496904604059e-05, + "loss": 8.268, + "step": 822200 + }, + { + "epoch": 8.76, + "learning_rate": 3.3904039253434405e-05, + "loss": 8.2772, + "step": 822300 + }, + { + "epoch": 8.76, + "learning_rate": 3.38831146011179e-05, + "loss": 8.2608, + "step": 822400 + }, + { + "epoch": 8.76, + "learning_rate": 3.3862195090718484e-05, + "loss": 8.2434, + "step": 822500 + }, + { + "epoch": 8.76, + "learning_rate": 3.384128072386294e-05, + "loss": 8.2031, + "step": 822600 + }, + { + "epoch": 8.76, + "learning_rate": 3.382037150217788e-05, + "loss": 8.2445, + "step": 822700 + }, + { + "epoch": 8.76, + "learning_rate": 3.379946742728938e-05, + "loss": 8.2542, + "step": 822800 + }, + { + "epoch": 8.76, + "learning_rate": 3.3778568500823146e-05, + "loss": 8.2665, + "step": 822900 + }, + { + "epoch": 8.77, + "learning_rate": 3.3757674724404477e-05, + "loss": 8.2411, + "step": 823000 + }, + { + "epoch": 8.77, + "learning_rate": 3.3736786099658326e-05, + "loss": 8.2724, + "step": 823100 + }, + { + "epoch": 8.77, + "learning_rate": 3.371590262820918e-05, + "loss": 8.2368, + "step": 823200 + }, + { + "epoch": 8.77, + "learning_rate": 3.3695024311681156e-05, + "loss": 8.2943, + "step": 823300 + }, + { + "epoch": 8.77, + "learning_rate": 3.367415115169792e-05, + "loss": 8.3478, + "step": 823400 + }, + { + "epoch": 8.77, + "learning_rate": 3.3653283149882845e-05, + "loss": 8.3187, + "step": 823500 + }, + { + "epoch": 8.77, + "learning_rate": 3.3632420307858815e-05, + "loss": 8.3542, + "step": 823600 + }, + { + "epoch": 8.77, + "learning_rate": 3.3611562627248326e-05, + "loss": 8.3667, + "step": 823700 + }, + { + "epoch": 8.77, + "learning_rate": 3.3590710109673464e-05, + "loss": 8.2599, + "step": 823800 + }, + { + "epoch": 8.78, + "learning_rate": 3.356986275675597e-05, + "loss": 8.2797, + "step": 823900 + }, + { + "epoch": 8.78, + "learning_rate": 3.354902057011714e-05, + "loss": 8.2854, + "step": 824000 + }, + { + "epoch": 8.78, + "learning_rate": 3.352818355137782e-05, + "loss": 8.392, + "step": 824100 + }, + { + "epoch": 8.78, + "learning_rate": 3.350735170215858e-05, + "loss": 8.2298, + "step": 824200 + }, + { + "epoch": 8.78, + "learning_rate": 3.348652502407949e-05, + "loss": 8.2483, + "step": 824300 + }, + { + "epoch": 8.78, + "learning_rate": 3.346570351876023e-05, + "loss": 8.2705, + "step": 824400 + }, + { + "epoch": 8.78, + "learning_rate": 3.344488718782006e-05, + "loss": 8.2792, + "step": 824500 + }, + { + "epoch": 8.78, + "learning_rate": 3.342407603287795e-05, + "loss": 8.2192, + "step": 824600 + }, + { + "epoch": 8.78, + "learning_rate": 3.340327005555233e-05, + "loss": 8.2517, + "step": 824700 + }, + { + "epoch": 8.79, + "learning_rate": 3.3382469257461305e-05, + "loss": 8.2783, + "step": 824800 + }, + { + "epoch": 8.79, + "learning_rate": 3.33616736402225e-05, + "loss": 8.2771, + "step": 824900 + }, + { + "epoch": 8.79, + "learning_rate": 3.334088320545327e-05, + "loss": 8.1771, + "step": 825000 + }, + { + "epoch": 8.79, + "learning_rate": 3.3320097954770454e-05, + "loss": 8.2297, + "step": 825100 + }, + { + "epoch": 8.79, + "learning_rate": 3.329931788979053e-05, + "loss": 8.2355, + "step": 825200 + }, + { + "epoch": 8.79, + "learning_rate": 3.3278543012129526e-05, + "loss": 8.2828, + "step": 825300 + }, + { + "epoch": 8.79, + "learning_rate": 3.325777332340316e-05, + "loss": 8.23, + "step": 825400 + }, + { + "epoch": 8.79, + "learning_rate": 3.323700882522669e-05, + "loss": 8.2299, + "step": 825500 + }, + { + "epoch": 8.79, + "learning_rate": 3.321624951921496e-05, + "loss": 8.2808, + "step": 825600 + }, + { + "epoch": 8.79, + "learning_rate": 3.319549540698237e-05, + "loss": 8.2985, + "step": 825700 + }, + { + "epoch": 8.8, + "learning_rate": 3.317474649014306e-05, + "loss": 8.2882, + "step": 825800 + }, + { + "epoch": 8.8, + "learning_rate": 3.3154002770310634e-05, + "loss": 8.2207, + "step": 825900 + }, + { + "epoch": 8.8, + "learning_rate": 3.313326424909831e-05, + "loss": 8.2881, + "step": 826000 + }, + { + "epoch": 8.8, + "learning_rate": 3.3112530928118965e-05, + "loss": 8.2874, + "step": 826100 + }, + { + "epoch": 8.8, + "learning_rate": 3.3091802808985026e-05, + "loss": 8.2627, + "step": 826200 + }, + { + "epoch": 8.8, + "learning_rate": 3.307107989330851e-05, + "loss": 8.3082, + "step": 826300 + }, + { + "epoch": 8.8, + "learning_rate": 3.305036218270101e-05, + "loss": 8.2829, + "step": 826400 + }, + { + "epoch": 8.8, + "learning_rate": 3.3029649678773796e-05, + "loss": 8.2537, + "step": 826500 + }, + { + "epoch": 8.8, + "learning_rate": 3.300894238313768e-05, + "loss": 8.2545, + "step": 826600 + }, + { + "epoch": 8.81, + "learning_rate": 3.298824029740304e-05, + "loss": 8.3198, + "step": 826700 + }, + { + "epoch": 8.81, + "learning_rate": 3.296754342317986e-05, + "loss": 8.2059, + "step": 826800 + }, + { + "epoch": 8.81, + "learning_rate": 3.294685176207779e-05, + "loss": 8.2796, + "step": 826900 + }, + { + "epoch": 8.81, + "learning_rate": 3.292616531570601e-05, + "loss": 8.2203, + "step": 827000 + }, + { + "epoch": 8.81, + "learning_rate": 3.290548408567331e-05, + "loss": 8.249, + "step": 827100 + }, + { + "epoch": 8.81, + "learning_rate": 3.2884808073588016e-05, + "loss": 8.2314, + "step": 827200 + }, + { + "epoch": 8.81, + "learning_rate": 3.2864137281058214e-05, + "loss": 8.3003, + "step": 827300 + }, + { + "epoch": 8.81, + "learning_rate": 3.2843471709691345e-05, + "loss": 8.2876, + "step": 827400 + }, + { + "epoch": 8.81, + "learning_rate": 3.282281136109466e-05, + "loss": 8.264, + "step": 827500 + }, + { + "epoch": 8.82, + "learning_rate": 3.280215623687486e-05, + "loss": 8.4004, + "step": 827600 + }, + { + "epoch": 8.82, + "learning_rate": 3.278150633863839e-05, + "loss": 8.1729, + "step": 827700 + }, + { + "epoch": 8.82, + "learning_rate": 3.2760861667991073e-05, + "loss": 8.2517, + "step": 827800 + }, + { + "epoch": 8.82, + "learning_rate": 3.2740222226538544e-05, + "loss": 8.2852, + "step": 827900 + }, + { + "epoch": 8.82, + "learning_rate": 3.271958801588585e-05, + "loss": 8.2706, + "step": 828000 + }, + { + "epoch": 8.82, + "learning_rate": 3.269895903763781e-05, + "loss": 8.2699, + "step": 828100 + }, + { + "epoch": 8.82, + "learning_rate": 3.2678335293398685e-05, + "loss": 8.3367, + "step": 828200 + }, + { + "epoch": 8.82, + "learning_rate": 3.265771678477236e-05, + "loss": 8.3366, + "step": 828300 + }, + { + "epoch": 8.82, + "learning_rate": 3.2637103513362445e-05, + "loss": 8.3118, + "step": 828400 + }, + { + "epoch": 8.82, + "learning_rate": 3.261649548077189e-05, + "loss": 8.3681, + "step": 828500 + }, + { + "epoch": 8.83, + "learning_rate": 3.25958926886035e-05, + "loss": 8.2023, + "step": 828600 + }, + { + "epoch": 8.83, + "learning_rate": 3.257529513845948e-05, + "loss": 8.3011, + "step": 828700 + }, + { + "epoch": 8.83, + "learning_rate": 3.255470283194181e-05, + "loss": 8.2313, + "step": 828800 + }, + { + "epoch": 8.83, + "learning_rate": 3.253411577065181e-05, + "loss": 8.2333, + "step": 828900 + }, + { + "epoch": 8.83, + "learning_rate": 3.251353395619064e-05, + "loss": 8.2084, + "step": 829000 + }, + { + "epoch": 8.83, + "learning_rate": 3.2492957390158904e-05, + "loss": 8.2673, + "step": 829100 + }, + { + "epoch": 8.83, + "learning_rate": 3.247238607415693e-05, + "loss": 8.2626, + "step": 829200 + }, + { + "epoch": 8.83, + "learning_rate": 3.24518200097844e-05, + "loss": 8.2767, + "step": 829300 + }, + { + "epoch": 8.83, + "learning_rate": 3.2431259198640865e-05, + "loss": 8.2966, + "step": 829400 + }, + { + "epoch": 8.84, + "learning_rate": 3.241070364232529e-05, + "loss": 8.1983, + "step": 829500 + }, + { + "epoch": 8.84, + "learning_rate": 3.23901533424363e-05, + "loss": 8.2309, + "step": 829600 + }, + { + "epoch": 8.84, + "learning_rate": 3.236960830057205e-05, + "loss": 8.2652, + "step": 829700 + }, + { + "epoch": 8.84, + "learning_rate": 3.234906851833039e-05, + "loss": 8.2435, + "step": 829800 + }, + { + "epoch": 8.84, + "learning_rate": 3.2328533997308685e-05, + "loss": 8.3293, + "step": 829900 + }, + { + "epoch": 8.84, + "learning_rate": 3.23080047391039e-05, + "loss": 8.3412, + "step": 830000 + }, + { + "epoch": 8.84, + "learning_rate": 3.228748074531256e-05, + "loss": 8.2195, + "step": 830100 + }, + { + "epoch": 8.84, + "learning_rate": 3.226696201753084e-05, + "loss": 8.216, + "step": 830200 + }, + { + "epoch": 8.84, + "learning_rate": 3.224644855735458e-05, + "loss": 8.2079, + "step": 830300 + }, + { + "epoch": 8.84, + "learning_rate": 3.222594036637896e-05, + "loss": 8.3219, + "step": 830400 + }, + { + "epoch": 8.85, + "learning_rate": 3.2205437446199016e-05, + "loss": 8.2278, + "step": 830500 + }, + { + "epoch": 8.85, + "learning_rate": 3.218493979840921e-05, + "loss": 8.2094, + "step": 830600 + }, + { + "epoch": 8.85, + "learning_rate": 3.216444742460366e-05, + "loss": 8.2229, + "step": 830700 + }, + { + "epoch": 8.85, + "learning_rate": 3.2143960326376025e-05, + "loss": 8.1938, + "step": 830800 + }, + { + "epoch": 8.85, + "learning_rate": 3.212347850531966e-05, + "loss": 8.209, + "step": 830900 + }, + { + "epoch": 8.85, + "learning_rate": 3.210300196302739e-05, + "loss": 8.362, + "step": 831000 + }, + { + "epoch": 8.85, + "learning_rate": 3.2082530701091697e-05, + "loss": 8.2696, + "step": 831100 + }, + { + "epoch": 8.85, + "learning_rate": 3.206206472110459e-05, + "loss": 8.2712, + "step": 831200 + }, + { + "epoch": 8.85, + "learning_rate": 3.2041604024657765e-05, + "loss": 8.3241, + "step": 831300 + }, + { + "epoch": 8.86, + "learning_rate": 3.202114861334244e-05, + "loss": 8.2776, + "step": 831400 + }, + { + "epoch": 8.86, + "learning_rate": 3.200069848874942e-05, + "loss": 8.239, + "step": 831500 + }, + { + "epoch": 8.86, + "learning_rate": 3.198025365246911e-05, + "loss": 8.3158, + "step": 831600 + }, + { + "epoch": 8.86, + "learning_rate": 3.1959814106091524e-05, + "loss": 8.2291, + "step": 831700 + }, + { + "epoch": 8.86, + "learning_rate": 3.1939379851206255e-05, + "loss": 8.184, + "step": 831800 + }, + { + "epoch": 8.86, + "learning_rate": 3.1918950889402444e-05, + "loss": 8.2202, + "step": 831900 + }, + { + "epoch": 8.86, + "learning_rate": 3.1898527222268845e-05, + "loss": 8.2499, + "step": 832000 + }, + { + "epoch": 8.86, + "learning_rate": 3.1878108851393884e-05, + "loss": 8.2533, + "step": 832100 + }, + { + "epoch": 8.86, + "learning_rate": 3.1857695778365434e-05, + "loss": 8.2805, + "step": 832200 + }, + { + "epoch": 8.87, + "learning_rate": 3.183728800477101e-05, + "loss": 8.2508, + "step": 832300 + }, + { + "epoch": 8.87, + "learning_rate": 3.1816885532197796e-05, + "loss": 8.246, + "step": 832400 + }, + { + "epoch": 8.87, + "learning_rate": 3.179648836223245e-05, + "loss": 8.2905, + "step": 832500 + }, + { + "epoch": 8.87, + "learning_rate": 3.177609649646126e-05, + "loss": 8.2816, + "step": 832600 + }, + { + "epoch": 8.87, + "learning_rate": 3.175570993647009e-05, + "loss": 8.3255, + "step": 832700 + }, + { + "epoch": 8.87, + "learning_rate": 3.1735328683844444e-05, + "loss": 8.2574, + "step": 832800 + }, + { + "epoch": 8.87, + "learning_rate": 3.171495274016936e-05, + "loss": 8.318, + "step": 832900 + }, + { + "epoch": 8.87, + "learning_rate": 3.169458210702947e-05, + "loss": 8.2815, + "step": 833000 + }, + { + "epoch": 8.87, + "learning_rate": 3.167421678600897e-05, + "loss": 8.2358, + "step": 833100 + }, + { + "epoch": 8.87, + "learning_rate": 3.165385677869174e-05, + "loss": 8.1911, + "step": 833200 + }, + { + "epoch": 8.88, + "learning_rate": 3.163350208666114e-05, + "loss": 8.2559, + "step": 833300 + }, + { + "epoch": 8.88, + "learning_rate": 3.1613152711500174e-05, + "loss": 8.2351, + "step": 833400 + }, + { + "epoch": 8.88, + "learning_rate": 3.159280865479135e-05, + "loss": 8.4092, + "step": 833500 + }, + { + "epoch": 8.88, + "learning_rate": 3.157246991811693e-05, + "loss": 8.332, + "step": 833600 + }, + { + "epoch": 8.88, + "learning_rate": 3.1552136503058604e-05, + "loss": 8.2057, + "step": 833700 + }, + { + "epoch": 8.88, + "learning_rate": 3.1531808411197705e-05, + "loss": 8.2141, + "step": 833800 + }, + { + "epoch": 8.88, + "learning_rate": 3.151148564411512e-05, + "loss": 8.226, + "step": 833900 + }, + { + "epoch": 8.88, + "learning_rate": 3.149116820339143e-05, + "loss": 8.2508, + "step": 834000 + }, + { + "epoch": 8.88, + "learning_rate": 3.1470856090606684e-05, + "loss": 8.2343, + "step": 834100 + }, + { + "epoch": 8.89, + "learning_rate": 3.145054930734055e-05, + "loss": 8.1982, + "step": 834200 + }, + { + "epoch": 8.89, + "learning_rate": 3.1430247855172254e-05, + "loss": 8.2984, + "step": 834300 + }, + { + "epoch": 8.89, + "learning_rate": 3.1409951735680734e-05, + "loss": 8.2347, + "step": 834400 + }, + { + "epoch": 8.89, + "learning_rate": 3.138966095044435e-05, + "loss": 8.2685, + "step": 834500 + }, + { + "epoch": 8.89, + "learning_rate": 3.136937550104112e-05, + "loss": 8.2427, + "step": 834600 + }, + { + "epoch": 8.89, + "learning_rate": 3.134909538904869e-05, + "loss": 8.2916, + "step": 834700 + }, + { + "epoch": 8.89, + "learning_rate": 3.132882061604422e-05, + "loss": 8.2093, + "step": 834800 + }, + { + "epoch": 8.89, + "learning_rate": 3.1308551183604474e-05, + "loss": 8.2482, + "step": 834900 + }, + { + "epoch": 8.89, + "learning_rate": 3.128828709330579e-05, + "loss": 8.2997, + "step": 835000 + }, + { + "epoch": 8.89, + "learning_rate": 3.1268028346724164e-05, + "loss": 8.2768, + "step": 835100 + }, + { + "epoch": 8.9, + "learning_rate": 3.1247774945435094e-05, + "loss": 8.2211, + "step": 835200 + }, + { + "epoch": 8.9, + "learning_rate": 3.122752689101368e-05, + "loss": 8.2943, + "step": 835300 + }, + { + "epoch": 8.9, + "learning_rate": 3.120728418503459e-05, + "loss": 8.1916, + "step": 835400 + }, + { + "epoch": 8.9, + "learning_rate": 3.11870468290722e-05, + "loss": 8.2423, + "step": 835500 + }, + { + "epoch": 8.9, + "learning_rate": 3.116681482470024e-05, + "loss": 8.3102, + "step": 835600 + }, + { + "epoch": 8.9, + "learning_rate": 3.114658817349224e-05, + "loss": 8.2352, + "step": 835700 + }, + { + "epoch": 8.9, + "learning_rate": 3.112636687702119e-05, + "loss": 8.272, + "step": 835800 + }, + { + "epoch": 8.9, + "learning_rate": 3.110615093685977e-05, + "loss": 8.2605, + "step": 835900 + }, + { + "epoch": 8.9, + "learning_rate": 3.108594035458007e-05, + "loss": 8.2864, + "step": 836000 + }, + { + "epoch": 8.91, + "learning_rate": 3.106573513175395e-05, + "loss": 8.2906, + "step": 836100 + }, + { + "epoch": 8.91, + "learning_rate": 3.104553526995271e-05, + "loss": 8.2791, + "step": 836200 + }, + { + "epoch": 8.91, + "learning_rate": 3.1025340770747404e-05, + "loss": 8.2411, + "step": 836300 + }, + { + "epoch": 8.91, + "learning_rate": 3.100515163570842e-05, + "loss": 8.3463, + "step": 836400 + }, + { + "epoch": 8.91, + "learning_rate": 3.0984967866405945e-05, + "loss": 8.3127, + "step": 836500 + }, + { + "epoch": 8.91, + "learning_rate": 3.0964789464409715e-05, + "loss": 8.2494, + "step": 836600 + }, + { + "epoch": 8.91, + "learning_rate": 3.094461643128891e-05, + "loss": 8.338, + "step": 836700 + }, + { + "epoch": 8.91, + "learning_rate": 3.092444876861246e-05, + "loss": 8.2186, + "step": 836800 + }, + { + "epoch": 8.91, + "learning_rate": 3.090428647794874e-05, + "loss": 8.3029, + "step": 836900 + }, + { + "epoch": 8.92, + "learning_rate": 3.088412956086589e-05, + "loss": 8.2043, + "step": 837000 + }, + { + "epoch": 8.92, + "learning_rate": 3.086397801893137e-05, + "loss": 8.263, + "step": 837100 + }, + { + "epoch": 8.92, + "learning_rate": 3.084383185371248e-05, + "loss": 8.2758, + "step": 837200 + }, + { + "epoch": 8.92, + "learning_rate": 3.0823691066775906e-05, + "loss": 8.2949, + "step": 837300 + }, + { + "epoch": 8.92, + "learning_rate": 3.08035556596881e-05, + "loss": 8.2873, + "step": 837400 + }, + { + "epoch": 8.92, + "learning_rate": 3.0783425634014884e-05, + "loss": 8.2554, + "step": 837500 + }, + { + "epoch": 8.92, + "learning_rate": 3.076330099132184e-05, + "loss": 8.3131, + "step": 837600 + }, + { + "epoch": 8.92, + "learning_rate": 3.074318173317406e-05, + "loss": 8.2175, + "step": 837700 + }, + { + "epoch": 8.92, + "learning_rate": 3.072306786113621e-05, + "loss": 8.2547, + "step": 837800 + }, + { + "epoch": 8.92, + "learning_rate": 3.07029593767725e-05, + "loss": 8.2897, + "step": 837900 + }, + { + "epoch": 8.93, + "learning_rate": 3.0682856281646854e-05, + "loss": 8.2476, + "step": 838000 + }, + { + "epoch": 8.93, + "learning_rate": 3.066275857732266e-05, + "loss": 8.2742, + "step": 838100 + }, + { + "epoch": 8.93, + "learning_rate": 3.06426662653629e-05, + "loss": 8.3255, + "step": 838200 + }, + { + "epoch": 8.93, + "learning_rate": 3.062257934733014e-05, + "loss": 8.3028, + "step": 838300 + }, + { + "epoch": 8.93, + "learning_rate": 3.060249782478659e-05, + "loss": 8.2572, + "step": 838400 + }, + { + "epoch": 8.93, + "learning_rate": 3.0582421699293973e-05, + "loss": 8.3093, + "step": 838500 + }, + { + "epoch": 8.93, + "learning_rate": 3.056235097241359e-05, + "loss": 8.277, + "step": 838600 + }, + { + "epoch": 8.93, + "learning_rate": 3.054228564570638e-05, + "loss": 8.2134, + "step": 838700 + }, + { + "epoch": 8.93, + "learning_rate": 3.052222572073281e-05, + "loss": 8.2125, + "step": 838800 + }, + { + "epoch": 8.94, + "learning_rate": 3.0502171199052944e-05, + "loss": 8.2933, + "step": 838900 + }, + { + "epoch": 8.94, + "learning_rate": 3.048212208222637e-05, + "loss": 8.2871, + "step": 839000 + }, + { + "epoch": 8.94, + "learning_rate": 3.0462078371812408e-05, + "loss": 8.3159, + "step": 839100 + }, + { + "epoch": 8.94, + "learning_rate": 3.0442040069369813e-05, + "loss": 8.268, + "step": 839200 + }, + { + "epoch": 8.94, + "learning_rate": 3.0422007176456957e-05, + "loss": 8.1977, + "step": 839300 + }, + { + "epoch": 8.94, + "learning_rate": 3.0401979694631778e-05, + "loss": 8.2553, + "step": 839400 + }, + { + "epoch": 8.94, + "learning_rate": 3.0381957625451873e-05, + "loss": 8.2642, + "step": 839500 + }, + { + "epoch": 8.94, + "learning_rate": 3.0361940970474332e-05, + "loss": 8.2399, + "step": 839600 + }, + { + "epoch": 8.94, + "learning_rate": 3.0341929731255857e-05, + "loss": 8.2947, + "step": 839700 + }, + { + "epoch": 8.94, + "learning_rate": 3.032192390935269e-05, + "loss": 8.2838, + "step": 839800 + }, + { + "epoch": 8.95, + "learning_rate": 3.030192350632074e-05, + "loss": 8.2372, + "step": 839900 + }, + { + "epoch": 8.95, + "learning_rate": 3.028192852371542e-05, + "loss": 8.2975, + "step": 840000 + }, + { + "epoch": 8.95, + "learning_rate": 3.026193896309174e-05, + "loss": 8.2776, + "step": 840100 + }, + { + "epoch": 8.95, + "learning_rate": 3.024195482600425e-05, + "loss": 8.3623, + "step": 840200 + }, + { + "epoch": 8.95, + "learning_rate": 3.022197611400719e-05, + "loss": 8.2556, + "step": 840300 + }, + { + "epoch": 8.95, + "learning_rate": 3.0202002828654276e-05, + "loss": 8.3252, + "step": 840400 + }, + { + "epoch": 8.95, + "learning_rate": 3.0182034971498818e-05, + "loss": 8.1914, + "step": 840500 + }, + { + "epoch": 8.95, + "learning_rate": 3.016207254409371e-05, + "loss": 8.2477, + "step": 840600 + }, + { + "epoch": 8.95, + "learning_rate": 3.0142115547991477e-05, + "loss": 8.2958, + "step": 840700 + }, + { + "epoch": 8.96, + "learning_rate": 3.0122163984744157e-05, + "loss": 8.1611, + "step": 840800 + }, + { + "epoch": 8.96, + "learning_rate": 3.0102217855903335e-05, + "loss": 8.2341, + "step": 840900 + }, + { + "epoch": 8.96, + "learning_rate": 3.0082277163020312e-05, + "loss": 8.3176, + "step": 841000 + }, + { + "epoch": 8.96, + "learning_rate": 3.006234190764583e-05, + "loss": 8.1658, + "step": 841100 + }, + { + "epoch": 8.96, + "learning_rate": 3.0042412091330252e-05, + "loss": 8.2394, + "step": 841200 + }, + { + "epoch": 8.96, + "learning_rate": 3.0022487715623504e-05, + "loss": 8.2207, + "step": 841300 + }, + { + "epoch": 8.96, + "learning_rate": 3.000256878207517e-05, + "loss": 8.2641, + "step": 841400 + }, + { + "epoch": 8.96, + "learning_rate": 2.9982655292234295e-05, + "loss": 8.3131, + "step": 841500 + }, + { + "epoch": 8.96, + "learning_rate": 2.996274724764958e-05, + "loss": 8.2654, + "step": 841600 + }, + { + "epoch": 8.97, + "learning_rate": 2.9942844649869228e-05, + "loss": 8.2102, + "step": 841700 + }, + { + "epoch": 8.97, + "learning_rate": 2.9922947500441136e-05, + "loss": 8.304, + "step": 841800 + }, + { + "epoch": 8.97, + "learning_rate": 2.990305580091266e-05, + "loss": 8.2089, + "step": 841900 + }, + { + "epoch": 8.97, + "learning_rate": 2.9883169552830802e-05, + "loss": 8.2184, + "step": 842000 + }, + { + "epoch": 8.97, + "learning_rate": 2.986328875774208e-05, + "loss": 8.2306, + "step": 842100 + }, + { + "epoch": 8.97, + "learning_rate": 2.984341341719271e-05, + "loss": 8.2026, + "step": 842200 + }, + { + "epoch": 8.97, + "learning_rate": 2.982354353272828e-05, + "loss": 8.2298, + "step": 842300 + }, + { + "epoch": 8.97, + "learning_rate": 2.980367910589418e-05, + "loss": 8.2182, + "step": 842400 + }, + { + "epoch": 8.97, + "learning_rate": 2.9783820138235173e-05, + "loss": 8.242, + "step": 842500 + }, + { + "epoch": 8.97, + "learning_rate": 2.9763966631295826e-05, + "loss": 8.1968, + "step": 842600 + }, + { + "epoch": 8.98, + "learning_rate": 2.9744118586619984e-05, + "loss": 8.2604, + "step": 842700 + }, + { + "epoch": 8.98, + "learning_rate": 2.9724276005751327e-05, + "loss": 8.2438, + "step": 842800 + }, + { + "epoch": 8.98, + "learning_rate": 2.9704438890233034e-05, + "loss": 8.2956, + "step": 842900 + }, + { + "epoch": 8.98, + "learning_rate": 2.96846072416078e-05, + "loss": 8.2327, + "step": 843000 + }, + { + "epoch": 8.98, + "learning_rate": 2.9664781061417945e-05, + "loss": 8.2698, + "step": 843100 + }, + { + "epoch": 8.98, + "learning_rate": 2.964496035120532e-05, + "loss": 8.2301, + "step": 843200 + }, + { + "epoch": 8.98, + "learning_rate": 2.9625145112511476e-05, + "loss": 8.2665, + "step": 843300 + }, + { + "epoch": 8.98, + "learning_rate": 2.9605335346877317e-05, + "loss": 8.3423, + "step": 843400 + }, + { + "epoch": 8.98, + "learning_rate": 2.9585531055843552e-05, + "loss": 8.2932, + "step": 843500 + }, + { + "epoch": 8.99, + "learning_rate": 2.9565732240950283e-05, + "loss": 8.3009, + "step": 843600 + }, + { + "epoch": 8.99, + "learning_rate": 2.954593890373739e-05, + "loss": 8.2639, + "step": 843700 + }, + { + "epoch": 8.99, + "learning_rate": 2.9526151045744056e-05, + "loss": 8.2271, + "step": 843800 + }, + { + "epoch": 8.99, + "learning_rate": 2.9506368668509276e-05, + "loss": 8.2762, + "step": 843900 + }, + { + "epoch": 8.99, + "learning_rate": 2.9486591773571483e-05, + "loss": 8.2065, + "step": 844000 + }, + { + "epoch": 8.99, + "learning_rate": 2.9466820362468805e-05, + "loss": 8.229, + "step": 844100 + }, + { + "epoch": 8.99, + "learning_rate": 2.944705443673875e-05, + "loss": 8.2392, + "step": 844200 + }, + { + "epoch": 8.99, + "learning_rate": 2.9427293997918615e-05, + "loss": 8.2306, + "step": 844300 + }, + { + "epoch": 8.99, + "learning_rate": 2.940753904754513e-05, + "loss": 8.2786, + "step": 844400 + }, + { + "epoch": 9.0, + "learning_rate": 2.9387789587154637e-05, + "loss": 8.2406, + "step": 844500 + }, + { + "epoch": 9.0, + "learning_rate": 2.9368045618283023e-05, + "loss": 8.2627, + "step": 844600 + }, + { + "epoch": 9.0, + "learning_rate": 2.9348307142465857e-05, + "loss": 8.2086, + "step": 844700 + }, + { + "epoch": 9.0, + "learning_rate": 2.9328574161238154e-05, + "loss": 8.3026, + "step": 844800 + }, + { + "epoch": 9.0, + "learning_rate": 2.9308846676134528e-05, + "loss": 8.2644, + "step": 844900 + }, + { + "epoch": 9.0, + "learning_rate": 2.928912468868924e-05, + "loss": 8.2803, + "step": 845000 + }, + { + "epoch": 9.0, + "learning_rate": 2.9269408200436022e-05, + "loss": 8.2429, + "step": 845100 + }, + { + "epoch": 9.0, + "learning_rate": 2.9249697212908322e-05, + "loss": 8.1767, + "step": 845200 + }, + { + "epoch": 9.0, + "learning_rate": 2.922999172763892e-05, + "loss": 8.2993, + "step": 845300 + }, + { + "epoch": 9.0, + "learning_rate": 2.9210291746160423e-05, + "loss": 8.2549, + "step": 845400 + }, + { + "epoch": 9.01, + "learning_rate": 2.919059727000487e-05, + "loss": 8.2595, + "step": 845500 + }, + { + "epoch": 9.01, + "learning_rate": 2.9170908300703893e-05, + "loss": 8.261, + "step": 845600 + }, + { + "epoch": 9.01, + "learning_rate": 2.9151224839788683e-05, + "loss": 8.2455, + "step": 845700 + }, + { + "epoch": 9.01, + "learning_rate": 2.9131546888790086e-05, + "loss": 8.2865, + "step": 845800 + }, + { + "epoch": 9.01, + "learning_rate": 2.911187444923843e-05, + "loss": 8.2717, + "step": 845900 + }, + { + "epoch": 9.01, + "learning_rate": 2.9092207522663626e-05, + "loss": 8.2054, + "step": 846000 + }, + { + "epoch": 9.01, + "learning_rate": 2.9072546110595167e-05, + "loss": 8.2345, + "step": 846100 + }, + { + "epoch": 9.01, + "learning_rate": 2.9052890214562167e-05, + "loss": 8.2846, + "step": 846200 + }, + { + "epoch": 9.01, + "learning_rate": 2.903323983609324e-05, + "loss": 8.3051, + "step": 846300 + }, + { + "epoch": 9.02, + "learning_rate": 2.9013594976716595e-05, + "loss": 8.2461, + "step": 846400 + }, + { + "epoch": 9.02, + "learning_rate": 2.8993955637959992e-05, + "loss": 8.2728, + "step": 846500 + }, + { + "epoch": 9.02, + "learning_rate": 2.8974321821350846e-05, + "loss": 8.1553, + "step": 846600 + }, + { + "epoch": 9.02, + "learning_rate": 2.8954693528416032e-05, + "loss": 8.2257, + "step": 846700 + }, + { + "epoch": 9.02, + "learning_rate": 2.893507076068207e-05, + "loss": 8.2464, + "step": 846800 + }, + { + "epoch": 9.02, + "learning_rate": 2.8915453519674984e-05, + "loss": 8.2094, + "step": 846900 + }, + { + "epoch": 9.02, + "learning_rate": 2.8895841806920466e-05, + "loss": 8.2746, + "step": 847000 + }, + { + "epoch": 9.02, + "learning_rate": 2.8876235623943692e-05, + "loss": 8.1501, + "step": 847100 + }, + { + "epoch": 9.02, + "learning_rate": 2.88566349722694e-05, + "loss": 8.2666, + "step": 847200 + }, + { + "epoch": 9.02, + "learning_rate": 2.883703985342202e-05, + "loss": 8.2965, + "step": 847300 + }, + { + "epoch": 9.03, + "learning_rate": 2.8817450268925405e-05, + "loss": 8.2031, + "step": 847400 + }, + { + "epoch": 9.03, + "learning_rate": 2.879786622030306e-05, + "loss": 8.157, + "step": 847500 + }, + { + "epoch": 9.03, + "learning_rate": 2.8778287709078e-05, + "loss": 8.2265, + "step": 847600 + }, + { + "epoch": 9.03, + "learning_rate": 2.8758714736772908e-05, + "loss": 8.2286, + "step": 847700 + }, + { + "epoch": 9.03, + "learning_rate": 2.8739147304909953e-05, + "loss": 8.3275, + "step": 847800 + }, + { + "epoch": 9.03, + "learning_rate": 2.8719585415010885e-05, + "loss": 8.1727, + "step": 847900 + }, + { + "epoch": 9.03, + "learning_rate": 2.8700029068597013e-05, + "loss": 8.2448, + "step": 848000 + }, + { + "epoch": 9.03, + "learning_rate": 2.868047826718928e-05, + "loss": 8.1642, + "step": 848100 + }, + { + "epoch": 9.03, + "learning_rate": 2.8660933012308157e-05, + "loss": 8.1949, + "step": 848200 + }, + { + "epoch": 9.04, + "learning_rate": 2.864139330547365e-05, + "loss": 8.2454, + "step": 848300 + }, + { + "epoch": 9.04, + "learning_rate": 2.8621859148205344e-05, + "loss": 8.2757, + "step": 848400 + }, + { + "epoch": 9.04, + "learning_rate": 2.8602330542022472e-05, + "loss": 8.2725, + "step": 848500 + }, + { + "epoch": 9.04, + "learning_rate": 2.858280748844374e-05, + "loss": 8.2192, + "step": 848600 + }, + { + "epoch": 9.04, + "learning_rate": 2.8563289988987475e-05, + "loss": 8.2925, + "step": 848700 + }, + { + "epoch": 9.04, + "learning_rate": 2.854377804517151e-05, + "loss": 8.192, + "step": 848800 + }, + { + "epoch": 9.04, + "learning_rate": 2.852427165851336e-05, + "loss": 8.2833, + "step": 848900 + }, + { + "epoch": 9.04, + "learning_rate": 2.8504770830530004e-05, + "loss": 8.2881, + "step": 849000 + }, + { + "epoch": 9.04, + "learning_rate": 2.848527556273799e-05, + "loss": 8.2624, + "step": 849100 + }, + { + "epoch": 9.05, + "learning_rate": 2.8465785856653527e-05, + "loss": 8.2188, + "step": 849200 + }, + { + "epoch": 9.05, + "learning_rate": 2.8446301713792312e-05, + "loss": 8.208, + "step": 849300 + }, + { + "epoch": 9.05, + "learning_rate": 2.8426823135669613e-05, + "loss": 8.1673, + "step": 849400 + }, + { + "epoch": 9.05, + "learning_rate": 2.840735012380027e-05, + "loss": 8.1996, + "step": 849500 + }, + { + "epoch": 9.05, + "learning_rate": 2.838788267969874e-05, + "loss": 8.2834, + "step": 849600 + }, + { + "epoch": 9.05, + "learning_rate": 2.8368420804878993e-05, + "loss": 8.2624, + "step": 849700 + }, + { + "epoch": 9.05, + "learning_rate": 2.834896450085458e-05, + "loss": 8.2728, + "step": 849800 + }, + { + "epoch": 9.05, + "learning_rate": 2.8329513769138584e-05, + "loss": 8.1989, + "step": 849900 + }, + { + "epoch": 9.05, + "learning_rate": 2.8310068611243757e-05, + "loss": 8.2072, + "step": 850000 + }, + { + "epoch": 9.05, + "learning_rate": 2.8290629028682313e-05, + "loss": 8.2574, + "step": 850100 + }, + { + "epoch": 9.06, + "learning_rate": 2.827119502296607e-05, + "loss": 8.2408, + "step": 850200 + }, + { + "epoch": 9.06, + "learning_rate": 2.825176659560639e-05, + "loss": 8.2824, + "step": 850300 + }, + { + "epoch": 9.06, + "learning_rate": 2.823234374811432e-05, + "loss": 8.231, + "step": 850400 + }, + { + "epoch": 9.06, + "learning_rate": 2.8212926482000236e-05, + "loss": 8.2776, + "step": 850500 + }, + { + "epoch": 9.06, + "learning_rate": 2.8193514798774335e-05, + "loss": 8.2864, + "step": 850600 + }, + { + "epoch": 9.06, + "learning_rate": 2.8174108699946178e-05, + "loss": 8.2784, + "step": 850700 + }, + { + "epoch": 9.06, + "learning_rate": 2.815470818702509e-05, + "loss": 8.2215, + "step": 850800 + }, + { + "epoch": 9.06, + "learning_rate": 2.8135313261519725e-05, + "loss": 8.2985, + "step": 850900 + }, + { + "epoch": 9.06, + "learning_rate": 2.8115923924938524e-05, + "loss": 8.2389, + "step": 851000 + }, + { + "epoch": 9.07, + "learning_rate": 2.8096540178789333e-05, + "loss": 8.1879, + "step": 851100 + }, + { + "epoch": 9.07, + "learning_rate": 2.8077162024579685e-05, + "loss": 8.2685, + "step": 851200 + }, + { + "epoch": 9.07, + "learning_rate": 2.8057789463816588e-05, + "loss": 8.2088, + "step": 851300 + }, + { + "epoch": 9.07, + "learning_rate": 2.8038422498006624e-05, + "loss": 8.2687, + "step": 851400 + }, + { + "epoch": 9.07, + "learning_rate": 2.8019061128656066e-05, + "loss": 8.2597, + "step": 851500 + }, + { + "epoch": 9.07, + "learning_rate": 2.79997053572705e-05, + "loss": 8.2575, + "step": 851600 + }, + { + "epoch": 9.07, + "learning_rate": 2.7980355185355343e-05, + "loss": 8.1963, + "step": 851700 + }, + { + "epoch": 9.07, + "learning_rate": 2.7961010614415395e-05, + "loss": 8.2434, + "step": 851800 + }, + { + "epoch": 9.07, + "learning_rate": 2.794167164595516e-05, + "loss": 8.1514, + "step": 851900 + }, + { + "epoch": 9.07, + "learning_rate": 2.792233828147852e-05, + "loss": 8.2702, + "step": 852000 + }, + { + "epoch": 9.08, + "learning_rate": 2.7903010522489137e-05, + "loss": 8.2681, + "step": 852100 + }, + { + "epoch": 9.08, + "learning_rate": 2.788368837049009e-05, + "loss": 8.2304, + "step": 852200 + }, + { + "epoch": 9.08, + "learning_rate": 2.7864371826984058e-05, + "loss": 8.2437, + "step": 852300 + }, + { + "epoch": 9.08, + "learning_rate": 2.7845060893473273e-05, + "loss": 8.2877, + "step": 852400 + }, + { + "epoch": 9.08, + "learning_rate": 2.78257555714596e-05, + "loss": 8.1984, + "step": 852500 + }, + { + "epoch": 9.08, + "learning_rate": 2.7806455862444392e-05, + "loss": 8.2653, + "step": 852600 + }, + { + "epoch": 9.08, + "learning_rate": 2.778716176792858e-05, + "loss": 8.1856, + "step": 852700 + }, + { + "epoch": 9.08, + "learning_rate": 2.776787328941265e-05, + "loss": 8.239, + "step": 852800 + }, + { + "epoch": 9.08, + "learning_rate": 2.7748590428396725e-05, + "loss": 8.2087, + "step": 852900 + }, + { + "epoch": 9.09, + "learning_rate": 2.7729313186380402e-05, + "loss": 8.2769, + "step": 853000 + }, + { + "epoch": 9.09, + "learning_rate": 2.7710041564862877e-05, + "loss": 8.2345, + "step": 853100 + }, + { + "epoch": 9.09, + "learning_rate": 2.7690775565342875e-05, + "loss": 8.2624, + "step": 853200 + }, + { + "epoch": 9.09, + "learning_rate": 2.7671515189318764e-05, + "loss": 8.2159, + "step": 853300 + }, + { + "epoch": 9.09, + "learning_rate": 2.7652260438288423e-05, + "loss": 8.1478, + "step": 853400 + }, + { + "epoch": 9.09, + "learning_rate": 2.7633011313749235e-05, + "loss": 8.259, + "step": 853500 + }, + { + "epoch": 9.09, + "learning_rate": 2.7613767817198288e-05, + "loss": 8.2641, + "step": 853600 + }, + { + "epoch": 9.09, + "learning_rate": 2.7594529950132108e-05, + "loss": 8.2215, + "step": 853700 + }, + { + "epoch": 9.09, + "learning_rate": 2.757529771404683e-05, + "loss": 8.2947, + "step": 853800 + }, + { + "epoch": 9.1, + "learning_rate": 2.755607111043812e-05, + "loss": 8.2407, + "step": 853900 + }, + { + "epoch": 9.1, + "learning_rate": 2.7536850140801295e-05, + "loss": 8.2118, + "step": 854000 + }, + { + "epoch": 9.1, + "learning_rate": 2.751763480663113e-05, + "loss": 8.2793, + "step": 854100 + }, + { + "epoch": 9.1, + "learning_rate": 2.7498425109422023e-05, + "loss": 8.285, + "step": 854200 + }, + { + "epoch": 9.1, + "learning_rate": 2.7479221050667858e-05, + "loss": 8.2006, + "step": 854300 + }, + { + "epoch": 9.1, + "learning_rate": 2.7460022631862216e-05, + "loss": 8.2084, + "step": 854400 + }, + { + "epoch": 9.1, + "learning_rate": 2.744082985449813e-05, + "loss": 8.1859, + "step": 854500 + }, + { + "epoch": 9.1, + "learning_rate": 2.7421642720068207e-05, + "loss": 8.2969, + "step": 854600 + }, + { + "epoch": 9.1, + "learning_rate": 2.7402461230064625e-05, + "loss": 8.1653, + "step": 854700 + }, + { + "epoch": 9.1, + "learning_rate": 2.7383285385979184e-05, + "loss": 8.2762, + "step": 854800 + }, + { + "epoch": 9.11, + "learning_rate": 2.736411518930315e-05, + "loss": 8.2477, + "step": 854900 + }, + { + "epoch": 9.11, + "learning_rate": 2.73449506415274e-05, + "loss": 8.3123, + "step": 855000 + }, + { + "epoch": 9.11, + "learning_rate": 2.7325791744142328e-05, + "loss": 8.2553, + "step": 855100 + }, + { + "epoch": 9.11, + "learning_rate": 2.7306638498637994e-05, + "loss": 8.2666, + "step": 855200 + }, + { + "epoch": 9.11, + "learning_rate": 2.7287490906503908e-05, + "loss": 8.3143, + "step": 855300 + }, + { + "epoch": 9.11, + "learning_rate": 2.726834896922915e-05, + "loss": 8.1824, + "step": 855400 + }, + { + "epoch": 9.11, + "learning_rate": 2.7249212688302462e-05, + "loss": 8.1957, + "step": 855500 + }, + { + "epoch": 9.11, + "learning_rate": 2.7230082065212026e-05, + "loss": 8.2408, + "step": 855600 + }, + { + "epoch": 9.11, + "learning_rate": 2.7210957101445643e-05, + "loss": 8.3205, + "step": 855700 + }, + { + "epoch": 9.12, + "learning_rate": 2.7191837798490638e-05, + "loss": 8.1899, + "step": 855800 + }, + { + "epoch": 9.12, + "learning_rate": 2.7172724157833972e-05, + "loss": 8.2868, + "step": 855900 + }, + { + "epoch": 9.12, + "learning_rate": 2.7153616180962095e-05, + "loss": 8.2414, + "step": 856000 + }, + { + "epoch": 9.12, + "learning_rate": 2.7134513869361022e-05, + "loss": 8.2119, + "step": 856100 + }, + { + "epoch": 9.12, + "learning_rate": 2.7115417224516326e-05, + "loss": 8.2633, + "step": 856200 + }, + { + "epoch": 9.12, + "learning_rate": 2.7096326247913206e-05, + "loss": 8.3004, + "step": 856300 + }, + { + "epoch": 9.12, + "learning_rate": 2.7077240941036354e-05, + "loss": 8.2911, + "step": 856400 + }, + { + "epoch": 9.12, + "learning_rate": 2.7058161305370012e-05, + "loss": 8.29, + "step": 856500 + }, + { + "epoch": 9.12, + "learning_rate": 2.7039087342397983e-05, + "loss": 8.2163, + "step": 856600 + }, + { + "epoch": 9.12, + "learning_rate": 2.702001905360373e-05, + "loss": 8.264, + "step": 856700 + }, + { + "epoch": 9.13, + "learning_rate": 2.7000956440470147e-05, + "loss": 8.2532, + "step": 856800 + }, + { + "epoch": 9.13, + "learning_rate": 2.6981899504479736e-05, + "loss": 8.2629, + "step": 856900 + }, + { + "epoch": 9.13, + "learning_rate": 2.6962848247114537e-05, + "loss": 8.202, + "step": 857000 + }, + { + "epoch": 9.13, + "learning_rate": 2.6943802669856243e-05, + "loss": 8.2038, + "step": 857100 + }, + { + "epoch": 9.13, + "learning_rate": 2.692476277418593e-05, + "loss": 8.2742, + "step": 857200 + }, + { + "epoch": 9.13, + "learning_rate": 2.6905728561584408e-05, + "loss": 8.2522, + "step": 857300 + }, + { + "epoch": 9.13, + "learning_rate": 2.6886700033531908e-05, + "loss": 8.3436, + "step": 857400 + }, + { + "epoch": 9.13, + "learning_rate": 2.6867677191508356e-05, + "loss": 8.2472, + "step": 857500 + }, + { + "epoch": 9.13, + "learning_rate": 2.6848660036993124e-05, + "loss": 8.2295, + "step": 857600 + }, + { + "epoch": 9.14, + "learning_rate": 2.682964857146514e-05, + "loss": 8.2956, + "step": 857700 + }, + { + "epoch": 9.14, + "learning_rate": 2.6810642796402985e-05, + "loss": 8.2858, + "step": 857800 + }, + { + "epoch": 9.14, + "learning_rate": 2.6791642713284725e-05, + "loss": 8.2228, + "step": 857900 + }, + { + "epoch": 9.14, + "learning_rate": 2.677264832358799e-05, + "loss": 8.295, + "step": 858000 + }, + { + "epoch": 9.14, + "learning_rate": 2.675365962878995e-05, + "loss": 8.1675, + "step": 858100 + }, + { + "epoch": 9.14, + "learning_rate": 2.673467663036744e-05, + "loss": 8.2041, + "step": 858200 + }, + { + "epoch": 9.14, + "learning_rate": 2.6715699329796662e-05, + "loss": 8.2009, + "step": 858300 + }, + { + "epoch": 9.14, + "learning_rate": 2.6696727728553562e-05, + "loss": 8.2253, + "step": 858400 + }, + { + "epoch": 9.14, + "learning_rate": 2.66777618281135e-05, + "loss": 8.2651, + "step": 858500 + }, + { + "epoch": 9.15, + "learning_rate": 2.6658801629951558e-05, + "loss": 8.2451, + "step": 858600 + }, + { + "epoch": 9.15, + "learning_rate": 2.6639847135542152e-05, + "loss": 8.148, + "step": 858700 + }, + { + "epoch": 9.15, + "learning_rate": 2.662089834635946e-05, + "loss": 8.2035, + "step": 858800 + }, + { + "epoch": 9.15, + "learning_rate": 2.6601955263877066e-05, + "loss": 8.2968, + "step": 858900 + }, + { + "epoch": 9.15, + "learning_rate": 2.658301788956826e-05, + "loss": 8.2758, + "step": 859000 + }, + { + "epoch": 9.15, + "learning_rate": 2.6564086224905717e-05, + "loss": 8.2063, + "step": 859100 + }, + { + "epoch": 9.15, + "learning_rate": 2.6545160271361813e-05, + "loss": 8.2764, + "step": 859200 + }, + { + "epoch": 9.15, + "learning_rate": 2.6526240030408388e-05, + "loss": 8.2444, + "step": 859300 + }, + { + "epoch": 9.15, + "learning_rate": 2.65073255035169e-05, + "loss": 8.1554, + "step": 859400 + }, + { + "epoch": 9.15, + "learning_rate": 2.648841669215828e-05, + "loss": 8.3465, + "step": 859500 + }, + { + "epoch": 9.16, + "learning_rate": 2.6469513597803108e-05, + "loss": 8.313, + "step": 859600 + }, + { + "epoch": 9.16, + "learning_rate": 2.6450616221921532e-05, + "loss": 8.2134, + "step": 859700 + }, + { + "epoch": 9.16, + "learning_rate": 2.6431724565983098e-05, + "loss": 8.2438, + "step": 859800 + }, + { + "epoch": 9.16, + "learning_rate": 2.641283863145708e-05, + "loss": 8.2313, + "step": 859900 + }, + { + "epoch": 9.16, + "learning_rate": 2.6393958419812202e-05, + "loss": 8.2205, + "step": 860000 + }, + { + "epoch": 9.16, + "learning_rate": 2.6375083932516852e-05, + "loss": 8.2323, + "step": 860100 + }, + { + "epoch": 9.16, + "learning_rate": 2.635621517103879e-05, + "loss": 8.2395, + "step": 860200 + }, + { + "epoch": 9.16, + "learning_rate": 2.6337352136845527e-05, + "loss": 8.1833, + "step": 860300 + }, + { + "epoch": 9.16, + "learning_rate": 2.6318494831404016e-05, + "loss": 8.2299, + "step": 860400 + }, + { + "epoch": 9.17, + "learning_rate": 2.6299643256180794e-05, + "loss": 8.2382, + "step": 860500 + }, + { + "epoch": 9.17, + "learning_rate": 2.6280797412641922e-05, + "loss": 8.2801, + "step": 860600 + }, + { + "epoch": 9.17, + "learning_rate": 2.626195730225309e-05, + "loss": 8.1206, + "step": 860700 + }, + { + "epoch": 9.17, + "learning_rate": 2.624312292647948e-05, + "loss": 8.2359, + "step": 860800 + }, + { + "epoch": 9.17, + "learning_rate": 2.6224294286785833e-05, + "loss": 8.2054, + "step": 860900 + }, + { + "epoch": 9.17, + "learning_rate": 2.6205471384636427e-05, + "loss": 8.2147, + "step": 861000 + }, + { + "epoch": 9.17, + "learning_rate": 2.6186654221495177e-05, + "loss": 8.1995, + "step": 861100 + }, + { + "epoch": 9.17, + "learning_rate": 2.6167842798825482e-05, + "loss": 8.2621, + "step": 861200 + }, + { + "epoch": 9.17, + "learning_rate": 2.6149037118090292e-05, + "loss": 8.2659, + "step": 861300 + }, + { + "epoch": 9.18, + "learning_rate": 2.6130237180752105e-05, + "loss": 8.2146, + "step": 861400 + }, + { + "epoch": 9.18, + "learning_rate": 2.611144298827305e-05, + "loss": 8.3337, + "step": 861500 + }, + { + "epoch": 9.18, + "learning_rate": 2.6092654542114713e-05, + "loss": 8.2297, + "step": 861600 + }, + { + "epoch": 9.18, + "learning_rate": 2.6073871843738296e-05, + "loss": 8.2071, + "step": 861700 + }, + { + "epoch": 9.18, + "learning_rate": 2.6055094894604494e-05, + "loss": 8.3126, + "step": 861800 + }, + { + "epoch": 9.18, + "learning_rate": 2.6036323696173648e-05, + "loss": 8.3069, + "step": 861900 + }, + { + "epoch": 9.18, + "learning_rate": 2.601755824990556e-05, + "loss": 8.2293, + "step": 862000 + }, + { + "epoch": 9.18, + "learning_rate": 2.5998798557259595e-05, + "loss": 8.1861, + "step": 862100 + }, + { + "epoch": 9.18, + "learning_rate": 2.5980044619694765e-05, + "loss": 8.1671, + "step": 862200 + }, + { + "epoch": 9.18, + "learning_rate": 2.5961296438669524e-05, + "loss": 8.2552, + "step": 862300 + }, + { + "epoch": 9.19, + "learning_rate": 2.594255401564193e-05, + "loss": 8.2708, + "step": 862400 + }, + { + "epoch": 9.19, + "learning_rate": 2.5923817352069547e-05, + "loss": 8.3739, + "step": 862500 + }, + { + "epoch": 9.19, + "learning_rate": 2.590508644940959e-05, + "loss": 8.2646, + "step": 862600 + }, + { + "epoch": 9.19, + "learning_rate": 2.5886361309118735e-05, + "loss": 8.2662, + "step": 862700 + }, + { + "epoch": 9.19, + "learning_rate": 2.5867641932653218e-05, + "loss": 8.2397, + "step": 862800 + }, + { + "epoch": 9.19, + "learning_rate": 2.5848928321468857e-05, + "loss": 8.1381, + "step": 862900 + }, + { + "epoch": 9.19, + "learning_rate": 2.5830220477021028e-05, + "loss": 8.2682, + "step": 863000 + }, + { + "epoch": 9.19, + "learning_rate": 2.5811518400764645e-05, + "loss": 8.2159, + "step": 863100 + }, + { + "epoch": 9.19, + "learning_rate": 2.5792822094154158e-05, + "loss": 8.2312, + "step": 863200 + }, + { + "epoch": 9.2, + "learning_rate": 2.5774131558643544e-05, + "loss": 8.2579, + "step": 863300 + }, + { + "epoch": 9.2, + "learning_rate": 2.5755446795686433e-05, + "loss": 8.2518, + "step": 863400 + }, + { + "epoch": 9.2, + "learning_rate": 2.5736767806735918e-05, + "loss": 8.2897, + "step": 863500 + }, + { + "epoch": 9.2, + "learning_rate": 2.571809459324466e-05, + "loss": 8.2846, + "step": 863600 + }, + { + "epoch": 9.2, + "learning_rate": 2.5699427156664846e-05, + "loss": 8.2641, + "step": 863700 + }, + { + "epoch": 9.2, + "learning_rate": 2.56807654984483e-05, + "loss": 8.2616, + "step": 863800 + }, + { + "epoch": 9.2, + "learning_rate": 2.5662109620046316e-05, + "loss": 8.2524, + "step": 863900 + }, + { + "epoch": 9.2, + "learning_rate": 2.564345952290974e-05, + "loss": 8.226, + "step": 864000 + }, + { + "epoch": 9.2, + "learning_rate": 2.562481520848904e-05, + "loss": 8.2633, + "step": 864100 + }, + { + "epoch": 9.2, + "learning_rate": 2.560617667823417e-05, + "loss": 8.284, + "step": 864200 + }, + { + "epoch": 9.21, + "learning_rate": 2.558754393359464e-05, + "loss": 8.2933, + "step": 864300 + }, + { + "epoch": 9.21, + "learning_rate": 2.5568916976019496e-05, + "loss": 8.2512, + "step": 864400 + }, + { + "epoch": 9.21, + "learning_rate": 2.5550295806957404e-05, + "loss": 8.2185, + "step": 864500 + }, + { + "epoch": 9.21, + "learning_rate": 2.553168042785653e-05, + "loss": 8.2702, + "step": 864600 + }, + { + "epoch": 9.21, + "learning_rate": 2.5513070840164566e-05, + "loss": 8.161, + "step": 864700 + }, + { + "epoch": 9.21, + "learning_rate": 2.5494467045328773e-05, + "loss": 8.1895, + "step": 864800 + }, + { + "epoch": 9.21, + "learning_rate": 2.5475869044796052e-05, + "loss": 8.2898, + "step": 864900 + }, + { + "epoch": 9.21, + "learning_rate": 2.5457276840012656e-05, + "loss": 8.2649, + "step": 865000 + }, + { + "epoch": 9.21, + "learning_rate": 2.5438690432424584e-05, + "loss": 8.2263, + "step": 865100 + }, + { + "epoch": 9.22, + "learning_rate": 2.5420109823477244e-05, + "loss": 8.2613, + "step": 865200 + }, + { + "epoch": 9.22, + "learning_rate": 2.5401535014615753e-05, + "loss": 8.1612, + "step": 865300 + }, + { + "epoch": 9.22, + "learning_rate": 2.5382966007284535e-05, + "loss": 8.1773, + "step": 865400 + }, + { + "epoch": 9.22, + "learning_rate": 2.5364402802927823e-05, + "loss": 8.2719, + "step": 865500 + }, + { + "epoch": 9.22, + "learning_rate": 2.53458454029892e-05, + "loss": 8.2293, + "step": 865600 + }, + { + "epoch": 9.22, + "learning_rate": 2.5327293808911956e-05, + "loss": 8.2277, + "step": 865700 + }, + { + "epoch": 9.22, + "learning_rate": 2.5308748022138763e-05, + "loss": 8.1784, + "step": 865800 + }, + { + "epoch": 9.22, + "learning_rate": 2.5290208044111984e-05, + "loss": 8.2997, + "step": 865900 + }, + { + "epoch": 9.22, + "learning_rate": 2.5271673876273473e-05, + "loss": 8.2014, + "step": 866000 + }, + { + "epoch": 9.23, + "learning_rate": 2.525314552006458e-05, + "loss": 8.2686, + "step": 866100 + }, + { + "epoch": 9.23, + "learning_rate": 2.5234622976926335e-05, + "loss": 8.2701, + "step": 866200 + }, + { + "epoch": 9.23, + "learning_rate": 2.5216106248299178e-05, + "loss": 8.1782, + "step": 866300 + }, + { + "epoch": 9.23, + "learning_rate": 2.5197595335623236e-05, + "loss": 8.2287, + "step": 866400 + }, + { + "epoch": 9.23, + "learning_rate": 2.517909024033799e-05, + "loss": 8.249, + "step": 866500 + }, + { + "epoch": 9.23, + "learning_rate": 2.516059096388268e-05, + "loss": 8.2484, + "step": 866600 + }, + { + "epoch": 9.23, + "learning_rate": 2.5142097507695915e-05, + "loss": 8.2146, + "step": 866700 + }, + { + "epoch": 9.23, + "learning_rate": 2.5123609873216046e-05, + "loss": 8.2667, + "step": 866800 + }, + { + "epoch": 9.23, + "learning_rate": 2.5105128061880735e-05, + "loss": 8.2971, + "step": 866900 + }, + { + "epoch": 9.23, + "learning_rate": 2.508665207512739e-05, + "loss": 8.2207, + "step": 867000 + }, + { + "epoch": 9.24, + "learning_rate": 2.5068181914392886e-05, + "loss": 8.2056, + "step": 867100 + }, + { + "epoch": 9.24, + "learning_rate": 2.504971758111362e-05, + "loss": 8.2358, + "step": 867200 + }, + { + "epoch": 9.24, + "learning_rate": 2.5031259076725545e-05, + "loss": 8.2676, + "step": 867300 + }, + { + "epoch": 9.24, + "learning_rate": 2.5012806402664256e-05, + "loss": 8.1919, + "step": 867400 + }, + { + "epoch": 9.24, + "learning_rate": 2.4994359560364767e-05, + "loss": 8.2115, + "step": 867500 + }, + { + "epoch": 9.24, + "learning_rate": 2.4975918551261712e-05, + "loss": 8.2525, + "step": 867600 + }, + { + "epoch": 9.24, + "learning_rate": 2.4957483376789203e-05, + "loss": 8.1892, + "step": 867700 + }, + { + "epoch": 9.24, + "learning_rate": 2.4939054038381005e-05, + "loss": 8.2767, + "step": 867800 + }, + { + "epoch": 9.24, + "learning_rate": 2.492063053747036e-05, + "loss": 8.1937, + "step": 867900 + }, + { + "epoch": 9.25, + "learning_rate": 2.490221287549004e-05, + "loss": 8.2285, + "step": 868000 + }, + { + "epoch": 9.25, + "learning_rate": 2.488380105387238e-05, + "loss": 8.1904, + "step": 868100 + }, + { + "epoch": 9.25, + "learning_rate": 2.4865395074049315e-05, + "loss": 8.2666, + "step": 868200 + }, + { + "epoch": 9.25, + "learning_rate": 2.484699493745225e-05, + "loss": 8.1211, + "step": 868300 + }, + { + "epoch": 9.25, + "learning_rate": 2.4828600645512145e-05, + "loss": 8.2027, + "step": 868400 + }, + { + "epoch": 9.25, + "learning_rate": 2.481021219965959e-05, + "loss": 8.1842, + "step": 868500 + }, + { + "epoch": 9.25, + "learning_rate": 2.47918296013246e-05, + "loss": 8.2616, + "step": 868600 + }, + { + "epoch": 9.25, + "learning_rate": 2.477345285193682e-05, + "loss": 8.2594, + "step": 868700 + }, + { + "epoch": 9.25, + "learning_rate": 2.4755081952925375e-05, + "loss": 8.2027, + "step": 868800 + }, + { + "epoch": 9.25, + "learning_rate": 2.4736716905719015e-05, + "loss": 8.2356, + "step": 868900 + }, + { + "epoch": 9.26, + "learning_rate": 2.4718357711745986e-05, + "loss": 8.2691, + "step": 869000 + }, + { + "epoch": 9.26, + "learning_rate": 2.470000437243406e-05, + "loss": 8.2568, + "step": 869100 + }, + { + "epoch": 9.26, + "learning_rate": 2.468165688921057e-05, + "loss": 8.1783, + "step": 869200 + }, + { + "epoch": 9.26, + "learning_rate": 2.4663315263502452e-05, + "loss": 8.1743, + "step": 869300 + }, + { + "epoch": 9.26, + "learning_rate": 2.464497949673611e-05, + "loss": 8.2369, + "step": 869400 + }, + { + "epoch": 9.26, + "learning_rate": 2.46266495903375e-05, + "loss": 8.2656, + "step": 869500 + }, + { + "epoch": 9.26, + "learning_rate": 2.460832554573215e-05, + "loss": 8.2279, + "step": 869600 + }, + { + "epoch": 9.26, + "learning_rate": 2.4590007364345136e-05, + "loss": 8.2827, + "step": 869700 + }, + { + "epoch": 9.26, + "learning_rate": 2.4571695047601063e-05, + "loss": 8.3053, + "step": 869800 + }, + { + "epoch": 9.27, + "learning_rate": 2.4553388596924086e-05, + "loss": 8.2653, + "step": 869900 + }, + { + "epoch": 9.27, + "learning_rate": 2.453508801373785e-05, + "loss": 8.231, + "step": 870000 + }, + { + "epoch": 9.27, + "learning_rate": 2.451679329946567e-05, + "loss": 8.2723, + "step": 870100 + }, + { + "epoch": 9.27, + "learning_rate": 2.449850445553028e-05, + "loss": 8.2244, + "step": 870200 + }, + { + "epoch": 9.27, + "learning_rate": 2.4480221483353992e-05, + "loss": 8.2621, + "step": 870300 + }, + { + "epoch": 9.27, + "learning_rate": 2.4461944384358738e-05, + "loss": 8.2115, + "step": 870400 + }, + { + "epoch": 9.27, + "learning_rate": 2.4443673159965873e-05, + "loss": 8.2633, + "step": 870500 + }, + { + "epoch": 9.27, + "learning_rate": 2.442540781159638e-05, + "loss": 8.1256, + "step": 870600 + }, + { + "epoch": 9.27, + "learning_rate": 2.4407148340670715e-05, + "loss": 8.2616, + "step": 870700 + }, + { + "epoch": 9.28, + "learning_rate": 2.4388894748608982e-05, + "loss": 8.2542, + "step": 870800 + }, + { + "epoch": 9.28, + "learning_rate": 2.437064703683073e-05, + "loss": 8.2581, + "step": 870900 + }, + { + "epoch": 9.28, + "learning_rate": 2.4352405206755092e-05, + "loss": 8.1889, + "step": 871000 + }, + { + "epoch": 9.28, + "learning_rate": 2.433416925980071e-05, + "loss": 8.1918, + "step": 871100 + }, + { + "epoch": 9.28, + "learning_rate": 2.4315939197385838e-05, + "loss": 8.1559, + "step": 871200 + }, + { + "epoch": 9.28, + "learning_rate": 2.4297715020928213e-05, + "loss": 8.2307, + "step": 871300 + }, + { + "epoch": 9.28, + "learning_rate": 2.427949673184513e-05, + "loss": 8.2458, + "step": 871400 + }, + { + "epoch": 9.28, + "learning_rate": 2.4261284331553402e-05, + "loss": 8.3008, + "step": 871500 + }, + { + "epoch": 9.28, + "learning_rate": 2.424307782146946e-05, + "loss": 8.1956, + "step": 871600 + }, + { + "epoch": 9.28, + "learning_rate": 2.42248772030092e-05, + "loss": 8.3363, + "step": 871700 + }, + { + "epoch": 9.29, + "learning_rate": 2.4206682477588083e-05, + "loss": 8.2127, + "step": 871800 + }, + { + "epoch": 9.29, + "learning_rate": 2.418849364662109e-05, + "loss": 8.1865, + "step": 871900 + }, + { + "epoch": 9.29, + "learning_rate": 2.417031071152286e-05, + "loss": 8.2204, + "step": 872000 + }, + { + "epoch": 9.29, + "learning_rate": 2.4152133673707356e-05, + "loss": 8.287, + "step": 872100 + }, + { + "epoch": 9.29, + "learning_rate": 2.4133962534588296e-05, + "loss": 8.2732, + "step": 872200 + }, + { + "epoch": 9.29, + "learning_rate": 2.411579729557879e-05, + "loss": 8.2441, + "step": 872300 + }, + { + "epoch": 9.29, + "learning_rate": 2.4097637958091624e-05, + "loss": 8.2968, + "step": 872400 + }, + { + "epoch": 9.29, + "learning_rate": 2.4079484523539008e-05, + "loss": 8.2361, + "step": 872500 + }, + { + "epoch": 9.29, + "learning_rate": 2.4061336993332716e-05, + "loss": 8.248, + "step": 872600 + }, + { + "epoch": 9.3, + "learning_rate": 2.4043195368884143e-05, + "loss": 8.2101, + "step": 872700 + }, + { + "epoch": 9.3, + "learning_rate": 2.402505965160412e-05, + "loss": 8.2692, + "step": 872800 + }, + { + "epoch": 9.3, + "learning_rate": 2.400692984290308e-05, + "loss": 8.2535, + "step": 872900 + }, + { + "epoch": 9.3, + "learning_rate": 2.398880594419095e-05, + "loss": 8.2554, + "step": 873000 + }, + { + "epoch": 9.3, + "learning_rate": 2.3970687956877303e-05, + "loss": 8.2314, + "step": 873100 + }, + { + "epoch": 9.3, + "learning_rate": 2.3952575882371077e-05, + "loss": 8.2776, + "step": 873200 + }, + { + "epoch": 9.3, + "learning_rate": 2.393446972208091e-05, + "loss": 8.3094, + "step": 873300 + }, + { + "epoch": 9.3, + "learning_rate": 2.391636947741489e-05, + "loss": 8.2519, + "step": 873400 + }, + { + "epoch": 9.3, + "learning_rate": 2.3898275149780758e-05, + "loss": 8.3369, + "step": 873500 + }, + { + "epoch": 9.3, + "learning_rate": 2.3880186740585585e-05, + "loss": 8.2317, + "step": 873600 + }, + { + "epoch": 9.31, + "learning_rate": 2.3862104251236193e-05, + "loss": 8.2755, + "step": 873700 + }, + { + "epoch": 9.31, + "learning_rate": 2.3844027683138816e-05, + "loss": 8.2454, + "step": 873800 + }, + { + "epoch": 9.31, + "learning_rate": 2.3825957037699354e-05, + "loss": 8.1975, + "step": 873900 + }, + { + "epoch": 9.31, + "learning_rate": 2.3807892316323032e-05, + "loss": 8.2748, + "step": 874000 + }, + { + "epoch": 9.31, + "learning_rate": 2.3789833520414838e-05, + "loss": 8.2061, + "step": 874100 + }, + { + "epoch": 9.31, + "learning_rate": 2.3771780651379196e-05, + "loss": 8.3335, + "step": 874200 + }, + { + "epoch": 9.31, + "learning_rate": 2.3753733710620063e-05, + "loss": 8.212, + "step": 874300 + }, + { + "epoch": 9.31, + "learning_rate": 2.3735692699540925e-05, + "loss": 8.2242, + "step": 874400 + }, + { + "epoch": 9.31, + "learning_rate": 2.3717657619544852e-05, + "loss": 8.1893, + "step": 874500 + }, + { + "epoch": 9.32, + "learning_rate": 2.3699628472034506e-05, + "loss": 8.1715, + "step": 874600 + }, + { + "epoch": 9.32, + "learning_rate": 2.368160525841191e-05, + "loss": 8.2476, + "step": 874700 + }, + { + "epoch": 9.32, + "learning_rate": 2.3663587980078793e-05, + "loss": 8.2371, + "step": 874800 + }, + { + "epoch": 9.32, + "learning_rate": 2.3645576638436305e-05, + "loss": 8.2099, + "step": 874900 + }, + { + "epoch": 9.32, + "learning_rate": 2.3627571234885304e-05, + "loss": 8.259, + "step": 875000 + }, + { + "epoch": 9.32, + "learning_rate": 2.3609571770825924e-05, + "loss": 8.2941, + "step": 875100 + }, + { + "epoch": 9.32, + "learning_rate": 2.3591578247658098e-05, + "loss": 8.258, + "step": 875200 + }, + { + "epoch": 9.32, + "learning_rate": 2.357359066678113e-05, + "loss": 8.2324, + "step": 875300 + }, + { + "epoch": 9.32, + "learning_rate": 2.355560902959394e-05, + "loss": 8.1856, + "step": 875400 + }, + { + "epoch": 9.33, + "learning_rate": 2.3537633337494914e-05, + "loss": 8.2795, + "step": 875500 + }, + { + "epoch": 9.33, + "learning_rate": 2.35196635918821e-05, + "loss": 8.297, + "step": 875600 + }, + { + "epoch": 9.33, + "learning_rate": 2.3501699794152954e-05, + "loss": 8.2007, + "step": 875700 + }, + { + "epoch": 9.33, + "learning_rate": 2.348374194570454e-05, + "loss": 8.2313, + "step": 875800 + }, + { + "epoch": 9.33, + "learning_rate": 2.3465790047933423e-05, + "loss": 8.228, + "step": 875900 + }, + { + "epoch": 9.33, + "learning_rate": 2.3447844102235748e-05, + "loss": 8.2668, + "step": 876000 + }, + { + "epoch": 9.33, + "learning_rate": 2.3429904110007172e-05, + "loss": 8.2867, + "step": 876100 + }, + { + "epoch": 9.33, + "learning_rate": 2.3411970072642887e-05, + "loss": 8.2904, + "step": 876200 + }, + { + "epoch": 9.33, + "learning_rate": 2.3394041991537587e-05, + "loss": 8.2975, + "step": 876300 + }, + { + "epoch": 9.33, + "learning_rate": 2.337611986808559e-05, + "loss": 8.2023, + "step": 876400 + }, + { + "epoch": 9.34, + "learning_rate": 2.3358203703680692e-05, + "loss": 8.1915, + "step": 876500 + }, + { + "epoch": 9.34, + "learning_rate": 2.3340293499716203e-05, + "loss": 8.2578, + "step": 876600 + }, + { + "epoch": 9.34, + "learning_rate": 2.3322389257585052e-05, + "loss": 8.2721, + "step": 876700 + }, + { + "epoch": 9.34, + "learning_rate": 2.3304490978679638e-05, + "loss": 8.1855, + "step": 876800 + }, + { + "epoch": 9.34, + "learning_rate": 2.328659866439189e-05, + "loss": 8.1612, + "step": 876900 + }, + { + "epoch": 9.34, + "learning_rate": 2.3268712316113285e-05, + "loss": 8.2391, + "step": 877000 + }, + { + "epoch": 9.34, + "learning_rate": 2.3250831935234884e-05, + "loss": 8.2418, + "step": 877100 + }, + { + "epoch": 9.34, + "learning_rate": 2.323295752314725e-05, + "loss": 8.2239, + "step": 877200 + }, + { + "epoch": 9.34, + "learning_rate": 2.321508908124044e-05, + "loss": 8.2607, + "step": 877300 + }, + { + "epoch": 9.35, + "learning_rate": 2.3197226610904078e-05, + "loss": 8.2711, + "step": 877400 + }, + { + "epoch": 9.35, + "learning_rate": 2.3179370113527387e-05, + "loss": 8.2286, + "step": 877500 + }, + { + "epoch": 9.35, + "learning_rate": 2.3161519590499038e-05, + "loss": 8.2029, + "step": 877600 + }, + { + "epoch": 9.35, + "learning_rate": 2.3143675043207267e-05, + "loss": 8.2255, + "step": 877700 + }, + { + "epoch": 9.35, + "learning_rate": 2.312583647303982e-05, + "loss": 8.2033, + "step": 877800 + }, + { + "epoch": 9.35, + "learning_rate": 2.3108003881384044e-05, + "loss": 8.2274, + "step": 877900 + }, + { + "epoch": 9.35, + "learning_rate": 2.3090177269626777e-05, + "loss": 8.1593, + "step": 878000 + }, + { + "epoch": 9.35, + "learning_rate": 2.307235663915439e-05, + "loss": 8.266, + "step": 878100 + }, + { + "epoch": 9.35, + "learning_rate": 2.3054541991352762e-05, + "loss": 8.2387, + "step": 878200 + }, + { + "epoch": 9.36, + "learning_rate": 2.3036733327607397e-05, + "loss": 8.3142, + "step": 878300 + }, + { + "epoch": 9.36, + "learning_rate": 2.301893064930325e-05, + "loss": 8.3084, + "step": 878400 + }, + { + "epoch": 9.36, + "learning_rate": 2.300113395782484e-05, + "loss": 8.2179, + "step": 878500 + }, + { + "epoch": 9.36, + "learning_rate": 2.2983343254556187e-05, + "loss": 8.2239, + "step": 878600 + }, + { + "epoch": 9.36, + "learning_rate": 2.2965558540880926e-05, + "loss": 8.2508, + "step": 878700 + }, + { + "epoch": 9.36, + "learning_rate": 2.2947779818182158e-05, + "loss": 8.2053, + "step": 878800 + }, + { + "epoch": 9.36, + "learning_rate": 2.293000708784251e-05, + "loss": 8.2049, + "step": 878900 + }, + { + "epoch": 9.36, + "learning_rate": 2.2912240351244207e-05, + "loss": 8.274, + "step": 879000 + }, + { + "epoch": 9.36, + "learning_rate": 2.289447960976896e-05, + "loss": 8.2228, + "step": 879100 + }, + { + "epoch": 9.36, + "learning_rate": 2.2876724864798016e-05, + "loss": 8.2691, + "step": 879200 + }, + { + "epoch": 9.37, + "learning_rate": 2.285897611771214e-05, + "loss": 8.227, + "step": 879300 + }, + { + "epoch": 9.37, + "learning_rate": 2.28412333698917e-05, + "loss": 8.1806, + "step": 879400 + }, + { + "epoch": 9.37, + "learning_rate": 2.2823496622716534e-05, + "loss": 8.238, + "step": 879500 + }, + { + "epoch": 9.37, + "learning_rate": 2.280576587756603e-05, + "loss": 8.2513, + "step": 879600 + }, + { + "epoch": 9.37, + "learning_rate": 2.278804113581907e-05, + "loss": 8.2805, + "step": 879700 + }, + { + "epoch": 9.37, + "learning_rate": 2.2770322398854216e-05, + "loss": 8.2615, + "step": 879800 + }, + { + "epoch": 9.37, + "learning_rate": 2.275260966804932e-05, + "loss": 8.1825, + "step": 879900 + }, + { + "epoch": 9.37, + "learning_rate": 2.273490294478199e-05, + "loss": 8.3333, + "step": 880000 + }, + { + "epoch": 9.37, + "learning_rate": 2.2717202230429234e-05, + "loss": 8.3113, + "step": 880100 + }, + { + "epoch": 9.38, + "learning_rate": 2.2699507526367725e-05, + "loss": 8.323, + "step": 880200 + }, + { + "epoch": 9.38, + "learning_rate": 2.2681818833973467e-05, + "loss": 8.0964, + "step": 880300 + }, + { + "epoch": 9.38, + "learning_rate": 2.2664136154622186e-05, + "loss": 8.267, + "step": 880400 + }, + { + "epoch": 9.38, + "learning_rate": 2.264645948968901e-05, + "loss": 8.2327, + "step": 880500 + }, + { + "epoch": 9.38, + "learning_rate": 2.2628788840548754e-05, + "loss": 8.216, + "step": 880600 + }, + { + "epoch": 9.38, + "learning_rate": 2.261112420857554e-05, + "loss": 8.2846, + "step": 880700 + }, + { + "epoch": 9.38, + "learning_rate": 2.2593465595143205e-05, + "loss": 8.2743, + "step": 880800 + }, + { + "epoch": 9.38, + "learning_rate": 2.2575813001625135e-05, + "loss": 8.257, + "step": 880900 + }, + { + "epoch": 9.38, + "learning_rate": 2.2558166429394036e-05, + "loss": 8.177, + "step": 881000 + }, + { + "epoch": 9.38, + "learning_rate": 2.2540525879822383e-05, + "loss": 8.1856, + "step": 881100 + }, + { + "epoch": 9.39, + "learning_rate": 2.2522891354282026e-05, + "loss": 8.2371, + "step": 881200 + }, + { + "epoch": 9.39, + "learning_rate": 2.2505262854144492e-05, + "loss": 8.2081, + "step": 881300 + }, + { + "epoch": 9.39, + "learning_rate": 2.2487640380780626e-05, + "loss": 8.2591, + "step": 881400 + }, + { + "epoch": 9.39, + "learning_rate": 2.2470023935561026e-05, + "loss": 8.2954, + "step": 881500 + }, + { + "epoch": 9.39, + "learning_rate": 2.2452413519855664e-05, + "loss": 8.2346, + "step": 881600 + }, + { + "epoch": 9.39, + "learning_rate": 2.2434809135034184e-05, + "loss": 8.2265, + "step": 881700 + }, + { + "epoch": 9.39, + "learning_rate": 2.241721078246557e-05, + "loss": 8.2468, + "step": 881800 + }, + { + "epoch": 9.39, + "learning_rate": 2.2399618463518547e-05, + "loss": 8.2188, + "step": 881900 + }, + { + "epoch": 9.39, + "learning_rate": 2.2382032179561218e-05, + "loss": 8.1628, + "step": 882000 + }, + { + "epoch": 9.4, + "learning_rate": 2.2364451931961294e-05, + "loss": 8.238, + "step": 882100 + }, + { + "epoch": 9.4, + "learning_rate": 2.2346877722085946e-05, + "loss": 8.1785, + "step": 882200 + }, + { + "epoch": 9.4, + "learning_rate": 2.232930955130199e-05, + "loss": 8.2121, + "step": 882300 + }, + { + "epoch": 9.4, + "learning_rate": 2.231174742097567e-05, + "loss": 8.2904, + "step": 882400 + }, + { + "epoch": 9.4, + "learning_rate": 2.22941913324728e-05, + "loss": 8.2481, + "step": 882500 + }, + { + "epoch": 9.4, + "learning_rate": 2.2276641287158682e-05, + "loss": 8.2078, + "step": 882600 + }, + { + "epoch": 9.4, + "learning_rate": 2.2259097286398255e-05, + "loss": 8.2012, + "step": 882700 + }, + { + "epoch": 9.4, + "learning_rate": 2.2241559331555884e-05, + "loss": 8.2774, + "step": 882800 + }, + { + "epoch": 9.4, + "learning_rate": 2.2224027423995454e-05, + "loss": 8.1986, + "step": 882900 + }, + { + "epoch": 9.41, + "learning_rate": 2.2206501565080495e-05, + "loss": 8.2517, + "step": 883000 + }, + { + "epoch": 9.41, + "learning_rate": 2.2188981756173978e-05, + "loss": 8.2477, + "step": 883100 + }, + { + "epoch": 9.41, + "learning_rate": 2.2171467998638396e-05, + "loss": 8.1744, + "step": 883200 + }, + { + "epoch": 9.41, + "learning_rate": 2.2153960293835772e-05, + "loss": 8.2345, + "step": 883300 + }, + { + "epoch": 9.41, + "learning_rate": 2.2136458643127755e-05, + "loss": 8.2281, + "step": 883400 + }, + { + "epoch": 9.41, + "learning_rate": 2.211896304787541e-05, + "loss": 8.2821, + "step": 883500 + }, + { + "epoch": 9.41, + "learning_rate": 2.2101473509439375e-05, + "loss": 8.2239, + "step": 883600 + }, + { + "epoch": 9.41, + "learning_rate": 2.208399002917977e-05, + "loss": 8.2318, + "step": 883700 + }, + { + "epoch": 9.41, + "learning_rate": 2.206651260845637e-05, + "loss": 8.1909, + "step": 883800 + }, + { + "epoch": 9.41, + "learning_rate": 2.2049041248628345e-05, + "loss": 8.1797, + "step": 883900 + }, + { + "epoch": 9.42, + "learning_rate": 2.203157595105446e-05, + "loss": 8.2915, + "step": 884000 + }, + { + "epoch": 9.42, + "learning_rate": 2.2014116717092957e-05, + "loss": 8.2029, + "step": 884100 + }, + { + "epoch": 9.42, + "learning_rate": 2.1996663548101692e-05, + "loss": 8.2574, + "step": 884200 + }, + { + "epoch": 9.42, + "learning_rate": 2.197921644543799e-05, + "loss": 8.2722, + "step": 884300 + }, + { + "epoch": 9.42, + "learning_rate": 2.1961775410458697e-05, + "loss": 8.3045, + "step": 884400 + }, + { + "epoch": 9.42, + "learning_rate": 2.1944340444520182e-05, + "loss": 8.1754, + "step": 884500 + }, + { + "epoch": 9.42, + "learning_rate": 2.1926911548978423e-05, + "loss": 8.3009, + "step": 884600 + }, + { + "epoch": 9.42, + "learning_rate": 2.190948872518884e-05, + "loss": 8.2194, + "step": 884700 + }, + { + "epoch": 9.42, + "learning_rate": 2.1892071974506402e-05, + "loss": 8.2019, + "step": 884800 + }, + { + "epoch": 9.43, + "learning_rate": 2.1874661298285592e-05, + "loss": 8.1982, + "step": 884900 + }, + { + "epoch": 9.43, + "learning_rate": 2.185725669788048e-05, + "loss": 8.2136, + "step": 885000 + }, + { + "epoch": 9.43, + "learning_rate": 2.1839858174644613e-05, + "loss": 8.2783, + "step": 885100 + }, + { + "epoch": 9.43, + "learning_rate": 2.182246572993103e-05, + "loss": 8.2768, + "step": 885200 + }, + { + "epoch": 9.43, + "learning_rate": 2.180507936509242e-05, + "loss": 8.2058, + "step": 885300 + }, + { + "epoch": 9.43, + "learning_rate": 2.1787699081480882e-05, + "loss": 8.1557, + "step": 885400 + }, + { + "epoch": 9.43, + "learning_rate": 2.1770324880448088e-05, + "loss": 8.2866, + "step": 885500 + }, + { + "epoch": 9.43, + "learning_rate": 2.175295676334519e-05, + "loss": 8.2814, + "step": 885600 + }, + { + "epoch": 9.43, + "learning_rate": 2.173559473152298e-05, + "loss": 8.2171, + "step": 885700 + }, + { + "epoch": 9.43, + "learning_rate": 2.171823878633168e-05, + "loss": 8.1855, + "step": 885800 + }, + { + "epoch": 9.44, + "learning_rate": 2.1700888929121043e-05, + "loss": 8.3101, + "step": 885900 + }, + { + "epoch": 9.44, + "learning_rate": 2.1683545161240358e-05, + "loss": 8.227, + "step": 886000 + }, + { + "epoch": 9.44, + "learning_rate": 2.1666207484038504e-05, + "loss": 8.1593, + "step": 886100 + }, + { + "epoch": 9.44, + "learning_rate": 2.164887589886381e-05, + "loss": 8.2786, + "step": 886200 + }, + { + "epoch": 9.44, + "learning_rate": 2.1631550407064137e-05, + "loss": 8.1916, + "step": 886300 + }, + { + "epoch": 9.44, + "learning_rate": 2.1614231009986886e-05, + "loss": 8.1628, + "step": 886400 + }, + { + "epoch": 9.44, + "learning_rate": 2.1596917708979025e-05, + "loss": 8.221, + "step": 886500 + }, + { + "epoch": 9.44, + "learning_rate": 2.1579610505386992e-05, + "loss": 8.3515, + "step": 886600 + }, + { + "epoch": 9.44, + "learning_rate": 2.1562309400556778e-05, + "loss": 8.3325, + "step": 886700 + }, + { + "epoch": 9.45, + "learning_rate": 2.154501439583385e-05, + "loss": 8.2336, + "step": 886800 + }, + { + "epoch": 9.45, + "learning_rate": 2.1527725492563332e-05, + "loss": 8.1472, + "step": 886900 + }, + { + "epoch": 9.45, + "learning_rate": 2.151044269208967e-05, + "loss": 8.2629, + "step": 887000 + }, + { + "epoch": 9.45, + "learning_rate": 2.1493165995757015e-05, + "loss": 8.2356, + "step": 887100 + }, + { + "epoch": 9.45, + "learning_rate": 2.1475895404908998e-05, + "loss": 8.2612, + "step": 887200 + }, + { + "epoch": 9.45, + "learning_rate": 2.145863092088871e-05, + "loss": 8.2759, + "step": 887300 + }, + { + "epoch": 9.45, + "learning_rate": 2.144137254503884e-05, + "loss": 8.1738, + "step": 887400 + }, + { + "epoch": 9.45, + "learning_rate": 2.1424120278701533e-05, + "loss": 8.2508, + "step": 887500 + }, + { + "epoch": 9.45, + "learning_rate": 2.1406874123218556e-05, + "loss": 8.238, + "step": 887600 + }, + { + "epoch": 9.46, + "learning_rate": 2.138963407993111e-05, + "loss": 8.1243, + "step": 887700 + }, + { + "epoch": 9.46, + "learning_rate": 2.137240015017997e-05, + "loss": 8.24, + "step": 887800 + }, + { + "epoch": 9.46, + "learning_rate": 2.1355172335305373e-05, + "loss": 8.2385, + "step": 887900 + }, + { + "epoch": 9.46, + "learning_rate": 2.133795063664723e-05, + "loss": 8.2199, + "step": 888000 + }, + { + "epoch": 9.46, + "learning_rate": 2.132073505554476e-05, + "loss": 8.2326, + "step": 888100 + }, + { + "epoch": 9.46, + "learning_rate": 2.130352559333689e-05, + "loss": 8.2624, + "step": 888200 + }, + { + "epoch": 9.46, + "learning_rate": 2.1286322251361957e-05, + "loss": 8.237, + "step": 888300 + }, + { + "epoch": 9.46, + "learning_rate": 2.1269125030957947e-05, + "loss": 8.274, + "step": 888400 + }, + { + "epoch": 9.46, + "learning_rate": 2.1251933933462186e-05, + "loss": 8.1775, + "step": 888500 + }, + { + "epoch": 9.46, + "learning_rate": 2.1234748960211694e-05, + "loss": 8.2274, + "step": 888600 + }, + { + "epoch": 9.47, + "learning_rate": 2.1217570112542916e-05, + "loss": 8.2181, + "step": 888700 + }, + { + "epoch": 9.47, + "learning_rate": 2.1200397391791915e-05, + "loss": 8.1942, + "step": 888800 + }, + { + "epoch": 9.47, + "learning_rate": 2.118323079929412e-05, + "loss": 8.1854, + "step": 888900 + }, + { + "epoch": 9.47, + "learning_rate": 2.1166070336384646e-05, + "loss": 8.1413, + "step": 889000 + }, + { + "epoch": 9.47, + "learning_rate": 2.1148916004398046e-05, + "loss": 8.2076, + "step": 889100 + }, + { + "epoch": 9.47, + "learning_rate": 2.113176780466839e-05, + "loss": 8.1858, + "step": 889200 + }, + { + "epoch": 9.47, + "learning_rate": 2.1114625738529336e-05, + "loss": 8.2353, + "step": 889300 + }, + { + "epoch": 9.47, + "learning_rate": 2.1097489807313986e-05, + "loss": 8.2041, + "step": 889400 + }, + { + "epoch": 9.47, + "learning_rate": 2.1080360012355073e-05, + "loss": 8.1906, + "step": 889500 + }, + { + "epoch": 9.48, + "learning_rate": 2.1063236354984685e-05, + "loss": 8.2034, + "step": 889600 + }, + { + "epoch": 9.48, + "learning_rate": 2.10461188365346e-05, + "loss": 8.3256, + "step": 889700 + }, + { + "epoch": 9.48, + "learning_rate": 2.1029007458336037e-05, + "loss": 8.1666, + "step": 889800 + }, + { + "epoch": 9.48, + "learning_rate": 2.1011902221719737e-05, + "loss": 8.2296, + "step": 889900 + }, + { + "epoch": 9.48, + "learning_rate": 2.099480312801595e-05, + "loss": 8.2164, + "step": 890000 + }, + { + "epoch": 9.48, + "learning_rate": 2.0977710178554533e-05, + "loss": 8.267, + "step": 890100 + }, + { + "epoch": 9.48, + "learning_rate": 2.0960623374664777e-05, + "loss": 8.2921, + "step": 890200 + }, + { + "epoch": 9.48, + "learning_rate": 2.0943542717675524e-05, + "loss": 8.2351, + "step": 890300 + }, + { + "epoch": 9.48, + "learning_rate": 2.0926468208915116e-05, + "loss": 8.1954, + "step": 890400 + }, + { + "epoch": 9.49, + "learning_rate": 2.0909399849711486e-05, + "loss": 8.2777, + "step": 890500 + }, + { + "epoch": 9.49, + "learning_rate": 2.0892337641392024e-05, + "loss": 8.33, + "step": 890600 + }, + { + "epoch": 9.49, + "learning_rate": 2.0875281585283657e-05, + "loss": 8.1925, + "step": 890700 + }, + { + "epoch": 9.49, + "learning_rate": 2.0858231682712802e-05, + "loss": 8.1871, + "step": 890800 + }, + { + "epoch": 9.49, + "learning_rate": 2.0841187935005492e-05, + "loss": 8.2824, + "step": 890900 + }, + { + "epoch": 9.49, + "learning_rate": 2.082415034348718e-05, + "loss": 8.1729, + "step": 891000 + }, + { + "epoch": 9.49, + "learning_rate": 2.0807118909482913e-05, + "loss": 8.1944, + "step": 891100 + }, + { + "epoch": 9.49, + "learning_rate": 2.0790093634317164e-05, + "loss": 8.2859, + "step": 891200 + }, + { + "epoch": 9.49, + "learning_rate": 2.0773074519314072e-05, + "loss": 8.1738, + "step": 891300 + }, + { + "epoch": 9.49, + "learning_rate": 2.075606156579718e-05, + "loss": 8.284, + "step": 891400 + }, + { + "epoch": 9.5, + "learning_rate": 2.073905477508956e-05, + "loss": 8.3281, + "step": 891500 + }, + { + "epoch": 9.5, + "learning_rate": 2.0722054148513868e-05, + "loss": 8.1808, + "step": 891600 + }, + { + "epoch": 9.5, + "learning_rate": 2.070505968739225e-05, + "loss": 8.2823, + "step": 891700 + }, + { + "epoch": 9.5, + "learning_rate": 2.0688071393046347e-05, + "loss": 8.1573, + "step": 891800 + }, + { + "epoch": 9.5, + "learning_rate": 2.0671089266797315e-05, + "loss": 8.2132, + "step": 891900 + }, + { + "epoch": 9.5, + "learning_rate": 2.0654113309965905e-05, + "loss": 8.1399, + "step": 892000 + }, + { + "epoch": 9.5, + "learning_rate": 2.0637143523872328e-05, + "loss": 8.1468, + "step": 892100 + }, + { + "epoch": 9.5, + "learning_rate": 2.0620179909836312e-05, + "loss": 8.1916, + "step": 892200 + }, + { + "epoch": 9.5, + "learning_rate": 2.0603222469177096e-05, + "loss": 8.2058, + "step": 892300 + }, + { + "epoch": 9.51, + "learning_rate": 2.0586271203213525e-05, + "loss": 8.2834, + "step": 892400 + }, + { + "epoch": 9.51, + "learning_rate": 2.0569326113263854e-05, + "loss": 8.2046, + "step": 892500 + }, + { + "epoch": 9.51, + "learning_rate": 2.0552387200645928e-05, + "loss": 8.2391, + "step": 892600 + }, + { + "epoch": 9.51, + "learning_rate": 2.053545446667704e-05, + "loss": 8.2311, + "step": 892700 + }, + { + "epoch": 9.51, + "learning_rate": 2.0518527912674112e-05, + "loss": 8.2, + "step": 892800 + }, + { + "epoch": 9.51, + "learning_rate": 2.0501607539953505e-05, + "loss": 8.2399, + "step": 892900 + }, + { + "epoch": 9.51, + "learning_rate": 2.0484693349831097e-05, + "loss": 8.2317, + "step": 893000 + }, + { + "epoch": 9.51, + "learning_rate": 2.0467785343622303e-05, + "loss": 8.2552, + "step": 893100 + }, + { + "epoch": 9.51, + "learning_rate": 2.0450883522642105e-05, + "loss": 8.2695, + "step": 893200 + }, + { + "epoch": 9.51, + "learning_rate": 2.043398788820493e-05, + "loss": 8.2257, + "step": 893300 + }, + { + "epoch": 9.52, + "learning_rate": 2.041709844162474e-05, + "loss": 8.2158, + "step": 893400 + }, + { + "epoch": 9.52, + "learning_rate": 2.0400215184215056e-05, + "loss": 8.2591, + "step": 893500 + }, + { + "epoch": 9.52, + "learning_rate": 2.0383338117288885e-05, + "loss": 8.2416, + "step": 893600 + }, + { + "epoch": 9.52, + "learning_rate": 2.036646724215875e-05, + "loss": 8.2558, + "step": 893700 + }, + { + "epoch": 9.52, + "learning_rate": 2.0349602560136682e-05, + "loss": 8.2347, + "step": 893800 + }, + { + "epoch": 9.52, + "learning_rate": 2.0332744072534294e-05, + "loss": 8.2289, + "step": 893900 + }, + { + "epoch": 9.52, + "learning_rate": 2.0315891780662642e-05, + "loss": 8.2563, + "step": 894000 + }, + { + "epoch": 9.52, + "learning_rate": 2.029904568583234e-05, + "loss": 8.2468, + "step": 894100 + }, + { + "epoch": 9.52, + "learning_rate": 2.028220578935348e-05, + "loss": 8.1995, + "step": 894200 + }, + { + "epoch": 9.53, + "learning_rate": 2.0265372092535762e-05, + "loss": 8.2701, + "step": 894300 + }, + { + "epoch": 9.53, + "learning_rate": 2.0248544596688312e-05, + "loss": 8.1514, + "step": 894400 + }, + { + "epoch": 9.53, + "learning_rate": 2.0231723303119808e-05, + "loss": 8.2207, + "step": 894500 + }, + { + "epoch": 9.53, + "learning_rate": 2.0214908213138406e-05, + "loss": 8.194, + "step": 894600 + }, + { + "epoch": 9.53, + "learning_rate": 2.0198099328051913e-05, + "loss": 8.1554, + "step": 894700 + }, + { + "epoch": 9.53, + "learning_rate": 2.018129664916745e-05, + "loss": 8.193, + "step": 894800 + }, + { + "epoch": 9.53, + "learning_rate": 2.0164500177791833e-05, + "loss": 8.2307, + "step": 894900 + }, + { + "epoch": 9.53, + "learning_rate": 2.014770991523127e-05, + "loss": 8.1716, + "step": 895000 + }, + { + "epoch": 9.53, + "learning_rate": 2.013092586279164e-05, + "loss": 8.1494, + "step": 895100 + }, + { + "epoch": 9.54, + "learning_rate": 2.011414802177811e-05, + "loss": 8.2346, + "step": 895200 + }, + { + "epoch": 9.54, + "learning_rate": 2.0097376393495592e-05, + "loss": 8.2445, + "step": 895300 + }, + { + "epoch": 9.54, + "learning_rate": 2.008061097924836e-05, + "loss": 8.1421, + "step": 895400 + }, + { + "epoch": 9.54, + "learning_rate": 2.0063851780340316e-05, + "loss": 8.2218, + "step": 895500 + }, + { + "epoch": 9.54, + "learning_rate": 2.0047098798074793e-05, + "loss": 8.2666, + "step": 895600 + }, + { + "epoch": 9.54, + "learning_rate": 2.0030352033754652e-05, + "loss": 8.1717, + "step": 895700 + }, + { + "epoch": 9.54, + "learning_rate": 2.001361148868236e-05, + "loss": 8.1412, + "step": 895800 + }, + { + "epoch": 9.54, + "learning_rate": 1.9996877164159734e-05, + "loss": 8.2604, + "step": 895900 + }, + { + "epoch": 9.54, + "learning_rate": 1.9980149061488286e-05, + "loss": 8.2734, + "step": 896000 + }, + { + "epoch": 9.54, + "learning_rate": 1.9963427181968898e-05, + "loss": 8.2001, + "step": 896100 + }, + { + "epoch": 9.55, + "learning_rate": 1.9946711526902117e-05, + "loss": 8.2458, + "step": 896200 + }, + { + "epoch": 9.55, + "learning_rate": 1.993000209758783e-05, + "loss": 8.1851, + "step": 896300 + }, + { + "epoch": 9.55, + "learning_rate": 1.991329889532558e-05, + "loss": 8.2022, + "step": 896400 + }, + { + "epoch": 9.55, + "learning_rate": 1.9896601921414347e-05, + "loss": 8.1697, + "step": 896500 + }, + { + "epoch": 9.55, + "learning_rate": 1.9879911177152732e-05, + "loss": 8.178, + "step": 896600 + }, + { + "epoch": 9.55, + "learning_rate": 1.9863226663838675e-05, + "loss": 8.2239, + "step": 896700 + }, + { + "epoch": 9.55, + "learning_rate": 1.9846548382769793e-05, + "loss": 8.3306, + "step": 896800 + }, + { + "epoch": 9.55, + "learning_rate": 1.9829876335243147e-05, + "loss": 8.2699, + "step": 896900 + }, + { + "epoch": 9.55, + "learning_rate": 1.9813210522555316e-05, + "loss": 8.1993, + "step": 897000 + }, + { + "epoch": 9.56, + "learning_rate": 1.979655094600238e-05, + "loss": 8.2005, + "step": 897100 + }, + { + "epoch": 9.56, + "learning_rate": 1.9779897606880003e-05, + "loss": 8.1727, + "step": 897200 + }, + { + "epoch": 9.56, + "learning_rate": 1.97632505064833e-05, + "loss": 8.2695, + "step": 897300 + }, + { + "epoch": 9.56, + "learning_rate": 1.9746609646106916e-05, + "loss": 8.2713, + "step": 897400 + }, + { + "epoch": 9.56, + "learning_rate": 1.9729975027044977e-05, + "loss": 8.208, + "step": 897500 + }, + { + "epoch": 9.56, + "learning_rate": 1.9713346650591203e-05, + "loss": 8.1849, + "step": 897600 + }, + { + "epoch": 9.56, + "learning_rate": 1.969672451803881e-05, + "loss": 8.143, + "step": 897700 + }, + { + "epoch": 9.56, + "learning_rate": 1.9680108630680428e-05, + "loss": 8.2167, + "step": 897800 + }, + { + "epoch": 9.56, + "learning_rate": 1.9663498989808337e-05, + "loss": 8.1865, + "step": 897900 + }, + { + "epoch": 9.56, + "learning_rate": 1.9646895596714243e-05, + "loss": 8.2318, + "step": 898000 + }, + { + "epoch": 9.57, + "learning_rate": 1.9630298452689398e-05, + "loss": 8.19, + "step": 898100 + }, + { + "epoch": 9.57, + "learning_rate": 1.961370755902454e-05, + "loss": 8.2066, + "step": 898200 + }, + { + "epoch": 9.57, + "learning_rate": 1.959712291701e-05, + "loss": 8.1319, + "step": 898300 + }, + { + "epoch": 9.57, + "learning_rate": 1.9580544527935528e-05, + "loss": 8.2057, + "step": 898400 + }, + { + "epoch": 9.57, + "learning_rate": 1.9563972393090435e-05, + "loss": 8.208, + "step": 898500 + }, + { + "epoch": 9.57, + "learning_rate": 1.9547406513763523e-05, + "loss": 8.1365, + "step": 898600 + }, + { + "epoch": 9.57, + "learning_rate": 1.953084689124315e-05, + "loss": 8.2911, + "step": 898700 + }, + { + "epoch": 9.57, + "learning_rate": 1.951429352681715e-05, + "loss": 8.2186, + "step": 898800 + }, + { + "epoch": 9.57, + "learning_rate": 1.9497746421772877e-05, + "loss": 8.2363, + "step": 898900 + }, + { + "epoch": 9.58, + "learning_rate": 1.9481205577397176e-05, + "loss": 8.1965, + "step": 899000 + }, + { + "epoch": 9.58, + "learning_rate": 1.9464670994976475e-05, + "loss": 8.2189, + "step": 899100 + }, + { + "epoch": 9.58, + "learning_rate": 1.9448142675796654e-05, + "loss": 8.1908, + "step": 899200 + }, + { + "epoch": 9.58, + "learning_rate": 1.943162062114311e-05, + "loss": 8.228, + "step": 899300 + }, + { + "epoch": 9.58, + "learning_rate": 1.9415104832300758e-05, + "loss": 8.299, + "step": 899400 + }, + { + "epoch": 9.58, + "learning_rate": 1.939859531055407e-05, + "loss": 8.1898, + "step": 899500 + }, + { + "epoch": 9.58, + "learning_rate": 1.9382092057186962e-05, + "loss": 8.1697, + "step": 899600 + }, + { + "epoch": 9.58, + "learning_rate": 1.936559507348289e-05, + "loss": 8.2645, + "step": 899700 + }, + { + "epoch": 9.58, + "learning_rate": 1.9349104360724846e-05, + "loss": 8.1999, + "step": 899800 + }, + { + "epoch": 9.59, + "learning_rate": 1.9332619920195327e-05, + "loss": 8.2192, + "step": 899900 + }, + { + "epoch": 9.59, + "learning_rate": 1.9316141753176298e-05, + "loss": 8.1732, + "step": 900000 + }, + { + "epoch": 9.59, + "learning_rate": 1.9299669860949254e-05, + "loss": 8.1236, + "step": 900100 + }, + { + "epoch": 9.59, + "learning_rate": 1.9283204244795262e-05, + "loss": 8.1992, + "step": 900200 + }, + { + "epoch": 9.59, + "learning_rate": 1.926674490599484e-05, + "loss": 8.1954, + "step": 900300 + }, + { + "epoch": 9.59, + "learning_rate": 1.925029184582803e-05, + "loss": 8.2641, + "step": 900400 + }, + { + "epoch": 9.59, + "learning_rate": 1.9233845065574352e-05, + "loss": 8.2064, + "step": 900500 + }, + { + "epoch": 9.59, + "learning_rate": 1.9217404566512933e-05, + "loss": 8.1898, + "step": 900600 + }, + { + "epoch": 9.59, + "learning_rate": 1.920097034992232e-05, + "loss": 8.1512, + "step": 900700 + }, + { + "epoch": 9.59, + "learning_rate": 1.918454241708062e-05, + "loss": 8.1887, + "step": 900800 + }, + { + "epoch": 9.6, + "learning_rate": 1.9168120769265384e-05, + "loss": 8.2322, + "step": 900900 + }, + { + "epoch": 9.6, + "learning_rate": 1.9151705407753806e-05, + "loss": 8.2858, + "step": 901000 + }, + { + "epoch": 9.6, + "learning_rate": 1.9135296333822463e-05, + "loss": 8.1476, + "step": 901100 + }, + { + "epoch": 9.6, + "learning_rate": 1.9118893548747495e-05, + "loss": 8.2076, + "step": 901200 + }, + { + "epoch": 9.6, + "learning_rate": 1.9102497053804526e-05, + "loss": 8.3062, + "step": 901300 + }, + { + "epoch": 9.6, + "learning_rate": 1.908610685026877e-05, + "loss": 8.2571, + "step": 901400 + }, + { + "epoch": 9.6, + "learning_rate": 1.9069722939414858e-05, + "loss": 8.2163, + "step": 901500 + }, + { + "epoch": 9.6, + "learning_rate": 1.905334532251697e-05, + "loss": 8.1429, + "step": 901600 + }, + { + "epoch": 9.6, + "learning_rate": 1.9036974000848774e-05, + "loss": 8.2156, + "step": 901700 + }, + { + "epoch": 9.61, + "learning_rate": 1.9020608975683517e-05, + "loss": 8.2162, + "step": 901800 + }, + { + "epoch": 9.61, + "learning_rate": 1.9004250248293885e-05, + "loss": 8.2302, + "step": 901900 + }, + { + "epoch": 9.61, + "learning_rate": 1.898789781995207e-05, + "loss": 8.1999, + "step": 902000 + }, + { + "epoch": 9.61, + "learning_rate": 1.8971551691929857e-05, + "loss": 8.1943, + "step": 902100 + }, + { + "epoch": 9.61, + "learning_rate": 1.8955211865498456e-05, + "loss": 8.2459, + "step": 902200 + }, + { + "epoch": 9.61, + "learning_rate": 1.8938878341928633e-05, + "loss": 8.2435, + "step": 902300 + }, + { + "epoch": 9.61, + "learning_rate": 1.8922551122490593e-05, + "loss": 8.2626, + "step": 902400 + }, + { + "epoch": 9.61, + "learning_rate": 1.890623020845418e-05, + "loss": 8.2179, + "step": 902500 + }, + { + "epoch": 9.61, + "learning_rate": 1.8889915601088648e-05, + "loss": 8.1466, + "step": 902600 + }, + { + "epoch": 9.61, + "learning_rate": 1.8873607301662765e-05, + "loss": 8.2801, + "step": 902700 + }, + { + "epoch": 9.62, + "learning_rate": 1.8857305311444816e-05, + "loss": 8.2114, + "step": 902800 + }, + { + "epoch": 9.62, + "learning_rate": 1.88410096317027e-05, + "loss": 8.2324, + "step": 902900 + }, + { + "epoch": 9.62, + "learning_rate": 1.882472026370361e-05, + "loss": 8.264, + "step": 903000 + }, + { + "epoch": 9.62, + "learning_rate": 1.8808437208714447e-05, + "loss": 8.2374, + "step": 903100 + }, + { + "epoch": 9.62, + "learning_rate": 1.879216046800152e-05, + "loss": 8.2515, + "step": 903200 + }, + { + "epoch": 9.62, + "learning_rate": 1.8775890042830712e-05, + "loss": 8.2859, + "step": 903300 + }, + { + "epoch": 9.62, + "learning_rate": 1.8759625934467318e-05, + "loss": 8.1147, + "step": 903400 + }, + { + "epoch": 9.62, + "learning_rate": 1.8743368144176232e-05, + "loss": 8.2145, + "step": 903500 + }, + { + "epoch": 9.62, + "learning_rate": 1.8727116673221833e-05, + "loss": 8.2768, + "step": 903600 + }, + { + "epoch": 9.63, + "learning_rate": 1.8710871522867978e-05, + "loss": 8.1947, + "step": 903700 + }, + { + "epoch": 9.63, + "learning_rate": 1.8694632694378033e-05, + "loss": 8.155, + "step": 903800 + }, + { + "epoch": 9.63, + "learning_rate": 1.8678400189014932e-05, + "loss": 8.184, + "step": 903900 + }, + { + "epoch": 9.63, + "learning_rate": 1.8662174008041112e-05, + "loss": 8.2752, + "step": 904000 + }, + { + "epoch": 9.63, + "learning_rate": 1.8645954152718392e-05, + "loss": 8.1948, + "step": 904100 + }, + { + "epoch": 9.63, + "learning_rate": 1.8629740624308268e-05, + "loss": 8.1757, + "step": 904200 + }, + { + "epoch": 9.63, + "learning_rate": 1.8613533424071617e-05, + "loss": 8.2601, + "step": 904300 + }, + { + "epoch": 9.63, + "learning_rate": 1.8597332553268955e-05, + "loss": 8.2425, + "step": 904400 + }, + { + "epoch": 9.63, + "learning_rate": 1.8581138013160105e-05, + "loss": 8.1645, + "step": 904500 + }, + { + "epoch": 9.64, + "learning_rate": 1.8564949805004616e-05, + "loss": 8.2621, + "step": 904600 + }, + { + "epoch": 9.64, + "learning_rate": 1.8548767930061404e-05, + "loss": 8.1795, + "step": 904700 + }, + { + "epoch": 9.64, + "learning_rate": 1.8532592389588953e-05, + "loss": 8.2084, + "step": 904800 + }, + { + "epoch": 9.64, + "learning_rate": 1.85164231848452e-05, + "loss": 8.257, + "step": 904900 + }, + { + "epoch": 9.64, + "learning_rate": 1.850026031708767e-05, + "loss": 8.1723, + "step": 905000 + }, + { + "epoch": 9.64, + "learning_rate": 1.8484103787573348e-05, + "loss": 8.1361, + "step": 905100 + }, + { + "epoch": 9.64, + "learning_rate": 1.8467953597558706e-05, + "loss": 8.1606, + "step": 905200 + }, + { + "epoch": 9.64, + "learning_rate": 1.845180974829973e-05, + "loss": 8.1013, + "step": 905300 + }, + { + "epoch": 9.64, + "learning_rate": 1.843567224105198e-05, + "loss": 8.1172, + "step": 905400 + }, + { + "epoch": 9.64, + "learning_rate": 1.8419541077070445e-05, + "loss": 8.2407, + "step": 905500 + }, + { + "epoch": 9.65, + "learning_rate": 1.8403416257609653e-05, + "loss": 8.1947, + "step": 905600 + }, + { + "epoch": 9.65, + "learning_rate": 1.8387297783923586e-05, + "loss": 8.2107, + "step": 905700 + }, + { + "epoch": 9.65, + "learning_rate": 1.8371185657265854e-05, + "loss": 8.2077, + "step": 905800 + }, + { + "epoch": 9.65, + "learning_rate": 1.835507987888947e-05, + "loss": 8.2069, + "step": 905900 + }, + { + "epoch": 9.65, + "learning_rate": 1.833898045004695e-05, + "loss": 8.195, + "step": 906000 + }, + { + "epoch": 9.65, + "learning_rate": 1.8322887371990404e-05, + "loss": 8.1954, + "step": 906100 + }, + { + "epoch": 9.65, + "learning_rate": 1.8306800645971367e-05, + "loss": 8.2626, + "step": 906200 + }, + { + "epoch": 9.65, + "learning_rate": 1.829072027324089e-05, + "loss": 8.207, + "step": 906300 + }, + { + "epoch": 9.65, + "learning_rate": 1.8274646255049544e-05, + "loss": 8.2174, + "step": 906400 + }, + { + "epoch": 9.66, + "learning_rate": 1.8258578592647437e-05, + "loss": 8.1568, + "step": 906500 + }, + { + "epoch": 9.66, + "learning_rate": 1.8242517287284143e-05, + "loss": 8.242, + "step": 906600 + }, + { + "epoch": 9.66, + "learning_rate": 1.822646234020874e-05, + "loss": 8.2561, + "step": 906700 + }, + { + "epoch": 9.66, + "learning_rate": 1.8210413752669807e-05, + "loss": 8.2552, + "step": 906800 + }, + { + "epoch": 9.66, + "learning_rate": 1.8194371525915487e-05, + "loss": 8.2567, + "step": 906900 + }, + { + "epoch": 9.66, + "learning_rate": 1.8178335661193367e-05, + "loss": 8.2357, + "step": 907000 + }, + { + "epoch": 9.66, + "learning_rate": 1.8162306159750553e-05, + "loss": 8.2385, + "step": 907100 + }, + { + "epoch": 9.66, + "learning_rate": 1.8146283022833643e-05, + "loss": 8.2081, + "step": 907200 + }, + { + "epoch": 9.66, + "learning_rate": 1.8130266251688798e-05, + "loss": 8.2085, + "step": 907300 + }, + { + "epoch": 9.67, + "learning_rate": 1.8114255847561622e-05, + "loss": 8.173, + "step": 907400 + }, + { + "epoch": 9.67, + "learning_rate": 1.8098251811697242e-05, + "loss": 8.1932, + "step": 907500 + }, + { + "epoch": 9.67, + "learning_rate": 1.808225414534028e-05, + "loss": 8.2433, + "step": 907600 + }, + { + "epoch": 9.67, + "learning_rate": 1.8066262849734928e-05, + "loss": 8.1833, + "step": 907700 + }, + { + "epoch": 9.67, + "learning_rate": 1.8050277926124794e-05, + "loss": 8.1327, + "step": 907800 + }, + { + "epoch": 9.67, + "learning_rate": 1.8034299375753016e-05, + "loss": 8.2967, + "step": 907900 + }, + { + "epoch": 9.67, + "learning_rate": 1.801832719986225e-05, + "loss": 8.2453, + "step": 908000 + }, + { + "epoch": 9.67, + "learning_rate": 1.800236139969469e-05, + "loss": 8.101, + "step": 908100 + }, + { + "epoch": 9.67, + "learning_rate": 1.7986401976491983e-05, + "loss": 8.2534, + "step": 908200 + }, + { + "epoch": 9.67, + "learning_rate": 1.7970448931495242e-05, + "loss": 8.2623, + "step": 908300 + }, + { + "epoch": 9.68, + "learning_rate": 1.795450226594523e-05, + "loss": 8.2303, + "step": 908400 + }, + { + "epoch": 9.68, + "learning_rate": 1.793856198108205e-05, + "loss": 8.2673, + "step": 908500 + }, + { + "epoch": 9.68, + "learning_rate": 1.792262807814541e-05, + "loss": 8.107, + "step": 908600 + }, + { + "epoch": 9.68, + "learning_rate": 1.7906700558374455e-05, + "loss": 8.1864, + "step": 908700 + }, + { + "epoch": 9.68, + "learning_rate": 1.789077942300793e-05, + "loss": 8.1562, + "step": 908800 + }, + { + "epoch": 9.68, + "learning_rate": 1.7874864673283988e-05, + "loss": 8.1811, + "step": 908900 + }, + { + "epoch": 9.68, + "learning_rate": 1.7858956310440322e-05, + "loss": 8.212, + "step": 909000 + }, + { + "epoch": 9.68, + "learning_rate": 1.7843054335714105e-05, + "loss": 8.2119, + "step": 909100 + }, + { + "epoch": 9.68, + "learning_rate": 1.782715875034209e-05, + "loss": 8.2009, + "step": 909200 + }, + { + "epoch": 9.69, + "learning_rate": 1.781126955556044e-05, + "loss": 8.2379, + "step": 909300 + }, + { + "epoch": 9.69, + "learning_rate": 1.7795386752604858e-05, + "loss": 8.2744, + "step": 909400 + }, + { + "epoch": 9.69, + "learning_rate": 1.777951034271055e-05, + "loss": 8.2564, + "step": 909500 + }, + { + "epoch": 9.69, + "learning_rate": 1.7763640327112276e-05, + "loss": 8.2002, + "step": 909600 + }, + { + "epoch": 9.69, + "learning_rate": 1.7747776707044162e-05, + "loss": 8.1579, + "step": 909700 + }, + { + "epoch": 9.69, + "learning_rate": 1.7731919483740002e-05, + "loss": 8.2751, + "step": 909800 + }, + { + "epoch": 9.69, + "learning_rate": 1.771606865843295e-05, + "loss": 8.174, + "step": 909900 + }, + { + "epoch": 9.69, + "learning_rate": 1.770022423235581e-05, + "loss": 8.2009, + "step": 910000 + }, + { + "epoch": 9.69, + "learning_rate": 1.76843862067407e-05, + "loss": 8.1933, + "step": 910100 + }, + { + "epoch": 9.69, + "learning_rate": 1.7668554582819406e-05, + "loss": 8.2304, + "step": 910200 + }, + { + "epoch": 9.7, + "learning_rate": 1.765272936182316e-05, + "loss": 8.2068, + "step": 910300 + }, + { + "epoch": 9.7, + "learning_rate": 1.7636910544982687e-05, + "loss": 8.2885, + "step": 910400 + }, + { + "epoch": 9.7, + "learning_rate": 1.7621098133528203e-05, + "loss": 8.2387, + "step": 910500 + }, + { + "epoch": 9.7, + "learning_rate": 1.760529212868943e-05, + "loss": 8.2252, + "step": 910600 + }, + { + "epoch": 9.7, + "learning_rate": 1.7589492531695662e-05, + "loss": 8.2297, + "step": 910700 + }, + { + "epoch": 9.7, + "learning_rate": 1.757369934377555e-05, + "loss": 8.2343, + "step": 910800 + }, + { + "epoch": 9.7, + "learning_rate": 1.7557912566157397e-05, + "loss": 8.2493, + "step": 910900 + }, + { + "epoch": 9.7, + "learning_rate": 1.7542132200068884e-05, + "loss": 8.2188, + "step": 911000 + }, + { + "epoch": 9.7, + "learning_rate": 1.752635824673735e-05, + "loss": 8.1917, + "step": 911100 + }, + { + "epoch": 9.71, + "learning_rate": 1.751059070738942e-05, + "loss": 8.1418, + "step": 911200 + }, + { + "epoch": 9.71, + "learning_rate": 1.7494829583251427e-05, + "loss": 8.1332, + "step": 911300 + }, + { + "epoch": 9.71, + "learning_rate": 1.747907487554904e-05, + "loss": 8.1917, + "step": 911400 + }, + { + "epoch": 9.71, + "learning_rate": 1.74633265855076e-05, + "loss": 8.2215, + "step": 911500 + }, + { + "epoch": 9.71, + "learning_rate": 1.744758471435175e-05, + "loss": 8.2721, + "step": 911600 + }, + { + "epoch": 9.71, + "learning_rate": 1.743184926330581e-05, + "loss": 8.147, + "step": 911700 + }, + { + "epoch": 9.71, + "learning_rate": 1.74161202335935e-05, + "loss": 8.151, + "step": 911800 + }, + { + "epoch": 9.71, + "learning_rate": 1.740039762643807e-05, + "loss": 8.2429, + "step": 911900 + }, + { + "epoch": 9.71, + "learning_rate": 1.7384681443062245e-05, + "loss": 8.1692, + "step": 912000 + }, + { + "epoch": 9.72, + "learning_rate": 1.736897168468832e-05, + "loss": 8.2607, + "step": 912100 + }, + { + "epoch": 9.72, + "learning_rate": 1.735326835253803e-05, + "loss": 8.2796, + "step": 912200 + }, + { + "epoch": 9.72, + "learning_rate": 1.733757144783259e-05, + "loss": 8.2453, + "step": 912300 + }, + { + "epoch": 9.72, + "learning_rate": 1.7321880971792792e-05, + "loss": 8.177, + "step": 912400 + }, + { + "epoch": 9.72, + "learning_rate": 1.730619692563886e-05, + "loss": 8.2215, + "step": 912500 + }, + { + "epoch": 9.72, + "learning_rate": 1.7290519310590602e-05, + "loss": 8.1956, + "step": 912600 + }, + { + "epoch": 9.72, + "learning_rate": 1.7274848127867173e-05, + "loss": 8.1587, + "step": 912700 + }, + { + "epoch": 9.72, + "learning_rate": 1.7259183378687393e-05, + "loss": 8.1817, + "step": 912800 + }, + { + "epoch": 9.72, + "learning_rate": 1.7243525064269482e-05, + "loss": 8.176, + "step": 912900 + }, + { + "epoch": 9.72, + "learning_rate": 1.722787318583121e-05, + "loss": 8.2843, + "step": 913000 + }, + { + "epoch": 9.73, + "learning_rate": 1.7212227744589772e-05, + "loss": 8.2209, + "step": 913100 + }, + { + "epoch": 9.73, + "learning_rate": 1.7196588741761986e-05, + "loss": 8.1909, + "step": 913200 + }, + { + "epoch": 9.73, + "learning_rate": 1.7180956178564066e-05, + "loss": 8.2432, + "step": 913300 + }, + { + "epoch": 9.73, + "learning_rate": 1.7165330056211758e-05, + "loss": 8.1501, + "step": 913400 + }, + { + "epoch": 9.73, + "learning_rate": 1.7149710375920293e-05, + "loss": 8.1711, + "step": 913500 + }, + { + "epoch": 9.73, + "learning_rate": 1.713409713890445e-05, + "loss": 8.2742, + "step": 913600 + }, + { + "epoch": 9.73, + "learning_rate": 1.7118490346378445e-05, + "loss": 8.3274, + "step": 913700 + }, + { + "epoch": 9.73, + "learning_rate": 1.7102889999556037e-05, + "loss": 8.288, + "step": 913800 + }, + { + "epoch": 9.73, + "learning_rate": 1.708729609965043e-05, + "loss": 8.1946, + "step": 913900 + }, + { + "epoch": 9.74, + "learning_rate": 1.7071708647874407e-05, + "loss": 8.2752, + "step": 914000 + }, + { + "epoch": 9.74, + "learning_rate": 1.70561276454402e-05, + "loss": 8.1998, + "step": 914100 + }, + { + "epoch": 9.74, + "learning_rate": 1.704055309355953e-05, + "loss": 8.1905, + "step": 914200 + }, + { + "epoch": 9.74, + "learning_rate": 1.7024984993443605e-05, + "loss": 8.1813, + "step": 914300 + }, + { + "epoch": 9.74, + "learning_rate": 1.700942334630322e-05, + "loss": 8.2174, + "step": 914400 + }, + { + "epoch": 9.74, + "learning_rate": 1.699386815334858e-05, + "loss": 8.2474, + "step": 914500 + }, + { + "epoch": 9.74, + "learning_rate": 1.6978319415789366e-05, + "loss": 8.2889, + "step": 914600 + }, + { + "epoch": 9.74, + "learning_rate": 1.6962777134834886e-05, + "loss": 8.2675, + "step": 914700 + }, + { + "epoch": 9.74, + "learning_rate": 1.6947241311693828e-05, + "loss": 8.2081, + "step": 914800 + }, + { + "epoch": 9.74, + "learning_rate": 1.6931711947574402e-05, + "loss": 8.1887, + "step": 914900 + }, + { + "epoch": 9.75, + "learning_rate": 1.6916189043684327e-05, + "loss": 8.2694, + "step": 915000 + }, + { + "epoch": 9.75, + "learning_rate": 1.690067260123085e-05, + "loss": 8.1912, + "step": 915100 + }, + { + "epoch": 9.75, + "learning_rate": 1.6885162621420668e-05, + "loss": 8.2293, + "step": 915200 + }, + { + "epoch": 9.75, + "learning_rate": 1.686965910546e-05, + "loss": 8.1758, + "step": 915300 + }, + { + "epoch": 9.75, + "learning_rate": 1.6854162054554524e-05, + "loss": 8.1834, + "step": 915400 + }, + { + "epoch": 9.75, + "learning_rate": 1.6838671469909494e-05, + "loss": 8.2195, + "step": 915500 + }, + { + "epoch": 9.75, + "learning_rate": 1.6823187352729598e-05, + "loss": 8.1423, + "step": 915600 + }, + { + "epoch": 9.75, + "learning_rate": 1.680770970421903e-05, + "loss": 8.1322, + "step": 915700 + }, + { + "epoch": 9.75, + "learning_rate": 1.6792238525581473e-05, + "loss": 8.2782, + "step": 915800 + }, + { + "epoch": 9.76, + "learning_rate": 1.6776773818020162e-05, + "loss": 8.2554, + "step": 915900 + }, + { + "epoch": 9.76, + "learning_rate": 1.676131558273778e-05, + "loss": 8.2853, + "step": 916000 + }, + { + "epoch": 9.76, + "learning_rate": 1.6745863820936493e-05, + "loss": 8.2247, + "step": 916100 + }, + { + "epoch": 9.76, + "learning_rate": 1.6730418533817972e-05, + "loss": 8.1561, + "step": 916200 + }, + { + "epoch": 9.76, + "learning_rate": 1.671497972258348e-05, + "loss": 8.2204, + "step": 916300 + }, + { + "epoch": 9.76, + "learning_rate": 1.669954738843358e-05, + "loss": 8.173, + "step": 916400 + }, + { + "epoch": 9.76, + "learning_rate": 1.668412153256851e-05, + "loss": 8.1929, + "step": 916500 + }, + { + "epoch": 9.76, + "learning_rate": 1.6668702156187953e-05, + "loss": 8.2816, + "step": 916600 + }, + { + "epoch": 9.76, + "learning_rate": 1.6653289260491066e-05, + "loss": 8.2473, + "step": 916700 + }, + { + "epoch": 9.77, + "learning_rate": 1.6637882846676502e-05, + "loss": 8.1691, + "step": 916800 + }, + { + "epoch": 9.77, + "learning_rate": 1.6622482915942394e-05, + "loss": 8.3205, + "step": 916900 + }, + { + "epoch": 9.77, + "learning_rate": 1.6607089469486448e-05, + "loss": 8.2823, + "step": 917000 + }, + { + "epoch": 9.77, + "learning_rate": 1.6591702508505792e-05, + "loss": 8.198, + "step": 917100 + }, + { + "epoch": 9.77, + "learning_rate": 1.6576322034197067e-05, + "loss": 8.1553, + "step": 917200 + }, + { + "epoch": 9.77, + "learning_rate": 1.6560948047756386e-05, + "loss": 8.2283, + "step": 917300 + }, + { + "epoch": 9.77, + "learning_rate": 1.654558055037947e-05, + "loss": 8.2424, + "step": 917400 + }, + { + "epoch": 9.77, + "learning_rate": 1.6530219543261337e-05, + "loss": 8.2806, + "step": 917500 + }, + { + "epoch": 9.77, + "learning_rate": 1.6514865027596704e-05, + "loss": 8.2216, + "step": 917600 + }, + { + "epoch": 9.77, + "learning_rate": 1.6499517004579633e-05, + "loss": 8.2329, + "step": 917700 + }, + { + "epoch": 9.78, + "learning_rate": 1.648417547540383e-05, + "loss": 8.1244, + "step": 917800 + }, + { + "epoch": 9.78, + "learning_rate": 1.6468840441262288e-05, + "loss": 8.2313, + "step": 917900 + }, + { + "epoch": 9.78, + "learning_rate": 1.6453511903347707e-05, + "loss": 8.2035, + "step": 918000 + }, + { + "epoch": 9.78, + "learning_rate": 1.6438189862852126e-05, + "loss": 8.2038, + "step": 918100 + }, + { + "epoch": 9.78, + "learning_rate": 1.642287432096722e-05, + "loss": 8.1872, + "step": 918200 + }, + { + "epoch": 9.78, + "learning_rate": 1.6407565278883985e-05, + "loss": 8.2105, + "step": 918300 + }, + { + "epoch": 9.78, + "learning_rate": 1.6392262737793075e-05, + "loss": 8.2001, + "step": 918400 + }, + { + "epoch": 9.78, + "learning_rate": 1.6376966698884553e-05, + "loss": 8.2344, + "step": 918500 + }, + { + "epoch": 9.78, + "learning_rate": 1.6361677163347966e-05, + "loss": 8.1778, + "step": 918600 + }, + { + "epoch": 9.79, + "learning_rate": 1.6346394132372422e-05, + "loss": 8.1855, + "step": 918700 + }, + { + "epoch": 9.79, + "learning_rate": 1.633111760714645e-05, + "loss": 8.2586, + "step": 918800 + }, + { + "epoch": 9.79, + "learning_rate": 1.6315847588858178e-05, + "loss": 8.2614, + "step": 918900 + }, + { + "epoch": 9.79, + "learning_rate": 1.6300584078695047e-05, + "loss": 8.1358, + "step": 919000 + }, + { + "epoch": 9.79, + "learning_rate": 1.6285327077844193e-05, + "loss": 8.2024, + "step": 919100 + }, + { + "epoch": 9.79, + "learning_rate": 1.627007658749209e-05, + "loss": 8.2309, + "step": 919200 + }, + { + "epoch": 9.79, + "learning_rate": 1.6254832608824843e-05, + "loss": 8.2673, + "step": 919300 + }, + { + "epoch": 9.79, + "learning_rate": 1.6239595143027896e-05, + "loss": 8.1543, + "step": 919400 + }, + { + "epoch": 9.79, + "learning_rate": 1.6224364191286323e-05, + "loss": 8.2493, + "step": 919500 + }, + { + "epoch": 9.79, + "learning_rate": 1.620913975478463e-05, + "loss": 8.2909, + "step": 919600 + }, + { + "epoch": 9.8, + "learning_rate": 1.6193921834706804e-05, + "loss": 8.2806, + "step": 919700 + }, + { + "epoch": 9.8, + "learning_rate": 1.6178710432236345e-05, + "loss": 8.3039, + "step": 919800 + }, + { + "epoch": 9.8, + "learning_rate": 1.616350554855627e-05, + "loss": 8.2255, + "step": 919900 + }, + { + "epoch": 9.8, + "learning_rate": 1.6148307184849042e-05, + "loss": 8.2589, + "step": 920000 + }, + { + "epoch": 9.8, + "learning_rate": 1.6133115342296657e-05, + "loss": 8.2488, + "step": 920100 + }, + { + "epoch": 9.8, + "learning_rate": 1.6117930022080552e-05, + "loss": 8.1674, + "step": 920200 + }, + { + "epoch": 9.8, + "learning_rate": 1.610275122538173e-05, + "loss": 8.1656, + "step": 920300 + }, + { + "epoch": 9.8, + "learning_rate": 1.6087578953380632e-05, + "loss": 8.2775, + "step": 920400 + }, + { + "epoch": 9.8, + "learning_rate": 1.607241320725722e-05, + "loss": 8.1897, + "step": 920500 + }, + { + "epoch": 9.81, + "learning_rate": 1.605725398819089e-05, + "loss": 8.2417, + "step": 920600 + }, + { + "epoch": 9.81, + "learning_rate": 1.6042101297360633e-05, + "loss": 8.1454, + "step": 920700 + }, + { + "epoch": 9.81, + "learning_rate": 1.6026955135944856e-05, + "loss": 8.1855, + "step": 920800 + }, + { + "epoch": 9.81, + "learning_rate": 1.6011815505121443e-05, + "loss": 8.2421, + "step": 920900 + }, + { + "epoch": 9.81, + "learning_rate": 1.5996682406067866e-05, + "loss": 8.1293, + "step": 921000 + }, + { + "epoch": 9.81, + "learning_rate": 1.598155583996098e-05, + "loss": 8.2115, + "step": 921100 + }, + { + "epoch": 9.81, + "learning_rate": 1.596643580797721e-05, + "loss": 8.2437, + "step": 921200 + }, + { + "epoch": 9.81, + "learning_rate": 1.5951322311292405e-05, + "loss": 8.2513, + "step": 921300 + }, + { + "epoch": 9.81, + "learning_rate": 1.5936215351081983e-05, + "loss": 8.2527, + "step": 921400 + }, + { + "epoch": 9.82, + "learning_rate": 1.5921114928520807e-05, + "loss": 8.1911, + "step": 921500 + }, + { + "epoch": 9.82, + "learning_rate": 1.5906021044783215e-05, + "loss": 8.2197, + "step": 921600 + }, + { + "epoch": 9.82, + "learning_rate": 1.5890933701043066e-05, + "loss": 8.1845, + "step": 921700 + }, + { + "epoch": 9.82, + "learning_rate": 1.587585289847372e-05, + "loss": 8.1656, + "step": 921800 + }, + { + "epoch": 9.82, + "learning_rate": 1.586077863824802e-05, + "loss": 8.1538, + "step": 921900 + }, + { + "epoch": 9.82, + "learning_rate": 1.584571092153826e-05, + "loss": 8.2206, + "step": 922000 + }, + { + "epoch": 9.82, + "learning_rate": 1.5830649749516268e-05, + "loss": 8.2678, + "step": 922100 + }, + { + "epoch": 9.82, + "learning_rate": 1.581559512335338e-05, + "loss": 8.272, + "step": 922200 + }, + { + "epoch": 9.82, + "learning_rate": 1.5800547044220372e-05, + "loss": 8.2743, + "step": 922300 + }, + { + "epoch": 9.82, + "learning_rate": 1.5785505513287536e-05, + "loss": 8.2106, + "step": 922400 + }, + { + "epoch": 9.83, + "learning_rate": 1.5770470531724633e-05, + "loss": 8.2178, + "step": 922500 + }, + { + "epoch": 9.83, + "learning_rate": 1.575544210070098e-05, + "loss": 8.2006, + "step": 922600 + }, + { + "epoch": 9.83, + "learning_rate": 1.5740420221385323e-05, + "loss": 8.2406, + "step": 922700 + }, + { + "epoch": 9.83, + "learning_rate": 1.5725404894945882e-05, + "loss": 8.1758, + "step": 922800 + }, + { + "epoch": 9.83, + "learning_rate": 1.5710396122550465e-05, + "loss": 8.2265, + "step": 922900 + }, + { + "epoch": 9.83, + "learning_rate": 1.5695393905366253e-05, + "loss": 8.1988, + "step": 923000 + }, + { + "epoch": 9.83, + "learning_rate": 1.5680398244559992e-05, + "loss": 8.2159, + "step": 923100 + }, + { + "epoch": 9.83, + "learning_rate": 1.566540914129787e-05, + "loss": 8.2532, + "step": 923200 + }, + { + "epoch": 9.83, + "learning_rate": 1.5650426596745627e-05, + "loss": 8.266, + "step": 923300 + }, + { + "epoch": 9.84, + "learning_rate": 1.5635450612068447e-05, + "loss": 8.2017, + "step": 923400 + }, + { + "epoch": 9.84, + "learning_rate": 1.5620481188431012e-05, + "loss": 8.2048, + "step": 923500 + }, + { + "epoch": 9.84, + "learning_rate": 1.560551832699746e-05, + "loss": 8.2294, + "step": 923600 + }, + { + "epoch": 9.84, + "learning_rate": 1.559056202893151e-05, + "loss": 8.1723, + "step": 923700 + }, + { + "epoch": 9.84, + "learning_rate": 1.5575612295396293e-05, + "loss": 8.162, + "step": 923800 + }, + { + "epoch": 9.84, + "learning_rate": 1.556066912755445e-05, + "loss": 8.1356, + "step": 923900 + }, + { + "epoch": 9.84, + "learning_rate": 1.554573252656809e-05, + "loss": 8.2392, + "step": 924000 + }, + { + "epoch": 9.84, + "learning_rate": 1.5530802493598885e-05, + "loss": 8.2332, + "step": 924100 + }, + { + "epoch": 9.84, + "learning_rate": 1.55158790298079e-05, + "loss": 8.2333, + "step": 924200 + }, + { + "epoch": 9.85, + "learning_rate": 1.550096213635577e-05, + "loss": 8.2283, + "step": 924300 + }, + { + "epoch": 9.85, + "learning_rate": 1.5486051814402524e-05, + "loss": 8.2727, + "step": 924400 + }, + { + "epoch": 9.85, + "learning_rate": 1.547114806510783e-05, + "loss": 8.2097, + "step": 924500 + }, + { + "epoch": 9.85, + "learning_rate": 1.5456250889630664e-05, + "loss": 8.1754, + "step": 924600 + }, + { + "epoch": 9.85, + "learning_rate": 1.5441360289129636e-05, + "loss": 8.2452, + "step": 924700 + }, + { + "epoch": 9.85, + "learning_rate": 1.5426476264762756e-05, + "loss": 8.2906, + "step": 924800 + }, + { + "epoch": 9.85, + "learning_rate": 1.5411598817687624e-05, + "loss": 8.1918, + "step": 924900 + }, + { + "epoch": 9.85, + "learning_rate": 1.539672794906115e-05, + "loss": 8.1543, + "step": 925000 + }, + { + "epoch": 9.85, + "learning_rate": 1.538186366003991e-05, + "loss": 8.1833, + "step": 925100 + }, + { + "epoch": 9.85, + "learning_rate": 1.5367005951779922e-05, + "loss": 8.1841, + "step": 925200 + }, + { + "epoch": 9.86, + "learning_rate": 1.535215482543664e-05, + "loss": 8.2049, + "step": 925300 + }, + { + "epoch": 9.86, + "learning_rate": 1.5337310282165042e-05, + "loss": 8.2154, + "step": 925400 + }, + { + "epoch": 9.86, + "learning_rate": 1.5322472323119564e-05, + "loss": 8.2083, + "step": 925500 + }, + { + "epoch": 9.86, + "learning_rate": 1.530764094945424e-05, + "loss": 8.2403, + "step": 925600 + }, + { + "epoch": 9.86, + "learning_rate": 1.5292816162322386e-05, + "loss": 8.2318, + "step": 925700 + }, + { + "epoch": 9.86, + "learning_rate": 1.5277997962877022e-05, + "loss": 8.1655, + "step": 925800 + }, + { + "epoch": 9.86, + "learning_rate": 1.5263186352270508e-05, + "loss": 8.1824, + "step": 925900 + }, + { + "epoch": 9.86, + "learning_rate": 1.5248381331654804e-05, + "loss": 8.1733, + "step": 926000 + }, + { + "epoch": 9.86, + "learning_rate": 1.5233582902181221e-05, + "loss": 8.2157, + "step": 926100 + }, + { + "epoch": 9.87, + "learning_rate": 1.5218791065000681e-05, + "loss": 8.1852, + "step": 926200 + }, + { + "epoch": 9.87, + "learning_rate": 1.5204005821263534e-05, + "loss": 8.1997, + "step": 926300 + }, + { + "epoch": 9.87, + "learning_rate": 1.5189227172119669e-05, + "loss": 8.1834, + "step": 926400 + }, + { + "epoch": 9.87, + "learning_rate": 1.5174455118718344e-05, + "loss": 8.2621, + "step": 926500 + }, + { + "epoch": 9.87, + "learning_rate": 1.5159689662208454e-05, + "loss": 8.2094, + "step": 926600 + }, + { + "epoch": 9.87, + "learning_rate": 1.5144930803738278e-05, + "loss": 8.277, + "step": 926700 + }, + { + "epoch": 9.87, + "learning_rate": 1.513017854445562e-05, + "loss": 8.1849, + "step": 926800 + }, + { + "epoch": 9.87, + "learning_rate": 1.5115432885507752e-05, + "loss": 8.1744, + "step": 926900 + }, + { + "epoch": 9.87, + "learning_rate": 1.5100693828041468e-05, + "loss": 8.1936, + "step": 927000 + }, + { + "epoch": 9.87, + "learning_rate": 1.5085961373203017e-05, + "loss": 8.2293, + "step": 927100 + }, + { + "epoch": 9.88, + "learning_rate": 1.5071235522138127e-05, + "loss": 8.2384, + "step": 927200 + }, + { + "epoch": 9.88, + "learning_rate": 1.5056516275992062e-05, + "loss": 8.2779, + "step": 927300 + }, + { + "epoch": 9.88, + "learning_rate": 1.5041803635909501e-05, + "loss": 8.2816, + "step": 927400 + }, + { + "epoch": 9.88, + "learning_rate": 1.5027097603034713e-05, + "loss": 8.2382, + "step": 927500 + }, + { + "epoch": 9.88, + "learning_rate": 1.5012398178511289e-05, + "loss": 8.184, + "step": 927600 + }, + { + "epoch": 9.88, + "learning_rate": 1.4997705363482494e-05, + "loss": 8.1782, + "step": 927700 + }, + { + "epoch": 9.88, + "learning_rate": 1.498301915909095e-05, + "loss": 8.2781, + "step": 927800 + }, + { + "epoch": 9.88, + "learning_rate": 1.4968339566478807e-05, + "loss": 8.2471, + "step": 927900 + }, + { + "epoch": 9.88, + "learning_rate": 1.4953666586787674e-05, + "loss": 8.2386, + "step": 928000 + }, + { + "epoch": 9.89, + "learning_rate": 1.4939000221158728e-05, + "loss": 8.1327, + "step": 928100 + }, + { + "epoch": 9.89, + "learning_rate": 1.4924340470732534e-05, + "loss": 8.2308, + "step": 928200 + }, + { + "epoch": 9.89, + "learning_rate": 1.4909687336649191e-05, + "loss": 8.2392, + "step": 928300 + }, + { + "epoch": 9.89, + "learning_rate": 1.4895040820048256e-05, + "loss": 8.2398, + "step": 928400 + }, + { + "epoch": 9.89, + "learning_rate": 1.4880400922068827e-05, + "loss": 8.2348, + "step": 928500 + }, + { + "epoch": 9.89, + "learning_rate": 1.4865767643849437e-05, + "loss": 8.0108, + "step": 928600 + }, + { + "epoch": 9.89, + "learning_rate": 1.4851140986528112e-05, + "loss": 8.1826, + "step": 928700 + }, + { + "epoch": 9.89, + "learning_rate": 1.4836520951242339e-05, + "loss": 8.2516, + "step": 928800 + }, + { + "epoch": 9.89, + "learning_rate": 1.4821907539129177e-05, + "loss": 8.2293, + "step": 928900 + }, + { + "epoch": 9.9, + "learning_rate": 1.4807300751325093e-05, + "loss": 8.1959, + "step": 929000 + }, + { + "epoch": 9.9, + "learning_rate": 1.4792700588966035e-05, + "loss": 8.2345, + "step": 929100 + }, + { + "epoch": 9.9, + "learning_rate": 1.4778107053187463e-05, + "loss": 8.2554, + "step": 929200 + }, + { + "epoch": 9.9, + "learning_rate": 1.4763520145124354e-05, + "loss": 8.1823, + "step": 929300 + }, + { + "epoch": 9.9, + "learning_rate": 1.4748939865911105e-05, + "loss": 8.1741, + "step": 929400 + }, + { + "epoch": 9.9, + "learning_rate": 1.4734366216681605e-05, + "loss": 8.1407, + "step": 929500 + }, + { + "epoch": 9.9, + "learning_rate": 1.4719799198569295e-05, + "loss": 8.2358, + "step": 929600 + }, + { + "epoch": 9.9, + "learning_rate": 1.4705238812707034e-05, + "loss": 8.2507, + "step": 929700 + }, + { + "epoch": 9.9, + "learning_rate": 1.4690685060227182e-05, + "loss": 8.1508, + "step": 929800 + }, + { + "epoch": 9.9, + "learning_rate": 1.467613794226156e-05, + "loss": 8.158, + "step": 929900 + }, + { + "epoch": 9.91, + "learning_rate": 1.4661597459941556e-05, + "loss": 8.2826, + "step": 930000 + }, + { + "epoch": 9.91, + "learning_rate": 1.4647063614397948e-05, + "loss": 8.1969, + "step": 930100 + }, + { + "epoch": 9.91, + "learning_rate": 1.4632536406761033e-05, + "loss": 8.1997, + "step": 930200 + }, + { + "epoch": 9.91, + "learning_rate": 1.4618015838160593e-05, + "loss": 8.2648, + "step": 930300 + }, + { + "epoch": 9.91, + "learning_rate": 1.4603501909725914e-05, + "loss": 8.1415, + "step": 930400 + }, + { + "epoch": 9.91, + "learning_rate": 1.4588994622585739e-05, + "loss": 8.2337, + "step": 930500 + }, + { + "epoch": 9.91, + "learning_rate": 1.4574493977868297e-05, + "loss": 8.1813, + "step": 930600 + }, + { + "epoch": 9.91, + "learning_rate": 1.4559999976701277e-05, + "loss": 8.261, + "step": 930700 + }, + { + "epoch": 9.91, + "learning_rate": 1.4545512620211921e-05, + "loss": 8.1307, + "step": 930800 + }, + { + "epoch": 9.92, + "learning_rate": 1.4531031909526904e-05, + "loss": 8.2087, + "step": 930900 + }, + { + "epoch": 9.92, + "learning_rate": 1.4516557845772394e-05, + "loss": 8.2242, + "step": 931000 + }, + { + "epoch": 9.92, + "learning_rate": 1.4502090430074e-05, + "loss": 8.2146, + "step": 931100 + }, + { + "epoch": 9.92, + "learning_rate": 1.4487629663556935e-05, + "loss": 8.2077, + "step": 931200 + }, + { + "epoch": 9.92, + "learning_rate": 1.4473175547345708e-05, + "loss": 8.2201, + "step": 931300 + }, + { + "epoch": 9.92, + "learning_rate": 1.4458728082564488e-05, + "loss": 8.1975, + "step": 931400 + }, + { + "epoch": 9.92, + "learning_rate": 1.4444287270336855e-05, + "loss": 8.2025, + "step": 931500 + }, + { + "epoch": 9.92, + "learning_rate": 1.4429853111785874e-05, + "loss": 8.2913, + "step": 931600 + }, + { + "epoch": 9.92, + "learning_rate": 1.441542560803406e-05, + "loss": 8.2157, + "step": 931700 + }, + { + "epoch": 9.92, + "learning_rate": 1.440100476020344e-05, + "loss": 8.1492, + "step": 931800 + }, + { + "epoch": 9.93, + "learning_rate": 1.438659056941557e-05, + "loss": 8.266, + "step": 931900 + }, + { + "epoch": 9.93, + "learning_rate": 1.437218303679142e-05, + "loss": 8.2316, + "step": 932000 + }, + { + "epoch": 9.93, + "learning_rate": 1.435778216345145e-05, + "loss": 8.1457, + "step": 932100 + }, + { + "epoch": 9.93, + "learning_rate": 1.4343387950515618e-05, + "loss": 8.1829, + "step": 932200 + }, + { + "epoch": 9.93, + "learning_rate": 1.432900039910341e-05, + "loss": 8.18, + "step": 932300 + }, + { + "epoch": 9.93, + "learning_rate": 1.4314619510333672e-05, + "loss": 8.2009, + "step": 932400 + }, + { + "epoch": 9.93, + "learning_rate": 1.4300245285324865e-05, + "loss": 8.24, + "step": 932500 + }, + { + "epoch": 9.93, + "learning_rate": 1.4285877725194829e-05, + "loss": 8.1896, + "step": 932600 + }, + { + "epoch": 9.93, + "learning_rate": 1.4271516831061015e-05, + "loss": 8.1414, + "step": 932700 + }, + { + "epoch": 9.94, + "learning_rate": 1.4257162604040165e-05, + "loss": 8.2251, + "step": 932800 + }, + { + "epoch": 9.94, + "learning_rate": 1.4242815045248669e-05, + "loss": 8.2307, + "step": 932900 + }, + { + "epoch": 9.94, + "learning_rate": 1.4228474155802319e-05, + "loss": 8.2417, + "step": 933000 + }, + { + "epoch": 9.94, + "learning_rate": 1.421413993681645e-05, + "loss": 8.1696, + "step": 933100 + }, + { + "epoch": 9.94, + "learning_rate": 1.4199812389405754e-05, + "loss": 8.2037, + "step": 933200 + }, + { + "epoch": 9.94, + "learning_rate": 1.4185491514684568e-05, + "loss": 8.2168, + "step": 933300 + }, + { + "epoch": 9.94, + "learning_rate": 1.4171177313766582e-05, + "loss": 8.1543, + "step": 933400 + }, + { + "epoch": 9.94, + "learning_rate": 1.4156869787765003e-05, + "loss": 8.1938, + "step": 933500 + }, + { + "epoch": 9.94, + "learning_rate": 1.4142568937792566e-05, + "loss": 8.2643, + "step": 933600 + }, + { + "epoch": 9.95, + "learning_rate": 1.4128274764961413e-05, + "loss": 8.1718, + "step": 933700 + }, + { + "epoch": 9.95, + "learning_rate": 1.4113987270383278e-05, + "loss": 8.1896, + "step": 933800 + }, + { + "epoch": 9.95, + "learning_rate": 1.4099706455169193e-05, + "loss": 8.1783, + "step": 933900 + }, + { + "epoch": 9.95, + "learning_rate": 1.4085432320429848e-05, + "loss": 8.1849, + "step": 934000 + }, + { + "epoch": 9.95, + "learning_rate": 1.407116486727531e-05, + "loss": 8.1287, + "step": 934100 + }, + { + "epoch": 9.95, + "learning_rate": 1.4056904096815227e-05, + "loss": 8.1294, + "step": 934200 + }, + { + "epoch": 9.95, + "learning_rate": 1.4042650010158565e-05, + "loss": 8.3226, + "step": 934300 + }, + { + "epoch": 9.95, + "learning_rate": 1.4028402608413927e-05, + "loss": 8.0806, + "step": 934400 + }, + { + "epoch": 9.95, + "learning_rate": 1.4014161892689326e-05, + "loss": 8.1878, + "step": 934500 + }, + { + "epoch": 9.95, + "learning_rate": 1.3999927864092255e-05, + "loss": 8.2623, + "step": 934600 + }, + { + "epoch": 9.96, + "learning_rate": 1.3985700523729672e-05, + "loss": 8.1877, + "step": 934700 + }, + { + "epoch": 9.96, + "learning_rate": 1.3971479872708104e-05, + "loss": 8.2034, + "step": 934800 + }, + { + "epoch": 9.96, + "learning_rate": 1.3957265912133443e-05, + "loss": 8.2227, + "step": 934900 + }, + { + "epoch": 9.96, + "learning_rate": 1.3943058643111118e-05, + "loss": 8.2085, + "step": 935000 + }, + { + "epoch": 9.96, + "learning_rate": 1.392885806674601e-05, + "loss": 8.1953, + "step": 935100 + }, + { + "epoch": 9.96, + "learning_rate": 1.3914664184142556e-05, + "loss": 8.1729, + "step": 935200 + }, + { + "epoch": 9.96, + "learning_rate": 1.3900476996404566e-05, + "loss": 8.2245, + "step": 935300 + }, + { + "epoch": 9.96, + "learning_rate": 1.388629650463541e-05, + "loss": 8.2421, + "step": 935400 + }, + { + "epoch": 9.96, + "learning_rate": 1.3872122709937862e-05, + "loss": 8.1882, + "step": 935500 + }, + { + "epoch": 9.97, + "learning_rate": 1.3857955613414264e-05, + "loss": 8.2073, + "step": 935600 + }, + { + "epoch": 9.97, + "learning_rate": 1.3843795216166389e-05, + "loss": 8.1763, + "step": 935700 + }, + { + "epoch": 9.97, + "learning_rate": 1.3829641519295455e-05, + "loss": 8.2193, + "step": 935800 + }, + { + "epoch": 9.97, + "learning_rate": 1.381549452390224e-05, + "loss": 8.192, + "step": 935900 + }, + { + "epoch": 9.97, + "learning_rate": 1.380135423108695e-05, + "loss": 8.1682, + "step": 936000 + }, + { + "epoch": 9.97, + "learning_rate": 1.3787220641949261e-05, + "loss": 8.2345, + "step": 936100 + }, + { + "epoch": 9.97, + "learning_rate": 1.3773093757588317e-05, + "loss": 8.3114, + "step": 936200 + }, + { + "epoch": 9.97, + "learning_rate": 1.3758973579102829e-05, + "loss": 8.2308, + "step": 936300 + }, + { + "epoch": 9.97, + "learning_rate": 1.3744860107590906e-05, + "loss": 8.1369, + "step": 936400 + }, + { + "epoch": 9.97, + "learning_rate": 1.3730753344150137e-05, + "loss": 8.1693, + "step": 936500 + }, + { + "epoch": 9.98, + "learning_rate": 1.3716653289877578e-05, + "loss": 8.2178, + "step": 936600 + }, + { + "epoch": 9.98, + "learning_rate": 1.3702559945869864e-05, + "loss": 8.1446, + "step": 936700 + }, + { + "epoch": 9.98, + "learning_rate": 1.3688473313222993e-05, + "loss": 8.1908, + "step": 936800 + }, + { + "epoch": 9.98, + "learning_rate": 1.3674393393032491e-05, + "loss": 8.1727, + "step": 936900 + }, + { + "epoch": 9.98, + "learning_rate": 1.3660320186393327e-05, + "loss": 8.1344, + "step": 937000 + }, + { + "epoch": 9.98, + "learning_rate": 1.3646253694400036e-05, + "loss": 8.1895, + "step": 937100 + }, + { + "epoch": 9.98, + "learning_rate": 1.3632193918146529e-05, + "loss": 8.2353, + "step": 937200 + }, + { + "epoch": 9.98, + "learning_rate": 1.3618140858726236e-05, + "loss": 8.2415, + "step": 937300 + }, + { + "epoch": 9.98, + "learning_rate": 1.3604094517232058e-05, + "loss": 8.1797, + "step": 937400 + }, + { + "epoch": 9.99, + "learning_rate": 1.3590054894756421e-05, + "loss": 8.2045, + "step": 937500 + }, + { + "epoch": 9.99, + "learning_rate": 1.3576021992391152e-05, + "loss": 8.1964, + "step": 937600 + }, + { + "epoch": 9.99, + "learning_rate": 1.356199581122759e-05, + "loss": 8.184, + "step": 937700 + }, + { + "epoch": 9.99, + "learning_rate": 1.3547976352356572e-05, + "loss": 8.2722, + "step": 937800 + }, + { + "epoch": 9.99, + "learning_rate": 1.3533963616868395e-05, + "loss": 8.202, + "step": 937900 + }, + { + "epoch": 9.99, + "learning_rate": 1.3519957605852818e-05, + "loss": 8.2332, + "step": 938000 + }, + { + "epoch": 9.99, + "learning_rate": 1.350595832039907e-05, + "loss": 8.2188, + "step": 938100 + }, + { + "epoch": 9.99, + "learning_rate": 1.3491965761595925e-05, + "loss": 8.166, + "step": 938200 + }, + { + "epoch": 9.99, + "learning_rate": 1.3477979930531559e-05, + "loss": 8.2373, + "step": 938300 + }, + { + "epoch": 10.0, + "learning_rate": 1.3464000828293643e-05, + "loss": 8.1679, + "step": 938400 + }, + { + "epoch": 10.0, + "learning_rate": 1.3450028455969333e-05, + "loss": 8.2053, + "step": 938500 + }, + { + "epoch": 10.0, + "learning_rate": 1.343606281464529e-05, + "loss": 8.1995, + "step": 938600 + }, + { + "epoch": 10.0, + "learning_rate": 1.3422103905407601e-05, + "loss": 8.2281, + "step": 938700 + }, + { + "epoch": 10.0, + "learning_rate": 1.3408151729341855e-05, + "loss": 8.1634, + "step": 938800 + }, + { + "epoch": 10.0, + "learning_rate": 1.3394206287533106e-05, + "loss": 8.2291, + "step": 938900 + }, + { + "epoch": 10.0, + "learning_rate": 1.3380267581065909e-05, + "loss": 8.1954, + "step": 939000 + }, + { + "epoch": 10.0, + "learning_rate": 1.3366335611024273e-05, + "loss": 8.1618, + "step": 939100 + }, + { + "epoch": 10.0, + "learning_rate": 1.3352410378491697e-05, + "loss": 8.2057, + "step": 939200 + }, + { + "epoch": 10.0, + "learning_rate": 1.3338491884551108e-05, + "loss": 8.256, + "step": 939300 + }, + { + "epoch": 10.01, + "learning_rate": 1.3324580130285014e-05, + "loss": 8.1333, + "step": 939400 + }, + { + "epoch": 10.01, + "learning_rate": 1.3310675116775262e-05, + "loss": 8.2268, + "step": 939500 + }, + { + "epoch": 10.01, + "learning_rate": 1.3296776845103308e-05, + "loss": 8.1492, + "step": 939600 + }, + { + "epoch": 10.01, + "learning_rate": 1.3282885316349969e-05, + "loss": 8.2131, + "step": 939700 + }, + { + "epoch": 10.01, + "learning_rate": 1.3269000531595643e-05, + "loss": 8.2392, + "step": 939800 + }, + { + "epoch": 10.01, + "learning_rate": 1.3255122491920125e-05, + "loss": 8.1958, + "step": 939900 + }, + { + "epoch": 10.01, + "learning_rate": 1.3241251198402693e-05, + "loss": 8.1085, + "step": 940000 + }, + { + "epoch": 10.01, + "learning_rate": 1.3227386652122153e-05, + "loss": 8.2271, + "step": 940100 + }, + { + "epoch": 10.01, + "learning_rate": 1.3213528854156743e-05, + "loss": 8.1938, + "step": 940200 + }, + { + "epoch": 10.02, + "learning_rate": 1.3199677805584187e-05, + "loss": 8.1345, + "step": 940300 + }, + { + "epoch": 10.02, + "learning_rate": 1.3185833507481637e-05, + "loss": 8.2376, + "step": 940400 + }, + { + "epoch": 10.02, + "learning_rate": 1.3171995960925865e-05, + "loss": 8.1593, + "step": 940500 + }, + { + "epoch": 10.02, + "learning_rate": 1.3158165166992898e-05, + "loss": 8.154, + "step": 940600 + }, + { + "epoch": 10.02, + "learning_rate": 1.3144341126758453e-05, + "loss": 8.1716, + "step": 940700 + }, + { + "epoch": 10.02, + "learning_rate": 1.3130523841297559e-05, + "loss": 8.2019, + "step": 940800 + }, + { + "epoch": 10.02, + "learning_rate": 1.311671331168487e-05, + "loss": 8.173, + "step": 940900 + }, + { + "epoch": 10.02, + "learning_rate": 1.3102909538994346e-05, + "loss": 8.1874, + "step": 941000 + }, + { + "epoch": 10.02, + "learning_rate": 1.3089112524299551e-05, + "loss": 8.2124, + "step": 941100 + }, + { + "epoch": 10.03, + "learning_rate": 1.3075322268673484e-05, + "loss": 8.1933, + "step": 941200 + }, + { + "epoch": 10.03, + "learning_rate": 1.3061538773188609e-05, + "loss": 8.1657, + "step": 941300 + }, + { + "epoch": 10.03, + "learning_rate": 1.3047762038916834e-05, + "loss": 8.2096, + "step": 941400 + }, + { + "epoch": 10.03, + "learning_rate": 1.3033992066929634e-05, + "loss": 8.2388, + "step": 941500 + }, + { + "epoch": 10.03, + "learning_rate": 1.3020228858297878e-05, + "loss": 8.1894, + "step": 941600 + }, + { + "epoch": 10.03, + "learning_rate": 1.3006472414091931e-05, + "loss": 8.1433, + "step": 941700 + }, + { + "epoch": 10.03, + "learning_rate": 1.2992722735381601e-05, + "loss": 8.2373, + "step": 941800 + }, + { + "epoch": 10.03, + "learning_rate": 1.2978979823236237e-05, + "loss": 8.2347, + "step": 941900 + }, + { + "epoch": 10.03, + "learning_rate": 1.296524367872467e-05, + "loss": 8.1138, + "step": 942000 + }, + { + "epoch": 10.03, + "learning_rate": 1.2951514302915068e-05, + "loss": 8.145, + "step": 942100 + }, + { + "epoch": 10.04, + "learning_rate": 1.2937791696875234e-05, + "loss": 8.2819, + "step": 942200 + }, + { + "epoch": 10.04, + "learning_rate": 1.2924075861672347e-05, + "loss": 8.1437, + "step": 942300 + }, + { + "epoch": 10.04, + "learning_rate": 1.2910366798373108e-05, + "loss": 8.2025, + "step": 942400 + }, + { + "epoch": 10.04, + "learning_rate": 1.2896664508043632e-05, + "loss": 8.196, + "step": 942500 + }, + { + "epoch": 10.04, + "learning_rate": 1.28829689917496e-05, + "loss": 8.1845, + "step": 942600 + }, + { + "epoch": 10.04, + "learning_rate": 1.2869280250556092e-05, + "loss": 8.1342, + "step": 942700 + }, + { + "epoch": 10.04, + "learning_rate": 1.2855598285527682e-05, + "loss": 8.2433, + "step": 942800 + }, + { + "epoch": 10.04, + "learning_rate": 1.284192309772838e-05, + "loss": 8.1988, + "step": 942900 + }, + { + "epoch": 10.04, + "learning_rate": 1.2828254688221786e-05, + "loss": 8.1478, + "step": 943000 + }, + { + "epoch": 10.05, + "learning_rate": 1.2814593058070834e-05, + "loss": 8.1854, + "step": 943100 + }, + { + "epoch": 10.05, + "learning_rate": 1.280093820833802e-05, + "loss": 8.1558, + "step": 943200 + }, + { + "epoch": 10.05, + "learning_rate": 1.278729014008524e-05, + "loss": 8.1016, + "step": 943300 + }, + { + "epoch": 10.05, + "learning_rate": 1.2773648854373954e-05, + "loss": 8.1282, + "step": 943400 + }, + { + "epoch": 10.05, + "learning_rate": 1.2760014352265038e-05, + "loss": 8.2123, + "step": 943500 + }, + { + "epoch": 10.05, + "learning_rate": 1.2746386634818841e-05, + "loss": 8.2481, + "step": 943600 + }, + { + "epoch": 10.05, + "learning_rate": 1.2732765703095162e-05, + "loss": 8.2323, + "step": 943700 + }, + { + "epoch": 10.05, + "learning_rate": 1.2719151558153353e-05, + "loss": 8.1789, + "step": 943800 + }, + { + "epoch": 10.05, + "learning_rate": 1.2705544201052167e-05, + "loss": 8.2337, + "step": 943900 + }, + { + "epoch": 10.05, + "learning_rate": 1.2691943632849811e-05, + "loss": 8.1671, + "step": 944000 + }, + { + "epoch": 10.06, + "learning_rate": 1.2678349854604078e-05, + "loss": 8.1763, + "step": 944100 + }, + { + "epoch": 10.06, + "learning_rate": 1.2664762867372115e-05, + "loss": 8.1523, + "step": 944200 + }, + { + "epoch": 10.06, + "learning_rate": 1.2651182672210582e-05, + "loss": 8.1774, + "step": 944300 + }, + { + "epoch": 10.06, + "learning_rate": 1.2637609270175599e-05, + "loss": 8.1874, + "step": 944400 + }, + { + "epoch": 10.06, + "learning_rate": 1.262404266232281e-05, + "loss": 8.2509, + "step": 944500 + }, + { + "epoch": 10.06, + "learning_rate": 1.2610482849707262e-05, + "loss": 8.2029, + "step": 944600 + }, + { + "epoch": 10.06, + "learning_rate": 1.259692983338352e-05, + "loss": 8.2327, + "step": 944700 + }, + { + "epoch": 10.06, + "learning_rate": 1.2583383614405574e-05, + "loss": 8.2123, + "step": 944800 + }, + { + "epoch": 10.06, + "learning_rate": 1.2569844193826952e-05, + "loss": 8.1815, + "step": 944900 + }, + { + "epoch": 10.07, + "learning_rate": 1.2556311572700608e-05, + "loss": 8.2849, + "step": 945000 + }, + { + "epoch": 10.07, + "learning_rate": 1.2542785752078957e-05, + "loss": 8.1665, + "step": 945100 + }, + { + "epoch": 10.07, + "learning_rate": 1.25292667330139e-05, + "loss": 8.1716, + "step": 945200 + }, + { + "epoch": 10.07, + "learning_rate": 1.2515754516556843e-05, + "loss": 8.1442, + "step": 945300 + }, + { + "epoch": 10.07, + "learning_rate": 1.2502249103758611e-05, + "loss": 8.1671, + "step": 945400 + }, + { + "epoch": 10.07, + "learning_rate": 1.2488750495669543e-05, + "loss": 8.181, + "step": 945500 + }, + { + "epoch": 10.07, + "learning_rate": 1.2475258693339375e-05, + "loss": 8.2515, + "step": 945600 + }, + { + "epoch": 10.07, + "learning_rate": 1.2461773697817425e-05, + "loss": 8.192, + "step": 945700 + }, + { + "epoch": 10.07, + "learning_rate": 1.2448295510152396e-05, + "loss": 8.1792, + "step": 945800 + }, + { + "epoch": 10.08, + "learning_rate": 1.2434824131392497e-05, + "loss": 8.2554, + "step": 945900 + }, + { + "epoch": 10.08, + "learning_rate": 1.2421359562585366e-05, + "loss": 8.2414, + "step": 946000 + }, + { + "epoch": 10.08, + "learning_rate": 1.2407901804778188e-05, + "loss": 8.2443, + "step": 946100 + }, + { + "epoch": 10.08, + "learning_rate": 1.2394450859017558e-05, + "loss": 8.1837, + "step": 946200 + }, + { + "epoch": 10.08, + "learning_rate": 1.2381006726349541e-05, + "loss": 8.2239, + "step": 946300 + }, + { + "epoch": 10.08, + "learning_rate": 1.2367569407819723e-05, + "loss": 8.2157, + "step": 946400 + }, + { + "epoch": 10.08, + "learning_rate": 1.23541389044731e-05, + "loss": 8.2171, + "step": 946500 + }, + { + "epoch": 10.08, + "learning_rate": 1.2340715217354171e-05, + "loss": 8.155, + "step": 946600 + }, + { + "epoch": 10.08, + "learning_rate": 1.2327298347506877e-05, + "loss": 8.2333, + "step": 946700 + }, + { + "epoch": 10.08, + "learning_rate": 1.2313888295974685e-05, + "loss": 8.1729, + "step": 946800 + }, + { + "epoch": 10.09, + "learning_rate": 1.2300485063800482e-05, + "loss": 8.2816, + "step": 946900 + }, + { + "epoch": 10.09, + "learning_rate": 1.2287088652026635e-05, + "loss": 8.1668, + "step": 947000 + }, + { + "epoch": 10.09, + "learning_rate": 1.2273699061694976e-05, + "loss": 8.1717, + "step": 947100 + }, + { + "epoch": 10.09, + "learning_rate": 1.2260316293846852e-05, + "loss": 8.2181, + "step": 947200 + }, + { + "epoch": 10.09, + "learning_rate": 1.2246940349522973e-05, + "loss": 8.2233, + "step": 947300 + }, + { + "epoch": 10.09, + "learning_rate": 1.2233571229763664e-05, + "loss": 8.122, + "step": 947400 + }, + { + "epoch": 10.09, + "learning_rate": 1.2220208935608579e-05, + "loss": 8.1689, + "step": 947500 + }, + { + "epoch": 10.09, + "learning_rate": 1.2206853468096968e-05, + "loss": 8.182, + "step": 947600 + }, + { + "epoch": 10.09, + "learning_rate": 1.219350482826742e-05, + "loss": 8.1588, + "step": 947700 + }, + { + "epoch": 10.1, + "learning_rate": 1.2180163017158109e-05, + "loss": 8.1783, + "step": 947800 + }, + { + "epoch": 10.1, + "learning_rate": 1.2166828035806598e-05, + "loss": 8.129, + "step": 947900 + }, + { + "epoch": 10.1, + "learning_rate": 1.2153499885250009e-05, + "loss": 8.231, + "step": 948000 + }, + { + "epoch": 10.1, + "learning_rate": 1.2140178566524784e-05, + "loss": 8.2083, + "step": 948100 + }, + { + "epoch": 10.1, + "learning_rate": 1.2126864080666966e-05, + "loss": 8.1452, + "step": 948200 + }, + { + "epoch": 10.1, + "learning_rate": 1.2113556428712069e-05, + "loss": 8.1884, + "step": 948300 + }, + { + "epoch": 10.1, + "learning_rate": 1.2100255611694955e-05, + "loss": 8.1916, + "step": 948400 + }, + { + "epoch": 10.1, + "learning_rate": 1.2086961630650073e-05, + "loss": 8.1832, + "step": 948500 + }, + { + "epoch": 10.1, + "learning_rate": 1.2073674486611275e-05, + "loss": 8.1302, + "step": 948600 + }, + { + "epoch": 10.1, + "learning_rate": 1.2060394180611955e-05, + "loss": 8.2187, + "step": 948700 + }, + { + "epoch": 10.11, + "learning_rate": 1.2047120713684845e-05, + "loss": 8.277, + "step": 948800 + }, + { + "epoch": 10.11, + "learning_rate": 1.2033854086862284e-05, + "loss": 8.2358, + "step": 948900 + }, + { + "epoch": 10.11, + "learning_rate": 1.202059430117597e-05, + "loss": 8.2254, + "step": 949000 + }, + { + "epoch": 10.11, + "learning_rate": 1.2007341357657199e-05, + "loss": 8.1365, + "step": 949100 + }, + { + "epoch": 10.11, + "learning_rate": 1.1994095257336557e-05, + "loss": 8.2179, + "step": 949200 + }, + { + "epoch": 10.11, + "learning_rate": 1.1980856001244266e-05, + "loss": 8.1517, + "step": 949300 + }, + { + "epoch": 10.11, + "learning_rate": 1.1967623590409904e-05, + "loss": 8.1289, + "step": 949400 + }, + { + "epoch": 10.11, + "learning_rate": 1.1954398025862579e-05, + "loss": 8.2721, + "step": 949500 + }, + { + "epoch": 10.11, + "learning_rate": 1.1941179308630813e-05, + "loss": 8.1436, + "step": 949600 + }, + { + "epoch": 10.12, + "learning_rate": 1.1927967439742672e-05, + "loss": 8.2139, + "step": 949700 + }, + { + "epoch": 10.12, + "learning_rate": 1.1914762420225623e-05, + "loss": 8.1256, + "step": 949800 + }, + { + "epoch": 10.12, + "learning_rate": 1.1901564251106623e-05, + "loss": 8.2116, + "step": 949900 + }, + { + "epoch": 10.12, + "learning_rate": 1.1888372933412073e-05, + "loss": 8.2489, + "step": 950000 + }, + { + "epoch": 10.12, + "learning_rate": 1.1875188468167897e-05, + "loss": 8.2283, + "step": 950100 + }, + { + "epoch": 10.12, + "learning_rate": 1.1862010856399441e-05, + "loss": 8.2198, + "step": 950200 + }, + { + "epoch": 10.12, + "learning_rate": 1.1848840099131508e-05, + "loss": 8.164, + "step": 950300 + }, + { + "epoch": 10.12, + "learning_rate": 1.1835676197388435e-05, + "loss": 8.1825, + "step": 950400 + }, + { + "epoch": 10.12, + "learning_rate": 1.1822519152193944e-05, + "loss": 8.2286, + "step": 950500 + }, + { + "epoch": 10.13, + "learning_rate": 1.1809368964571266e-05, + "loss": 8.2241, + "step": 950600 + }, + { + "epoch": 10.13, + "learning_rate": 1.1796225635543078e-05, + "loss": 8.2221, + "step": 950700 + }, + { + "epoch": 10.13, + "learning_rate": 1.1783089166131578e-05, + "loss": 8.15, + "step": 950800 + }, + { + "epoch": 10.13, + "learning_rate": 1.1769959557358367e-05, + "loss": 8.2036, + "step": 950900 + }, + { + "epoch": 10.13, + "learning_rate": 1.1756836810244542e-05, + "loss": 8.2288, + "step": 951000 + }, + { + "epoch": 10.13, + "learning_rate": 1.174372092581063e-05, + "loss": 8.2081, + "step": 951100 + }, + { + "epoch": 10.13, + "learning_rate": 1.1730611905076694e-05, + "loss": 8.2371, + "step": 951200 + }, + { + "epoch": 10.13, + "learning_rate": 1.1717509749062217e-05, + "loss": 8.1379, + "step": 951300 + }, + { + "epoch": 10.13, + "learning_rate": 1.170441445878615e-05, + "loss": 8.1558, + "step": 951400 + }, + { + "epoch": 10.13, + "learning_rate": 1.1691326035266881e-05, + "loss": 8.1261, + "step": 951500 + }, + { + "epoch": 10.14, + "learning_rate": 1.167824447952236e-05, + "loss": 8.2145, + "step": 951600 + }, + { + "epoch": 10.14, + "learning_rate": 1.1665169792569908e-05, + "loss": 8.2348, + "step": 951700 + }, + { + "epoch": 10.14, + "learning_rate": 1.1652101975426355e-05, + "loss": 8.2714, + "step": 951800 + }, + { + "epoch": 10.14, + "learning_rate": 1.1639041029107944e-05, + "loss": 8.2394, + "step": 951900 + }, + { + "epoch": 10.14, + "learning_rate": 1.1625986954630498e-05, + "loss": 8.241, + "step": 952000 + }, + { + "epoch": 10.14, + "learning_rate": 1.1612939753009189e-05, + "loss": 8.2724, + "step": 952100 + }, + { + "epoch": 10.14, + "learning_rate": 1.1599899425258698e-05, + "loss": 8.1438, + "step": 952200 + }, + { + "epoch": 10.14, + "learning_rate": 1.158686597239318e-05, + "loss": 8.1778, + "step": 952300 + }, + { + "epoch": 10.14, + "learning_rate": 1.1573839395426257e-05, + "loss": 8.1368, + "step": 952400 + }, + { + "epoch": 10.15, + "learning_rate": 1.1560819695371005e-05, + "loss": 8.1767, + "step": 952500 + }, + { + "epoch": 10.15, + "learning_rate": 1.1547806873239941e-05, + "loss": 8.1869, + "step": 952600 + }, + { + "epoch": 10.15, + "learning_rate": 1.1534800930045109e-05, + "loss": 8.2415, + "step": 952700 + }, + { + "epoch": 10.15, + "learning_rate": 1.1521801866797976e-05, + "loss": 8.1569, + "step": 952800 + }, + { + "epoch": 10.15, + "learning_rate": 1.150880968450947e-05, + "loss": 8.2481, + "step": 952900 + }, + { + "epoch": 10.15, + "learning_rate": 1.1495824384189979e-05, + "loss": 8.2147, + "step": 953000 + }, + { + "epoch": 10.15, + "learning_rate": 1.148284596684941e-05, + "loss": 8.0579, + "step": 953100 + }, + { + "epoch": 10.15, + "learning_rate": 1.1469874433497085e-05, + "loss": 8.1581, + "step": 953200 + }, + { + "epoch": 10.15, + "learning_rate": 1.1456909785141778e-05, + "loss": 8.2225, + "step": 953300 + }, + { + "epoch": 10.15, + "learning_rate": 1.1443952022791749e-05, + "loss": 8.2299, + "step": 953400 + }, + { + "epoch": 10.16, + "learning_rate": 1.1431001147454755e-05, + "loss": 8.1994, + "step": 953500 + }, + { + "epoch": 10.16, + "learning_rate": 1.1418057160137985e-05, + "loss": 8.2251, + "step": 953600 + }, + { + "epoch": 10.16, + "learning_rate": 1.1405120061848074e-05, + "loss": 8.1506, + "step": 953700 + }, + { + "epoch": 10.16, + "learning_rate": 1.139218985359113e-05, + "loss": 8.2464, + "step": 953800 + }, + { + "epoch": 10.16, + "learning_rate": 1.1379266536372779e-05, + "loss": 8.2261, + "step": 953900 + }, + { + "epoch": 10.16, + "learning_rate": 1.1366350111198043e-05, + "loss": 8.1921, + "step": 954000 + }, + { + "epoch": 10.16, + "learning_rate": 1.1353440579071428e-05, + "loss": 8.2493, + "step": 954100 + }, + { + "epoch": 10.16, + "learning_rate": 1.134053794099691e-05, + "loss": 8.1221, + "step": 954200 + }, + { + "epoch": 10.16, + "learning_rate": 1.1327642197977973e-05, + "loss": 8.178, + "step": 954300 + }, + { + "epoch": 10.17, + "learning_rate": 1.1314753351017438e-05, + "loss": 8.1999, + "step": 954400 + }, + { + "epoch": 10.17, + "learning_rate": 1.1301871401117726e-05, + "loss": 8.1542, + "step": 954500 + }, + { + "epoch": 10.17, + "learning_rate": 1.1288996349280679e-05, + "loss": 8.1818, + "step": 954600 + }, + { + "epoch": 10.17, + "learning_rate": 1.1276128196507563e-05, + "loss": 8.2005, + "step": 954700 + }, + { + "epoch": 10.17, + "learning_rate": 1.1263266943799145e-05, + "loss": 8.1507, + "step": 954800 + }, + { + "epoch": 10.17, + "learning_rate": 1.1250412592155635e-05, + "loss": 8.2713, + "step": 954900 + }, + { + "epoch": 10.17, + "learning_rate": 1.1237565142576756e-05, + "loss": 8.2523, + "step": 955000 + }, + { + "epoch": 10.17, + "learning_rate": 1.1224724596061586e-05, + "loss": 8.2338, + "step": 955100 + }, + { + "epoch": 10.17, + "learning_rate": 1.1211890953608806e-05, + "loss": 8.153, + "step": 955200 + }, + { + "epoch": 10.18, + "learning_rate": 1.1199064216216438e-05, + "loss": 8.195, + "step": 955300 + }, + { + "epoch": 10.18, + "learning_rate": 1.1186244384882071e-05, + "loss": 8.2022, + "step": 955400 + }, + { + "epoch": 10.18, + "learning_rate": 1.1173431460602646e-05, + "loss": 8.1665, + "step": 955500 + }, + { + "epoch": 10.18, + "learning_rate": 1.1160625444374661e-05, + "loss": 8.2578, + "step": 955600 + }, + { + "epoch": 10.18, + "learning_rate": 1.1147826337194023e-05, + "loss": 8.2052, + "step": 955700 + }, + { + "epoch": 10.18, + "learning_rate": 1.1135034140056167e-05, + "loss": 8.2371, + "step": 955800 + }, + { + "epoch": 10.18, + "learning_rate": 1.1122248853955874e-05, + "loss": 8.1687, + "step": 955900 + }, + { + "epoch": 10.18, + "learning_rate": 1.1109470479887508e-05, + "loss": 8.1997, + "step": 956000 + }, + { + "epoch": 10.18, + "learning_rate": 1.1096699018844825e-05, + "loss": 8.2682, + "step": 956100 + }, + { + "epoch": 10.18, + "learning_rate": 1.1083934471821055e-05, + "loss": 8.1696, + "step": 956200 + }, + { + "epoch": 10.19, + "learning_rate": 1.1071176839808906e-05, + "loss": 8.2294, + "step": 956300 + }, + { + "epoch": 10.19, + "learning_rate": 1.1058426123800547e-05, + "loss": 8.1466, + "step": 956400 + }, + { + "epoch": 10.19, + "learning_rate": 1.1045682324787598e-05, + "loss": 8.2195, + "step": 956500 + }, + { + "epoch": 10.19, + "learning_rate": 1.1032945443761134e-05, + "loss": 8.1293, + "step": 956600 + }, + { + "epoch": 10.19, + "learning_rate": 1.1020215481711738e-05, + "loss": 8.2236, + "step": 956700 + }, + { + "epoch": 10.19, + "learning_rate": 1.1007492439629364e-05, + "loss": 8.2022, + "step": 956800 + }, + { + "epoch": 10.19, + "learning_rate": 1.0994776318503563e-05, + "loss": 8.1695, + "step": 956900 + }, + { + "epoch": 10.19, + "learning_rate": 1.098206711932318e-05, + "loss": 8.1065, + "step": 957000 + }, + { + "epoch": 10.19, + "learning_rate": 1.0969364843076679e-05, + "loss": 8.1582, + "step": 957100 + }, + { + "epoch": 10.2, + "learning_rate": 1.0956669490751892e-05, + "loss": 8.1357, + "step": 957200 + }, + { + "epoch": 10.2, + "learning_rate": 1.094398106333614e-05, + "loss": 8.2216, + "step": 957300 + }, + { + "epoch": 10.2, + "learning_rate": 1.0931299561816189e-05, + "loss": 8.2217, + "step": 957400 + }, + { + "epoch": 10.2, + "learning_rate": 1.0918624987178294e-05, + "loss": 8.1743, + "step": 957500 + }, + { + "epoch": 10.2, + "learning_rate": 1.0905957340408168e-05, + "loss": 8.2316, + "step": 957600 + }, + { + "epoch": 10.2, + "learning_rate": 1.0893296622490968e-05, + "loss": 8.1573, + "step": 957700 + }, + { + "epoch": 10.2, + "learning_rate": 1.0880642834411293e-05, + "loss": 8.185, + "step": 957800 + }, + { + "epoch": 10.2, + "learning_rate": 1.0867995977153277e-05, + "loss": 8.1789, + "step": 957900 + }, + { + "epoch": 10.2, + "learning_rate": 1.0855356051700439e-05, + "loss": 8.1478, + "step": 958000 + }, + { + "epoch": 10.21, + "learning_rate": 1.0842723059035786e-05, + "loss": 8.1523, + "step": 958100 + }, + { + "epoch": 10.21, + "learning_rate": 1.0830097000141781e-05, + "loss": 8.1055, + "step": 958200 + }, + { + "epoch": 10.21, + "learning_rate": 1.0817477876000381e-05, + "loss": 8.1793, + "step": 958300 + }, + { + "epoch": 10.21, + "learning_rate": 1.0804865687592957e-05, + "loss": 8.1902, + "step": 958400 + }, + { + "epoch": 10.21, + "learning_rate": 1.079226043590037e-05, + "loss": 8.2027, + "step": 958500 + }, + { + "epoch": 10.21, + "learning_rate": 1.07796621219029e-05, + "loss": 8.2307, + "step": 958600 + }, + { + "epoch": 10.21, + "learning_rate": 1.0767070746580366e-05, + "loss": 8.2247, + "step": 958700 + }, + { + "epoch": 10.21, + "learning_rate": 1.0754486310911982e-05, + "loss": 8.1258, + "step": 958800 + }, + { + "epoch": 10.21, + "learning_rate": 1.0741908815876422e-05, + "loss": 8.1581, + "step": 958900 + }, + { + "epoch": 10.21, + "learning_rate": 1.0729338262451871e-05, + "loss": 8.143, + "step": 959000 + }, + { + "epoch": 10.22, + "learning_rate": 1.0716774651615923e-05, + "loss": 8.2298, + "step": 959100 + }, + { + "epoch": 10.22, + "learning_rate": 1.0704217984345655e-05, + "loss": 8.1854, + "step": 959200 + }, + { + "epoch": 10.22, + "learning_rate": 1.0691668261617581e-05, + "loss": 8.1567, + "step": 959300 + }, + { + "epoch": 10.22, + "learning_rate": 1.0679125484407749e-05, + "loss": 8.3245, + "step": 959400 + }, + { + "epoch": 10.22, + "learning_rate": 1.0666589653691562e-05, + "loss": 8.2569, + "step": 959500 + }, + { + "epoch": 10.22, + "learning_rate": 1.0654060770443941e-05, + "loss": 8.1453, + "step": 959600 + }, + { + "epoch": 10.22, + "learning_rate": 1.0641538835639253e-05, + "loss": 8.2035, + "step": 959700 + }, + { + "epoch": 10.22, + "learning_rate": 1.0629023850251363e-05, + "loss": 8.2112, + "step": 959800 + }, + { + "epoch": 10.22, + "learning_rate": 1.0616515815253547e-05, + "loss": 8.3068, + "step": 959900 + }, + { + "epoch": 10.23, + "learning_rate": 1.060401473161855e-05, + "loss": 8.1344, + "step": 960000 + }, + { + "epoch": 10.23, + "learning_rate": 1.059152060031855e-05, + "loss": 8.1425, + "step": 960100 + }, + { + "epoch": 10.23, + "learning_rate": 1.057903342232528e-05, + "loss": 8.1327, + "step": 960200 + }, + { + "epoch": 10.23, + "learning_rate": 1.056655319860984e-05, + "loss": 8.2386, + "step": 960300 + }, + { + "epoch": 10.23, + "learning_rate": 1.0554079930142824e-05, + "loss": 8.2096, + "step": 960400 + }, + { + "epoch": 10.23, + "learning_rate": 1.0541613617894241e-05, + "loss": 8.2026, + "step": 960500 + }, + { + "epoch": 10.23, + "learning_rate": 1.0529154262833663e-05, + "loss": 8.1331, + "step": 960600 + }, + { + "epoch": 10.23, + "learning_rate": 1.0516701865930012e-05, + "loss": 8.1253, + "step": 960700 + }, + { + "epoch": 10.23, + "learning_rate": 1.0504256428151704e-05, + "loss": 8.1857, + "step": 960800 + }, + { + "epoch": 10.23, + "learning_rate": 1.0491817950466654e-05, + "loss": 8.1189, + "step": 960900 + }, + { + "epoch": 10.24, + "learning_rate": 1.0479386433842198e-05, + "loss": 8.1366, + "step": 961000 + }, + { + "epoch": 10.24, + "learning_rate": 1.0466961879245129e-05, + "loss": 8.1669, + "step": 961100 + }, + { + "epoch": 10.24, + "learning_rate": 1.0454544287641687e-05, + "loss": 8.2389, + "step": 961200 + }, + { + "epoch": 10.24, + "learning_rate": 1.0442133659997622e-05, + "loss": 8.1727, + "step": 961300 + }, + { + "epoch": 10.24, + "learning_rate": 1.0429729997278093e-05, + "loss": 8.2072, + "step": 961400 + }, + { + "epoch": 10.24, + "learning_rate": 1.0417333300447741e-05, + "loss": 8.2054, + "step": 961500 + }, + { + "epoch": 10.24, + "learning_rate": 1.0404943570470637e-05, + "loss": 8.1784, + "step": 961600 + }, + { + "epoch": 10.24, + "learning_rate": 1.0392560808310358e-05, + "loss": 8.1249, + "step": 961700 + }, + { + "epoch": 10.24, + "learning_rate": 1.0380185014929911e-05, + "loss": 8.2095, + "step": 961800 + }, + { + "epoch": 10.25, + "learning_rate": 1.0367816191291757e-05, + "loss": 8.1973, + "step": 961900 + }, + { + "epoch": 10.25, + "learning_rate": 1.0355454338357796e-05, + "loss": 8.2608, + "step": 962000 + }, + { + "epoch": 10.25, + "learning_rate": 1.0343099457089466e-05, + "loss": 8.1872, + "step": 962100 + }, + { + "epoch": 10.25, + "learning_rate": 1.0330751548447549e-05, + "loss": 8.1961, + "step": 962200 + }, + { + "epoch": 10.25, + "learning_rate": 1.0318410613392372e-05, + "loss": 8.0839, + "step": 962300 + }, + { + "epoch": 10.25, + "learning_rate": 1.0306076652883679e-05, + "loss": 8.1627, + "step": 962400 + }, + { + "epoch": 10.25, + "learning_rate": 1.0293749667880725e-05, + "loss": 8.2079, + "step": 962500 + }, + { + "epoch": 10.25, + "learning_rate": 1.0281429659342112e-05, + "loss": 8.0855, + "step": 962600 + }, + { + "epoch": 10.25, + "learning_rate": 1.0269116628226028e-05, + "loss": 8.1397, + "step": 962700 + }, + { + "epoch": 10.26, + "learning_rate": 1.0256810575490006e-05, + "loss": 8.1884, + "step": 962800 + }, + { + "epoch": 10.26, + "learning_rate": 1.0244511502091148e-05, + "loss": 8.1596, + "step": 962900 + }, + { + "epoch": 10.26, + "learning_rate": 1.023221940898591e-05, + "loss": 8.1621, + "step": 963000 + }, + { + "epoch": 10.26, + "learning_rate": 1.0219934297130252e-05, + "loss": 8.1641, + "step": 963100 + }, + { + "epoch": 10.26, + "learning_rate": 1.020765616747964e-05, + "loss": 8.1819, + "step": 963200 + }, + { + "epoch": 10.26, + "learning_rate": 1.0195385020988868e-05, + "loss": 8.2062, + "step": 963300 + }, + { + "epoch": 10.26, + "learning_rate": 1.0183120858612317e-05, + "loss": 8.1963, + "step": 963400 + }, + { + "epoch": 10.26, + "learning_rate": 1.0170863681303733e-05, + "loss": 8.2101, + "step": 963500 + }, + { + "epoch": 10.26, + "learning_rate": 1.0158613490016432e-05, + "loss": 8.1169, + "step": 963600 + }, + { + "epoch": 10.26, + "learning_rate": 1.0146370285703012e-05, + "loss": 8.1575, + "step": 963700 + }, + { + "epoch": 10.27, + "learning_rate": 1.0134134069315704e-05, + "loss": 8.196, + "step": 963800 + }, + { + "epoch": 10.27, + "learning_rate": 1.0121904841806074e-05, + "loss": 8.1826, + "step": 963900 + }, + { + "epoch": 10.27, + "learning_rate": 1.0109682604125248e-05, + "loss": 8.1579, + "step": 964000 + }, + { + "epoch": 10.27, + "learning_rate": 1.0097467357223678e-05, + "loss": 8.2314, + "step": 964100 + }, + { + "epoch": 10.27, + "learning_rate": 1.0085259102051393e-05, + "loss": 8.2211, + "step": 964200 + }, + { + "epoch": 10.27, + "learning_rate": 1.0073057839557832e-05, + "loss": 8.1835, + "step": 964300 + }, + { + "epoch": 10.27, + "learning_rate": 1.006086357069186e-05, + "loss": 8.2325, + "step": 964400 + }, + { + "epoch": 10.27, + "learning_rate": 1.0048676296401826e-05, + "loss": 8.197, + "step": 964500 + }, + { + "epoch": 10.27, + "learning_rate": 1.0036496017635567e-05, + "loss": 8.1801, + "step": 964600 + }, + { + "epoch": 10.28, + "learning_rate": 1.002432273534033e-05, + "loss": 8.2066, + "step": 964700 + }, + { + "epoch": 10.28, + "learning_rate": 1.0012156450462828e-05, + "loss": 8.2172, + "step": 964800 + }, + { + "epoch": 10.28, + "learning_rate": 9.999997163949227e-06, + "loss": 8.1414, + "step": 964900 + }, + { + "epoch": 10.28, + "learning_rate": 9.987844876745155e-06, + "loss": 8.1349, + "step": 965000 + }, + { + "epoch": 10.28, + "learning_rate": 9.97569958979575e-06, + "loss": 8.2014, + "step": 965100 + }, + { + "epoch": 10.28, + "learning_rate": 9.963561304045465e-06, + "loss": 8.1431, + "step": 965200 + }, + { + "epoch": 10.28, + "learning_rate": 9.951430020438358e-06, + "loss": 8.1298, + "step": 965300 + }, + { + "epoch": 10.28, + "learning_rate": 9.93930573991787e-06, + "loss": 8.1672, + "step": 965400 + }, + { + "epoch": 10.28, + "learning_rate": 9.927188463426895e-06, + "loss": 8.1995, + "step": 965500 + }, + { + "epoch": 10.28, + "learning_rate": 9.91507819190779e-06, + "loss": 8.2457, + "step": 965600 + }, + { + "epoch": 10.29, + "learning_rate": 9.902974926302389e-06, + "loss": 8.1795, + "step": 965700 + }, + { + "epoch": 10.29, + "learning_rate": 9.890878667551962e-06, + "loss": 8.1748, + "step": 965800 + }, + { + "epoch": 10.29, + "learning_rate": 9.878789416597234e-06, + "loss": 8.1353, + "step": 965900 + }, + { + "epoch": 10.29, + "learning_rate": 9.866707174378353e-06, + "loss": 8.1316, + "step": 966000 + }, + { + "epoch": 10.29, + "learning_rate": 9.854631941835025e-06, + "loss": 8.1967, + "step": 966100 + }, + { + "epoch": 10.29, + "learning_rate": 9.842563719906284e-06, + "loss": 8.1211, + "step": 966200 + }, + { + "epoch": 10.29, + "learning_rate": 9.830502509530715e-06, + "loss": 8.2056, + "step": 966300 + }, + { + "epoch": 10.29, + "learning_rate": 9.818448311646278e-06, + "loss": 8.1773, + "step": 966400 + }, + { + "epoch": 10.29, + "learning_rate": 9.806401127190468e-06, + "loss": 8.2357, + "step": 966500 + }, + { + "epoch": 10.3, + "learning_rate": 9.79436095710019e-06, + "loss": 8.1156, + "step": 966600 + }, + { + "epoch": 10.3, + "learning_rate": 9.782327802311786e-06, + "loss": 8.2718, + "step": 966700 + }, + { + "epoch": 10.3, + "learning_rate": 9.770301663761084e-06, + "loss": 8.2753, + "step": 966800 + }, + { + "epoch": 10.3, + "learning_rate": 9.758282542383379e-06, + "loss": 8.201, + "step": 966900 + }, + { + "epoch": 10.3, + "learning_rate": 9.746270439113393e-06, + "loss": 8.1988, + "step": 967000 + }, + { + "epoch": 10.3, + "learning_rate": 9.734265354885263e-06, + "loss": 8.1639, + "step": 967100 + }, + { + "epoch": 10.3, + "learning_rate": 9.722267290632691e-06, + "loss": 8.2062, + "step": 967200 + }, + { + "epoch": 10.3, + "learning_rate": 9.710276247288742e-06, + "loss": 8.1986, + "step": 967300 + }, + { + "epoch": 10.3, + "learning_rate": 9.698292225785954e-06, + "loss": 8.1745, + "step": 967400 + }, + { + "epoch": 10.31, + "learning_rate": 9.68631522705632e-06, + "loss": 8.1713, + "step": 967500 + }, + { + "epoch": 10.31, + "learning_rate": 9.674345252031314e-06, + "loss": 8.1903, + "step": 967600 + }, + { + "epoch": 10.31, + "learning_rate": 9.662382301641826e-06, + "loss": 8.208, + "step": 967700 + }, + { + "epoch": 10.31, + "learning_rate": 9.650426376818222e-06, + "loss": 8.2211, + "step": 967800 + }, + { + "epoch": 10.31, + "learning_rate": 9.638477478490304e-06, + "loss": 8.0597, + "step": 967900 + }, + { + "epoch": 10.31, + "learning_rate": 9.62653560758736e-06, + "loss": 8.254, + "step": 968000 + }, + { + "epoch": 10.31, + "learning_rate": 9.614600765038096e-06, + "loss": 8.2203, + "step": 968100 + }, + { + "epoch": 10.31, + "learning_rate": 9.602672951770698e-06, + "loss": 8.2032, + "step": 968200 + }, + { + "epoch": 10.31, + "learning_rate": 9.590752168712758e-06, + "loss": 8.1988, + "step": 968300 + }, + { + "epoch": 10.31, + "learning_rate": 9.578838416791402e-06, + "loss": 8.1043, + "step": 968400 + }, + { + "epoch": 10.32, + "learning_rate": 9.566931696933157e-06, + "loss": 8.1953, + "step": 968500 + }, + { + "epoch": 10.32, + "learning_rate": 9.55503201006398e-06, + "loss": 8.2054, + "step": 968600 + }, + { + "epoch": 10.32, + "learning_rate": 9.543139357109321e-06, + "loss": 8.1744, + "step": 968700 + }, + { + "epoch": 10.32, + "learning_rate": 9.531253738994116e-06, + "loss": 8.2154, + "step": 968800 + }, + { + "epoch": 10.32, + "learning_rate": 9.51937515664264e-06, + "loss": 8.2571, + "step": 968900 + }, + { + "epoch": 10.32, + "learning_rate": 9.50750361097874e-06, + "loss": 8.1436, + "step": 969000 + }, + { + "epoch": 10.32, + "learning_rate": 9.495639102925636e-06, + "loss": 8.1516, + "step": 969100 + }, + { + "epoch": 10.32, + "learning_rate": 9.483781633406063e-06, + "loss": 8.1998, + "step": 969200 + }, + { + "epoch": 10.32, + "learning_rate": 9.471931203342176e-06, + "loss": 8.1484, + "step": 969300 + }, + { + "epoch": 10.33, + "learning_rate": 9.46008781365555e-06, + "loss": 8.1863, + "step": 969400 + }, + { + "epoch": 10.33, + "learning_rate": 9.448251465267288e-06, + "loss": 8.213, + "step": 969500 + }, + { + "epoch": 10.33, + "learning_rate": 9.43642215909789e-06, + "loss": 8.1411, + "step": 969600 + }, + { + "epoch": 10.33, + "learning_rate": 9.42459989606731e-06, + "loss": 8.226, + "step": 969700 + }, + { + "epoch": 10.33, + "learning_rate": 9.412784677094955e-06, + "loss": 8.1991, + "step": 969800 + }, + { + "epoch": 10.33, + "learning_rate": 9.400976503099757e-06, + "loss": 8.1311, + "step": 969900 + }, + { + "epoch": 10.33, + "learning_rate": 9.38917537499996e-06, + "loss": 8.2088, + "step": 970000 + }, + { + "epoch": 10.33, + "learning_rate": 9.377381293713395e-06, + "loss": 8.1943, + "step": 970100 + }, + { + "epoch": 10.33, + "learning_rate": 9.365594260157251e-06, + "loss": 8.2018, + "step": 970200 + }, + { + "epoch": 10.33, + "learning_rate": 9.353814275248262e-06, + "loss": 8.1374, + "step": 970300 + }, + { + "epoch": 10.34, + "learning_rate": 9.342041339902485e-06, + "loss": 8.1945, + "step": 970400 + }, + { + "epoch": 10.34, + "learning_rate": 9.330275455035563e-06, + "loss": 8.3151, + "step": 970500 + }, + { + "epoch": 10.34, + "learning_rate": 9.31851662156249e-06, + "loss": 8.25, + "step": 970600 + }, + { + "epoch": 10.34, + "learning_rate": 9.306764840397796e-06, + "loss": 8.205, + "step": 970700 + }, + { + "epoch": 10.34, + "learning_rate": 9.295020112455355e-06, + "loss": 8.273, + "step": 970800 + }, + { + "epoch": 10.34, + "learning_rate": 9.283282438648621e-06, + "loss": 8.1946, + "step": 970900 + }, + { + "epoch": 10.34, + "learning_rate": 9.2715518198904e-06, + "loss": 8.1551, + "step": 971000 + }, + { + "epoch": 10.34, + "learning_rate": 9.259828257092984e-06, + "loss": 8.2244, + "step": 971100 + }, + { + "epoch": 10.34, + "learning_rate": 9.248111751168098e-06, + "loss": 8.2521, + "step": 971200 + }, + { + "epoch": 10.35, + "learning_rate": 9.23640230302697e-06, + "loss": 8.1367, + "step": 971300 + }, + { + "epoch": 10.35, + "learning_rate": 9.224699913580259e-06, + "loss": 8.1766, + "step": 971400 + }, + { + "epoch": 10.35, + "learning_rate": 9.213004583738004e-06, + "loss": 8.1563, + "step": 971500 + }, + { + "epoch": 10.35, + "learning_rate": 9.201316314409802e-06, + "loss": 8.1209, + "step": 971600 + }, + { + "epoch": 10.35, + "learning_rate": 9.189635106504613e-06, + "loss": 8.1086, + "step": 971700 + }, + { + "epoch": 10.35, + "learning_rate": 9.177960960930931e-06, + "loss": 8.1274, + "step": 971800 + }, + { + "epoch": 10.35, + "learning_rate": 9.1662938785966e-06, + "loss": 8.1674, + "step": 971900 + }, + { + "epoch": 10.35, + "learning_rate": 9.154633860409023e-06, + "loss": 8.254, + "step": 972000 + }, + { + "epoch": 10.35, + "learning_rate": 9.142980907274968e-06, + "loss": 8.2121, + "step": 972100 + }, + { + "epoch": 10.36, + "learning_rate": 9.131335020100706e-06, + "loss": 8.1986, + "step": 972200 + }, + { + "epoch": 10.36, + "learning_rate": 9.119696199791893e-06, + "loss": 8.1979, + "step": 972300 + }, + { + "epoch": 10.36, + "learning_rate": 9.10806444725375e-06, + "loss": 8.1555, + "step": 972400 + }, + { + "epoch": 10.36, + "learning_rate": 9.09643976339084e-06, + "loss": 8.1974, + "step": 972500 + }, + { + "epoch": 10.36, + "learning_rate": 9.08482214910722e-06, + "loss": 8.1635, + "step": 972600 + }, + { + "epoch": 10.36, + "learning_rate": 9.073211605306387e-06, + "loss": 8.2975, + "step": 972700 + }, + { + "epoch": 10.36, + "learning_rate": 9.061608132891309e-06, + "loss": 8.2128, + "step": 972800 + }, + { + "epoch": 10.36, + "learning_rate": 9.050011732764386e-06, + "loss": 8.1807, + "step": 972900 + }, + { + "epoch": 10.36, + "learning_rate": 9.038422405827473e-06, + "loss": 8.1913, + "step": 973000 + }, + { + "epoch": 10.36, + "learning_rate": 9.026840152981842e-06, + "loss": 8.2096, + "step": 973100 + }, + { + "epoch": 10.37, + "learning_rate": 9.01526497512829e-06, + "loss": 8.1398, + "step": 973200 + }, + { + "epoch": 10.37, + "learning_rate": 9.003696873167011e-06, + "loss": 8.2382, + "step": 973300 + }, + { + "epoch": 10.37, + "learning_rate": 8.992135847997629e-06, + "loss": 8.1362, + "step": 973400 + }, + { + "epoch": 10.37, + "learning_rate": 8.98058190051927e-06, + "loss": 8.1822, + "step": 973500 + }, + { + "epoch": 10.37, + "learning_rate": 8.969035031630491e-06, + "loss": 8.1122, + "step": 973600 + }, + { + "epoch": 10.37, + "learning_rate": 8.957495242229286e-06, + "loss": 8.1355, + "step": 973700 + }, + { + "epoch": 10.37, + "learning_rate": 8.945962533213081e-06, + "loss": 8.1419, + "step": 973800 + }, + { + "epoch": 10.37, + "learning_rate": 8.934436905478816e-06, + "loss": 8.184, + "step": 973900 + }, + { + "epoch": 10.37, + "learning_rate": 8.922918359922817e-06, + "loss": 8.2654, + "step": 974000 + }, + { + "epoch": 10.38, + "learning_rate": 8.911406897440888e-06, + "loss": 8.1689, + "step": 974100 + }, + { + "epoch": 10.38, + "learning_rate": 8.899902518928272e-06, + "loss": 8.1686, + "step": 974200 + }, + { + "epoch": 10.38, + "learning_rate": 8.888405225279673e-06, + "loss": 8.1911, + "step": 974300 + }, + { + "epoch": 10.38, + "learning_rate": 8.876915017389242e-06, + "loss": 8.2052, + "step": 974400 + }, + { + "epoch": 10.38, + "learning_rate": 8.865431896150556e-06, + "loss": 8.0625, + "step": 974500 + }, + { + "epoch": 10.38, + "learning_rate": 8.853955862456654e-06, + "loss": 8.2341, + "step": 974600 + }, + { + "epoch": 10.38, + "learning_rate": 8.842486917200066e-06, + "loss": 8.2207, + "step": 974700 + }, + { + "epoch": 10.38, + "learning_rate": 8.831025061272702e-06, + "loss": 8.2197, + "step": 974800 + }, + { + "epoch": 10.38, + "learning_rate": 8.81957029556596e-06, + "loss": 8.2313, + "step": 974900 + }, + { + "epoch": 10.39, + "learning_rate": 8.80812262097066e-06, + "loss": 8.1796, + "step": 975000 + }, + { + "epoch": 10.39, + "learning_rate": 8.796682038377124e-06, + "loss": 8.1428, + "step": 975100 + }, + { + "epoch": 10.39, + "learning_rate": 8.785248548675073e-06, + "loss": 8.1847, + "step": 975200 + }, + { + "epoch": 10.39, + "learning_rate": 8.773822152753685e-06, + "loss": 8.1091, + "step": 975300 + }, + { + "epoch": 10.39, + "learning_rate": 8.762402851501573e-06, + "loss": 8.196, + "step": 975400 + }, + { + "epoch": 10.39, + "learning_rate": 8.750990645806856e-06, + "loss": 8.2252, + "step": 975500 + }, + { + "epoch": 10.39, + "learning_rate": 8.73958553655705e-06, + "loss": 8.1947, + "step": 975600 + }, + { + "epoch": 10.39, + "learning_rate": 8.728187524639097e-06, + "loss": 8.1203, + "step": 975700 + }, + { + "epoch": 10.39, + "learning_rate": 8.716796610939481e-06, + "loss": 8.2262, + "step": 975800 + }, + { + "epoch": 10.39, + "learning_rate": 8.705412796344037e-06, + "loss": 8.1911, + "step": 975900 + }, + { + "epoch": 10.4, + "learning_rate": 8.6940360817381e-06, + "loss": 8.1895, + "step": 976000 + }, + { + "epoch": 10.4, + "learning_rate": 8.682666468006406e-06, + "loss": 8.2139, + "step": 976100 + }, + { + "epoch": 10.4, + "learning_rate": 8.671303956033217e-06, + "loss": 8.1646, + "step": 976200 + }, + { + "epoch": 10.4, + "learning_rate": 8.659948546702179e-06, + "loss": 8.1328, + "step": 976300 + }, + { + "epoch": 10.4, + "learning_rate": 8.648600240896398e-06, + "loss": 8.2411, + "step": 976400 + }, + { + "epoch": 10.4, + "learning_rate": 8.637259039498412e-06, + "loss": 8.1504, + "step": 976500 + }, + { + "epoch": 10.4, + "learning_rate": 8.625924943390274e-06, + "loss": 8.1449, + "step": 976600 + }, + { + "epoch": 10.4, + "learning_rate": 8.614597953453407e-06, + "loss": 8.1315, + "step": 976700 + }, + { + "epoch": 10.4, + "learning_rate": 8.603278070568721e-06, + "loss": 8.0773, + "step": 976800 + }, + { + "epoch": 10.41, + "learning_rate": 8.591965295616555e-06, + "loss": 8.2161, + "step": 976900 + }, + { + "epoch": 10.41, + "learning_rate": 8.58065962947674e-06, + "loss": 8.1917, + "step": 977000 + }, + { + "epoch": 10.41, + "learning_rate": 8.56936107302846e-06, + "loss": 8.2761, + "step": 977100 + }, + { + "epoch": 10.41, + "learning_rate": 8.558069627150445e-06, + "loss": 8.243, + "step": 977200 + }, + { + "epoch": 10.41, + "learning_rate": 8.546785292720815e-06, + "loss": 8.2393, + "step": 977300 + }, + { + "epoch": 10.41, + "learning_rate": 8.535508070617193e-06, + "loss": 8.2348, + "step": 977400 + }, + { + "epoch": 10.41, + "learning_rate": 8.524237961716553e-06, + "loss": 8.1656, + "step": 977500 + }, + { + "epoch": 10.41, + "learning_rate": 8.512974966895382e-06, + "loss": 8.1674, + "step": 977600 + }, + { + "epoch": 10.41, + "learning_rate": 8.501719087029648e-06, + "loss": 8.258, + "step": 977700 + }, + { + "epoch": 10.41, + "learning_rate": 8.490470322994692e-06, + "loss": 8.2192, + "step": 977800 + }, + { + "epoch": 10.42, + "learning_rate": 8.479228675665341e-06, + "loss": 8.2736, + "step": 977900 + }, + { + "epoch": 10.42, + "learning_rate": 8.467994145915826e-06, + "loss": 8.2337, + "step": 978000 + }, + { + "epoch": 10.42, + "learning_rate": 8.456766734619926e-06, + "loss": 8.2366, + "step": 978100 + }, + { + "epoch": 10.42, + "learning_rate": 8.445546442650709e-06, + "loss": 8.1961, + "step": 978200 + }, + { + "epoch": 10.42, + "learning_rate": 8.434333270880857e-06, + "loss": 8.1535, + "step": 978300 + }, + { + "epoch": 10.42, + "learning_rate": 8.423127220182359e-06, + "loss": 8.2453, + "step": 978400 + }, + { + "epoch": 10.42, + "learning_rate": 8.411928291426774e-06, + "loss": 8.1436, + "step": 978500 + }, + { + "epoch": 10.42, + "learning_rate": 8.400736485484983e-06, + "loss": 8.1565, + "step": 978600 + }, + { + "epoch": 10.42, + "learning_rate": 8.389551803227413e-06, + "loss": 8.1239, + "step": 978700 + }, + { + "epoch": 10.43, + "learning_rate": 8.378374245523868e-06, + "loss": 8.1607, + "step": 978800 + }, + { + "epoch": 10.43, + "learning_rate": 8.367203813243673e-06, + "loss": 8.1766, + "step": 978900 + }, + { + "epoch": 10.43, + "learning_rate": 8.356040507255503e-06, + "loss": 8.1454, + "step": 979000 + }, + { + "epoch": 10.43, + "learning_rate": 8.34488432842756e-06, + "loss": 8.1759, + "step": 979100 + }, + { + "epoch": 10.43, + "learning_rate": 8.333735277627453e-06, + "loss": 8.1683, + "step": 979200 + }, + { + "epoch": 10.43, + "learning_rate": 8.322593355722252e-06, + "loss": 8.2464, + "step": 979300 + }, + { + "epoch": 10.43, + "learning_rate": 8.311458563578433e-06, + "loss": 8.1144, + "step": 979400 + }, + { + "epoch": 10.43, + "learning_rate": 8.300330902061992e-06, + "loss": 8.1214, + "step": 979500 + }, + { + "epoch": 10.43, + "learning_rate": 8.289210372038315e-06, + "loss": 8.203, + "step": 979600 + }, + { + "epoch": 10.44, + "learning_rate": 8.278096974372228e-06, + "loss": 8.1371, + "step": 979700 + }, + { + "epoch": 10.44, + "learning_rate": 8.266990709928046e-06, + "loss": 8.1702, + "step": 979800 + }, + { + "epoch": 10.44, + "learning_rate": 8.255891579569508e-06, + "loss": 8.1932, + "step": 979900 + }, + { + "epoch": 10.44, + "learning_rate": 8.244799584159769e-06, + "loss": 8.1853, + "step": 980000 + }, + { + "epoch": 10.44, + "learning_rate": 8.233714724561458e-06, + "loss": 8.1908, + "step": 980100 + }, + { + "epoch": 10.44, + "learning_rate": 8.222637001636669e-06, + "loss": 8.1292, + "step": 980200 + }, + { + "epoch": 10.44, + "learning_rate": 8.211566416246896e-06, + "loss": 8.2315, + "step": 980300 + }, + { + "epoch": 10.44, + "learning_rate": 8.200502969253121e-06, + "loss": 8.264, + "step": 980400 + }, + { + "epoch": 10.44, + "learning_rate": 8.189446661515709e-06, + "loss": 8.1069, + "step": 980500 + }, + { + "epoch": 10.44, + "learning_rate": 8.178397493894552e-06, + "loss": 8.1844, + "step": 980600 + }, + { + "epoch": 10.45, + "learning_rate": 8.167355467248928e-06, + "loss": 8.1416, + "step": 980700 + }, + { + "epoch": 10.45, + "learning_rate": 8.156320582437583e-06, + "loss": 8.1995, + "step": 980800 + }, + { + "epoch": 10.45, + "learning_rate": 8.145292840318686e-06, + "loss": 8.1934, + "step": 980900 + }, + { + "epoch": 10.45, + "learning_rate": 8.134272241749874e-06, + "loss": 8.2568, + "step": 981000 + }, + { + "epoch": 10.45, + "learning_rate": 8.12325878758824e-06, + "loss": 8.2814, + "step": 981100 + }, + { + "epoch": 10.45, + "learning_rate": 8.112252478690274e-06, + "loss": 8.1455, + "step": 981200 + }, + { + "epoch": 10.45, + "learning_rate": 8.101253315911927e-06, + "loss": 8.1714, + "step": 981300 + }, + { + "epoch": 10.45, + "learning_rate": 8.090261300108648e-06, + "loss": 8.2721, + "step": 981400 + }, + { + "epoch": 10.45, + "learning_rate": 8.079276432135263e-06, + "loss": 8.151, + "step": 981500 + }, + { + "epoch": 10.46, + "learning_rate": 8.068298712846067e-06, + "loss": 8.1786, + "step": 981600 + }, + { + "epoch": 10.46, + "learning_rate": 8.057328143094788e-06, + "loss": 8.1547, + "step": 981700 + }, + { + "epoch": 10.46, + "learning_rate": 8.046364723734635e-06, + "loss": 8.158, + "step": 981800 + }, + { + "epoch": 10.46, + "learning_rate": 8.03540845561821e-06, + "loss": 8.1278, + "step": 981900 + }, + { + "epoch": 10.46, + "learning_rate": 8.02445933959758e-06, + "loss": 8.2069, + "step": 982000 + }, + { + "epoch": 10.46, + "learning_rate": 8.013517376524282e-06, + "loss": 8.0895, + "step": 982100 + }, + { + "epoch": 10.46, + "learning_rate": 8.002582567249272e-06, + "loss": 8.175, + "step": 982200 + }, + { + "epoch": 10.46, + "learning_rate": 7.991654912622936e-06, + "loss": 8.1796, + "step": 982300 + }, + { + "epoch": 10.46, + "learning_rate": 7.980734413495106e-06, + "loss": 8.2414, + "step": 982400 + }, + { + "epoch": 10.46, + "learning_rate": 7.969821070715112e-06, + "loss": 8.2193, + "step": 982500 + }, + { + "epoch": 10.47, + "learning_rate": 7.958914885131652e-06, + "loss": 8.1653, + "step": 982600 + }, + { + "epoch": 10.47, + "learning_rate": 7.948015857592916e-06, + "loss": 8.193, + "step": 982700 + }, + { + "epoch": 10.47, + "learning_rate": 7.937123988946504e-06, + "loss": 8.1101, + "step": 982800 + }, + { + "epoch": 10.47, + "learning_rate": 7.926239280039505e-06, + "loss": 8.1481, + "step": 982900 + }, + { + "epoch": 10.47, + "learning_rate": 7.915361731718418e-06, + "loss": 8.1546, + "step": 983000 + }, + { + "epoch": 10.47, + "learning_rate": 7.904491344829168e-06, + "loss": 8.2561, + "step": 983100 + }, + { + "epoch": 10.47, + "learning_rate": 7.893628120217155e-06, + "loss": 8.1481, + "step": 983200 + }, + { + "epoch": 10.47, + "learning_rate": 7.882772058727239e-06, + "loss": 8.2405, + "step": 983300 + }, + { + "epoch": 10.47, + "learning_rate": 7.871923161203676e-06, + "loss": 8.1573, + "step": 983400 + }, + { + "epoch": 10.48, + "learning_rate": 7.861081428490191e-06, + "loss": 8.1651, + "step": 983500 + }, + { + "epoch": 10.48, + "learning_rate": 7.850246861429921e-06, + "loss": 8.0615, + "step": 983600 + }, + { + "epoch": 10.48, + "learning_rate": 7.839419460865538e-06, + "loss": 8.188, + "step": 983700 + }, + { + "epoch": 10.48, + "learning_rate": 7.828599227638999e-06, + "loss": 8.2238, + "step": 983800 + }, + { + "epoch": 10.48, + "learning_rate": 7.817786162591845e-06, + "loss": 8.1192, + "step": 983900 + }, + { + "epoch": 10.48, + "learning_rate": 7.806980266565033e-06, + "loss": 8.161, + "step": 984000 + }, + { + "epoch": 10.48, + "learning_rate": 7.796181540398905e-06, + "loss": 8.2267, + "step": 984100 + }, + { + "epoch": 10.48, + "learning_rate": 7.785389984933277e-06, + "loss": 8.1941, + "step": 984200 + }, + { + "epoch": 10.48, + "learning_rate": 7.774605601007412e-06, + "loss": 8.1865, + "step": 984300 + }, + { + "epoch": 10.49, + "learning_rate": 7.763828389460037e-06, + "loss": 8.1376, + "step": 984400 + }, + { + "epoch": 10.49, + "learning_rate": 7.753058351129272e-06, + "loss": 8.1998, + "step": 984500 + }, + { + "epoch": 10.49, + "learning_rate": 7.742295486852713e-06, + "loss": 8.2253, + "step": 984600 + }, + { + "epoch": 10.49, + "learning_rate": 7.731539797467368e-06, + "loss": 8.2476, + "step": 984700 + }, + { + "epoch": 10.49, + "learning_rate": 7.720791283809759e-06, + "loss": 8.1059, + "step": 984800 + }, + { + "epoch": 10.49, + "learning_rate": 7.710049946715736e-06, + "loss": 8.2044, + "step": 984900 + }, + { + "epoch": 10.49, + "learning_rate": 7.6993157870207e-06, + "loss": 8.1728, + "step": 985000 + }, + { + "epoch": 10.49, + "learning_rate": 7.688588805559416e-06, + "loss": 8.2114, + "step": 985100 + }, + { + "epoch": 10.49, + "learning_rate": 7.677869003166172e-06, + "loss": 8.1439, + "step": 985200 + }, + { + "epoch": 10.49, + "learning_rate": 7.667156380674589e-06, + "loss": 8.1623, + "step": 985300 + }, + { + "epoch": 10.5, + "learning_rate": 7.656450938917825e-06, + "loss": 8.1738, + "step": 985400 + }, + { + "epoch": 10.5, + "learning_rate": 7.645752678728413e-06, + "loss": 8.2509, + "step": 985500 + }, + { + "epoch": 10.5, + "learning_rate": 7.63506160093841e-06, + "loss": 8.1571, + "step": 985600 + }, + { + "epoch": 10.5, + "learning_rate": 7.624377706379204e-06, + "loss": 8.1493, + "step": 985700 + }, + { + "epoch": 10.5, + "learning_rate": 7.613700995881723e-06, + "loss": 8.0728, + "step": 985800 + }, + { + "epoch": 10.5, + "learning_rate": 7.603031470276278e-06, + "loss": 8.1224, + "step": 985900 + }, + { + "epoch": 10.5, + "learning_rate": 7.592369130392629e-06, + "loss": 8.1622, + "step": 986000 + }, + { + "epoch": 10.5, + "learning_rate": 7.581713977060012e-06, + "loss": 8.1777, + "step": 986100 + }, + { + "epoch": 10.5, + "learning_rate": 7.571066011107064e-06, + "loss": 8.1499, + "step": 986200 + }, + { + "epoch": 10.51, + "learning_rate": 7.560425233361901e-06, + "loss": 8.2363, + "step": 986300 + }, + { + "epoch": 10.51, + "learning_rate": 7.549791644652015e-06, + "loss": 8.1276, + "step": 986400 + }, + { + "epoch": 10.51, + "learning_rate": 7.539165245804425e-06, + "loss": 8.1959, + "step": 986500 + }, + { + "epoch": 10.51, + "learning_rate": 7.528546037645501e-06, + "loss": 8.1701, + "step": 986600 + }, + { + "epoch": 10.51, + "learning_rate": 7.5179340210011605e-06, + "loss": 8.205, + "step": 986700 + }, + { + "epoch": 10.51, + "learning_rate": 7.5073291966966445e-06, + "loss": 8.1626, + "step": 986800 + }, + { + "epoch": 10.51, + "learning_rate": 7.496731565556725e-06, + "loss": 8.1411, + "step": 986900 + }, + { + "epoch": 10.51, + "learning_rate": 7.486141128405566e-06, + "loss": 8.1605, + "step": 987000 + }, + { + "epoch": 10.51, + "learning_rate": 7.475557886066798e-06, + "loss": 8.1773, + "step": 987100 + }, + { + "epoch": 10.51, + "learning_rate": 7.4649818393634495e-06, + "loss": 8.1049, + "step": 987200 + }, + { + "epoch": 10.52, + "learning_rate": 7.454412989118065e-06, + "loss": 8.1155, + "step": 987300 + }, + { + "epoch": 10.52, + "learning_rate": 7.443851336152563e-06, + "loss": 8.1419, + "step": 987400 + }, + { + "epoch": 10.52, + "learning_rate": 7.433296881288321e-06, + "loss": 8.2226, + "step": 987500 + }, + { + "epoch": 10.52, + "learning_rate": 7.4227496253461485e-06, + "loss": 8.0729, + "step": 987600 + }, + { + "epoch": 10.52, + "learning_rate": 7.412209569146356e-06, + "loss": 8.1816, + "step": 987700 + }, + { + "epoch": 10.52, + "learning_rate": 7.401676713508599e-06, + "loss": 8.2581, + "step": 987800 + }, + { + "epoch": 10.52, + "learning_rate": 7.391151059252033e-06, + "loss": 8.1832, + "step": 987900 + }, + { + "epoch": 10.52, + "learning_rate": 7.380632607195215e-06, + "loss": 8.1843, + "step": 988000 + }, + { + "epoch": 10.52, + "learning_rate": 7.370121358156223e-06, + "loss": 8.1702, + "step": 988100 + }, + { + "epoch": 10.53, + "learning_rate": 7.3596173129524715e-06, + "loss": 8.1787, + "step": 988200 + }, + { + "epoch": 10.53, + "learning_rate": 7.349120472400861e-06, + "loss": 8.1571, + "step": 988300 + }, + { + "epoch": 10.53, + "learning_rate": 7.338630837317761e-06, + "loss": 8.2037, + "step": 988400 + }, + { + "epoch": 10.53, + "learning_rate": 7.328148408518931e-06, + "loss": 8.1461, + "step": 988500 + }, + { + "epoch": 10.53, + "learning_rate": 7.317673186819596e-06, + "loss": 8.1384, + "step": 988600 + }, + { + "epoch": 10.53, + "learning_rate": 7.307205173034404e-06, + "loss": 8.1483, + "step": 988700 + }, + { + "epoch": 10.53, + "learning_rate": 7.296744367977471e-06, + "loss": 8.2342, + "step": 988800 + }, + { + "epoch": 10.53, + "learning_rate": 7.286290772462334e-06, + "loss": 8.1906, + "step": 988900 + }, + { + "epoch": 10.53, + "learning_rate": 7.2758443873019664e-06, + "loss": 8.2528, + "step": 989000 + }, + { + "epoch": 10.54, + "learning_rate": 7.265405213308751e-06, + "loss": 8.1642, + "step": 989100 + }, + { + "epoch": 10.54, + "learning_rate": 7.254973251294605e-06, + "loss": 8.2045, + "step": 989200 + }, + { + "epoch": 10.54, + "learning_rate": 7.2445485020707914e-06, + "loss": 8.1347, + "step": 989300 + }, + { + "epoch": 10.54, + "learning_rate": 7.234130966448039e-06, + "loss": 8.1563, + "step": 989400 + }, + { + "epoch": 10.54, + "learning_rate": 7.223720645236509e-06, + "loss": 8.2222, + "step": 989500 + }, + { + "epoch": 10.54, + "learning_rate": 7.213317539245845e-06, + "loss": 8.2843, + "step": 989600 + }, + { + "epoch": 10.54, + "learning_rate": 7.202921649285088e-06, + "loss": 8.1773, + "step": 989700 + }, + { + "epoch": 10.54, + "learning_rate": 7.192532976162725e-06, + "loss": 8.1523, + "step": 989800 + }, + { + "epoch": 10.54, + "learning_rate": 7.182151520686664e-06, + "loss": 8.185, + "step": 989900 + }, + { + "epoch": 10.54, + "learning_rate": 7.171777283664305e-06, + "loss": 8.2306, + "step": 990000 + }, + { + "epoch": 10.55, + "learning_rate": 7.161410265902446e-06, + "loss": 8.1886, + "step": 990100 + }, + { + "epoch": 10.55, + "learning_rate": 7.15105046820731e-06, + "loss": 8.2891, + "step": 990200 + }, + { + "epoch": 10.55, + "learning_rate": 7.1406978913845845e-06, + "loss": 8.1996, + "step": 990300 + }, + { + "epoch": 10.55, + "learning_rate": 7.130352536239415e-06, + "loss": 8.1677, + "step": 990400 + }, + { + "epoch": 10.55, + "learning_rate": 7.120014403576336e-06, + "loss": 8.2142, + "step": 990500 + }, + { + "epoch": 10.55, + "learning_rate": 7.109683494199349e-06, + "loss": 8.2323, + "step": 990600 + }, + { + "epoch": 10.55, + "learning_rate": 7.09935980891191e-06, + "loss": 8.1326, + "step": 990700 + }, + { + "epoch": 10.55, + "learning_rate": 7.089043348516878e-06, + "loss": 8.2135, + "step": 990800 + }, + { + "epoch": 10.55, + "learning_rate": 7.078734113816554e-06, + "loss": 8.1861, + "step": 990900 + }, + { + "epoch": 10.56, + "learning_rate": 7.068432105612699e-06, + "loss": 8.1464, + "step": 991000 + }, + { + "epoch": 10.56, + "learning_rate": 7.058137324706504e-06, + "loss": 8.1893, + "step": 991100 + }, + { + "epoch": 10.56, + "learning_rate": 7.047849771898596e-06, + "loss": 8.1553, + "step": 991200 + }, + { + "epoch": 10.56, + "learning_rate": 7.037569447989046e-06, + "loss": 8.2561, + "step": 991300 + }, + { + "epoch": 10.56, + "learning_rate": 7.027296353777313e-06, + "loss": 8.2351, + "step": 991400 + }, + { + "epoch": 10.56, + "learning_rate": 7.017030490062393e-06, + "loss": 8.1826, + "step": 991500 + }, + { + "epoch": 10.56, + "learning_rate": 7.006771857642647e-06, + "loss": 8.1163, + "step": 991600 + }, + { + "epoch": 10.56, + "learning_rate": 6.9965204573158694e-06, + "loss": 8.104, + "step": 991700 + }, + { + "epoch": 10.56, + "learning_rate": 6.986276289879323e-06, + "loss": 8.1844, + "step": 991800 + }, + { + "epoch": 10.57, + "learning_rate": 6.976039356129726e-06, + "loss": 8.1814, + "step": 991900 + }, + { + "epoch": 10.57, + "learning_rate": 6.965809656863142e-06, + "loss": 8.2433, + "step": 992000 + }, + { + "epoch": 10.57, + "learning_rate": 6.955587192875201e-06, + "loss": 8.1743, + "step": 992100 + }, + { + "epoch": 10.57, + "learning_rate": 6.945371964960856e-06, + "loss": 8.1187, + "step": 992200 + }, + { + "epoch": 10.57, + "learning_rate": 6.935163973914594e-06, + "loss": 8.2322, + "step": 992300 + }, + { + "epoch": 10.57, + "learning_rate": 6.924963220530245e-06, + "loss": 8.2564, + "step": 992400 + }, + { + "epoch": 10.57, + "learning_rate": 6.914769705601143e-06, + "loss": 8.2342, + "step": 992500 + }, + { + "epoch": 10.57, + "learning_rate": 6.904583429920064e-06, + "loss": 8.1962, + "step": 992600 + }, + { + "epoch": 10.57, + "learning_rate": 6.8944043942791415e-06, + "loss": 8.2183, + "step": 992700 + }, + { + "epoch": 10.57, + "learning_rate": 6.884232599470042e-06, + "loss": 8.2112, + "step": 992800 + }, + { + "epoch": 10.58, + "learning_rate": 6.8740680462838106e-06, + "loss": 8.1259, + "step": 992900 + }, + { + "epoch": 10.58, + "learning_rate": 6.86391073551097e-06, + "loss": 8.0988, + "step": 993000 + }, + { + "epoch": 10.58, + "learning_rate": 6.853760667941411e-06, + "loss": 8.1226, + "step": 993100 + }, + { + "epoch": 10.58, + "learning_rate": 6.843617844364536e-06, + "loss": 8.2225, + "step": 993200 + }, + { + "epoch": 10.58, + "learning_rate": 6.833482265569146e-06, + "loss": 8.149, + "step": 993300 + }, + { + "epoch": 10.58, + "learning_rate": 6.8233539323435014e-06, + "loss": 8.1562, + "step": 993400 + }, + { + "epoch": 10.58, + "learning_rate": 6.813232845475248e-06, + "loss": 8.2497, + "step": 993500 + }, + { + "epoch": 10.58, + "learning_rate": 6.803119005751546e-06, + "loss": 8.1208, + "step": 993600 + }, + { + "epoch": 10.58, + "learning_rate": 6.793012413958921e-06, + "loss": 8.138, + "step": 993700 + }, + { + "epoch": 10.59, + "learning_rate": 6.782913070883367e-06, + "loss": 8.2159, + "step": 993800 + }, + { + "epoch": 10.59, + "learning_rate": 6.772820977310301e-06, + "loss": 8.1727, + "step": 993900 + }, + { + "epoch": 10.59, + "learning_rate": 6.762736134024628e-06, + "loss": 8.1468, + "step": 994000 + }, + { + "epoch": 10.59, + "learning_rate": 6.752658541810608e-06, + "loss": 8.1279, + "step": 994100 + }, + { + "epoch": 10.59, + "learning_rate": 6.742588201451994e-06, + "loss": 8.1307, + "step": 994200 + }, + { + "epoch": 10.59, + "learning_rate": 6.732525113731936e-06, + "loss": 8.233, + "step": 994300 + }, + { + "epoch": 10.59, + "learning_rate": 6.722469279433064e-06, + "loss": 8.1956, + "step": 994400 + }, + { + "epoch": 10.59, + "learning_rate": 6.712420699337429e-06, + "loss": 8.1337, + "step": 994500 + }, + { + "epoch": 10.59, + "learning_rate": 6.702379374226464e-06, + "loss": 8.1675, + "step": 994600 + }, + { + "epoch": 10.59, + "learning_rate": 6.692345304881142e-06, + "loss": 8.1593, + "step": 994700 + }, + { + "epoch": 10.6, + "learning_rate": 6.6823184920817845e-06, + "loss": 8.2248, + "step": 994800 + }, + { + "epoch": 10.6, + "learning_rate": 6.672298936608179e-06, + "loss": 8.1801, + "step": 994900 + }, + { + "epoch": 10.6, + "learning_rate": 6.662286639239534e-06, + "loss": 8.1971, + "step": 995000 + }, + { + "epoch": 10.6, + "learning_rate": 6.6522816007545395e-06, + "loss": 8.2011, + "step": 995100 + }, + { + "epoch": 10.6, + "learning_rate": 6.6422838219312615e-06, + "loss": 8.1864, + "step": 995200 + }, + { + "epoch": 10.6, + "learning_rate": 6.632293303547243e-06, + "loss": 8.1833, + "step": 995300 + }, + { + "epoch": 10.6, + "learning_rate": 6.622310046379432e-06, + "loss": 8.2345, + "step": 995400 + }, + { + "epoch": 10.6, + "learning_rate": 6.6123340512042495e-06, + "loss": 8.0825, + "step": 995500 + }, + { + "epoch": 10.6, + "learning_rate": 6.60236531879751e-06, + "loss": 8.1841, + "step": 995600 + }, + { + "epoch": 10.61, + "learning_rate": 6.592403849934492e-06, + "loss": 8.2391, + "step": 995700 + }, + { + "epoch": 10.61, + "learning_rate": 6.582449645389888e-06, + "loss": 8.1776, + "step": 995800 + }, + { + "epoch": 10.61, + "learning_rate": 6.572502705937866e-06, + "loss": 8.1727, + "step": 995900 + }, + { + "epoch": 10.61, + "learning_rate": 6.562563032351976e-06, + "loss": 8.2501, + "step": 996000 + }, + { + "epoch": 10.61, + "learning_rate": 6.552630625405232e-06, + "loss": 8.1655, + "step": 996100 + }, + { + "epoch": 10.61, + "learning_rate": 6.542705485870071e-06, + "loss": 8.1797, + "step": 996200 + }, + { + "epoch": 10.61, + "learning_rate": 6.532787614518398e-06, + "loss": 8.1994, + "step": 996300 + }, + { + "epoch": 10.61, + "learning_rate": 6.5228770121214974e-06, + "loss": 8.2018, + "step": 996400 + }, + { + "epoch": 10.61, + "learning_rate": 6.5129736794501405e-06, + "loss": 8.1125, + "step": 996500 + }, + { + "epoch": 10.62, + "learning_rate": 6.503077617274489e-06, + "loss": 8.2502, + "step": 996600 + }, + { + "epoch": 10.62, + "learning_rate": 6.4931888263641825e-06, + "loss": 8.1298, + "step": 996700 + }, + { + "epoch": 10.62, + "learning_rate": 6.483307307488263e-06, + "loss": 8.1538, + "step": 996800 + }, + { + "epoch": 10.62, + "learning_rate": 6.47343306141519e-06, + "loss": 8.1114, + "step": 996900 + }, + { + "epoch": 10.62, + "learning_rate": 6.463566088912942e-06, + "loss": 8.1586, + "step": 997000 + }, + { + "epoch": 10.62, + "learning_rate": 6.4537063907488375e-06, + "loss": 8.0664, + "step": 997100 + }, + { + "epoch": 10.62, + "learning_rate": 6.443853967689684e-06, + "loss": 8.2085, + "step": 997200 + }, + { + "epoch": 10.62, + "learning_rate": 6.434008820501669e-06, + "loss": 8.1391, + "step": 997300 + }, + { + "epoch": 10.62, + "learning_rate": 6.424170949950492e-06, + "loss": 8.1587, + "step": 997400 + }, + { + "epoch": 10.62, + "learning_rate": 6.41434035680123e-06, + "loss": 8.2401, + "step": 997500 + }, + { + "epoch": 10.63, + "learning_rate": 6.404517041818414e-06, + "loss": 8.1239, + "step": 997600 + }, + { + "epoch": 10.63, + "learning_rate": 6.39470100576598e-06, + "loss": 8.1388, + "step": 997700 + }, + { + "epoch": 10.63, + "learning_rate": 6.384892249407348e-06, + "loss": 8.1879, + "step": 997800 + }, + { + "epoch": 10.63, + "learning_rate": 6.375090773505344e-06, + "loss": 8.2126, + "step": 997900 + }, + { + "epoch": 10.63, + "learning_rate": 6.365296578822233e-06, + "loss": 8.2283, + "step": 998000 + }, + { + "epoch": 10.63, + "learning_rate": 6.355509666119674e-06, + "loss": 8.1873, + "step": 998100 + }, + { + "epoch": 10.63, + "learning_rate": 6.345730036158848e-06, + "loss": 8.1647, + "step": 998200 + }, + { + "epoch": 10.63, + "learning_rate": 6.335957689700301e-06, + "loss": 8.2219, + "step": 998300 + }, + { + "epoch": 10.63, + "learning_rate": 6.326192627504013e-06, + "loss": 8.1469, + "step": 998400 + }, + { + "epoch": 10.64, + "learning_rate": 6.316434850329411e-06, + "loss": 8.2167, + "step": 998500 + }, + { + "epoch": 10.64, + "learning_rate": 6.3066843589354e-06, + "loss": 8.216, + "step": 998600 + }, + { + "epoch": 10.64, + "learning_rate": 6.2969411540802155e-06, + "loss": 8.2484, + "step": 998700 + }, + { + "epoch": 10.64, + "learning_rate": 6.28720523652162e-06, + "loss": 8.1137, + "step": 998800 + }, + { + "epoch": 10.64, + "learning_rate": 6.277476607016797e-06, + "loss": 8.1296, + "step": 998900 + }, + { + "epoch": 10.64, + "learning_rate": 6.267755266322317e-06, + "loss": 8.2136, + "step": 999000 + }, + { + "epoch": 10.64, + "learning_rate": 6.25804121519421e-06, + "loss": 8.1724, + "step": 999100 + }, + { + "epoch": 10.64, + "learning_rate": 6.248334454387938e-06, + "loss": 8.2218, + "step": 999200 + }, + { + "epoch": 10.64, + "learning_rate": 6.2386349846584204e-06, + "loss": 8.2234, + "step": 999300 + }, + { + "epoch": 10.64, + "learning_rate": 6.228942806759963e-06, + "loss": 8.1555, + "step": 999400 + }, + { + "epoch": 10.65, + "learning_rate": 6.2192579214463305e-06, + "loss": 8.149, + "step": 999500 + }, + { + "epoch": 10.65, + "learning_rate": 6.20958032947071e-06, + "loss": 8.1893, + "step": 999600 + }, + { + "epoch": 10.65, + "learning_rate": 6.199910031585765e-06, + "loss": 8.1403, + "step": 999700 + }, + { + "epoch": 10.65, + "learning_rate": 6.190247028543494e-06, + "loss": 8.1505, + "step": 999800 + }, + { + "epoch": 10.65, + "learning_rate": 6.180591321095452e-06, + "loss": 8.1888, + "step": 999900 + }, + { + "epoch": 10.65, + "learning_rate": 6.170942909992516e-06, + "loss": 8.1937, + "step": 1000000 + }, + { + "epoch": 10.65, + "learning_rate": 6.161301795985097e-06, + "loss": 8.1204, + "step": 1000100 + }, + { + "epoch": 10.65, + "learning_rate": 6.151667979822917e-06, + "loss": 8.173, + "step": 1000200 + }, + { + "epoch": 10.65, + "learning_rate": 6.142041462255255e-06, + "loss": 8.1696, + "step": 1000300 + }, + { + "epoch": 10.66, + "learning_rate": 6.1324222440307335e-06, + "loss": 8.1814, + "step": 1000400 + }, + { + "epoch": 10.66, + "learning_rate": 6.1228103258974875e-06, + "loss": 8.1712, + "step": 1000500 + }, + { + "epoch": 10.66, + "learning_rate": 6.113205708602965e-06, + "loss": 8.1239, + "step": 1000600 + }, + { + "epoch": 10.66, + "learning_rate": 6.103608392894178e-06, + "loss": 8.1842, + "step": 1000700 + }, + { + "epoch": 10.66, + "learning_rate": 6.094018379517486e-06, + "loss": 8.2326, + "step": 1000800 + }, + { + "epoch": 10.66, + "learning_rate": 6.084435669218691e-06, + "loss": 8.1775, + "step": 1000900 + }, + { + "epoch": 10.66, + "learning_rate": 6.074860262743065e-06, + "loss": 8.1747, + "step": 1001000 + }, + { + "epoch": 10.66, + "learning_rate": 6.065292160835278e-06, + "loss": 8.2615, + "step": 1001100 + }, + { + "epoch": 10.66, + "learning_rate": 6.05573136423947e-06, + "loss": 8.182, + "step": 1001200 + }, + { + "epoch": 10.67, + "learning_rate": 6.046177873699133e-06, + "loss": 8.2159, + "step": 1001300 + }, + { + "epoch": 10.67, + "learning_rate": 6.036631689957295e-06, + "loss": 8.2017, + "step": 1001400 + }, + { + "epoch": 10.67, + "learning_rate": 6.027092813756308e-06, + "loss": 8.2077, + "step": 1001500 + }, + { + "epoch": 10.67, + "learning_rate": 6.017561245838088e-06, + "loss": 8.1975, + "step": 1001600 + }, + { + "epoch": 10.67, + "learning_rate": 6.008036986943822e-06, + "loss": 8.214, + "step": 1001700 + }, + { + "epoch": 10.67, + "learning_rate": 5.9985200378142705e-06, + "loss": 8.1293, + "step": 1001800 + }, + { + "epoch": 10.67, + "learning_rate": 5.989010399189543e-06, + "loss": 8.1631, + "step": 1001900 + }, + { + "epoch": 10.67, + "learning_rate": 5.979508071809215e-06, + "loss": 8.1278, + "step": 1002000 + }, + { + "epoch": 10.67, + "learning_rate": 5.970013056412272e-06, + "loss": 8.1404, + "step": 1002100 + }, + { + "epoch": 10.67, + "learning_rate": 5.960525353737157e-06, + "loss": 8.1902, + "step": 1002200 + }, + { + "epoch": 10.68, + "learning_rate": 5.9510449645217235e-06, + "loss": 8.238, + "step": 1002300 + }, + { + "epoch": 10.68, + "learning_rate": 5.941571889503261e-06, + "loss": 8.0982, + "step": 1002400 + }, + { + "epoch": 10.68, + "learning_rate": 5.932106129418469e-06, + "loss": 8.0899, + "step": 1002500 + }, + { + "epoch": 10.68, + "learning_rate": 5.9226476850035464e-06, + "loss": 8.3309, + "step": 1002600 + }, + { + "epoch": 10.68, + "learning_rate": 5.91319655699406e-06, + "loss": 8.1959, + "step": 1002700 + }, + { + "epoch": 10.68, + "learning_rate": 5.903752746125013e-06, + "loss": 8.1827, + "step": 1002800 + }, + { + "epoch": 10.68, + "learning_rate": 5.894316253130838e-06, + "loss": 8.1156, + "step": 1002900 + }, + { + "epoch": 10.68, + "learning_rate": 5.884887078745449e-06, + "loss": 8.2594, + "step": 1003000 + }, + { + "epoch": 10.68, + "learning_rate": 5.875465223702148e-06, + "loss": 8.1668, + "step": 1003100 + }, + { + "epoch": 10.69, + "learning_rate": 5.8660506887336376e-06, + "loss": 8.1153, + "step": 1003200 + }, + { + "epoch": 10.69, + "learning_rate": 5.856643474572121e-06, + "loss": 8.1732, + "step": 1003300 + }, + { + "epoch": 10.69, + "learning_rate": 5.847243581949202e-06, + "loss": 8.1132, + "step": 1003400 + }, + { + "epoch": 10.69, + "learning_rate": 5.837851011595896e-06, + "loss": 8.1814, + "step": 1003500 + }, + { + "epoch": 10.69, + "learning_rate": 5.828465764242641e-06, + "loss": 8.1736, + "step": 1003600 + }, + { + "epoch": 10.69, + "learning_rate": 5.819087840619386e-06, + "loss": 8.2393, + "step": 1003700 + }, + { + "epoch": 10.69, + "learning_rate": 5.809717241455404e-06, + "loss": 8.1922, + "step": 1003800 + }, + { + "epoch": 10.69, + "learning_rate": 5.800353967479477e-06, + "loss": 8.1677, + "step": 1003900 + }, + { + "epoch": 10.69, + "learning_rate": 5.790998019419758e-06, + "loss": 8.1307, + "step": 1004000 + }, + { + "epoch": 10.69, + "learning_rate": 5.781649398003886e-06, + "loss": 8.1697, + "step": 1004100 + }, + { + "epoch": 10.7, + "learning_rate": 5.7723081039589015e-06, + "loss": 8.1494, + "step": 1004200 + }, + { + "epoch": 10.7, + "learning_rate": 5.762974138011279e-06, + "loss": 8.193, + "step": 1004300 + }, + { + "epoch": 10.7, + "learning_rate": 5.753647500886894e-06, + "loss": 8.1849, + "step": 1004400 + }, + { + "epoch": 10.7, + "learning_rate": 5.7443281933111104e-06, + "loss": 8.1683, + "step": 1004500 + }, + { + "epoch": 10.7, + "learning_rate": 5.735016216008693e-06, + "loss": 8.1303, + "step": 1004600 + }, + { + "epoch": 10.7, + "learning_rate": 5.7257115697038175e-06, + "loss": 8.1945, + "step": 1004700 + }, + { + "epoch": 10.7, + "learning_rate": 5.7164142551201065e-06, + "loss": 8.1612, + "step": 1004800 + }, + { + "epoch": 10.7, + "learning_rate": 5.707124272980624e-06, + "loss": 8.1534, + "step": 1004900 + }, + { + "epoch": 10.7, + "learning_rate": 5.697841624007861e-06, + "loss": 8.2369, + "step": 1005000 + }, + { + "epoch": 10.71, + "learning_rate": 5.688566308923704e-06, + "loss": 8.1136, + "step": 1005100 + }, + { + "epoch": 10.71, + "learning_rate": 5.679298328449523e-06, + "loss": 8.2083, + "step": 1005200 + }, + { + "epoch": 10.71, + "learning_rate": 5.670037683306073e-06, + "loss": 8.2021, + "step": 1005300 + }, + { + "epoch": 10.71, + "learning_rate": 5.660784374213568e-06, + "loss": 8.2274, + "step": 1005400 + }, + { + "epoch": 10.71, + "learning_rate": 5.65153840189161e-06, + "loss": 8.1788, + "step": 1005500 + }, + { + "epoch": 10.71, + "learning_rate": 5.64229976705929e-06, + "loss": 8.1239, + "step": 1005600 + }, + { + "epoch": 10.71, + "learning_rate": 5.6330684704350985e-06, + "loss": 8.2689, + "step": 1005700 + }, + { + "epoch": 10.71, + "learning_rate": 5.623844512736931e-06, + "loss": 8.1891, + "step": 1005800 + }, + { + "epoch": 10.71, + "learning_rate": 5.6146278946821455e-06, + "loss": 8.1209, + "step": 1005900 + }, + { + "epoch": 10.72, + "learning_rate": 5.605418616987523e-06, + "loss": 8.1103, + "step": 1006000 + }, + { + "epoch": 10.72, + "learning_rate": 5.596216680369282e-06, + "loss": 8.1559, + "step": 1006100 + }, + { + "epoch": 10.72, + "learning_rate": 5.587022085543037e-06, + "loss": 8.1982, + "step": 1006200 + }, + { + "epoch": 10.72, + "learning_rate": 5.5778348332238494e-06, + "loss": 8.1577, + "step": 1006300 + }, + { + "epoch": 10.72, + "learning_rate": 5.568654924126249e-06, + "loss": 8.2816, + "step": 1006400 + }, + { + "epoch": 10.72, + "learning_rate": 5.559482358964097e-06, + "loss": 8.166, + "step": 1006500 + }, + { + "epoch": 10.72, + "learning_rate": 5.5503171384508e-06, + "loss": 8.1885, + "step": 1006600 + }, + { + "epoch": 10.72, + "learning_rate": 5.541159263299112e-06, + "loss": 8.1694, + "step": 1006700 + }, + { + "epoch": 10.72, + "learning_rate": 5.532008734221272e-06, + "loss": 8.1852, + "step": 1006800 + }, + { + "epoch": 10.72, + "learning_rate": 5.522865551928868e-06, + "loss": 8.1424, + "step": 1006900 + }, + { + "epoch": 10.73, + "learning_rate": 5.513729717132998e-06, + "loss": 8.1639, + "step": 1007000 + }, + { + "epoch": 10.73, + "learning_rate": 5.504601230544149e-06, + "loss": 8.1277, + "step": 1007100 + }, + { + "epoch": 10.73, + "learning_rate": 5.495480092872252e-06, + "loss": 8.2171, + "step": 1007200 + }, + { + "epoch": 10.73, + "learning_rate": 5.486366304826662e-06, + "loss": 8.1712, + "step": 1007300 + }, + { + "epoch": 10.73, + "learning_rate": 5.477259867116124e-06, + "loss": 8.1622, + "step": 1007400 + }, + { + "epoch": 10.73, + "learning_rate": 5.4681607804489165e-06, + "loss": 8.1871, + "step": 1007500 + }, + { + "epoch": 10.73, + "learning_rate": 5.459069045532594e-06, + "loss": 8.1514, + "step": 1007600 + }, + { + "epoch": 10.73, + "learning_rate": 5.449984663074281e-06, + "loss": 8.2425, + "step": 1007700 + }, + { + "epoch": 10.73, + "learning_rate": 5.440907633780423e-06, + "loss": 8.1116, + "step": 1007800 + }, + { + "epoch": 10.74, + "learning_rate": 5.431837958357011e-06, + "loss": 8.1596, + "step": 1007900 + }, + { + "epoch": 10.74, + "learning_rate": 5.422775637509326e-06, + "loss": 8.1201, + "step": 1008000 + }, + { + "epoch": 10.74, + "learning_rate": 5.41372067194218e-06, + "loss": 8.1834, + "step": 1008100 + }, + { + "epoch": 10.74, + "learning_rate": 5.404673062359755e-06, + "loss": 8.2297, + "step": 1008200 + }, + { + "epoch": 10.74, + "learning_rate": 5.395632809465734e-06, + "loss": 8.202, + "step": 1008300 + }, + { + "epoch": 10.74, + "learning_rate": 5.38659991396312e-06, + "loss": 8.1687, + "step": 1008400 + }, + { + "epoch": 10.74, + "learning_rate": 5.377574376554439e-06, + "loss": 8.1697, + "step": 1008500 + }, + { + "epoch": 10.74, + "learning_rate": 5.368556197941588e-06, + "loss": 8.2532, + "step": 1008600 + }, + { + "epoch": 10.74, + "learning_rate": 5.359545378825925e-06, + "loss": 8.1907, + "step": 1008700 + }, + { + "epoch": 10.75, + "learning_rate": 5.350541919908203e-06, + "loss": 8.1563, + "step": 1008800 + }, + { + "epoch": 10.75, + "learning_rate": 5.34154582188866e-06, + "loss": 8.185, + "step": 1008900 + }, + { + "epoch": 10.75, + "learning_rate": 5.3325570854668825e-06, + "loss": 8.2071, + "step": 1009000 + }, + { + "epoch": 10.75, + "learning_rate": 5.323575711341955e-06, + "loss": 8.1723, + "step": 1009100 + }, + { + "epoch": 10.75, + "learning_rate": 5.31460170021233e-06, + "loss": 8.1479, + "step": 1009200 + }, + { + "epoch": 10.75, + "learning_rate": 5.3056350527759366e-06, + "loss": 8.1397, + "step": 1009300 + }, + { + "epoch": 10.75, + "learning_rate": 5.296675769730131e-06, + "loss": 8.1541, + "step": 1009400 + }, + { + "epoch": 10.75, + "learning_rate": 5.287723851771631e-06, + "loss": 8.1566, + "step": 1009500 + }, + { + "epoch": 10.75, + "learning_rate": 5.27877929959667e-06, + "loss": 8.1569, + "step": 1009600 + }, + { + "epoch": 10.75, + "learning_rate": 5.269842113900858e-06, + "loss": 8.1624, + "step": 1009700 + }, + { + "epoch": 10.76, + "learning_rate": 5.260912295379228e-06, + "loss": 8.1893, + "step": 1009800 + }, + { + "epoch": 10.76, + "learning_rate": 5.251989844726235e-06, + "loss": 8.2723, + "step": 1009900 + }, + { + "epoch": 10.76, + "learning_rate": 5.243074762635825e-06, + "loss": 8.2313, + "step": 1010000 + }, + { + "epoch": 10.76, + "learning_rate": 5.234167049801297e-06, + "loss": 8.1893, + "step": 1010100 + }, + { + "epoch": 10.76, + "learning_rate": 5.2252667069154214e-06, + "loss": 8.2309, + "step": 1010200 + }, + { + "epoch": 10.76, + "learning_rate": 5.216373734670343e-06, + "loss": 8.1648, + "step": 1010300 + }, + { + "epoch": 10.76, + "learning_rate": 5.20748813375771e-06, + "loss": 8.2875, + "step": 1010400 + }, + { + "epoch": 10.76, + "learning_rate": 5.198609904868534e-06, + "loss": 8.1302, + "step": 1010500 + }, + { + "epoch": 10.76, + "learning_rate": 5.189739048693287e-06, + "loss": 8.2035, + "step": 1010600 + }, + { + "epoch": 10.77, + "learning_rate": 5.180875565921839e-06, + "loss": 8.2064, + "step": 1010700 + }, + { + "epoch": 10.77, + "learning_rate": 5.172019457243527e-06, + "loss": 8.1789, + "step": 1010800 + }, + { + "epoch": 10.77, + "learning_rate": 5.1631707233470884e-06, + "loss": 8.1952, + "step": 1010900 + }, + { + "epoch": 10.77, + "learning_rate": 5.154329364920673e-06, + "loss": 8.2326, + "step": 1011000 + }, + { + "epoch": 10.77, + "learning_rate": 5.145495382651877e-06, + "loss": 8.1545, + "step": 1011100 + }, + { + "epoch": 10.77, + "learning_rate": 5.1366687772277375e-06, + "loss": 8.1495, + "step": 1011200 + }, + { + "epoch": 10.77, + "learning_rate": 5.127849549334684e-06, + "loss": 8.1603, + "step": 1011300 + }, + { + "epoch": 10.77, + "learning_rate": 5.11903769965858e-06, + "loss": 8.2036, + "step": 1011400 + }, + { + "epoch": 10.77, + "learning_rate": 5.1102332288847645e-06, + "loss": 8.2047, + "step": 1011500 + }, + { + "epoch": 10.77, + "learning_rate": 5.101436137697924e-06, + "loss": 8.1776, + "step": 1011600 + }, + { + "epoch": 10.78, + "learning_rate": 5.092646426782221e-06, + "loss": 8.0969, + "step": 1011700 + }, + { + "epoch": 10.78, + "learning_rate": 5.083864096821223e-06, + "loss": 8.2096, + "step": 1011800 + }, + { + "epoch": 10.78, + "learning_rate": 5.075089148497958e-06, + "loss": 8.1424, + "step": 1011900 + }, + { + "epoch": 10.78, + "learning_rate": 5.066321582494837e-06, + "loss": 8.1343, + "step": 1012000 + }, + { + "epoch": 10.78, + "learning_rate": 5.057561399493715e-06, + "loss": 8.163, + "step": 1012100 + }, + { + "epoch": 10.78, + "learning_rate": 5.048808600175858e-06, + "loss": 8.1852, + "step": 1012200 + }, + { + "epoch": 10.78, + "learning_rate": 5.040063185222011e-06, + "loss": 8.1951, + "step": 1012300 + }, + { + "epoch": 10.78, + "learning_rate": 5.031325155312272e-06, + "loss": 8.1348, + "step": 1012400 + }, + { + "epoch": 10.78, + "learning_rate": 5.0225945111262105e-06, + "loss": 8.2171, + "step": 1012500 + }, + { + "epoch": 10.79, + "learning_rate": 5.0138712533427944e-06, + "loss": 8.1607, + "step": 1012600 + }, + { + "epoch": 10.79, + "learning_rate": 5.005155382640459e-06, + "loss": 8.1498, + "step": 1012700 + }, + { + "epoch": 10.79, + "learning_rate": 4.996446899697027e-06, + "loss": 8.1882, + "step": 1012800 + }, + { + "epoch": 10.79, + "learning_rate": 4.987745805189759e-06, + "loss": 8.1983, + "step": 1012900 + }, + { + "epoch": 10.79, + "learning_rate": 4.979052099795323e-06, + "loss": 8.108, + "step": 1013000 + }, + { + "epoch": 10.79, + "learning_rate": 4.970365784189857e-06, + "loss": 8.204, + "step": 1013100 + }, + { + "epoch": 10.79, + "learning_rate": 4.961686859048887e-06, + "loss": 8.1317, + "step": 1013200 + }, + { + "epoch": 10.79, + "learning_rate": 4.953015325047361e-06, + "loss": 8.143, + "step": 1013300 + }, + { + "epoch": 10.79, + "learning_rate": 4.944351182859675e-06, + "loss": 8.177, + "step": 1013400 + }, + { + "epoch": 10.8, + "learning_rate": 4.935694433159643e-06, + "loss": 8.1415, + "step": 1013500 + }, + { + "epoch": 10.8, + "learning_rate": 4.927045076620496e-06, + "loss": 8.1497, + "step": 1013600 + }, + { + "epoch": 10.8, + "learning_rate": 4.918403113914894e-06, + "loss": 8.2061, + "step": 1013700 + }, + { + "epoch": 10.8, + "learning_rate": 4.909768545714932e-06, + "loss": 8.1886, + "step": 1013800 + }, + { + "epoch": 10.8, + "learning_rate": 4.901141372692119e-06, + "loss": 8.1835, + "step": 1013900 + }, + { + "epoch": 10.8, + "learning_rate": 4.892521595517385e-06, + "loss": 8.1673, + "step": 1014000 + }, + { + "epoch": 10.8, + "learning_rate": 4.883909214861082e-06, + "loss": 8.2566, + "step": 1014100 + }, + { + "epoch": 10.8, + "learning_rate": 4.8753042313930186e-06, + "loss": 8.1811, + "step": 1014200 + }, + { + "epoch": 10.8, + "learning_rate": 4.866706645782404e-06, + "loss": 8.1298, + "step": 1014300 + }, + { + "epoch": 10.8, + "learning_rate": 4.858116458697859e-06, + "loss": 8.1851, + "step": 1014400 + }, + { + "epoch": 10.81, + "learning_rate": 4.849533670807438e-06, + "loss": 8.1329, + "step": 1014500 + }, + { + "epoch": 10.81, + "learning_rate": 4.840958282778651e-06, + "loss": 8.1777, + "step": 1014600 + }, + { + "epoch": 10.81, + "learning_rate": 4.832390295278377e-06, + "loss": 8.0806, + "step": 1014700 + }, + { + "epoch": 10.81, + "learning_rate": 4.8238297089729716e-06, + "loss": 8.1709, + "step": 1014800 + }, + { + "epoch": 10.81, + "learning_rate": 4.815276524528179e-06, + "loss": 8.0904, + "step": 1014900 + }, + { + "epoch": 10.81, + "learning_rate": 4.8067307426092005e-06, + "loss": 8.1802, + "step": 1015000 + }, + { + "epoch": 10.81, + "learning_rate": 4.798192363880605e-06, + "loss": 8.0981, + "step": 1015100 + }, + { + "epoch": 10.81, + "learning_rate": 4.789661389006461e-06, + "loss": 8.2385, + "step": 1015200 + }, + { + "epoch": 10.81, + "learning_rate": 4.781137818650205e-06, + "loss": 8.1898, + "step": 1015300 + }, + { + "epoch": 10.82, + "learning_rate": 4.77262165347474e-06, + "loss": 8.182, + "step": 1015400 + }, + { + "epoch": 10.82, + "learning_rate": 4.7641128941423255e-06, + "loss": 8.1843, + "step": 1015500 + }, + { + "epoch": 10.82, + "learning_rate": 4.755611541314708e-06, + "loss": 8.1967, + "step": 1015600 + }, + { + "epoch": 10.82, + "learning_rate": 4.747117595653061e-06, + "loss": 8.171, + "step": 1015700 + }, + { + "epoch": 10.82, + "learning_rate": 4.7386310578179324e-06, + "loss": 8.211, + "step": 1015800 + }, + { + "epoch": 10.82, + "learning_rate": 4.730151928469328e-06, + "loss": 8.1579, + "step": 1015900 + }, + { + "epoch": 10.82, + "learning_rate": 4.721680208266666e-06, + "loss": 8.189, + "step": 1016000 + }, + { + "epoch": 10.82, + "learning_rate": 4.7132158978688275e-06, + "loss": 8.257, + "step": 1016100 + }, + { + "epoch": 10.82, + "learning_rate": 4.704758997934033e-06, + "loss": 8.1728, + "step": 1016200 + }, + { + "epoch": 10.82, + "learning_rate": 4.696309509120001e-06, + "loss": 8.1759, + "step": 1016300 + }, + { + "epoch": 10.83, + "learning_rate": 4.687867432083837e-06, + "loss": 8.1161, + "step": 1016400 + }, + { + "epoch": 10.83, + "learning_rate": 4.679432767482117e-06, + "loss": 8.2792, + "step": 1016500 + }, + { + "epoch": 10.83, + "learning_rate": 4.671005515970761e-06, + "loss": 8.1924, + "step": 1016600 + }, + { + "epoch": 10.83, + "learning_rate": 4.6625856782051895e-06, + "loss": 8.161, + "step": 1016700 + }, + { + "epoch": 10.83, + "learning_rate": 4.654173254840188e-06, + "loss": 8.1485, + "step": 1016800 + }, + { + "epoch": 10.83, + "learning_rate": 4.645768246530014e-06, + "loss": 8.1277, + "step": 1016900 + }, + { + "epoch": 10.83, + "learning_rate": 4.637370653928308e-06, + "loss": 8.2028, + "step": 1017000 + }, + { + "epoch": 10.83, + "learning_rate": 4.628980477688171e-06, + "loss": 8.2297, + "step": 1017100 + }, + { + "epoch": 10.83, + "learning_rate": 4.6205977184620944e-06, + "loss": 8.1868, + "step": 1017200 + }, + { + "epoch": 10.84, + "learning_rate": 4.612222376902009e-06, + "loss": 8.176, + "step": 1017300 + }, + { + "epoch": 10.84, + "learning_rate": 4.603854453659251e-06, + "loss": 8.1995, + "step": 1017400 + }, + { + "epoch": 10.84, + "learning_rate": 4.595493949384622e-06, + "loss": 8.2511, + "step": 1017500 + }, + { + "epoch": 10.84, + "learning_rate": 4.5871408647283125e-06, + "loss": 8.1638, + "step": 1017600 + }, + { + "epoch": 10.84, + "learning_rate": 4.578795200339925e-06, + "loss": 8.1356, + "step": 1017700 + }, + { + "epoch": 10.84, + "learning_rate": 4.570456956868519e-06, + "loss": 8.1545, + "step": 1017800 + }, + { + "epoch": 10.84, + "learning_rate": 4.562126134962563e-06, + "loss": 8.2112, + "step": 1017900 + }, + { + "epoch": 10.84, + "learning_rate": 4.553802735269941e-06, + "loss": 8.2263, + "step": 1018000 + }, + { + "epoch": 10.84, + "learning_rate": 4.545486758437944e-06, + "loss": 8.1047, + "step": 1018100 + }, + { + "epoch": 10.85, + "learning_rate": 4.537178205113346e-06, + "loss": 8.179, + "step": 1018200 + }, + { + "epoch": 10.85, + "learning_rate": 4.528877075942272e-06, + "loss": 8.1589, + "step": 1018300 + }, + { + "epoch": 10.85, + "learning_rate": 4.5205833715703305e-06, + "loss": 8.2569, + "step": 1018400 + }, + { + "epoch": 10.85, + "learning_rate": 4.512297092642481e-06, + "loss": 8.1874, + "step": 1018500 + }, + { + "epoch": 10.85, + "learning_rate": 4.504018239803187e-06, + "loss": 8.2364, + "step": 1018600 + }, + { + "epoch": 10.85, + "learning_rate": 4.4957468136962885e-06, + "loss": 8.1464, + "step": 1018700 + }, + { + "epoch": 10.85, + "learning_rate": 4.4874828149650385e-06, + "loss": 8.0435, + "step": 1018800 + }, + { + "epoch": 10.85, + "learning_rate": 4.479226244252133e-06, + "loss": 8.066, + "step": 1018900 + }, + { + "epoch": 10.85, + "learning_rate": 4.470977102199714e-06, + "loss": 8.2202, + "step": 1019000 + }, + { + "epoch": 10.85, + "learning_rate": 4.46273538944928e-06, + "loss": 8.2354, + "step": 1019100 + }, + { + "epoch": 10.86, + "learning_rate": 4.454501106641817e-06, + "loss": 8.1456, + "step": 1019200 + }, + { + "epoch": 10.86, + "learning_rate": 4.446274254417681e-06, + "loss": 8.1137, + "step": 1019300 + }, + { + "epoch": 10.86, + "learning_rate": 4.4380548334167045e-06, + "loss": 8.1255, + "step": 1019400 + }, + { + "epoch": 10.86, + "learning_rate": 4.429842844278098e-06, + "loss": 8.1706, + "step": 1019500 + }, + { + "epoch": 10.86, + "learning_rate": 4.421638287640517e-06, + "loss": 8.1942, + "step": 1019600 + }, + { + "epoch": 10.86, + "learning_rate": 4.4134411641419955e-06, + "loss": 8.1343, + "step": 1019700 + }, + { + "epoch": 10.86, + "learning_rate": 4.40525147442008e-06, + "loss": 8.1122, + "step": 1019800 + }, + { + "epoch": 10.86, + "learning_rate": 4.397069219111638e-06, + "loss": 8.2271, + "step": 1019900 + }, + { + "epoch": 10.86, + "learning_rate": 4.388894398853016e-06, + "loss": 8.1362, + "step": 1020000 + }, + { + "epoch": 10.87, + "learning_rate": 4.3807270142799946e-06, + "loss": 8.0984, + "step": 1020100 + }, + { + "epoch": 10.87, + "learning_rate": 4.372567066027733e-06, + "loss": 8.1858, + "step": 1020200 + }, + { + "epoch": 10.87, + "learning_rate": 4.364414554730834e-06, + "loss": 8.1965, + "step": 1020300 + }, + { + "epoch": 10.87, + "learning_rate": 4.35626948102329e-06, + "loss": 8.1554, + "step": 1020400 + }, + { + "epoch": 10.87, + "learning_rate": 4.3481318455385945e-06, + "loss": 8.1606, + "step": 1020500 + }, + { + "epoch": 10.87, + "learning_rate": 4.340001648909597e-06, + "loss": 8.1588, + "step": 1020600 + }, + { + "epoch": 10.87, + "learning_rate": 4.33187889176857e-06, + "loss": 8.2209, + "step": 1020700 + }, + { + "epoch": 10.87, + "learning_rate": 4.323763574747209e-06, + "loss": 8.1306, + "step": 1020800 + }, + { + "epoch": 10.87, + "learning_rate": 4.315655698476684e-06, + "loss": 8.2021, + "step": 1020900 + }, + { + "epoch": 10.87, + "learning_rate": 4.307555263587515e-06, + "loss": 8.2303, + "step": 1021000 + }, + { + "epoch": 10.88, + "learning_rate": 4.299462270709675e-06, + "loss": 8.2328, + "step": 1021100 + }, + { + "epoch": 10.88, + "learning_rate": 4.291376720472562e-06, + "loss": 8.129, + "step": 1021200 + }, + { + "epoch": 10.88, + "learning_rate": 4.2832986135050155e-06, + "loss": 8.1171, + "step": 1021300 + }, + { + "epoch": 10.88, + "learning_rate": 4.2752279504352234e-06, + "loss": 8.1512, + "step": 1021400 + }, + { + "epoch": 10.88, + "learning_rate": 4.267164731890872e-06, + "loss": 8.2078, + "step": 1021500 + }, + { + "epoch": 10.88, + "learning_rate": 4.259108958499014e-06, + "loss": 8.106, + "step": 1021600 + }, + { + "epoch": 10.88, + "learning_rate": 4.251060630886205e-06, + "loss": 8.1406, + "step": 1021700 + }, + { + "epoch": 10.88, + "learning_rate": 4.24301974967829e-06, + "loss": 8.1477, + "step": 1021800 + }, + { + "epoch": 10.88, + "learning_rate": 4.2349863155006445e-06, + "loss": 8.1484, + "step": 1021900 + }, + { + "epoch": 10.89, + "learning_rate": 4.2269603289780465e-06, + "loss": 8.2225, + "step": 1022000 + }, + { + "epoch": 10.89, + "learning_rate": 4.218941790734654e-06, + "loss": 8.1607, + "step": 1022100 + }, + { + "epoch": 10.89, + "learning_rate": 4.210930701394078e-06, + "loss": 8.1437, + "step": 1022200 + }, + { + "epoch": 10.89, + "learning_rate": 4.202927061579331e-06, + "loss": 8.1176, + "step": 1022300 + }, + { + "epoch": 10.89, + "learning_rate": 4.194930871912894e-06, + "loss": 8.1923, + "step": 1022400 + }, + { + "epoch": 10.89, + "learning_rate": 4.18694213301657e-06, + "loss": 8.186, + "step": 1022500 + }, + { + "epoch": 10.89, + "learning_rate": 4.178960845511692e-06, + "loss": 8.1791, + "step": 1022600 + }, + { + "epoch": 10.89, + "learning_rate": 4.170987010018934e-06, + "loss": 8.1922, + "step": 1022700 + }, + { + "epoch": 10.89, + "learning_rate": 4.163020627158465e-06, + "loss": 8.1816, + "step": 1022800 + }, + { + "epoch": 10.9, + "learning_rate": 4.155061697549789e-06, + "loss": 8.1617, + "step": 1022900 + }, + { + "epoch": 10.9, + "learning_rate": 4.14711022181189e-06, + "loss": 8.0877, + "step": 1023000 + }, + { + "epoch": 10.9, + "learning_rate": 4.139166200563149e-06, + "loss": 8.1473, + "step": 1023100 + }, + { + "epoch": 10.9, + "learning_rate": 4.131229634421396e-06, + "loss": 8.1667, + "step": 1023200 + }, + { + "epoch": 10.9, + "learning_rate": 4.123300524003826e-06, + "loss": 8.1357, + "step": 1023300 + }, + { + "epoch": 10.9, + "learning_rate": 4.1153788699271e-06, + "loss": 8.1509, + "step": 1023400 + }, + { + "epoch": 10.9, + "learning_rate": 4.107464672807293e-06, + "loss": 8.1959, + "step": 1023500 + }, + { + "epoch": 10.9, + "learning_rate": 4.09955793325989e-06, + "loss": 8.2907, + "step": 1023600 + }, + { + "epoch": 10.9, + "learning_rate": 4.091658651899788e-06, + "loss": 8.1283, + "step": 1023700 + }, + { + "epoch": 10.9, + "learning_rate": 4.083766829341329e-06, + "loss": 8.2119, + "step": 1023800 + }, + { + "epoch": 10.91, + "learning_rate": 4.075882466198256e-06, + "loss": 8.1347, + "step": 1023900 + }, + { + "epoch": 10.91, + "learning_rate": 4.068005563083732e-06, + "loss": 8.1926, + "step": 1024000 + }, + { + "epoch": 10.91, + "learning_rate": 4.0601361206103474e-06, + "loss": 8.1566, + "step": 1024100 + }, + { + "epoch": 10.91, + "learning_rate": 4.052274139390111e-06, + "loss": 8.1676, + "step": 1024200 + }, + { + "epoch": 10.91, + "learning_rate": 4.044419620034468e-06, + "loss": 8.1107, + "step": 1024300 + }, + { + "epoch": 10.91, + "learning_rate": 4.036572563154217e-06, + "loss": 8.1911, + "step": 1024400 + }, + { + "epoch": 10.91, + "learning_rate": 4.028732969359672e-06, + "loss": 8.1455, + "step": 1024500 + }, + { + "epoch": 10.91, + "learning_rate": 4.0209008392605e-06, + "loss": 8.205, + "step": 1024600 + }, + { + "epoch": 10.91, + "learning_rate": 4.013076173465802e-06, + "loss": 8.1595, + "step": 1024700 + }, + { + "epoch": 10.92, + "learning_rate": 4.005258972584103e-06, + "loss": 8.1888, + "step": 1024800 + }, + { + "epoch": 10.92, + "learning_rate": 3.997449237223372e-06, + "loss": 8.1964, + "step": 1024900 + }, + { + "epoch": 10.92, + "learning_rate": 3.989646967990945e-06, + "loss": 8.1616, + "step": 1025000 + }, + { + "epoch": 10.92, + "learning_rate": 3.981852165493616e-06, + "loss": 8.1802, + "step": 1025100 + }, + { + "epoch": 10.92, + "learning_rate": 3.974064830337576e-06, + "loss": 8.2811, + "step": 1025200 + }, + { + "epoch": 10.92, + "learning_rate": 3.966284963128464e-06, + "loss": 8.1282, + "step": 1025300 + }, + { + "epoch": 10.92, + "learning_rate": 3.958512564471318e-06, + "loss": 8.1211, + "step": 1025400 + }, + { + "epoch": 10.92, + "learning_rate": 3.950747634970597e-06, + "loss": 8.2075, + "step": 1025500 + }, + { + "epoch": 10.92, + "learning_rate": 3.942990175230155e-06, + "loss": 8.2101, + "step": 1025600 + }, + { + "epoch": 10.93, + "learning_rate": 3.9352401858533395e-06, + "loss": 8.1476, + "step": 1025700 + }, + { + "epoch": 10.93, + "learning_rate": 3.927497667442826e-06, + "loss": 8.0879, + "step": 1025800 + }, + { + "epoch": 10.93, + "learning_rate": 3.9197626206007756e-06, + "loss": 8.1009, + "step": 1025900 + }, + { + "epoch": 10.93, + "learning_rate": 3.912035045928719e-06, + "loss": 8.2665, + "step": 1026000 + }, + { + "epoch": 10.93, + "learning_rate": 3.904314944027665e-06, + "loss": 8.2303, + "step": 1026100 + }, + { + "epoch": 10.93, + "learning_rate": 3.896602315497988e-06, + "loss": 8.151, + "step": 1026200 + }, + { + "epoch": 10.93, + "learning_rate": 3.888897160939476e-06, + "loss": 8.1499, + "step": 1026300 + }, + { + "epoch": 10.93, + "learning_rate": 3.881199480951403e-06, + "loss": 8.1797, + "step": 1026400 + }, + { + "epoch": 10.93, + "learning_rate": 3.8735092761324036e-06, + "loss": 8.1987, + "step": 1026500 + }, + { + "epoch": 10.93, + "learning_rate": 3.865826547080531e-06, + "loss": 8.1953, + "step": 1026600 + }, + { + "epoch": 10.94, + "learning_rate": 3.858151294393275e-06, + "loss": 8.1431, + "step": 1026700 + }, + { + "epoch": 10.94, + "learning_rate": 3.850483518667558e-06, + "loss": 8.1245, + "step": 1026800 + }, + { + "epoch": 10.94, + "learning_rate": 3.842823220499692e-06, + "loss": 8.2034, + "step": 1026900 + }, + { + "epoch": 10.94, + "learning_rate": 3.835170400485421e-06, + "loss": 8.1498, + "step": 1027000 + }, + { + "epoch": 10.94, + "learning_rate": 3.8275250592199056e-06, + "loss": 8.118, + "step": 1027100 + }, + { + "epoch": 10.94, + "learning_rate": 3.8198871972977224e-06, + "loss": 8.1295, + "step": 1027200 + }, + { + "epoch": 10.94, + "learning_rate": 3.812256815312876e-06, + "loss": 8.2043, + "step": 1027300 + }, + { + "epoch": 10.94, + "learning_rate": 3.8046339138587682e-06, + "loss": 8.1329, + "step": 1027400 + }, + { + "epoch": 10.94, + "learning_rate": 3.797018493528237e-06, + "loss": 8.131, + "step": 1027500 + }, + { + "epoch": 10.95, + "learning_rate": 3.7894105549135526e-06, + "loss": 8.2063, + "step": 1027600 + }, + { + "epoch": 10.95, + "learning_rate": 3.781810098606364e-06, + "loss": 8.2135, + "step": 1027700 + }, + { + "epoch": 10.95, + "learning_rate": 3.7742171251977767e-06, + "loss": 8.1872, + "step": 1027800 + }, + { + "epoch": 10.95, + "learning_rate": 3.7666316352782615e-06, + "loss": 8.141, + "step": 1027900 + }, + { + "epoch": 10.95, + "learning_rate": 3.759053629437792e-06, + "loss": 8.1947, + "step": 1028000 + }, + { + "epoch": 10.95, + "learning_rate": 3.7514831082656856e-06, + "loss": 8.1625, + "step": 1028100 + }, + { + "epoch": 10.95, + "learning_rate": 3.7439200723506816e-06, + "loss": 8.1396, + "step": 1028200 + }, + { + "epoch": 10.95, + "learning_rate": 3.7363645222809996e-06, + "loss": 8.1444, + "step": 1028300 + }, + { + "epoch": 10.95, + "learning_rate": 3.728816458644213e-06, + "loss": 8.2012, + "step": 1028400 + }, + { + "epoch": 10.95, + "learning_rate": 3.721275882027342e-06, + "loss": 8.1598, + "step": 1028500 + }, + { + "epoch": 10.96, + "learning_rate": 3.713742793016806e-06, + "loss": 8.14, + "step": 1028600 + }, + { + "epoch": 10.96, + "learning_rate": 3.706217192198469e-06, + "loss": 8.1936, + "step": 1028700 + }, + { + "epoch": 10.96, + "learning_rate": 3.698699080157597e-06, + "loss": 8.1996, + "step": 1028800 + }, + { + "epoch": 10.96, + "learning_rate": 3.691188457478856e-06, + "loss": 8.1612, + "step": 1028900 + }, + { + "epoch": 10.96, + "learning_rate": 3.6836853247463666e-06, + "loss": 8.1568, + "step": 1029000 + }, + { + "epoch": 10.96, + "learning_rate": 3.6761896825436403e-06, + "loss": 8.1826, + "step": 1029100 + }, + { + "epoch": 10.96, + "learning_rate": 3.668701531453622e-06, + "loss": 8.2274, + "step": 1029200 + }, + { + "epoch": 10.96, + "learning_rate": 3.661220872058657e-06, + "loss": 8.1775, + "step": 1029300 + }, + { + "epoch": 10.96, + "learning_rate": 3.6537477049405133e-06, + "loss": 8.2205, + "step": 1029400 + }, + { + "epoch": 10.97, + "learning_rate": 3.6462820306804033e-06, + "loss": 8.1038, + "step": 1029500 + }, + { + "epoch": 10.97, + "learning_rate": 3.6388238498588967e-06, + "loss": 8.2139, + "step": 1029600 + }, + { + "epoch": 10.97, + "learning_rate": 3.631373163056051e-06, + "loss": 8.1179, + "step": 1029700 + }, + { + "epoch": 10.97, + "learning_rate": 3.6239299708512806e-06, + "loss": 8.1812, + "step": 1029800 + }, + { + "epoch": 10.97, + "learning_rate": 3.6164942738234675e-06, + "loss": 8.1502, + "step": 1029900 + }, + { + "epoch": 10.97, + "learning_rate": 3.6090660725508595e-06, + "loss": 8.1042, + "step": 1030000 + }, + { + "epoch": 10.97, + "learning_rate": 3.6016453676111837e-06, + "loss": 8.2273, + "step": 1030100 + }, + { + "epoch": 10.97, + "learning_rate": 3.594232159581512e-06, + "loss": 8.1705, + "step": 1030200 + }, + { + "epoch": 10.97, + "learning_rate": 3.586826449038394e-06, + "loss": 8.1795, + "step": 1030300 + }, + { + "epoch": 10.98, + "learning_rate": 3.579428236557769e-06, + "loss": 8.218, + "step": 1030400 + }, + { + "epoch": 10.98, + "learning_rate": 3.5720375227149773e-06, + "loss": 8.0814, + "step": 1030500 + }, + { + "epoch": 10.98, + "learning_rate": 3.564654308084836e-06, + "loss": 8.1121, + "step": 1030600 + }, + { + "epoch": 10.98, + "learning_rate": 3.5572785932414975e-06, + "loss": 8.1185, + "step": 1030700 + }, + { + "epoch": 10.98, + "learning_rate": 3.549910378758603e-06, + "loss": 8.1149, + "step": 1030800 + }, + { + "epoch": 10.98, + "learning_rate": 3.5425496652091384e-06, + "loss": 8.149, + "step": 1030900 + }, + { + "epoch": 10.98, + "learning_rate": 3.5351964531656125e-06, + "loss": 8.1604, + "step": 1031000 + }, + { + "epoch": 10.98, + "learning_rate": 3.527850743199812e-06, + "loss": 8.1321, + "step": 1031100 + }, + { + "epoch": 10.98, + "learning_rate": 3.5205125358830694e-06, + "loss": 8.169, + "step": 1031200 + }, + { + "epoch": 10.98, + "learning_rate": 3.51318183178605e-06, + "loss": 8.159, + "step": 1031300 + }, + { + "epoch": 10.99, + "learning_rate": 3.5058586314788753e-06, + "loss": 8.2282, + "step": 1031400 + }, + { + "epoch": 10.99, + "learning_rate": 3.498542935531046e-06, + "loss": 8.1479, + "step": 1031500 + }, + { + "epoch": 10.99, + "learning_rate": 3.49123474451154e-06, + "loss": 8.1351, + "step": 1031600 + }, + { + "epoch": 10.99, + "learning_rate": 3.4839340589887025e-06, + "loss": 8.1267, + "step": 1031700 + }, + { + "epoch": 10.99, + "learning_rate": 3.476640879530313e-06, + "loss": 8.235, + "step": 1031800 + }, + { + "epoch": 10.99, + "learning_rate": 3.4693552067035393e-06, + "loss": 8.1938, + "step": 1031900 + }, + { + "epoch": 10.99, + "learning_rate": 3.4620770410750182e-06, + "loss": 8.1665, + "step": 1032000 + }, + { + "epoch": 10.99, + "learning_rate": 3.4548063832107514e-06, + "loss": 8.1939, + "step": 1032100 + }, + { + "epoch": 10.99, + "learning_rate": 3.4475432336761983e-06, + "loss": 8.1368, + "step": 1032200 + }, + { + "epoch": 11.0, + "learning_rate": 3.4402875930361955e-06, + "loss": 8.1599, + "step": 1032300 + }, + { + "epoch": 11.0, + "learning_rate": 3.4330394618550364e-06, + "loss": 8.1248, + "step": 1032400 + }, + { + "epoch": 11.0, + "learning_rate": 3.4257988406963927e-06, + "loss": 8.1622, + "step": 1032500 + }, + { + "epoch": 11.0, + "learning_rate": 3.4185657301233574e-06, + "loss": 8.1641, + "step": 1032600 + }, + { + "epoch": 11.0, + "learning_rate": 3.4113401306984815e-06, + "loss": 8.2043, + "step": 1032700 + }, + { + "epoch": 11.0, + "learning_rate": 3.4041220429836927e-06, + "loss": 8.1148, + "step": 1032800 + }, + { + "epoch": 11.0, + "learning_rate": 3.39691146754032e-06, + "loss": 8.1443, + "step": 1032900 + }, + { + "epoch": 11.0, + "learning_rate": 3.3897084049291485e-06, + "loss": 8.1086, + "step": 1033000 + }, + { + "epoch": 11.0, + "learning_rate": 3.3825128557103624e-06, + "loss": 8.1941, + "step": 1033100 + }, + { + "epoch": 11.0, + "learning_rate": 3.3753248204435483e-06, + "loss": 8.2403, + "step": 1033200 + }, + { + "epoch": 11.01, + "learning_rate": 3.3681442996877364e-06, + "loss": 8.1904, + "step": 1033300 + }, + { + "epoch": 11.01, + "learning_rate": 3.360971294001336e-06, + "loss": 8.149, + "step": 1033400 + }, + { + "epoch": 11.01, + "learning_rate": 3.3538058039422115e-06, + "loss": 8.159, + "step": 1033500 + }, + { + "epoch": 11.01, + "learning_rate": 3.3466478300676173e-06, + "loss": 8.1642, + "step": 1033600 + }, + { + "epoch": 11.01, + "learning_rate": 3.3394973729342305e-06, + "loss": 8.1945, + "step": 1033700 + }, + { + "epoch": 11.01, + "learning_rate": 3.332354433098128e-06, + "loss": 8.0814, + "step": 1033800 + }, + { + "epoch": 11.01, + "learning_rate": 3.325219011114844e-06, + "loss": 8.1637, + "step": 1033900 + }, + { + "epoch": 11.01, + "learning_rate": 3.3180911075392894e-06, + "loss": 8.1349, + "step": 1034000 + }, + { + "epoch": 11.01, + "learning_rate": 3.310970722925799e-06, + "loss": 8.1414, + "step": 1034100 + }, + { + "epoch": 11.02, + "learning_rate": 3.303857857828119e-06, + "loss": 8.094, + "step": 1034200 + }, + { + "epoch": 11.02, + "learning_rate": 3.296752512799428e-06, + "loss": 8.1615, + "step": 1034300 + }, + { + "epoch": 11.02, + "learning_rate": 3.2896546883923075e-06, + "loss": 8.1732, + "step": 1034400 + }, + { + "epoch": 11.02, + "learning_rate": 3.2825643851587484e-06, + "loss": 8.0914, + "step": 1034500 + }, + { + "epoch": 11.02, + "learning_rate": 3.2754816036501877e-06, + "loss": 8.1377, + "step": 1034600 + }, + { + "epoch": 11.02, + "learning_rate": 3.2684063444174297e-06, + "loss": 8.1986, + "step": 1034700 + }, + { + "epoch": 11.02, + "learning_rate": 3.2613386080107335e-06, + "loss": 8.1858, + "step": 1034800 + }, + { + "epoch": 11.02, + "learning_rate": 3.2542783949797263e-06, + "loss": 8.1693, + "step": 1034900 + }, + { + "epoch": 11.02, + "learning_rate": 3.2472257058735357e-06, + "loss": 8.1417, + "step": 1035000 + }, + { + "epoch": 11.03, + "learning_rate": 3.2401805412406005e-06, + "loss": 8.1866, + "step": 1035100 + }, + { + "epoch": 11.03, + "learning_rate": 3.2331429016288607e-06, + "loss": 8.1385, + "step": 1035200 + }, + { + "epoch": 11.03, + "learning_rate": 3.226112787585589e-06, + "loss": 8.1478, + "step": 1035300 + }, + { + "epoch": 11.03, + "learning_rate": 3.2190901996575707e-06, + "loss": 8.1386, + "step": 1035400 + }, + { + "epoch": 11.03, + "learning_rate": 3.2120751383909243e-06, + "loss": 8.1697, + "step": 1035500 + }, + { + "epoch": 11.03, + "learning_rate": 3.2050676043312134e-06, + "loss": 8.0937, + "step": 1035600 + }, + { + "epoch": 11.03, + "learning_rate": 3.198067598023402e-06, + "loss": 8.1889, + "step": 1035700 + }, + { + "epoch": 11.03, + "learning_rate": 3.1910751200119213e-06, + "loss": 8.1709, + "step": 1035800 + }, + { + "epoch": 11.03, + "learning_rate": 3.184090170840537e-06, + "loss": 8.1549, + "step": 1035900 + }, + { + "epoch": 11.03, + "learning_rate": 3.1771127510525023e-06, + "loss": 8.1522, + "step": 1036000 + }, + { + "epoch": 11.04, + "learning_rate": 3.170142861190406e-06, + "loss": 8.1565, + "step": 1036100 + }, + { + "epoch": 11.04, + "learning_rate": 3.163180501796359e-06, + "loss": 8.0805, + "step": 1036200 + }, + { + "epoch": 11.04, + "learning_rate": 3.156225673411761e-06, + "loss": 8.1734, + "step": 1036300 + }, + { + "epoch": 11.04, + "learning_rate": 3.149278376577536e-06, + "loss": 8.1888, + "step": 1036400 + }, + { + "epoch": 11.04, + "learning_rate": 3.1423386118339505e-06, + "loss": 8.1214, + "step": 1036500 + }, + { + "epoch": 11.04, + "learning_rate": 3.135406379720729e-06, + "loss": 8.0976, + "step": 1036600 + }, + { + "epoch": 11.04, + "learning_rate": 3.1284816807769734e-06, + "loss": 8.2447, + "step": 1036700 + }, + { + "epoch": 11.04, + "learning_rate": 3.1215645155412308e-06, + "loss": 8.1847, + "step": 1036800 + }, + { + "epoch": 11.04, + "learning_rate": 3.1146548845514488e-06, + "loss": 8.1547, + "step": 1036900 + }, + { + "epoch": 11.05, + "learning_rate": 3.1077527883449973e-06, + "loss": 8.1656, + "step": 1037000 + }, + { + "epoch": 11.05, + "learning_rate": 3.100858227458636e-06, + "loss": 8.1057, + "step": 1037100 + }, + { + "epoch": 11.05, + "learning_rate": 3.0939712024285583e-06, + "loss": 8.1357, + "step": 1037200 + }, + { + "epoch": 11.05, + "learning_rate": 3.087091713790391e-06, + "loss": 8.1142, + "step": 1037300 + }, + { + "epoch": 11.05, + "learning_rate": 3.080219762079117e-06, + "loss": 8.1489, + "step": 1037400 + }, + { + "epoch": 11.05, + "learning_rate": 3.0733553478291987e-06, + "loss": 8.204, + "step": 1037500 + }, + { + "epoch": 11.05, + "learning_rate": 3.0664984715744525e-06, + "loss": 8.1687, + "step": 1037600 + }, + { + "epoch": 11.05, + "learning_rate": 3.059649133848186e-06, + "loss": 8.1313, + "step": 1037700 + }, + { + "epoch": 11.05, + "learning_rate": 3.0528073351830166e-06, + "loss": 8.1727, + "step": 1037800 + }, + { + "epoch": 11.06, + "learning_rate": 3.045973076111075e-06, + "loss": 8.1485, + "step": 1037900 + }, + { + "epoch": 11.06, + "learning_rate": 3.039146357163836e-06, + "loss": 8.1421, + "step": 1038000 + }, + { + "epoch": 11.06, + "learning_rate": 3.0323271788722405e-06, + "loss": 8.1853, + "step": 1038100 + }, + { + "epoch": 11.06, + "learning_rate": 3.025515541766588e-06, + "loss": 8.1543, + "step": 1038200 + }, + { + "epoch": 11.06, + "learning_rate": 3.018711446376632e-06, + "loss": 8.2307, + "step": 1038300 + }, + { + "epoch": 11.06, + "learning_rate": 3.0119148932315378e-06, + "loss": 8.1174, + "step": 1038400 + }, + { + "epoch": 11.06, + "learning_rate": 3.0051258828598603e-06, + "loss": 8.1043, + "step": 1038500 + }, + { + "epoch": 11.06, + "learning_rate": 2.998344415789578e-06, + "loss": 8.0793, + "step": 1038600 + }, + { + "epoch": 11.06, + "learning_rate": 2.9915704925480904e-06, + "loss": 8.148, + "step": 1038700 + }, + { + "epoch": 11.06, + "learning_rate": 2.9848041136622427e-06, + "loss": 8.111, + "step": 1038800 + }, + { + "epoch": 11.07, + "learning_rate": 2.9780452796581926e-06, + "loss": 8.2561, + "step": 1038900 + }, + { + "epoch": 11.07, + "learning_rate": 2.971293991061619e-06, + "loss": 8.1795, + "step": 1039000 + }, + { + "epoch": 11.07, + "learning_rate": 2.9645502483975463e-06, + "loss": 8.2037, + "step": 1039100 + }, + { + "epoch": 11.07, + "learning_rate": 2.9578140521904664e-06, + "loss": 8.1397, + "step": 1039200 + }, + { + "epoch": 11.07, + "learning_rate": 2.9510854029642265e-06, + "loss": 8.1766, + "step": 1039300 + }, + { + "epoch": 11.07, + "learning_rate": 2.9443643012421194e-06, + "loss": 8.1828, + "step": 1039400 + }, + { + "epoch": 11.07, + "learning_rate": 2.9376507475468606e-06, + "loss": 8.132, + "step": 1039500 + }, + { + "epoch": 11.07, + "learning_rate": 2.9309447424005433e-06, + "loss": 8.1732, + "step": 1039600 + }, + { + "epoch": 11.07, + "learning_rate": 2.9242462863247057e-06, + "loss": 8.2043, + "step": 1039700 + }, + { + "epoch": 11.08, + "learning_rate": 2.9175553798402866e-06, + "loss": 8.2922, + "step": 1039800 + }, + { + "epoch": 11.08, + "learning_rate": 2.910872023467648e-06, + "loss": 8.2064, + "step": 1039900 + }, + { + "epoch": 11.08, + "learning_rate": 2.90419621772654e-06, + "loss": 8.1839, + "step": 1040000 + }, + { + "epoch": 11.08, + "learning_rate": 2.8975279631361263e-06, + "loss": 8.1221, + "step": 1040100 + }, + { + "epoch": 11.08, + "learning_rate": 2.8908672602150354e-06, + "loss": 8.1507, + "step": 1040200 + }, + { + "epoch": 11.08, + "learning_rate": 2.8842141094812537e-06, + "loss": 8.12, + "step": 1040300 + }, + { + "epoch": 11.08, + "learning_rate": 2.8775685114522e-06, + "loss": 8.1756, + "step": 1040400 + }, + { + "epoch": 11.08, + "learning_rate": 2.8709304666446836e-06, + "loss": 8.2004, + "step": 1040500 + }, + { + "epoch": 11.08, + "learning_rate": 2.8642999755749688e-06, + "loss": 8.2025, + "step": 1040600 + }, + { + "epoch": 11.08, + "learning_rate": 2.8576770387587104e-06, + "loss": 8.1401, + "step": 1040700 + }, + { + "epoch": 11.09, + "learning_rate": 2.8510616567109515e-06, + "loss": 8.1751, + "step": 1040800 + }, + { + "epoch": 11.09, + "learning_rate": 2.8444538299462033e-06, + "loss": 8.1864, + "step": 1040900 + }, + { + "epoch": 11.09, + "learning_rate": 2.837853558978332e-06, + "loss": 8.2719, + "step": 1041000 + }, + { + "epoch": 11.09, + "learning_rate": 2.831260844320649e-06, + "loss": 8.1905, + "step": 1041100 + }, + { + "epoch": 11.09, + "learning_rate": 2.824675686485867e-06, + "loss": 8.1659, + "step": 1041200 + }, + { + "epoch": 11.09, + "learning_rate": 2.8180980859861317e-06, + "loss": 8.2104, + "step": 1041300 + }, + { + "epoch": 11.09, + "learning_rate": 2.8115280433329672e-06, + "loss": 8.2325, + "step": 1041400 + }, + { + "epoch": 11.09, + "learning_rate": 2.8049655590373312e-06, + "loss": 8.179, + "step": 1041500 + }, + { + "epoch": 11.09, + "learning_rate": 2.7984106336095716e-06, + "loss": 8.1528, + "step": 1041600 + }, + { + "epoch": 11.1, + "learning_rate": 2.7918632675594913e-06, + "loss": 8.1966, + "step": 1041700 + }, + { + "epoch": 11.1, + "learning_rate": 2.7853234613962718e-06, + "loss": 8.1883, + "step": 1041800 + }, + { + "epoch": 11.1, + "learning_rate": 2.778791215628518e-06, + "loss": 8.1366, + "step": 1041900 + }, + { + "epoch": 11.1, + "learning_rate": 2.7722665307642224e-06, + "loss": 8.2356, + "step": 1042000 + }, + { + "epoch": 11.1, + "learning_rate": 2.765749407310847e-06, + "loss": 8.1906, + "step": 1042100 + }, + { + "epoch": 11.1, + "learning_rate": 2.7592398457751967e-06, + "loss": 8.1217, + "step": 1042200 + }, + { + "epoch": 11.1, + "learning_rate": 2.752737846663533e-06, + "loss": 8.0966, + "step": 1042300 + }, + { + "epoch": 11.1, + "learning_rate": 2.7462434104815084e-06, + "loss": 8.248, + "step": 1042400 + }, + { + "epoch": 11.1, + "learning_rate": 2.739756537734206e-06, + "loss": 8.2023, + "step": 1042500 + }, + { + "epoch": 11.11, + "learning_rate": 2.733277228926123e-06, + "loss": 8.1293, + "step": 1042600 + }, + { + "epoch": 11.11, + "learning_rate": 2.7268054845611346e-06, + "loss": 8.2041, + "step": 1042700 + }, + { + "epoch": 11.11, + "learning_rate": 2.7203413051425377e-06, + "loss": 8.0825, + "step": 1042800 + }, + { + "epoch": 11.11, + "learning_rate": 2.7138846911730853e-06, + "loss": 8.2198, + "step": 1042900 + }, + { + "epoch": 11.11, + "learning_rate": 2.7074356431548985e-06, + "loss": 8.177, + "step": 1043000 + }, + { + "epoch": 11.11, + "learning_rate": 2.7009941615894985e-06, + "loss": 8.1339, + "step": 1043100 + }, + { + "epoch": 11.11, + "learning_rate": 2.694560246977873e-06, + "loss": 8.088, + "step": 1043200 + }, + { + "epoch": 11.11, + "learning_rate": 2.6881338998203776e-06, + "loss": 8.1801, + "step": 1043300 + }, + { + "epoch": 11.11, + "learning_rate": 2.6817151206167678e-06, + "loss": 8.1404, + "step": 1043400 + }, + { + "epoch": 11.11, + "learning_rate": 2.6753039098662556e-06, + "loss": 8.1099, + "step": 1043500 + }, + { + "epoch": 11.12, + "learning_rate": 2.6689002680674426e-06, + "loss": 8.1102, + "step": 1043600 + }, + { + "epoch": 11.12, + "learning_rate": 2.6625041957183293e-06, + "loss": 8.1479, + "step": 1043700 + }, + { + "epoch": 11.12, + "learning_rate": 2.6561156933163413e-06, + "loss": 8.1939, + "step": 1043800 + }, + { + "epoch": 11.12, + "learning_rate": 2.649734761358302e-06, + "loss": 8.1749, + "step": 1043900 + }, + { + "epoch": 11.12, + "learning_rate": 2.643361400340505e-06, + "loss": 8.178, + "step": 1044000 + }, + { + "epoch": 11.12, + "learning_rate": 2.6369956107585416e-06, + "loss": 8.158, + "step": 1044100 + }, + { + "epoch": 11.12, + "learning_rate": 2.630637393107527e-06, + "loss": 8.1713, + "step": 1044200 + }, + { + "epoch": 11.12, + "learning_rate": 2.624286747881899e-06, + "loss": 8.1735, + "step": 1044300 + }, + { + "epoch": 11.12, + "learning_rate": 2.617943675575607e-06, + "loss": 8.1668, + "step": 1044400 + }, + { + "epoch": 11.13, + "learning_rate": 2.611608176681901e-06, + "loss": 8.2041, + "step": 1044500 + }, + { + "epoch": 11.13, + "learning_rate": 2.6052802516935094e-06, + "loss": 8.1503, + "step": 1044600 + }, + { + "epoch": 11.13, + "learning_rate": 2.59895990110256e-06, + "loss": 8.1894, + "step": 1044700 + }, + { + "epoch": 11.13, + "learning_rate": 2.5926471254005933e-06, + "loss": 8.1826, + "step": 1044800 + }, + { + "epoch": 11.13, + "learning_rate": 2.586341925078528e-06, + "loss": 8.184, + "step": 1044900 + }, + { + "epoch": 11.13, + "learning_rate": 2.5800443006267384e-06, + "loss": 8.115, + "step": 1045000 + }, + { + "epoch": 11.13, + "learning_rate": 2.573754252535021e-06, + "loss": 8.2197, + "step": 1045100 + }, + { + "epoch": 11.13, + "learning_rate": 2.567471781292485e-06, + "loss": 8.2214, + "step": 1045200 + }, + { + "epoch": 11.13, + "learning_rate": 2.5611968873877844e-06, + "loss": 8.188, + "step": 1045300 + }, + { + "epoch": 11.13, + "learning_rate": 2.554929571308873e-06, + "loss": 8.15, + "step": 1045400 + }, + { + "epoch": 11.14, + "learning_rate": 2.5486698335431938e-06, + "loss": 8.2103, + "step": 1045500 + }, + { + "epoch": 11.14, + "learning_rate": 2.5424176745775354e-06, + "loss": 8.1899, + "step": 1045600 + }, + { + "epoch": 11.14, + "learning_rate": 2.536173094898153e-06, + "loss": 8.1963, + "step": 1045700 + }, + { + "epoch": 11.14, + "learning_rate": 2.5299360949906812e-06, + "loss": 8.1858, + "step": 1045800 + }, + { + "epoch": 11.14, + "learning_rate": 2.5237066753401763e-06, + "loss": 8.1635, + "step": 1045900 + }, + { + "epoch": 11.14, + "learning_rate": 2.517484836431083e-06, + "loss": 8.1573, + "step": 1046000 + }, + { + "epoch": 11.14, + "learning_rate": 2.5112705787473043e-06, + "loss": 8.0823, + "step": 1046100 + }, + { + "epoch": 11.14, + "learning_rate": 2.5050639027720977e-06, + "loss": 8.0982, + "step": 1046200 + }, + { + "epoch": 11.14, + "learning_rate": 2.498864808988177e-06, + "loss": 8.1889, + "step": 1046300 + }, + { + "epoch": 11.15, + "learning_rate": 2.4926732978776124e-06, + "loss": 8.1727, + "step": 1046400 + }, + { + "epoch": 11.15, + "learning_rate": 2.486489369921963e-06, + "loss": 8.1762, + "step": 1046500 + }, + { + "epoch": 11.15, + "learning_rate": 2.4803130256021213e-06, + "loss": 8.1427, + "step": 1046600 + }, + { + "epoch": 11.15, + "learning_rate": 2.4741442653984372e-06, + "loss": 8.2087, + "step": 1046700 + }, + { + "epoch": 11.15, + "learning_rate": 2.467983089790649e-06, + "loss": 8.1842, + "step": 1046800 + }, + { + "epoch": 11.15, + "learning_rate": 2.461829499257917e-06, + "loss": 8.1474, + "step": 1046900 + }, + { + "epoch": 11.15, + "learning_rate": 2.4556834942788044e-06, + "loss": 8.1561, + "step": 1047000 + }, + { + "epoch": 11.15, + "learning_rate": 2.449545075331283e-06, + "loss": 8.2226, + "step": 1047100 + }, + { + "epoch": 11.15, + "learning_rate": 2.443414242892739e-06, + "loss": 8.1554, + "step": 1047200 + }, + { + "epoch": 11.16, + "learning_rate": 2.4372909974399783e-06, + "loss": 8.0986, + "step": 1047300 + }, + { + "epoch": 11.16, + "learning_rate": 2.4311753394491986e-06, + "loss": 8.1729, + "step": 1047400 + }, + { + "epoch": 11.16, + "learning_rate": 2.4250672693960087e-06, + "loss": 8.1662, + "step": 1047500 + }, + { + "epoch": 11.16, + "learning_rate": 2.41896678775545e-06, + "loss": 8.2222, + "step": 1047600 + }, + { + "epoch": 11.16, + "learning_rate": 2.4128738950019436e-06, + "loss": 8.1599, + "step": 1047700 + }, + { + "epoch": 11.16, + "learning_rate": 2.4067885916093324e-06, + "loss": 8.1211, + "step": 1047800 + }, + { + "epoch": 11.16, + "learning_rate": 2.4007108780508826e-06, + "loss": 8.1367, + "step": 1047900 + }, + { + "epoch": 11.16, + "learning_rate": 2.394640754799249e-06, + "loss": 8.1539, + "step": 1048000 + }, + { + "epoch": 11.16, + "learning_rate": 2.388578222326521e-06, + "loss": 8.2016, + "step": 1048100 + }, + { + "epoch": 11.16, + "learning_rate": 2.3825232811041653e-06, + "loss": 8.1553, + "step": 1048200 + }, + { + "epoch": 11.17, + "learning_rate": 2.3764759316030727e-06, + "loss": 8.1142, + "step": 1048300 + }, + { + "epoch": 11.17, + "learning_rate": 2.3704361742935667e-06, + "loss": 8.1542, + "step": 1048400 + }, + { + "epoch": 11.17, + "learning_rate": 2.364404009645349e-06, + "loss": 8.1482, + "step": 1048500 + }, + { + "epoch": 11.17, + "learning_rate": 2.358379438127545e-06, + "loss": 8.134, + "step": 1048600 + }, + { + "epoch": 11.17, + "learning_rate": 2.352362460208668e-06, + "loss": 8.1582, + "step": 1048700 + }, + { + "epoch": 11.17, + "learning_rate": 2.3463530763566777e-06, + "loss": 8.1058, + "step": 1048800 + }, + { + "epoch": 11.17, + "learning_rate": 2.340351287038922e-06, + "loss": 8.1615, + "step": 1048900 + }, + { + "epoch": 11.17, + "learning_rate": 2.3343570927221615e-06, + "loss": 8.1482, + "step": 1049000 + }, + { + "epoch": 11.17, + "learning_rate": 2.3283704938725558e-06, + "loss": 8.1891, + "step": 1049100 + }, + { + "epoch": 11.18, + "learning_rate": 2.3223914909557e-06, + "loss": 8.2055, + "step": 1049200 + }, + { + "epoch": 11.18, + "learning_rate": 2.316420084436566e-06, + "loss": 8.1712, + "step": 1049300 + }, + { + "epoch": 11.18, + "learning_rate": 2.3104562747795487e-06, + "loss": 8.1346, + "step": 1049400 + }, + { + "epoch": 11.18, + "learning_rate": 2.304500062448467e-06, + "loss": 8.1681, + "step": 1049500 + }, + { + "epoch": 11.18, + "learning_rate": 2.2985514479065383e-06, + "loss": 8.1149, + "step": 1049600 + }, + { + "epoch": 11.18, + "learning_rate": 2.292610431616382e-06, + "loss": 8.1986, + "step": 1049700 + }, + { + "epoch": 11.18, + "learning_rate": 2.286677014040006e-06, + "loss": 8.13, + "step": 1049800 + }, + { + "epoch": 11.18, + "learning_rate": 2.280751195638897e-06, + "loss": 8.1313, + "step": 1049900 + }, + { + "epoch": 11.18, + "learning_rate": 2.2748329768738864e-06, + "loss": 8.1553, + "step": 1050000 + }, + { + "epoch": 11.18, + "learning_rate": 2.2689223582052276e-06, + "loss": 8.1662, + "step": 1050100 + }, + { + "epoch": 11.19, + "learning_rate": 2.263019340092587e-06, + "loss": 8.2291, + "step": 1050200 + }, + { + "epoch": 11.19, + "learning_rate": 2.2571239229950745e-06, + "loss": 8.1562, + "step": 1050300 + }, + { + "epoch": 11.19, + "learning_rate": 2.251236107371135e-06, + "loss": 8.1386, + "step": 1050400 + }, + { + "epoch": 11.19, + "learning_rate": 2.245355893678702e-06, + "loss": 8.1812, + "step": 1050500 + }, + { + "epoch": 11.19, + "learning_rate": 2.2394832823750434e-06, + "loss": 8.1169, + "step": 1050600 + }, + { + "epoch": 11.19, + "learning_rate": 2.233618273916904e-06, + "loss": 8.1434, + "step": 1050700 + }, + { + "epoch": 11.19, + "learning_rate": 2.227760868760398e-06, + "loss": 8.1505, + "step": 1050800 + }, + { + "epoch": 11.19, + "learning_rate": 2.221911067361049e-06, + "loss": 8.2015, + "step": 1050900 + }, + { + "epoch": 11.19, + "learning_rate": 2.216068870173793e-06, + "loss": 8.2392, + "step": 1051000 + }, + { + "epoch": 11.2, + "learning_rate": 2.2102342776530003e-06, + "loss": 8.1489, + "step": 1051100 + }, + { + "epoch": 11.2, + "learning_rate": 2.204407290252397e-06, + "loss": 8.1621, + "step": 1051200 + }, + { + "epoch": 11.2, + "learning_rate": 2.198587908425165e-06, + "loss": 8.1399, + "step": 1051300 + }, + { + "epoch": 11.2, + "learning_rate": 2.192776132623886e-06, + "loss": 8.1611, + "step": 1051400 + }, + { + "epoch": 11.2, + "learning_rate": 2.186971963300544e-06, + "loss": 8.2049, + "step": 1051500 + }, + { + "epoch": 11.2, + "learning_rate": 2.18117540090651e-06, + "loss": 8.1541, + "step": 1051600 + }, + { + "epoch": 11.2, + "learning_rate": 2.17538644589258e-06, + "loss": 8.2024, + "step": 1051700 + }, + { + "epoch": 11.2, + "learning_rate": 2.1696050987089933e-06, + "loss": 8.1375, + "step": 1051800 + }, + { + "epoch": 11.2, + "learning_rate": 2.163831359805346e-06, + "loss": 8.2063, + "step": 1051900 + }, + { + "epoch": 11.21, + "learning_rate": 2.1580652296306682e-06, + "loss": 8.1625, + "step": 1052000 + }, + { + "epoch": 11.21, + "learning_rate": 2.152306708633367e-06, + "loss": 8.1666, + "step": 1052100 + }, + { + "epoch": 11.21, + "learning_rate": 2.146555797261329e-06, + "loss": 8.0352, + "step": 1052200 + }, + { + "epoch": 11.21, + "learning_rate": 2.1408124959617636e-06, + "loss": 8.1968, + "step": 1052300 + }, + { + "epoch": 11.21, + "learning_rate": 2.1350768051813575e-06, + "loss": 8.1438, + "step": 1052400 + }, + { + "epoch": 11.21, + "learning_rate": 2.1293487253661424e-06, + "loss": 8.178, + "step": 1052500 + }, + { + "epoch": 11.21, + "learning_rate": 2.1236282569616296e-06, + "loss": 8.2433, + "step": 1052600 + }, + { + "epoch": 11.21, + "learning_rate": 2.1179154004126622e-06, + "loss": 8.1059, + "step": 1052700 + }, + { + "epoch": 11.21, + "learning_rate": 2.112210156163563e-06, + "loss": 8.1803, + "step": 1052800 + }, + { + "epoch": 11.21, + "learning_rate": 2.106512524658e-06, + "loss": 8.1522, + "step": 1052900 + }, + { + "epoch": 11.22, + "learning_rate": 2.1008225063391172e-06, + "loss": 8.166, + "step": 1053000 + }, + { + "epoch": 11.22, + "learning_rate": 2.0951401016493953e-06, + "loss": 8.161, + "step": 1053100 + }, + { + "epoch": 11.22, + "learning_rate": 2.0894653110307584e-06, + "loss": 8.1215, + "step": 1053200 + }, + { + "epoch": 11.22, + "learning_rate": 2.0837981349245526e-06, + "loss": 8.1473, + "step": 1053300 + }, + { + "epoch": 11.22, + "learning_rate": 2.078138573771493e-06, + "loss": 8.1434, + "step": 1053400 + }, + { + "epoch": 11.22, + "learning_rate": 2.0724866280117384e-06, + "loss": 8.1682, + "step": 1053500 + }, + { + "epoch": 11.22, + "learning_rate": 2.066842298084837e-06, + "loss": 8.1743, + "step": 1053600 + }, + { + "epoch": 11.22, + "learning_rate": 2.0612055844297597e-06, + "loss": 8.1516, + "step": 1053700 + }, + { + "epoch": 11.22, + "learning_rate": 2.055576487484856e-06, + "loss": 8.1454, + "step": 1053800 + }, + { + "epoch": 11.23, + "learning_rate": 2.0499550076879206e-06, + "loss": 8.1456, + "step": 1053900 + }, + { + "epoch": 11.23, + "learning_rate": 2.044341145476103e-06, + "loss": 8.1417, + "step": 1054000 + }, + { + "epoch": 11.23, + "learning_rate": 2.0387349012860544e-06, + "loss": 8.1626, + "step": 1054100 + }, + { + "epoch": 11.23, + "learning_rate": 2.0331362755537042e-06, + "loss": 8.1581, + "step": 1054200 + }, + { + "epoch": 11.23, + "learning_rate": 2.0275452687145036e-06, + "loss": 8.1429, + "step": 1054300 + }, + { + "epoch": 11.23, + "learning_rate": 2.0219618812032493e-06, + "loss": 8.1783, + "step": 1054400 + }, + { + "epoch": 11.23, + "learning_rate": 2.0163861134541606e-06, + "loss": 8.1207, + "step": 1054500 + }, + { + "epoch": 11.23, + "learning_rate": 2.0108179659008684e-06, + "loss": 8.1019, + "step": 1054600 + }, + { + "epoch": 11.23, + "learning_rate": 2.0052574389764156e-06, + "loss": 8.2469, + "step": 1054700 + }, + { + "epoch": 11.24, + "learning_rate": 1.9997045331132334e-06, + "loss": 8.1509, + "step": 1054800 + }, + { + "epoch": 11.24, + "learning_rate": 1.9941592487431883e-06, + "loss": 8.0951, + "step": 1054900 + }, + { + "epoch": 11.24, + "learning_rate": 1.9886215862975123e-06, + "loss": 8.1818, + "step": 1055000 + }, + { + "epoch": 11.24, + "learning_rate": 1.9830915462068833e-06, + "loss": 8.1687, + "step": 1055100 + }, + { + "epoch": 11.24, + "learning_rate": 1.9775691289013796e-06, + "loss": 8.0734, + "step": 1055200 + }, + { + "epoch": 11.24, + "learning_rate": 1.9720543348104804e-06, + "loss": 8.0789, + "step": 1055300 + }, + { + "epoch": 11.24, + "learning_rate": 1.966547164363053e-06, + "loss": 8.1646, + "step": 1055400 + }, + { + "epoch": 11.24, + "learning_rate": 1.9610476179874106e-06, + "loss": 8.1853, + "step": 1055500 + }, + { + "epoch": 11.24, + "learning_rate": 1.9555556961112444e-06, + "loss": 8.1188, + "step": 1055600 + }, + { + "epoch": 11.24, + "learning_rate": 1.950071399161657e-06, + "loss": 8.1796, + "step": 1055700 + }, + { + "epoch": 11.25, + "learning_rate": 1.9445947275651743e-06, + "loss": 8.1422, + "step": 1055800 + }, + { + "epoch": 11.25, + "learning_rate": 1.9391256817477222e-06, + "loss": 8.2041, + "step": 1055900 + }, + { + "epoch": 11.25, + "learning_rate": 1.9336642621346155e-06, + "loss": 8.2535, + "step": 1056000 + }, + { + "epoch": 11.25, + "learning_rate": 1.928210469150582e-06, + "loss": 8.1493, + "step": 1056100 + }, + { + "epoch": 11.25, + "learning_rate": 1.9227643032197816e-06, + "loss": 8.1564, + "step": 1056200 + }, + { + "epoch": 11.25, + "learning_rate": 1.917325764765754e-06, + "loss": 8.1962, + "step": 1056300 + }, + { + "epoch": 11.25, + "learning_rate": 1.911894854211471e-06, + "loss": 8.158, + "step": 1056400 + }, + { + "epoch": 11.25, + "learning_rate": 1.9064715719792626e-06, + "loss": 8.1112, + "step": 1056500 + }, + { + "epoch": 11.25, + "learning_rate": 1.9010559184909239e-06, + "loss": 8.1476, + "step": 1056600 + }, + { + "epoch": 11.26, + "learning_rate": 1.8956478941676293e-06, + "loss": 8.1776, + "step": 1056700 + }, + { + "epoch": 11.26, + "learning_rate": 1.8902474994299535e-06, + "loss": 8.1067, + "step": 1056800 + }, + { + "epoch": 11.26, + "learning_rate": 1.8848547346978718e-06, + "loss": 8.1555, + "step": 1056900 + }, + { + "epoch": 11.26, + "learning_rate": 1.8794696003908153e-06, + "loss": 8.1136, + "step": 1057000 + }, + { + "epoch": 11.26, + "learning_rate": 1.874092096927571e-06, + "loss": 8.197, + "step": 1057100 + }, + { + "epoch": 11.26, + "learning_rate": 1.868722224726338e-06, + "loss": 8.2468, + "step": 1057200 + }, + { + "epoch": 11.26, + "learning_rate": 1.8633599842047267e-06, + "loss": 8.1919, + "step": 1057300 + }, + { + "epoch": 11.26, + "learning_rate": 1.858005375779781e-06, + "loss": 8.1866, + "step": 1057400 + }, + { + "epoch": 11.26, + "learning_rate": 1.8526583998679236e-06, + "loss": 8.1114, + "step": 1057500 + }, + { + "epoch": 11.26, + "learning_rate": 1.8473190568849885e-06, + "loss": 8.2272, + "step": 1057600 + }, + { + "epoch": 11.27, + "learning_rate": 1.841987347246199e-06, + "loss": 8.1656, + "step": 1057700 + }, + { + "epoch": 11.27, + "learning_rate": 1.8366632713662236e-06, + "loss": 8.2473, + "step": 1057800 + }, + { + "epoch": 11.27, + "learning_rate": 1.8313468296591086e-06, + "loss": 8.1606, + "step": 1057900 + }, + { + "epoch": 11.27, + "learning_rate": 1.8260380225383123e-06, + "loss": 8.1368, + "step": 1058000 + }, + { + "epoch": 11.27, + "learning_rate": 1.8207368504167045e-06, + "loss": 8.1541, + "step": 1058100 + }, + { + "epoch": 11.27, + "learning_rate": 1.8154433137065662e-06, + "loss": 8.2416, + "step": 1058200 + }, + { + "epoch": 11.27, + "learning_rate": 1.8101574128195687e-06, + "loss": 8.158, + "step": 1058300 + }, + { + "epoch": 11.27, + "learning_rate": 1.8048791481667825e-06, + "loss": 8.1626, + "step": 1058400 + }, + { + "epoch": 11.27, + "learning_rate": 1.799608520158713e-06, + "loss": 8.168, + "step": 1058500 + }, + { + "epoch": 11.28, + "learning_rate": 1.7943455292052657e-06, + "loss": 8.1296, + "step": 1058600 + }, + { + "epoch": 11.28, + "learning_rate": 1.7890901757157352e-06, + "loss": 8.2212, + "step": 1058700 + }, + { + "epoch": 11.28, + "learning_rate": 1.783842460098828e-06, + "loss": 8.1492, + "step": 1058800 + }, + { + "epoch": 11.28, + "learning_rate": 1.778602382762673e-06, + "loss": 8.1905, + "step": 1058900 + }, + { + "epoch": 11.28, + "learning_rate": 1.7733699441147667e-06, + "loss": 8.1694, + "step": 1059000 + }, + { + "epoch": 11.28, + "learning_rate": 1.7681451445620501e-06, + "loss": 8.1409, + "step": 1059100 + }, + { + "epoch": 11.28, + "learning_rate": 1.7629279845108538e-06, + "loss": 8.1386, + "step": 1059200 + }, + { + "epoch": 11.28, + "learning_rate": 1.757718464366942e-06, + "loss": 8.137, + "step": 1059300 + }, + { + "epoch": 11.28, + "learning_rate": 1.752516584535413e-06, + "loss": 8.2442, + "step": 1059400 + }, + { + "epoch": 11.29, + "learning_rate": 1.7473223454208543e-06, + "loss": 8.1238, + "step": 1059500 + }, + { + "epoch": 11.29, + "learning_rate": 1.7421357474271983e-06, + "loss": 8.1577, + "step": 1059600 + }, + { + "epoch": 11.29, + "learning_rate": 1.7369567909578444e-06, + "loss": 8.1249, + "step": 1059700 + }, + { + "epoch": 11.29, + "learning_rate": 1.7317854764155151e-06, + "loss": 8.167, + "step": 1059800 + }, + { + "epoch": 11.29, + "learning_rate": 1.7266218042023996e-06, + "loss": 8.1734, + "step": 1059900 + }, + { + "epoch": 11.29, + "learning_rate": 1.721465774720099e-06, + "loss": 8.1362, + "step": 1060000 + }, + { + "epoch": 11.29, + "learning_rate": 1.7163173883695704e-06, + "loss": 8.1666, + "step": 1060100 + }, + { + "epoch": 11.29, + "learning_rate": 1.7111766455512157e-06, + "loss": 8.0816, + "step": 1060200 + }, + { + "epoch": 11.29, + "learning_rate": 1.706043546664826e-06, + "loss": 8.1929, + "step": 1060300 + }, + { + "epoch": 11.29, + "learning_rate": 1.7009180921096157e-06, + "loss": 8.2492, + "step": 1060400 + }, + { + "epoch": 11.3, + "learning_rate": 1.6958002822841767e-06, + "loss": 8.301, + "step": 1060500 + }, + { + "epoch": 11.3, + "learning_rate": 1.6906901175865354e-06, + "loss": 8.1643, + "step": 1060600 + }, + { + "epoch": 11.3, + "learning_rate": 1.6855875984140957e-06, + "loss": 8.2117, + "step": 1060700 + }, + { + "epoch": 11.3, + "learning_rate": 1.6804927251637071e-06, + "loss": 8.1936, + "step": 1060800 + }, + { + "epoch": 11.3, + "learning_rate": 1.6754054982315527e-06, + "loss": 8.1654, + "step": 1060900 + }, + { + "epoch": 11.3, + "learning_rate": 1.6703259180133047e-06, + "loss": 8.1652, + "step": 1061000 + }, + { + "epoch": 11.3, + "learning_rate": 1.6652539849040028e-06, + "loss": 8.2591, + "step": 1061100 + }, + { + "epoch": 11.3, + "learning_rate": 1.6601896992980648e-06, + "loss": 8.2157, + "step": 1061200 + }, + { + "epoch": 11.3, + "learning_rate": 1.6551330615893535e-06, + "loss": 8.1875, + "step": 1061300 + }, + { + "epoch": 11.31, + "learning_rate": 1.6500840721711431e-06, + "loss": 8.2543, + "step": 1061400 + }, + { + "epoch": 11.31, + "learning_rate": 1.645042731436075e-06, + "loss": 8.1483, + "step": 1061500 + }, + { + "epoch": 11.31, + "learning_rate": 1.6400090397762136e-06, + "loss": 8.1609, + "step": 1061600 + }, + { + "epoch": 11.31, + "learning_rate": 1.6349829975830232e-06, + "loss": 8.1051, + "step": 1061700 + }, + { + "epoch": 11.31, + "learning_rate": 1.6299646052474028e-06, + "loss": 8.2181, + "step": 1061800 + }, + { + "epoch": 11.31, + "learning_rate": 1.6249538631596174e-06, + "loss": 8.1844, + "step": 1061900 + }, + { + "epoch": 11.31, + "learning_rate": 1.6199507717093443e-06, + "loss": 8.2203, + "step": 1062000 + }, + { + "epoch": 11.31, + "learning_rate": 1.6149553312857058e-06, + "loss": 8.2574, + "step": 1062100 + }, + { + "epoch": 11.31, + "learning_rate": 1.6099675422771687e-06, + "loss": 8.2255, + "step": 1062200 + }, + { + "epoch": 11.31, + "learning_rate": 1.604987405071645e-06, + "loss": 8.1653, + "step": 1062300 + }, + { + "epoch": 11.32, + "learning_rate": 1.6000149200564364e-06, + "loss": 8.1435, + "step": 1062400 + }, + { + "epoch": 11.32, + "learning_rate": 1.595050087618255e-06, + "loss": 8.1225, + "step": 1062500 + }, + { + "epoch": 11.32, + "learning_rate": 1.5900929081432258e-06, + "loss": 8.2264, + "step": 1062600 + }, + { + "epoch": 11.32, + "learning_rate": 1.5851433820168516e-06, + "loss": 8.1308, + "step": 1062700 + }, + { + "epoch": 11.32, + "learning_rate": 1.5802015096240685e-06, + "loss": 8.0965, + "step": 1062800 + }, + { + "epoch": 11.32, + "learning_rate": 1.575267291349203e-06, + "loss": 8.1899, + "step": 1062900 + }, + { + "epoch": 11.32, + "learning_rate": 1.570340727576003e-06, + "loss": 8.1603, + "step": 1063000 + }, + { + "epoch": 11.32, + "learning_rate": 1.5654218186875847e-06, + "loss": 8.1446, + "step": 1063100 + }, + { + "epoch": 11.32, + "learning_rate": 1.5605105650665086e-06, + "loss": 8.093, + "step": 1063200 + }, + { + "epoch": 11.33, + "learning_rate": 1.5556069670947137e-06, + "loss": 8.2077, + "step": 1063300 + }, + { + "epoch": 11.33, + "learning_rate": 1.5507110251535617e-06, + "loss": 8.1619, + "step": 1063400 + }, + { + "epoch": 11.33, + "learning_rate": 1.5458227396238145e-06, + "loss": 8.2253, + "step": 1063500 + }, + { + "epoch": 11.33, + "learning_rate": 1.5409421108856126e-06, + "loss": 8.2202, + "step": 1063600 + }, + { + "epoch": 11.33, + "learning_rate": 1.536069139318541e-06, + "loss": 8.1154, + "step": 1063700 + }, + { + "epoch": 11.33, + "learning_rate": 1.5312038253015747e-06, + "loss": 8.1516, + "step": 1063800 + }, + { + "epoch": 11.33, + "learning_rate": 1.5263461692130777e-06, + "loss": 8.1447, + "step": 1063900 + }, + { + "epoch": 11.33, + "learning_rate": 1.5214961714308363e-06, + "loss": 8.1936, + "step": 1064000 + }, + { + "epoch": 11.33, + "learning_rate": 1.5166538323320378e-06, + "loss": 8.1272, + "step": 1064100 + }, + { + "epoch": 11.34, + "learning_rate": 1.511819152293259e-06, + "loss": 8.2055, + "step": 1064200 + }, + { + "epoch": 11.34, + "learning_rate": 1.50699213169051e-06, + "loss": 8.1765, + "step": 1064300 + }, + { + "epoch": 11.34, + "learning_rate": 1.5021727708991796e-06, + "loss": 8.1858, + "step": 1064400 + }, + { + "epoch": 11.34, + "learning_rate": 1.4973610702940677e-06, + "loss": 8.0991, + "step": 1064500 + }, + { + "epoch": 11.34, + "learning_rate": 1.4925570302493863e-06, + "loss": 8.124, + "step": 1064600 + }, + { + "epoch": 11.34, + "learning_rate": 1.4877606511387477e-06, + "loss": 8.2112, + "step": 1064700 + }, + { + "epoch": 11.34, + "learning_rate": 1.4829719333351643e-06, + "loss": 8.0736, + "step": 1064800 + }, + { + "epoch": 11.34, + "learning_rate": 1.4781908772110498e-06, + "loss": 8.1481, + "step": 1064900 + }, + { + "epoch": 11.34, + "learning_rate": 1.4734174831382397e-06, + "loss": 8.1342, + "step": 1065000 + }, + { + "epoch": 11.34, + "learning_rate": 1.4686517514879372e-06, + "loss": 8.2086, + "step": 1065100 + }, + { + "epoch": 11.35, + "learning_rate": 1.4638936826308013e-06, + "loss": 8.2455, + "step": 1065200 + }, + { + "epoch": 11.35, + "learning_rate": 1.4591432769368585e-06, + "loss": 8.161, + "step": 1065300 + }, + { + "epoch": 11.35, + "learning_rate": 1.4544005347755464e-06, + "loss": 8.1585, + "step": 1065400 + }, + { + "epoch": 11.35, + "learning_rate": 1.449665456515703e-06, + "loss": 8.1522, + "step": 1065500 + }, + { + "epoch": 11.35, + "learning_rate": 1.44493804252559e-06, + "loss": 8.1904, + "step": 1065600 + }, + { + "epoch": 11.35, + "learning_rate": 1.440218293172846e-06, + "loss": 8.1853, + "step": 1065700 + }, + { + "epoch": 11.35, + "learning_rate": 1.435506208824522e-06, + "loss": 8.1984, + "step": 1065800 + }, + { + "epoch": 11.35, + "learning_rate": 1.4308017898470916e-06, + "loss": 8.2041, + "step": 1065900 + }, + { + "epoch": 11.35, + "learning_rate": 1.4261050366064176e-06, + "loss": 8.1597, + "step": 1066000 + }, + { + "epoch": 11.36, + "learning_rate": 1.4214159494677526e-06, + "loss": 8.1551, + "step": 1066100 + }, + { + "epoch": 11.36, + "learning_rate": 1.4167345287957712e-06, + "loss": 8.2065, + "step": 1066200 + }, + { + "epoch": 11.36, + "learning_rate": 1.4120607749545712e-06, + "loss": 8.202, + "step": 1066300 + }, + { + "epoch": 11.36, + "learning_rate": 1.4073946883076062e-06, + "loss": 8.1913, + "step": 1066400 + }, + { + "epoch": 11.36, + "learning_rate": 1.4027362692177636e-06, + "loss": 8.1795, + "step": 1066500 + }, + { + "epoch": 11.36, + "learning_rate": 1.3980855180473208e-06, + "loss": 8.1259, + "step": 1066600 + }, + { + "epoch": 11.36, + "learning_rate": 1.3934424351579877e-06, + "loss": 8.1583, + "step": 1066700 + }, + { + "epoch": 11.36, + "learning_rate": 1.388807020910843e-06, + "loss": 8.1307, + "step": 1066800 + }, + { + "epoch": 11.36, + "learning_rate": 1.3841792756663864e-06, + "loss": 8.1522, + "step": 1066900 + }, + { + "epoch": 11.36, + "learning_rate": 1.3795591997845192e-06, + "loss": 8.1427, + "step": 1067000 + }, + { + "epoch": 11.37, + "learning_rate": 1.3749467936245542e-06, + "loss": 8.1994, + "step": 1067100 + }, + { + "epoch": 11.37, + "learning_rate": 1.370342057545171e-06, + "loss": 8.1095, + "step": 1067200 + }, + { + "epoch": 11.37, + "learning_rate": 1.3657449919045161e-06, + "loss": 8.1817, + "step": 1067300 + }, + { + "epoch": 11.37, + "learning_rate": 1.361155597060071e-06, + "loss": 8.2087, + "step": 1067400 + }, + { + "epoch": 11.37, + "learning_rate": 1.3565738733687827e-06, + "loss": 8.0824, + "step": 1067500 + }, + { + "epoch": 11.37, + "learning_rate": 1.3519998211869444e-06, + "loss": 8.1108, + "step": 1067600 + }, + { + "epoch": 11.37, + "learning_rate": 1.3474334408702937e-06, + "loss": 8.143, + "step": 1067700 + }, + { + "epoch": 11.37, + "learning_rate": 1.3428747327739576e-06, + "loss": 8.1224, + "step": 1067800 + }, + { + "epoch": 11.37, + "learning_rate": 1.338323697252486e-06, + "loss": 8.2507, + "step": 1067900 + }, + { + "epoch": 11.38, + "learning_rate": 1.3337803346597843e-06, + "loss": 8.1524, + "step": 1068000 + }, + { + "epoch": 11.38, + "learning_rate": 1.3292446453492035e-06, + "loss": 8.1302, + "step": 1068100 + }, + { + "epoch": 11.38, + "learning_rate": 1.3247166296734836e-06, + "loss": 8.1252, + "step": 1068200 + }, + { + "epoch": 11.38, + "learning_rate": 1.3201962879847652e-06, + "loss": 8.2079, + "step": 1068300 + }, + { + "epoch": 11.38, + "learning_rate": 1.3156836206346113e-06, + "loss": 8.1929, + "step": 1068400 + }, + { + "epoch": 11.38, + "learning_rate": 1.3111786279739413e-06, + "loss": 8.2476, + "step": 1068500 + }, + { + "epoch": 11.38, + "learning_rate": 1.3066813103531417e-06, + "loss": 8.19, + "step": 1068600 + }, + { + "epoch": 11.38, + "learning_rate": 1.3021916681219548e-06, + "loss": 8.1724, + "step": 1068700 + }, + { + "epoch": 11.38, + "learning_rate": 1.2977097016295348e-06, + "loss": 8.1806, + "step": 1068800 + }, + { + "epoch": 11.39, + "learning_rate": 1.2932354112244582e-06, + "loss": 8.1235, + "step": 1068900 + }, + { + "epoch": 11.39, + "learning_rate": 1.2887687972546802e-06, + "loss": 8.1977, + "step": 1069000 + }, + { + "epoch": 11.39, + "learning_rate": 1.2843098600675673e-06, + "loss": 8.2084, + "step": 1069100 + }, + { + "epoch": 11.39, + "learning_rate": 1.2798586000099088e-06, + "loss": 8.1891, + "step": 1069200 + }, + { + "epoch": 11.39, + "learning_rate": 1.2754150174278722e-06, + "loss": 8.1761, + "step": 1069300 + }, + { + "epoch": 11.39, + "learning_rate": 1.2709791126670256e-06, + "loss": 8.1074, + "step": 1069400 + }, + { + "epoch": 11.39, + "learning_rate": 1.2665508860723485e-06, + "loss": 8.1663, + "step": 1069500 + }, + { + "epoch": 11.39, + "learning_rate": 1.262130337988232e-06, + "loss": 8.139, + "step": 1069600 + }, + { + "epoch": 11.39, + "learning_rate": 1.2577174687584791e-06, + "loss": 8.2053, + "step": 1069700 + }, + { + "epoch": 11.39, + "learning_rate": 1.2533122787262486e-06, + "loss": 8.1908, + "step": 1069800 + }, + { + "epoch": 11.4, + "learning_rate": 1.248914768234144e-06, + "loss": 8.1839, + "step": 1069900 + }, + { + "epoch": 11.4, + "learning_rate": 1.2445249376241697e-06, + "loss": 8.1445, + "step": 1070000 + }, + { + "epoch": 11.4, + "learning_rate": 1.2401427872377193e-06, + "loss": 8.1156, + "step": 1070100 + }, + { + "epoch": 11.4, + "learning_rate": 1.2357683174155866e-06, + "loss": 8.2202, + "step": 1070200 + }, + { + "epoch": 11.4, + "learning_rate": 1.2314015284979775e-06, + "loss": 8.1586, + "step": 1070300 + }, + { + "epoch": 11.4, + "learning_rate": 1.2270424208244978e-06, + "loss": 8.1751, + "step": 1070400 + }, + { + "epoch": 11.4, + "learning_rate": 1.222690994734166e-06, + "loss": 8.1956, + "step": 1070500 + }, + { + "epoch": 11.4, + "learning_rate": 1.2183472505653658e-06, + "loss": 8.2278, + "step": 1070600 + }, + { + "epoch": 11.4, + "learning_rate": 1.214011188655939e-06, + "loss": 8.1484, + "step": 1070700 + }, + { + "epoch": 11.41, + "learning_rate": 1.2096828093430935e-06, + "loss": 8.1224, + "step": 1070800 + }, + { + "epoch": 11.41, + "learning_rate": 1.2053621129634374e-06, + "loss": 8.1556, + "step": 1070900 + }, + { + "epoch": 11.41, + "learning_rate": 1.2010490998530021e-06, + "loss": 8.1817, + "step": 1071000 + }, + { + "epoch": 11.41, + "learning_rate": 1.1967437703472084e-06, + "loss": 8.1791, + "step": 1071100 + }, + { + "epoch": 11.41, + "learning_rate": 1.1924461247808772e-06, + "loss": 8.1853, + "step": 1071200 + }, + { + "epoch": 11.41, + "learning_rate": 1.188156163488252e-06, + "loss": 8.2893, + "step": 1071300 + }, + { + "epoch": 11.41, + "learning_rate": 1.1838738868029442e-06, + "loss": 8.1345, + "step": 1071400 + }, + { + "epoch": 11.41, + "learning_rate": 1.1795992950579982e-06, + "loss": 8.129, + "step": 1071500 + }, + { + "epoch": 11.41, + "learning_rate": 1.175332388585848e-06, + "loss": 8.1275, + "step": 1071600 + }, + { + "epoch": 11.42, + "learning_rate": 1.1710731677183395e-06, + "loss": 8.1863, + "step": 1071700 + }, + { + "epoch": 11.42, + "learning_rate": 1.1668216327866853e-06, + "loss": 8.187, + "step": 1071800 + }, + { + "epoch": 11.42, + "learning_rate": 1.1625777841215545e-06, + "loss": 8.2085, + "step": 1071900 + }, + { + "epoch": 11.42, + "learning_rate": 1.1583416220529942e-06, + "loss": 8.1794, + "step": 1072000 + }, + { + "epoch": 11.42, + "learning_rate": 1.1541131469104295e-06, + "loss": 8.1535, + "step": 1072100 + }, + { + "epoch": 11.42, + "learning_rate": 1.1498923590227084e-06, + "loss": 8.1716, + "step": 1072200 + }, + { + "epoch": 11.42, + "learning_rate": 1.145679258718113e-06, + "loss": 8.1742, + "step": 1072300 + }, + { + "epoch": 11.42, + "learning_rate": 1.1414738463242703e-06, + "loss": 8.1483, + "step": 1072400 + }, + { + "epoch": 11.42, + "learning_rate": 1.1372761221682403e-06, + "loss": 8.1549, + "step": 1072500 + }, + { + "epoch": 11.42, + "learning_rate": 1.1330860865764848e-06, + "loss": 8.0866, + "step": 1072600 + }, + { + "epoch": 11.43, + "learning_rate": 1.1289037398748648e-06, + "loss": 8.1243, + "step": 1072700 + }, + { + "epoch": 11.43, + "learning_rate": 1.1247290823886425e-06, + "loss": 8.1645, + "step": 1072800 + }, + { + "epoch": 11.43, + "learning_rate": 1.1205621144424584e-06, + "loss": 8.1362, + "step": 1072900 + }, + { + "epoch": 11.43, + "learning_rate": 1.1164028363604084e-06, + "loss": 8.1903, + "step": 1073000 + }, + { + "epoch": 11.43, + "learning_rate": 1.1122512484659563e-06, + "loss": 8.1432, + "step": 1073100 + }, + { + "epoch": 11.43, + "learning_rate": 1.108107351081955e-06, + "loss": 8.1676, + "step": 1073200 + }, + { + "epoch": 11.43, + "learning_rate": 1.10397114453068e-06, + "loss": 8.13, + "step": 1073300 + }, + { + "epoch": 11.43, + "learning_rate": 1.0998426291338183e-06, + "loss": 8.1646, + "step": 1073400 + }, + { + "epoch": 11.43, + "learning_rate": 1.0957218052124352e-06, + "loss": 8.1248, + "step": 1073500 + }, + { + "epoch": 11.44, + "learning_rate": 1.091608673087008e-06, + "loss": 8.1911, + "step": 1073600 + }, + { + "epoch": 11.44, + "learning_rate": 1.0875032330774027e-06, + "loss": 8.1226, + "step": 1073700 + }, + { + "epoch": 11.44, + "learning_rate": 1.0834054855029306e-06, + "loss": 8.2033, + "step": 1073800 + }, + { + "epoch": 11.44, + "learning_rate": 1.0793154306822485e-06, + "loss": 8.0873, + "step": 1073900 + }, + { + "epoch": 11.44, + "learning_rate": 1.0752330689334455e-06, + "loss": 8.1694, + "step": 1074000 + }, + { + "epoch": 11.44, + "learning_rate": 1.0711584005740127e-06, + "loss": 8.133, + "step": 1074100 + }, + { + "epoch": 11.44, + "learning_rate": 1.067091425920841e-06, + "loss": 8.0792, + "step": 1074200 + }, + { + "epoch": 11.44, + "learning_rate": 1.0630321452901993e-06, + "loss": 8.0721, + "step": 1074300 + }, + { + "epoch": 11.44, + "learning_rate": 1.0589805589977908e-06, + "loss": 8.1542, + "step": 1074400 + }, + { + "epoch": 11.44, + "learning_rate": 1.054936667358719e-06, + "loss": 8.1796, + "step": 1074500 + }, + { + "epoch": 11.45, + "learning_rate": 1.0509004706874659e-06, + "loss": 8.0949, + "step": 1074600 + }, + { + "epoch": 11.45, + "learning_rate": 1.0468719692979245e-06, + "loss": 8.1191, + "step": 1074700 + }, + { + "epoch": 11.45, + "learning_rate": 1.0428511635034e-06, + "loss": 8.1159, + "step": 1074800 + }, + { + "epoch": 11.45, + "learning_rate": 1.0388380536165864e-06, + "loss": 8.2205, + "step": 1074900 + }, + { + "epoch": 11.45, + "learning_rate": 1.034832639949579e-06, + "loss": 8.1125, + "step": 1075000 + }, + { + "epoch": 11.45, + "learning_rate": 1.0308349228138947e-06, + "loss": 8.1451, + "step": 1075100 + }, + { + "epoch": 11.45, + "learning_rate": 1.0268449025204074e-06, + "loss": 8.1942, + "step": 1075200 + }, + { + "epoch": 11.45, + "learning_rate": 1.0228625793794577e-06, + "loss": 8.1737, + "step": 1075300 + }, + { + "epoch": 11.45, + "learning_rate": 1.0188879537007312e-06, + "loss": 8.1619, + "step": 1075400 + }, + { + "epoch": 11.46, + "learning_rate": 1.014921025793336e-06, + "loss": 8.1942, + "step": 1075500 + }, + { + "epoch": 11.46, + "learning_rate": 1.0109617959657702e-06, + "loss": 8.1392, + "step": 1075600 + }, + { + "epoch": 11.46, + "learning_rate": 1.007010264525976e-06, + "loss": 8.1577, + "step": 1075700 + }, + { + "epoch": 11.46, + "learning_rate": 1.00306643178123e-06, + "loss": 8.1942, + "step": 1075800 + }, + { + "epoch": 11.46, + "learning_rate": 9.991302980382534e-07, + "loss": 8.1802, + "step": 1075900 + }, + { + "epoch": 11.46, + "learning_rate": 9.95201863603179e-07, + "loss": 8.1124, + "step": 1076000 + }, + { + "epoch": 11.46, + "learning_rate": 9.91281128781496e-07, + "loss": 8.1176, + "step": 1076100 + }, + { + "epoch": 11.46, + "learning_rate": 9.873680938781272e-07, + "loss": 8.0936, + "step": 1076200 + }, + { + "epoch": 11.46, + "learning_rate": 9.834627591974066e-07, + "loss": 8.1145, + "step": 1076300 + }, + { + "epoch": 11.47, + "learning_rate": 9.79565125043036e-07, + "loss": 8.1733, + "step": 1076400 + }, + { + "epoch": 11.47, + "learning_rate": 9.756751917181395e-07, + "loss": 8.1742, + "step": 1076500 + }, + { + "epoch": 11.47, + "learning_rate": 9.717929595252307e-07, + "loss": 8.1465, + "step": 1076600 + }, + { + "epoch": 11.47, + "learning_rate": 9.679184287662347e-07, + "loss": 8.2496, + "step": 1076700 + }, + { + "epoch": 11.47, + "learning_rate": 9.640515997424882e-07, + "loss": 8.146, + "step": 1076800 + }, + { + "epoch": 11.47, + "learning_rate": 9.60192472754684e-07, + "loss": 8.1245, + "step": 1076900 + }, + { + "epoch": 11.47, + "learning_rate": 9.563410481029821e-07, + "loss": 8.1389, + "step": 1077000 + }, + { + "epoch": 11.47, + "learning_rate": 9.524973260868875e-07, + "loss": 8.1872, + "step": 1077100 + }, + { + "epoch": 11.47, + "learning_rate": 9.486613070053385e-07, + "loss": 8.158, + "step": 1077200 + }, + { + "epoch": 11.47, + "learning_rate": 9.448329911566411e-07, + "loss": 8.1536, + "step": 1077300 + }, + { + "epoch": 11.48, + "learning_rate": 9.410123788385572e-07, + "loss": 8.1393, + "step": 1077400 + }, + { + "epoch": 11.48, + "learning_rate": 9.371994703481823e-07, + "loss": 8.1471, + "step": 1077500 + }, + { + "epoch": 11.48, + "learning_rate": 9.333942659820683e-07, + "loss": 8.1548, + "step": 1077600 + }, + { + "epoch": 11.48, + "learning_rate": 9.295967660361338e-07, + "loss": 8.1824, + "step": 1077700 + }, + { + "epoch": 11.48, + "learning_rate": 9.258069708057205e-07, + "loss": 8.1506, + "step": 1077800 + }, + { + "epoch": 11.48, + "learning_rate": 9.220248805855702e-07, + "loss": 8.1782, + "step": 1077900 + }, + { + "epoch": 11.48, + "learning_rate": 9.182504956697924e-07, + "loss": 8.1528, + "step": 1078000 + }, + { + "epoch": 11.48, + "learning_rate": 9.144838163519187e-07, + "loss": 8.2237, + "step": 1078100 + }, + { + "epoch": 11.48, + "learning_rate": 9.107248429249149e-07, + "loss": 8.1218, + "step": 1078200 + }, + { + "epoch": 11.49, + "learning_rate": 9.069735756811026e-07, + "loss": 8.1497, + "step": 1078300 + }, + { + "epoch": 11.49, + "learning_rate": 9.032300149122042e-07, + "loss": 8.1278, + "step": 1078400 + }, + { + "epoch": 11.49, + "learning_rate": 8.994941609093754e-07, + "loss": 8.2214, + "step": 1078500 + }, + { + "epoch": 11.49, + "learning_rate": 8.957660139631507e-07, + "loss": 8.1476, + "step": 1078600 + }, + { + "epoch": 11.49, + "learning_rate": 8.920455743634537e-07, + "loss": 8.2113, + "step": 1078700 + }, + { + "epoch": 11.49, + "learning_rate": 8.883328423996417e-07, + "loss": 8.1459, + "step": 1078800 + }, + { + "epoch": 11.49, + "learning_rate": 8.846278183604395e-07, + "loss": 8.101, + "step": 1078900 + }, + { + "epoch": 11.49, + "learning_rate": 8.809305025340054e-07, + "loss": 8.1838, + "step": 1079000 + }, + { + "epoch": 11.49, + "learning_rate": 8.772408952078648e-07, + "loss": 8.2129, + "step": 1079100 + }, + { + "epoch": 11.49, + "learning_rate": 8.73558996668955e-07, + "loss": 8.1965, + "step": 1079200 + }, + { + "epoch": 11.5, + "learning_rate": 8.698848072036359e-07, + "loss": 8.0966, + "step": 1079300 + }, + { + "epoch": 11.5, + "learning_rate": 8.662183270976343e-07, + "loss": 8.1227, + "step": 1079400 + }, + { + "epoch": 11.5, + "learning_rate": 8.625595566361111e-07, + "loss": 8.1365, + "step": 1079500 + }, + { + "epoch": 11.5, + "learning_rate": 8.589084961035721e-07, + "loss": 8.0934, + "step": 1079600 + }, + { + "epoch": 11.5, + "learning_rate": 8.552651457840011e-07, + "loss": 8.0895, + "step": 1079700 + }, + { + "epoch": 11.5, + "learning_rate": 8.516295059607271e-07, + "loss": 8.1617, + "step": 1079800 + }, + { + "epoch": 11.5, + "learning_rate": 8.480015769164907e-07, + "loss": 8.1827, + "step": 1079900 + }, + { + "epoch": 11.5, + "learning_rate": 8.443813589334326e-07, + "loss": 8.0834, + "step": 1080000 + }, + { + "epoch": 11.5, + "learning_rate": 8.407688522931056e-07, + "loss": 8.1824, + "step": 1080100 + }, + { + "epoch": 11.51, + "learning_rate": 8.371640572764516e-07, + "loss": 8.1523, + "step": 1080200 + }, + { + "epoch": 11.51, + "learning_rate": 8.33566974163813e-07, + "loss": 8.1497, + "step": 1080300 + }, + { + "epoch": 11.51, + "learning_rate": 8.299776032349438e-07, + "loss": 8.1998, + "step": 1080400 + }, + { + "epoch": 11.51, + "learning_rate": 8.263959447689873e-07, + "loss": 8.1599, + "step": 1080500 + }, + { + "epoch": 11.51, + "learning_rate": 8.228219990444763e-07, + "loss": 8.1593, + "step": 1080600 + }, + { + "epoch": 11.51, + "learning_rate": 8.192557663393774e-07, + "loss": 8.1138, + "step": 1080700 + }, + { + "epoch": 11.51, + "learning_rate": 8.156972469310243e-07, + "loss": 8.1366, + "step": 1080800 + }, + { + "epoch": 11.51, + "learning_rate": 8.121464410961621e-07, + "loss": 8.1161, + "step": 1080900 + }, + { + "epoch": 11.51, + "learning_rate": 8.086033491109479e-07, + "loss": 8.1603, + "step": 1081000 + }, + { + "epoch": 11.52, + "learning_rate": 8.050679712509168e-07, + "loss": 8.1611, + "step": 1081100 + }, + { + "epoch": 11.52, + "learning_rate": 8.015403077910267e-07, + "loss": 8.0688, + "step": 1081200 + }, + { + "epoch": 11.52, + "learning_rate": 7.980203590056135e-07, + "loss": 8.2031, + "step": 1081300 + }, + { + "epoch": 11.52, + "learning_rate": 7.94508125168425e-07, + "loss": 8.1288, + "step": 1081400 + }, + { + "epoch": 11.52, + "learning_rate": 7.910036065526095e-07, + "loss": 8.1578, + "step": 1081500 + }, + { + "epoch": 11.52, + "learning_rate": 7.875068034307265e-07, + "loss": 8.2093, + "step": 1081600 + }, + { + "epoch": 11.52, + "learning_rate": 7.840177160747142e-07, + "loss": 8.1224, + "step": 1081700 + }, + { + "epoch": 11.52, + "learning_rate": 7.80536344755911e-07, + "loss": 8.1864, + "step": 1081800 + }, + { + "epoch": 11.52, + "learning_rate": 7.770626897450561e-07, + "loss": 8.17, + "step": 1081900 + }, + { + "epoch": 11.52, + "learning_rate": 7.735967513123332e-07, + "loss": 8.1322, + "step": 1082000 + }, + { + "epoch": 11.53, + "learning_rate": 7.70138529727249e-07, + "loss": 8.149, + "step": 1082100 + }, + { + "epoch": 11.53, + "learning_rate": 7.666880252587771e-07, + "loss": 8.2008, + "step": 1082200 + }, + { + "epoch": 11.53, + "learning_rate": 7.632452381752364e-07, + "loss": 8.1206, + "step": 1082300 + }, + { + "epoch": 11.53, + "learning_rate": 7.598101687444126e-07, + "loss": 8.0475, + "step": 1082400 + }, + { + "epoch": 11.53, + "learning_rate": 7.563828172334031e-07, + "loss": 8.1768, + "step": 1082500 + }, + { + "epoch": 11.53, + "learning_rate": 7.529631839087948e-07, + "loss": 8.1996, + "step": 1082600 + }, + { + "epoch": 11.53, + "learning_rate": 7.495512690365081e-07, + "loss": 8.1784, + "step": 1082700 + }, + { + "epoch": 11.53, + "learning_rate": 7.461470728818976e-07, + "loss": 8.1954, + "step": 1082800 + }, + { + "epoch": 11.53, + "learning_rate": 7.427505957097069e-07, + "loss": 8.121, + "step": 1082900 + }, + { + "epoch": 11.54, + "learning_rate": 7.393618377840805e-07, + "loss": 8.1108, + "step": 1083000 + }, + { + "epoch": 11.54, + "learning_rate": 7.35980799368563e-07, + "loss": 8.1216, + "step": 1083100 + }, + { + "epoch": 11.54, + "learning_rate": 7.326074807260885e-07, + "loss": 8.1149, + "step": 1083200 + }, + { + "epoch": 11.54, + "learning_rate": 7.292418821190139e-07, + "loss": 8.2065, + "step": 1083300 + }, + { + "epoch": 11.54, + "learning_rate": 7.258840038090742e-07, + "loss": 8.1307, + "step": 1083400 + }, + { + "epoch": 11.54, + "learning_rate": 7.225338460574272e-07, + "loss": 8.1767, + "step": 1083500 + }, + { + "epoch": 11.54, + "learning_rate": 7.191914091245866e-07, + "loss": 8.2688, + "step": 1083600 + }, + { + "epoch": 11.54, + "learning_rate": 7.158566932705224e-07, + "loss": 8.1487, + "step": 1083700 + }, + { + "epoch": 11.54, + "learning_rate": 7.125296987545493e-07, + "loss": 8.1462, + "step": 1083800 + }, + { + "epoch": 11.54, + "learning_rate": 7.092104258354382e-07, + "loss": 8.1554, + "step": 1083900 + }, + { + "epoch": 11.55, + "learning_rate": 7.058988747712936e-07, + "loss": 8.2115, + "step": 1084000 + }, + { + "epoch": 11.55, + "learning_rate": 7.025950458196873e-07, + "loss": 8.1292, + "step": 1084100 + }, + { + "epoch": 11.55, + "learning_rate": 6.992989392375471e-07, + "loss": 8.1527, + "step": 1084200 + }, + { + "epoch": 11.55, + "learning_rate": 6.960105552812123e-07, + "loss": 8.1924, + "step": 1084300 + }, + { + "epoch": 11.55, + "learning_rate": 6.927298942064119e-07, + "loss": 8.1494, + "step": 1084400 + }, + { + "epoch": 11.55, + "learning_rate": 6.894569562682862e-07, + "loss": 8.1474, + "step": 1084500 + }, + { + "epoch": 11.55, + "learning_rate": 6.86191741721387e-07, + "loss": 8.1463, + "step": 1084600 + }, + { + "epoch": 11.55, + "learning_rate": 6.829342508196334e-07, + "loss": 8.1267, + "step": 1084700 + }, + { + "epoch": 11.55, + "learning_rate": 6.796844838163674e-07, + "loss": 8.1718, + "step": 1084800 + }, + { + "epoch": 11.56, + "learning_rate": 6.7644244096432e-07, + "loss": 8.1248, + "step": 1084900 + }, + { + "epoch": 11.56, + "learning_rate": 6.73208122515634e-07, + "loss": 8.2201, + "step": 1085000 + }, + { + "epoch": 11.56, + "learning_rate": 6.699815287218303e-07, + "loss": 8.1195, + "step": 1085100 + }, + { + "epoch": 11.56, + "learning_rate": 6.667626598338528e-07, + "loss": 8.2056, + "step": 1085200 + }, + { + "epoch": 11.56, + "learning_rate": 6.635515161020345e-07, + "loss": 8.1824, + "step": 1085300 + }, + { + "epoch": 11.56, + "learning_rate": 6.603480977760868e-07, + "loss": 8.0816, + "step": 1085400 + }, + { + "epoch": 11.56, + "learning_rate": 6.571524051051548e-07, + "loss": 8.1739, + "step": 1085500 + }, + { + "epoch": 11.56, + "learning_rate": 6.53964438337773e-07, + "loss": 8.2184, + "step": 1085600 + }, + { + "epoch": 11.56, + "learning_rate": 6.507841977218653e-07, + "loss": 8.139, + "step": 1085700 + }, + { + "epoch": 11.57, + "learning_rate": 6.476116835047563e-07, + "loss": 8.1227, + "step": 1085800 + }, + { + "epoch": 11.57, + "learning_rate": 6.444468959331707e-07, + "loss": 8.195, + "step": 1085900 + }, + { + "epoch": 11.57, + "learning_rate": 6.412898352532337e-07, + "loss": 8.1214, + "step": 1086000 + }, + { + "epoch": 11.57, + "learning_rate": 6.381405017104825e-07, + "loss": 8.1272, + "step": 1086100 + }, + { + "epoch": 11.57, + "learning_rate": 6.34998895549832e-07, + "loss": 8.0877, + "step": 1086200 + }, + { + "epoch": 11.57, + "learning_rate": 6.318650170155982e-07, + "loss": 8.1068, + "step": 1086300 + }, + { + "epoch": 11.57, + "learning_rate": 6.287388663515193e-07, + "loss": 8.1527, + "step": 1086400 + }, + { + "epoch": 11.57, + "learning_rate": 6.256204438007007e-07, + "loss": 8.2, + "step": 1086500 + }, + { + "epoch": 11.57, + "learning_rate": 6.22509749605682e-07, + "loss": 8.1705, + "step": 1086600 + }, + { + "epoch": 11.57, + "learning_rate": 6.194067840083473e-07, + "loss": 8.1965, + "step": 1086700 + }, + { + "epoch": 11.58, + "learning_rate": 6.163115472500592e-07, + "loss": 8.2673, + "step": 1086800 + }, + { + "epoch": 11.58, + "learning_rate": 6.132240395715027e-07, + "loss": 8.1897, + "step": 1086900 + }, + { + "epoch": 11.58, + "learning_rate": 6.101442612128083e-07, + "loss": 8.2516, + "step": 1087000 + }, + { + "epoch": 11.58, + "learning_rate": 6.070722124134731e-07, + "loss": 8.1603, + "step": 1087100 + }, + { + "epoch": 11.58, + "learning_rate": 6.040078934124394e-07, + "loss": 8.1643, + "step": 1087200 + }, + { + "epoch": 11.58, + "learning_rate": 6.009513044479941e-07, + "loss": 8.0849, + "step": 1087300 + }, + { + "epoch": 11.58, + "learning_rate": 5.979024457578586e-07, + "loss": 8.1248, + "step": 1087400 + }, + { + "epoch": 11.58, + "learning_rate": 5.948613175791429e-07, + "loss": 8.1074, + "step": 1087500 + }, + { + "epoch": 11.58, + "learning_rate": 5.918279201483579e-07, + "loss": 8.1389, + "step": 1087600 + }, + { + "epoch": 11.59, + "learning_rate": 5.888022537014037e-07, + "loss": 8.2031, + "step": 1087700 + }, + { + "epoch": 11.59, + "learning_rate": 5.857843184735923e-07, + "loss": 8.142, + "step": 1087800 + }, + { + "epoch": 11.59, + "learning_rate": 5.827741146996357e-07, + "loss": 8.1468, + "step": 1087900 + }, + { + "epoch": 11.59, + "learning_rate": 5.797716426136246e-07, + "loss": 8.1597, + "step": 1088000 + }, + { + "epoch": 11.59, + "learning_rate": 5.767769024490721e-07, + "loss": 8.2469, + "step": 1088100 + }, + { + "epoch": 11.59, + "learning_rate": 5.737898944388698e-07, + "loss": 8.1869, + "step": 1088200 + }, + { + "epoch": 11.59, + "learning_rate": 5.708106188153206e-07, + "loss": 8.2232, + "step": 1088300 + }, + { + "epoch": 11.59, + "learning_rate": 5.678390758101282e-07, + "loss": 8.1216, + "step": 1088400 + }, + { + "epoch": 11.59, + "learning_rate": 5.648752656543854e-07, + "loss": 8.2284, + "step": 1088500 + }, + { + "epoch": 11.6, + "learning_rate": 5.619191885785858e-07, + "loss": 8.1534, + "step": 1088600 + }, + { + "epoch": 11.6, + "learning_rate": 5.589708448126341e-07, + "loss": 8.1637, + "step": 1088700 + }, + { + "epoch": 11.6, + "learning_rate": 5.560302345858026e-07, + "loss": 8.1631, + "step": 1088800 + }, + { + "epoch": 11.6, + "learning_rate": 5.530973581268084e-07, + "loss": 8.1678, + "step": 1088900 + }, + { + "epoch": 11.6, + "learning_rate": 5.501722156637246e-07, + "loss": 8.1437, + "step": 1089000 + }, + { + "epoch": 11.6, + "learning_rate": 5.472548074240469e-07, + "loss": 8.1491, + "step": 1089100 + }, + { + "epoch": 11.6, + "learning_rate": 5.443451336346606e-07, + "loss": 8.2458, + "step": 1089200 + }, + { + "epoch": 11.6, + "learning_rate": 5.414431945218512e-07, + "loss": 8.1505, + "step": 1089300 + }, + { + "epoch": 11.6, + "learning_rate": 5.385489903112939e-07, + "loss": 8.0881, + "step": 1089400 + }, + { + "epoch": 11.6, + "learning_rate": 5.356625212280975e-07, + "loss": 8.1484, + "step": 1089500 + }, + { + "epoch": 11.61, + "learning_rate": 5.327837874967156e-07, + "loss": 8.1289, + "step": 1089600 + }, + { + "epoch": 11.61, + "learning_rate": 5.29912789341036e-07, + "loss": 8.2541, + "step": 1089700 + }, + { + "epoch": 11.61, + "learning_rate": 5.270495269843467e-07, + "loss": 8.2066, + "step": 1089800 + }, + { + "epoch": 11.61, + "learning_rate": 5.241940006493029e-07, + "loss": 8.2295, + "step": 1089900 + }, + { + "epoch": 11.61, + "learning_rate": 5.213462105580047e-07, + "loss": 8.1815, + "step": 1090000 + }, + { + "epoch": 11.61, + "learning_rate": 5.185061569318972e-07, + "loss": 8.1888, + "step": 1090100 + }, + { + "epoch": 11.61, + "learning_rate": 5.156738399918815e-07, + "loss": 8.2541, + "step": 1090200 + }, + { + "epoch": 11.61, + "learning_rate": 5.128492599582035e-07, + "loss": 8.2033, + "step": 1090300 + }, + { + "epoch": 11.61, + "learning_rate": 5.100324170505433e-07, + "loss": 8.0875, + "step": 1090400 + }, + { + "epoch": 11.62, + "learning_rate": 5.072233114879588e-07, + "loss": 8.1583, + "step": 1090500 + }, + { + "epoch": 11.62, + "learning_rate": 5.044219434889308e-07, + "loss": 8.1384, + "step": 1090600 + }, + { + "epoch": 11.62, + "learning_rate": 5.016283132712962e-07, + "loss": 8.3075, + "step": 1090700 + }, + { + "epoch": 11.62, + "learning_rate": 4.988424210523368e-07, + "loss": 8.1301, + "step": 1090800 + }, + { + "epoch": 11.62, + "learning_rate": 4.960642670487014e-07, + "loss": 8.0607, + "step": 1090900 + }, + { + "epoch": 11.62, + "learning_rate": 4.932938514764507e-07, + "loss": 8.2213, + "step": 1091000 + }, + { + "epoch": 11.62, + "learning_rate": 4.905311745510454e-07, + "loss": 8.1958, + "step": 1091100 + }, + { + "epoch": 11.62, + "learning_rate": 4.87776236487325e-07, + "loss": 8.1501, + "step": 1091200 + }, + { + "epoch": 11.62, + "learning_rate": 4.850290374995626e-07, + "loss": 8.2576, + "step": 1091300 + }, + { + "epoch": 11.62, + "learning_rate": 4.82289577801387e-07, + "loss": 8.1374, + "step": 1091400 + }, + { + "epoch": 11.63, + "learning_rate": 4.795578576058612e-07, + "loss": 8.1958, + "step": 1091500 + }, + { + "epoch": 11.63, + "learning_rate": 4.768338771254155e-07, + "loss": 8.0582, + "step": 1091600 + }, + { + "epoch": 11.63, + "learning_rate": 4.7411763657191354e-07, + "loss": 8.1807, + "step": 1091700 + }, + { + "epoch": 11.63, + "learning_rate": 4.7140913615658643e-07, + "loss": 8.2038, + "step": 1091800 + }, + { + "epoch": 11.63, + "learning_rate": 4.6870837609007677e-07, + "loss": 8.217, + "step": 1091900 + }, + { + "epoch": 11.63, + "learning_rate": 4.6601535658241655e-07, + "loss": 8.1147, + "step": 1092000 + }, + { + "epoch": 11.63, + "learning_rate": 4.633300778430605e-07, + "loss": 8.201, + "step": 1092100 + }, + { + "epoch": 11.63, + "learning_rate": 4.6065254008081927e-07, + "loss": 8.1042, + "step": 1092200 + }, + { + "epoch": 11.63, + "learning_rate": 4.5798274350393746e-07, + "loss": 8.1168, + "step": 1092300 + }, + { + "epoch": 11.64, + "learning_rate": 4.5532068832004895e-07, + "loss": 8.1431, + "step": 1092400 + }, + { + "epoch": 11.64, + "learning_rate": 4.52666374736177e-07, + "loss": 8.22, + "step": 1092500 + }, + { + "epoch": 11.64, + "learning_rate": 4.5001980295874545e-07, + "loss": 8.227, + "step": 1092600 + }, + { + "epoch": 11.64, + "learning_rate": 4.473809731935896e-07, + "loss": 8.1143, + "step": 1092700 + }, + { + "epoch": 11.64, + "learning_rate": 4.4474988564591203e-07, + "loss": 8.1941, + "step": 1092800 + }, + { + "epoch": 11.64, + "learning_rate": 4.4212654052034896e-07, + "loss": 8.2576, + "step": 1092900 + }, + { + "epoch": 11.64, + "learning_rate": 4.3951093802091505e-07, + "loss": 8.1845, + "step": 1093000 + }, + { + "epoch": 11.64, + "learning_rate": 4.3690307835102527e-07, + "loss": 8.2039, + "step": 1093100 + }, + { + "epoch": 11.64, + "learning_rate": 4.343029617134842e-07, + "loss": 8.148, + "step": 1093200 + }, + { + "epoch": 11.65, + "learning_rate": 4.317105883105188e-07, + "loss": 8.1326, + "step": 1093300 + }, + { + "epoch": 11.65, + "learning_rate": 4.2912595834372346e-07, + "loss": 8.136, + "step": 1093400 + }, + { + "epoch": 11.65, + "learning_rate": 4.265490720141152e-07, + "loss": 8.2029, + "step": 1093500 + }, + { + "epoch": 11.65, + "learning_rate": 4.2397992952210033e-07, + "loss": 8.0885, + "step": 1093600 + }, + { + "epoch": 11.65, + "learning_rate": 4.2141853106747453e-07, + "loss": 8.2282, + "step": 1093700 + }, + { + "epoch": 11.65, + "learning_rate": 4.188648768494341e-07, + "loss": 8.1249, + "step": 1093800 + }, + { + "epoch": 11.65, + "learning_rate": 4.163189670665979e-07, + "loss": 8.0815, + "step": 1093900 + }, + { + "epoch": 11.65, + "learning_rate": 4.137808019169298e-07, + "loss": 8.161, + "step": 1094000 + }, + { + "epoch": 11.65, + "learning_rate": 4.1125038159784966e-07, + "loss": 8.1372, + "step": 1094100 + }, + { + "epoch": 11.65, + "learning_rate": 4.0872770630613345e-07, + "loss": 8.149, + "step": 1094200 + }, + { + "epoch": 11.66, + "learning_rate": 4.062127762379797e-07, + "loss": 8.1507, + "step": 1094300 + }, + { + "epoch": 11.66, + "learning_rate": 4.0370559158897647e-07, + "loss": 8.2085, + "step": 1094400 + }, + { + "epoch": 11.66, + "learning_rate": 4.0120615255409e-07, + "loss": 8.1436, + "step": 1094500 + }, + { + "epoch": 11.66, + "learning_rate": 3.9871445932772036e-07, + "loss": 8.091, + "step": 1094600 + }, + { + "epoch": 11.66, + "learning_rate": 3.962305121036347e-07, + "loss": 8.1572, + "step": 1094700 + }, + { + "epoch": 11.66, + "learning_rate": 3.9375431107502304e-07, + "loss": 8.2106, + "step": 1094800 + }, + { + "epoch": 11.66, + "learning_rate": 3.9128585643444236e-07, + "loss": 8.2078, + "step": 1094900 + }, + { + "epoch": 11.66, + "learning_rate": 3.888251483738836e-07, + "loss": 8.1446, + "step": 1095000 + }, + { + "epoch": 11.66, + "learning_rate": 3.863721870847048e-07, + "loss": 8.1565, + "step": 1095100 + }, + { + "epoch": 11.67, + "learning_rate": 3.839269727576755e-07, + "loss": 8.2158, + "step": 1095200 + }, + { + "epoch": 11.67, + "learning_rate": 3.814895055829548e-07, + "loss": 8.0802, + "step": 1095300 + }, + { + "epoch": 11.67, + "learning_rate": 3.7905978575012435e-07, + "loss": 8.093, + "step": 1095400 + }, + { + "epoch": 11.67, + "learning_rate": 3.766378134481108e-07, + "loss": 8.1401, + "step": 1095500 + }, + { + "epoch": 11.67, + "learning_rate": 3.7422358886529674e-07, + "loss": 8.185, + "step": 1095600 + }, + { + "epoch": 11.67, + "learning_rate": 3.7181711218943205e-07, + "loss": 8.1222, + "step": 1095700 + }, + { + "epoch": 11.67, + "learning_rate": 3.69418383607667e-07, + "loss": 8.2365, + "step": 1095800 + }, + { + "epoch": 11.67, + "learning_rate": 3.6702740330655237e-07, + "loss": 8.2407, + "step": 1095900 + }, + { + "epoch": 11.67, + "learning_rate": 3.6464417147202833e-07, + "loss": 8.168, + "step": 1096000 + }, + { + "epoch": 11.67, + "learning_rate": 3.622686882894355e-07, + "loss": 8.1652, + "step": 1096100 + }, + { + "epoch": 11.68, + "learning_rate": 3.5990095394353716e-07, + "loss": 8.1734, + "step": 1096200 + }, + { + "epoch": 11.68, + "learning_rate": 3.5754096861845277e-07, + "loss": 8.1262, + "step": 1096300 + }, + { + "epoch": 11.68, + "learning_rate": 3.5518873249772434e-07, + "loss": 8.1646, + "step": 1096400 + }, + { + "epoch": 11.68, + "learning_rate": 3.528442457642944e-07, + "loss": 8.1401, + "step": 1096500 + }, + { + "epoch": 11.68, + "learning_rate": 3.5050750860047277e-07, + "loss": 8.1651, + "step": 1096600 + }, + { + "epoch": 11.68, + "learning_rate": 3.4817852118800285e-07, + "loss": 8.2513, + "step": 1096700 + }, + { + "epoch": 11.68, + "learning_rate": 3.458572837080065e-07, + "loss": 8.1854, + "step": 1096800 + }, + { + "epoch": 11.68, + "learning_rate": 3.435437963410171e-07, + "loss": 8.2056, + "step": 1096900 + }, + { + "epoch": 11.68, + "learning_rate": 3.412380592669351e-07, + "loss": 8.2094, + "step": 1097000 + }, + { + "epoch": 11.69, + "learning_rate": 3.3894007266509486e-07, + "loss": 8.1428, + "step": 1097100 + }, + { + "epoch": 11.69, + "learning_rate": 3.3664983671419793e-07, + "loss": 8.1291, + "step": 1097200 + }, + { + "epoch": 11.69, + "learning_rate": 3.3436735159236844e-07, + "loss": 8.181, + "step": 1097300 + }, + { + "epoch": 11.69, + "learning_rate": 3.3209261747710885e-07, + "loss": 8.2121, + "step": 1097400 + }, + { + "epoch": 11.69, + "learning_rate": 3.298256345453221e-07, + "loss": 8.1772, + "step": 1097500 + }, + { + "epoch": 11.69, + "learning_rate": 3.2756640297332274e-07, + "loss": 8.1734, + "step": 1097600 + }, + { + "epoch": 11.69, + "learning_rate": 3.2531492293679245e-07, + "loss": 8.2184, + "step": 1097700 + }, + { + "epoch": 11.69, + "learning_rate": 3.2307119461085775e-07, + "loss": 8.2188, + "step": 1097800 + }, + { + "epoch": 11.69, + "learning_rate": 3.208352181699792e-07, + "loss": 8.1941, + "step": 1097900 + }, + { + "epoch": 11.7, + "learning_rate": 3.1860699378808424e-07, + "loss": 8.1446, + "step": 1098000 + }, + { + "epoch": 11.7, + "learning_rate": 3.1638652163843427e-07, + "loss": 8.1277, + "step": 1098100 + }, + { + "epoch": 11.7, + "learning_rate": 3.1417380189372456e-07, + "loss": 8.087, + "step": 1098200 + }, + { + "epoch": 11.7, + "learning_rate": 3.1196883472603965e-07, + "loss": 8.239, + "step": 1098300 + }, + { + "epoch": 11.7, + "learning_rate": 3.097716203068757e-07, + "loss": 8.1697, + "step": 1098400 + }, + { + "epoch": 11.7, + "learning_rate": 3.0758215880707374e-07, + "loss": 8.0557, + "step": 1098500 + }, + { + "epoch": 11.7, + "learning_rate": 3.054004503969421e-07, + "loss": 8.1419, + "step": 1098600 + }, + { + "epoch": 11.7, + "learning_rate": 3.03226495246145e-07, + "loss": 8.1941, + "step": 1098700 + }, + { + "epoch": 11.7, + "learning_rate": 3.010602935237361e-07, + "loss": 8.0976, + "step": 1098800 + }, + { + "epoch": 11.7, + "learning_rate": 2.9890184539819176e-07, + "loss": 8.1591, + "step": 1098900 + }, + { + "epoch": 11.71, + "learning_rate": 2.967511510373777e-07, + "loss": 8.1712, + "step": 1099000 + }, + { + "epoch": 11.71, + "learning_rate": 2.9460821060854905e-07, + "loss": 8.1569, + "step": 1099100 + }, + { + "epoch": 11.71, + "learning_rate": 2.924730242783613e-07, + "loss": 8.1603, + "step": 1099200 + }, + { + "epoch": 11.71, + "learning_rate": 2.903455922128706e-07, + "loss": 8.1549, + "step": 1099300 + }, + { + "epoch": 11.71, + "learning_rate": 2.8822591457753346e-07, + "loss": 8.2493, + "step": 1099400 + }, + { + "epoch": 11.71, + "learning_rate": 2.861139915371847e-07, + "loss": 8.1034, + "step": 1099500 + }, + { + "epoch": 11.71, + "learning_rate": 2.8400982325608173e-07, + "loss": 8.1538, + "step": 1099600 + }, + { + "epoch": 11.71, + "learning_rate": 2.819134098978493e-07, + "loss": 8.2129, + "step": 1099700 + }, + { + "epoch": 11.71, + "learning_rate": 2.798247516255459e-07, + "loss": 8.1654, + "step": 1099800 + }, + { + "epoch": 11.72, + "learning_rate": 2.777438486015971e-07, + "loss": 8.2338, + "step": 1099900 + }, + { + "epoch": 11.72, + "learning_rate": 2.7567070098782897e-07, + "loss": 8.1557, + "step": 1100000 + }, + { + "epoch": 11.72, + "learning_rate": 2.736053089454793e-07, + "loss": 8.1395, + "step": 1100100 + }, + { + "epoch": 11.72, + "learning_rate": 2.71547672635164e-07, + "loss": 8.1467, + "step": 1100200 + }, + { + "epoch": 11.72, + "learning_rate": 2.694977922169217e-07, + "loss": 8.173, + "step": 1100300 + }, + { + "epoch": 11.72, + "learning_rate": 2.674556678501583e-07, + "loss": 8.1727, + "step": 1100400 + }, + { + "epoch": 11.72, + "learning_rate": 2.6542129969369114e-07, + "loss": 8.1659, + "step": 1100500 + }, + { + "epoch": 11.72, + "learning_rate": 2.633946879057381e-07, + "loss": 8.213, + "step": 1100600 + }, + { + "epoch": 11.72, + "learning_rate": 2.613758326439175e-07, + "loss": 8.1876, + "step": 1100700 + }, + { + "epoch": 11.72, + "learning_rate": 2.59364734065215e-07, + "loss": 8.196, + "step": 1100800 + }, + { + "epoch": 11.73, + "learning_rate": 2.5736139232603875e-07, + "loss": 8.1365, + "step": 1100900 + }, + { + "epoch": 11.73, + "learning_rate": 2.553658075822085e-07, + "loss": 8.1385, + "step": 1101000 + }, + { + "epoch": 11.73, + "learning_rate": 2.533779799889002e-07, + "loss": 8.2212, + "step": 1101100 + }, + { + "epoch": 11.73, + "learning_rate": 2.513979097007124e-07, + "loss": 8.0977, + "step": 1101200 + }, + { + "epoch": 11.73, + "learning_rate": 2.494255968716441e-07, + "loss": 8.144, + "step": 1101300 + }, + { + "epoch": 11.73, + "learning_rate": 2.4746104165507266e-07, + "loss": 8.2028, + "step": 1101400 + }, + { + "epoch": 11.73, + "learning_rate": 2.4550424420378693e-07, + "loss": 8.1409, + "step": 1101500 + }, + { + "epoch": 11.73, + "learning_rate": 2.4355520466995405e-07, + "loss": 8.2183, + "step": 1101600 + }, + { + "epoch": 11.73, + "learning_rate": 2.4161392320517504e-07, + "loss": 8.1481, + "step": 1101700 + }, + { + "epoch": 11.74, + "learning_rate": 2.3968039996040694e-07, + "loss": 8.1033, + "step": 1101800 + }, + { + "epoch": 11.74, + "learning_rate": 2.3775463508601826e-07, + "loss": 8.1604, + "step": 1101900 + }, + { + "epoch": 11.74, + "learning_rate": 2.3583662873177815e-07, + "loss": 8.1708, + "step": 1102000 + }, + { + "epoch": 11.74, + "learning_rate": 2.3392638104686726e-07, + "loss": 8.0966, + "step": 1102100 + }, + { + "epoch": 11.74, + "learning_rate": 2.3202389217982233e-07, + "loss": 8.1498, + "step": 1102200 + }, + { + "epoch": 11.74, + "learning_rate": 2.3012916227861393e-07, + "loss": 8.1406, + "step": 1102300 + }, + { + "epoch": 11.74, + "learning_rate": 2.282421914905797e-07, + "loss": 8.1891, + "step": 1102400 + }, + { + "epoch": 11.74, + "learning_rate": 2.263629799624911e-07, + "loss": 8.0988, + "step": 1102500 + }, + { + "epoch": 11.74, + "learning_rate": 2.244915278404869e-07, + "loss": 8.17, + "step": 1102600 + }, + { + "epoch": 11.75, + "learning_rate": 2.226278352701061e-07, + "loss": 8.1571, + "step": 1102700 + }, + { + "epoch": 11.75, + "learning_rate": 2.2077190239628842e-07, + "loss": 8.1291, + "step": 1102800 + }, + { + "epoch": 11.75, + "learning_rate": 2.1892372936336282e-07, + "loss": 8.2018, + "step": 1102900 + }, + { + "epoch": 11.75, + "learning_rate": 2.1708331631508095e-07, + "loss": 8.131, + "step": 1103000 + }, + { + "epoch": 11.75, + "learning_rate": 2.1525066339456168e-07, + "loss": 8.1478, + "step": 1103100 + }, + { + "epoch": 11.75, + "learning_rate": 2.1342577074432436e-07, + "loss": 8.1595, + "step": 1103200 + }, + { + "epoch": 11.75, + "learning_rate": 2.1160863850629987e-07, + "loss": 8.1998, + "step": 1103300 + }, + { + "epoch": 11.75, + "learning_rate": 2.0979926682180851e-07, + "loss": 8.2023, + "step": 1103400 + }, + { + "epoch": 11.75, + "learning_rate": 2.0799765583154884e-07, + "loss": 8.13, + "step": 1103500 + }, + { + "epoch": 11.75, + "learning_rate": 2.0620380567565324e-07, + "loss": 8.1385, + "step": 1103600 + }, + { + "epoch": 11.76, + "learning_rate": 2.0441771649362118e-07, + "loss": 8.1817, + "step": 1103700 + }, + { + "epoch": 11.76, + "learning_rate": 2.026393884243527e-07, + "loss": 8.1582, + "step": 1103800 + }, + { + "epoch": 11.76, + "learning_rate": 2.0086882160614827e-07, + "loss": 8.1533, + "step": 1103900 + }, + { + "epoch": 11.76, + "learning_rate": 1.9910601617671997e-07, + "loss": 8.1472, + "step": 1104000 + }, + { + "epoch": 11.76, + "learning_rate": 1.9735097227313592e-07, + "loss": 8.1644, + "step": 1104100 + }, + { + "epoch": 11.76, + "learning_rate": 1.9560369003190916e-07, + "loss": 8.277, + "step": 1104200 + }, + { + "epoch": 11.76, + "learning_rate": 1.9386416958891985e-07, + "loss": 8.191, + "step": 1104300 + }, + { + "epoch": 11.76, + "learning_rate": 1.9213241107944867e-07, + "loss": 8.1715, + "step": 1104400 + }, + { + "epoch": 11.76, + "learning_rate": 1.9040841463816573e-07, + "loss": 8.1382, + "step": 1104500 + }, + { + "epoch": 11.77, + "learning_rate": 1.886921803991637e-07, + "loss": 8.2904, + "step": 1104600 + }, + { + "epoch": 11.77, + "learning_rate": 1.869837084959025e-07, + "loss": 8.1115, + "step": 1104700 + }, + { + "epoch": 11.77, + "learning_rate": 1.852829990612537e-07, + "loss": 8.116, + "step": 1104800 + }, + { + "epoch": 11.77, + "learning_rate": 1.8359005222747805e-07, + "loss": 8.2132, + "step": 1104900 + }, + { + "epoch": 11.77, + "learning_rate": 1.8190486812623698e-07, + "loss": 8.1052, + "step": 1105000 + }, + { + "epoch": 11.77, + "learning_rate": 1.8022744688860339e-07, + "loss": 8.1227, + "step": 1105100 + }, + { + "epoch": 11.77, + "learning_rate": 1.7855778864500627e-07, + "loss": 8.1917, + "step": 1105200 + }, + { + "epoch": 11.77, + "learning_rate": 1.7689589352530845e-07, + "loss": 8.209, + "step": 1105300 + }, + { + "epoch": 11.77, + "learning_rate": 1.7524176165875095e-07, + "loss": 8.1786, + "step": 1105400 + }, + { + "epoch": 11.78, + "learning_rate": 1.7359539317397534e-07, + "loss": 8.1491, + "step": 1105500 + }, + { + "epoch": 11.78, + "learning_rate": 1.719567881990236e-07, + "loss": 8.1343, + "step": 1105600 + }, + { + "epoch": 11.78, + "learning_rate": 1.703259468613383e-07, + "loss": 8.1366, + "step": 1105700 + }, + { + "epoch": 11.78, + "learning_rate": 1.6870286928772905e-07, + "loss": 8.1161, + "step": 1105800 + }, + { + "epoch": 11.78, + "learning_rate": 1.6708755560443934e-07, + "loss": 8.1755, + "step": 1105900 + }, + { + "epoch": 11.78, + "learning_rate": 1.654800059370798e-07, + "loss": 8.1588, + "step": 1106000 + }, + { + "epoch": 11.78, + "learning_rate": 1.6388022041068373e-07, + "loss": 8.151, + "step": 1106100 + }, + { + "epoch": 11.78, + "learning_rate": 1.6228819914966277e-07, + "loss": 8.0846, + "step": 1106200 + }, + { + "epoch": 11.78, + "learning_rate": 1.6070394227782893e-07, + "loss": 8.1373, + "step": 1106300 + }, + { + "epoch": 11.78, + "learning_rate": 1.5912744991838367e-07, + "loss": 8.1801, + "step": 1106400 + }, + { + "epoch": 11.79, + "learning_rate": 1.5755872219392897e-07, + "loss": 8.2179, + "step": 1106500 + }, + { + "epoch": 11.79, + "learning_rate": 1.559977592264783e-07, + "loss": 8.1844, + "step": 1106600 + }, + { + "epoch": 11.79, + "learning_rate": 1.5444456113741236e-07, + "loss": 8.1854, + "step": 1106700 + }, + { + "epoch": 11.79, + "learning_rate": 1.5289912804753447e-07, + "loss": 8.1701, + "step": 1106800 + }, + { + "epoch": 11.79, + "learning_rate": 1.5136146007702635e-07, + "loss": 8.1793, + "step": 1106900 + }, + { + "epoch": 11.79, + "learning_rate": 1.4983155734548115e-07, + "loss": 8.1871, + "step": 1107000 + }, + { + "epoch": 11.79, + "learning_rate": 1.4830941997187043e-07, + "loss": 8.1871, + "step": 1107100 + }, + { + "epoch": 11.79, + "learning_rate": 1.4679504807457723e-07, + "loss": 8.1447, + "step": 1107200 + }, + { + "epoch": 11.79, + "learning_rate": 1.4528844177137401e-07, + "loss": 8.2075, + "step": 1107300 + }, + { + "epoch": 11.8, + "learning_rate": 1.4378960117943374e-07, + "loss": 8.1429, + "step": 1107400 + }, + { + "epoch": 11.8, + "learning_rate": 1.422985264153076e-07, + "loss": 8.1776, + "step": 1107500 + }, + { + "epoch": 11.8, + "learning_rate": 1.408152175949584e-07, + "loss": 8.2118, + "step": 1107600 + }, + { + "epoch": 11.8, + "learning_rate": 1.3933967483376053e-07, + "loss": 8.1573, + "step": 1107700 + }, + { + "epoch": 11.8, + "learning_rate": 1.3787189824645552e-07, + "loss": 8.0789, + "step": 1107800 + }, + { + "epoch": 11.8, + "learning_rate": 1.364118879471854e-07, + "loss": 8.0848, + "step": 1107900 + }, + { + "epoch": 11.8, + "learning_rate": 1.3495964404950377e-07, + "loss": 8.1682, + "step": 1108000 + }, + { + "epoch": 11.8, + "learning_rate": 1.3351516666635365e-07, + "loss": 8.1777, + "step": 1108100 + }, + { + "epoch": 11.8, + "learning_rate": 1.320784559100674e-07, + "loss": 8.0902, + "step": 1108200 + }, + { + "epoch": 11.8, + "learning_rate": 1.3064951189238894e-07, + "loss": 8.109, + "step": 1108300 + }, + { + "epoch": 11.81, + "learning_rate": 1.2922833472442942e-07, + "loss": 8.282, + "step": 1108400 + }, + { + "epoch": 11.81, + "learning_rate": 1.278149245167226e-07, + "loss": 8.1172, + "step": 1108500 + }, + { + "epoch": 11.81, + "learning_rate": 1.2640928137919173e-07, + "loss": 8.1406, + "step": 1108600 + }, + { + "epoch": 11.81, + "learning_rate": 1.2501140542116042e-07, + "loss": 8.1682, + "step": 1108700 + }, + { + "epoch": 11.81, + "learning_rate": 1.2362129675133062e-07, + "loss": 8.1809, + "step": 1108800 + }, + { + "epoch": 11.81, + "learning_rate": 1.2223895547780473e-07, + "loss": 8.0993, + "step": 1108900 + }, + { + "epoch": 11.81, + "learning_rate": 1.2086438170810787e-07, + "loss": 8.1116, + "step": 1109000 + }, + { + "epoch": 11.81, + "learning_rate": 1.194975755491212e-07, + "loss": 8.2153, + "step": 1109100 + }, + { + "epoch": 11.81, + "learning_rate": 1.1813853710714862e-07, + "loss": 8.2147, + "step": 1109200 + }, + { + "epoch": 11.82, + "learning_rate": 1.167872664878944e-07, + "loss": 8.1812, + "step": 1109300 + }, + { + "epoch": 11.82, + "learning_rate": 1.1544376379643007e-07, + "loss": 8.1795, + "step": 1109400 + }, + { + "epoch": 11.82, + "learning_rate": 1.1410802913724982e-07, + "loss": 8.1916, + "step": 1109500 + }, + { + "epoch": 11.82, + "learning_rate": 1.1278006261422613e-07, + "loss": 8.0762, + "step": 1109600 + }, + { + "epoch": 11.82, + "learning_rate": 1.1145986433064303e-07, + "loss": 8.1861, + "step": 1109700 + }, + { + "epoch": 11.82, + "learning_rate": 1.1014743438916287e-07, + "loss": 8.1907, + "step": 1109800 + }, + { + "epoch": 11.82, + "learning_rate": 1.0884277289185952e-07, + "loss": 8.0971, + "step": 1109900 + }, + { + "epoch": 11.82, + "learning_rate": 1.0754587994020738e-07, + "loss": 8.1078, + "step": 1110000 + }, + { + "epoch": 11.82, + "learning_rate": 1.06256755635048e-07, + "loss": 8.1139, + "step": 1110100 + }, + { + "epoch": 11.83, + "learning_rate": 1.0497540007664564e-07, + "loss": 8.1566, + "step": 1110200 + }, + { + "epoch": 11.83, + "learning_rate": 1.037018133646539e-07, + "loss": 8.0644, + "step": 1110300 + }, + { + "epoch": 11.83, + "learning_rate": 1.0243599559810468e-07, + "loss": 8.154, + "step": 1110400 + }, + { + "epoch": 11.83, + "learning_rate": 1.0117794687546367e-07, + "loss": 8.2412, + "step": 1110500 + }, + { + "epoch": 11.83, + "learning_rate": 9.992766729455261e-08, + "loss": 8.1001, + "step": 1110600 + }, + { + "epoch": 11.83, + "learning_rate": 9.868515695260483e-08, + "loss": 8.2027, + "step": 1110700 + }, + { + "epoch": 11.83, + "learning_rate": 9.745041594626525e-08, + "loss": 8.2096, + "step": 1110800 + }, + { + "epoch": 11.83, + "learning_rate": 9.622344437154596e-08, + "loss": 8.2597, + "step": 1110900 + }, + { + "epoch": 11.83, + "learning_rate": 9.500424232387062e-08, + "loss": 8.1218, + "step": 1111000 + }, + { + "epoch": 11.83, + "learning_rate": 9.379280989806338e-08, + "loss": 8.166, + "step": 1111100 + }, + { + "epoch": 11.84, + "learning_rate": 9.258914718833778e-08, + "loss": 8.1951, + "step": 1111200 + }, + { + "epoch": 11.84, + "learning_rate": 9.13932542882967e-08, + "loss": 8.2104, + "step": 1111300 + }, + { + "epoch": 11.84, + "learning_rate": 9.020513129094354e-08, + "loss": 8.1935, + "step": 1111400 + }, + { + "epoch": 11.84, + "learning_rate": 8.902477828869327e-08, + "loss": 8.1251, + "step": 1111500 + }, + { + "epoch": 11.84, + "learning_rate": 8.78521953733169e-08, + "loss": 8.1775, + "step": 1111600 + }, + { + "epoch": 11.84, + "learning_rate": 8.668738263603038e-08, + "loss": 8.1723, + "step": 1111700 + }, + { + "epoch": 11.84, + "learning_rate": 8.553034016740569e-08, + "loss": 8.1034, + "step": 1111800 + }, + { + "epoch": 11.84, + "learning_rate": 8.43810680574375e-08, + "loss": 8.2045, + "step": 1111900 + }, + { + "epoch": 11.84, + "learning_rate": 8.323956639548769e-08, + "loss": 8.1585, + "step": 1112000 + }, + { + "epoch": 11.85, + "learning_rate": 8.210583527035188e-08, + "loss": 8.2352, + "step": 1112100 + }, + { + "epoch": 11.85, + "learning_rate": 8.097987477019287e-08, + "loss": 8.1468, + "step": 1112200 + }, + { + "epoch": 11.85, + "learning_rate": 7.986168498256286e-08, + "loss": 8.122, + "step": 1112300 + }, + { + "epoch": 11.85, + "learning_rate": 7.875126599444782e-08, + "loss": 8.1542, + "step": 1112400 + }, + { + "epoch": 11.85, + "learning_rate": 7.76486178921898e-08, + "loss": 8.1814, + "step": 1112500 + }, + { + "epoch": 11.85, + "learning_rate": 7.655374076154242e-08, + "loss": 8.1136, + "step": 1112600 + }, + { + "epoch": 11.85, + "learning_rate": 7.54666346876598e-08, + "loss": 8.0928, + "step": 1112700 + }, + { + "epoch": 11.85, + "learning_rate": 7.43872997550854e-08, + "loss": 8.0997, + "step": 1112800 + }, + { + "epoch": 11.85, + "learning_rate": 7.33157360477521e-08, + "loss": 8.1448, + "step": 1112900 + }, + { + "epoch": 11.85, + "learning_rate": 7.225194364900434e-08, + "loss": 8.1728, + "step": 1113000 + }, + { + "epoch": 11.86, + "learning_rate": 7.119592264156482e-08, + "loss": 8.1797, + "step": 1113100 + }, + { + "epoch": 11.86, + "learning_rate": 7.014767310756787e-08, + "loss": 8.1429, + "step": 1113200 + }, + { + "epoch": 11.86, + "learning_rate": 6.910719512853713e-08, + "loss": 8.208, + "step": 1113300 + }, + { + "epoch": 11.86, + "learning_rate": 6.807448878538569e-08, + "loss": 8.0283, + "step": 1113400 + }, + { + "epoch": 11.86, + "learning_rate": 6.704955415842706e-08, + "loss": 8.3043, + "step": 1113500 + }, + { + "epoch": 11.86, + "learning_rate": 6.603239132736416e-08, + "loss": 8.2377, + "step": 1113600 + }, + { + "epoch": 11.86, + "learning_rate": 6.502300037132258e-08, + "loss": 8.2207, + "step": 1113700 + }, + { + "epoch": 11.86, + "learning_rate": 6.402138136878399e-08, + "loss": 8.1113, + "step": 1113800 + }, + { + "epoch": 11.86, + "learning_rate": 6.302753439765274e-08, + "loss": 8.1694, + "step": 1113900 + }, + { + "epoch": 11.87, + "learning_rate": 6.204145953521146e-08, + "loss": 8.2088, + "step": 1114000 + }, + { + "epoch": 11.87, + "learning_rate": 6.106315685815434e-08, + "loss": 8.1294, + "step": 1114100 + }, + { + "epoch": 11.87, + "learning_rate": 6.009262644256497e-08, + "loss": 8.1451, + "step": 1114200 + }, + { + "epoch": 11.87, + "learning_rate": 5.912986836392742e-08, + "loss": 8.1137, + "step": 1114300 + }, + { + "epoch": 11.87, + "learning_rate": 5.8174882697104025e-08, + "loss": 8.1286, + "step": 1114400 + }, + { + "epoch": 11.87, + "learning_rate": 5.722766951635761e-08, + "loss": 8.169, + "step": 1114500 + }, + { + "epoch": 11.87, + "learning_rate": 5.628822889537366e-08, + "loss": 8.1433, + "step": 1114600 + }, + { + "epoch": 11.87, + "learning_rate": 5.535656090720487e-08, + "loss": 8.2252, + "step": 1114700 + }, + { + "epoch": 11.87, + "learning_rate": 5.443266562430438e-08, + "loss": 8.2318, + "step": 1114800 + }, + { + "epoch": 11.88, + "learning_rate": 5.351654311851473e-08, + "loss": 8.1874, + "step": 1114900 + }, + { + "epoch": 11.88, + "learning_rate": 5.260819346110113e-08, + "loss": 8.1275, + "step": 1115000 + }, + { + "epoch": 11.88, + "learning_rate": 5.1707616722695974e-08, + "loss": 8.1395, + "step": 1115100 + }, + { + "epoch": 11.88, + "learning_rate": 5.081481297334323e-08, + "loss": 8.1456, + "step": 1115200 + }, + { + "epoch": 11.88, + "learning_rate": 4.992978228246514e-08, + "loss": 8.1664, + "step": 1115300 + }, + { + "epoch": 11.88, + "learning_rate": 4.905252471889554e-08, + "loss": 8.1115, + "step": 1115400 + }, + { + "epoch": 11.88, + "learning_rate": 4.818304035085763e-08, + "loss": 8.1694, + "step": 1115500 + }, + { + "epoch": 11.88, + "learning_rate": 4.73213292459862e-08, + "loss": 8.1421, + "step": 1115600 + }, + { + "epoch": 11.88, + "learning_rate": 4.64673914712721e-08, + "loss": 8.157, + "step": 1115700 + }, + { + "epoch": 11.88, + "learning_rate": 4.5621227093139986e-08, + "loss": 8.1785, + "step": 1115800 + }, + { + "epoch": 11.89, + "learning_rate": 4.478283617740386e-08, + "loss": 8.1565, + "step": 1115900 + }, + { + "epoch": 11.89, + "learning_rate": 4.395221878924494e-08, + "loss": 8.123, + "step": 1116000 + }, + { + "epoch": 11.89, + "learning_rate": 4.3129374993278184e-08, + "loss": 8.0936, + "step": 1116100 + }, + { + "epoch": 11.89, + "learning_rate": 4.231430485348575e-08, + "loss": 8.1443, + "step": 1116200 + }, + { + "epoch": 11.89, + "learning_rate": 4.150700843326139e-08, + "loss": 8.2025, + "step": 1116300 + }, + { + "epoch": 11.89, + "learning_rate": 4.070748579538819e-08, + "loss": 8.2084, + "step": 1116400 + }, + { + "epoch": 11.89, + "learning_rate": 3.991573700204976e-08, + "loss": 8.252, + "step": 1116500 + }, + { + "epoch": 11.89, + "learning_rate": 3.913176211480796e-08, + "loss": 8.2067, + "step": 1116600 + }, + { + "epoch": 11.89, + "learning_rate": 3.835556119463624e-08, + "loss": 8.1817, + "step": 1116700 + }, + { + "epoch": 11.9, + "learning_rate": 3.758713430190852e-08, + "loss": 8.244, + "step": 1116800 + }, + { + "epoch": 11.9, + "learning_rate": 3.6826481496377016e-08, + "loss": 8.1372, + "step": 1116900 + }, + { + "epoch": 11.9, + "learning_rate": 3.607360283719441e-08, + "loss": 8.0947, + "step": 1117000 + }, + { + "epoch": 11.9, + "learning_rate": 3.532849838292496e-08, + "loss": 8.1511, + "step": 1117100 + }, + { + "epoch": 11.9, + "learning_rate": 3.45911681915112e-08, + "loss": 8.2004, + "step": 1117200 + }, + { + "epoch": 11.9, + "learning_rate": 3.3861612320285066e-08, + "loss": 8.1201, + "step": 1117300 + }, + { + "epoch": 11.9, + "learning_rate": 3.313983082600114e-08, + "loss": 8.1598, + "step": 1117400 + }, + { + "epoch": 11.9, + "learning_rate": 3.2425823764781206e-08, + "loss": 8.2093, + "step": 1117500 + }, + { + "epoch": 11.9, + "learning_rate": 3.171959119214751e-08, + "loss": 8.1432, + "step": 1117600 + }, + { + "epoch": 11.9, + "learning_rate": 3.1021133163045004e-08, + "loss": 8.1334, + "step": 1117700 + }, + { + "epoch": 11.91, + "learning_rate": 3.033044973177468e-08, + "loss": 8.1673, + "step": 1117800 + }, + { + "epoch": 11.91, + "learning_rate": 2.9647540952049135e-08, + "loss": 8.0766, + "step": 1117900 + }, + { + "epoch": 11.91, + "learning_rate": 2.8972406876992542e-08, + "loss": 8.148, + "step": 1118000 + }, + { + "epoch": 11.91, + "learning_rate": 2.8305047559107343e-08, + "loss": 8.1418, + "step": 1118100 + }, + { + "epoch": 11.91, + "learning_rate": 2.764546305028537e-08, + "loss": 8.1619, + "step": 1118200 + }, + { + "epoch": 11.91, + "learning_rate": 2.699365340181892e-08, + "loss": 8.2239, + "step": 1118300 + }, + { + "epoch": 11.91, + "learning_rate": 2.6349618664422982e-08, + "loss": 8.2692, + "step": 1118400 + }, + { + "epoch": 11.91, + "learning_rate": 2.5713358888157514e-08, + "loss": 8.1113, + "step": 1118500 + }, + { + "epoch": 11.91, + "learning_rate": 2.5084874122516254e-08, + "loss": 8.2183, + "step": 1118600 + }, + { + "epoch": 11.92, + "learning_rate": 2.4464164416382328e-08, + "loss": 8.1431, + "step": 1118700 + }, + { + "epoch": 11.92, + "learning_rate": 2.385122981801713e-08, + "loss": 8.1665, + "step": 1118800 + }, + { + "epoch": 11.92, + "learning_rate": 2.3246070375093633e-08, + "loss": 8.1511, + "step": 1118900 + }, + { + "epoch": 11.92, + "learning_rate": 2.264868613467419e-08, + "loss": 8.1785, + "step": 1119000 + }, + { + "epoch": 11.92, + "learning_rate": 2.2059077143221642e-08, + "loss": 8.1569, + "step": 1119100 + }, + { + "epoch": 11.92, + "learning_rate": 2.147724344658819e-08, + "loss": 8.1315, + "step": 1119200 + }, + { + "epoch": 11.92, + "learning_rate": 2.090318509001543e-08, + "loss": 8.1879, + "step": 1119300 + }, + { + "epoch": 11.92, + "learning_rate": 2.0336902118167635e-08, + "loss": 8.1557, + "step": 1119400 + }, + { + "epoch": 11.92, + "learning_rate": 1.9778394575054036e-08, + "loss": 8.1056, + "step": 1119500 + }, + { + "epoch": 11.93, + "learning_rate": 1.9227662504139875e-08, + "loss": 8.1696, + "step": 1119600 + }, + { + "epoch": 11.93, + "learning_rate": 1.868470594823535e-08, + "loss": 8.1699, + "step": 1119700 + }, + { + "epoch": 11.93, + "learning_rate": 1.8149524949573337e-08, + "loss": 8.1838, + "step": 1119800 + }, + { + "epoch": 11.93, + "learning_rate": 1.7622119549776107e-08, + "loss": 8.1563, + "step": 1119900 + }, + { + "epoch": 11.93, + "learning_rate": 1.7102489789866393e-08, + "loss": 8.1979, + "step": 1120000 + }, + { + "epoch": 11.93, + "learning_rate": 1.6590635710234114e-08, + "loss": 8.1631, + "step": 1120100 + }, + { + "epoch": 11.93, + "learning_rate": 1.608655735071407e-08, + "loss": 8.2064, + "step": 1120200 + }, + { + "epoch": 11.93, + "learning_rate": 1.5590254750486032e-08, + "loss": 8.2135, + "step": 1120300 + }, + { + "epoch": 11.93, + "learning_rate": 1.5101727948163557e-08, + "loss": 8.1149, + "step": 1120400 + }, + { + "epoch": 11.93, + "learning_rate": 1.4620976981727375e-08, + "loss": 8.2165, + "step": 1120500 + }, + { + "epoch": 11.94, + "learning_rate": 1.4148001888569796e-08, + "loss": 8.1441, + "step": 1120600 + }, + { + "epoch": 11.94, + "learning_rate": 1.3682802705472509e-08, + "loss": 8.1564, + "step": 1120700 + }, + { + "epoch": 11.94, + "learning_rate": 1.3225379468628784e-08, + "loss": 8.1986, + "step": 1120800 + }, + { + "epoch": 11.94, + "learning_rate": 1.2775732213587966e-08, + "loss": 8.2174, + "step": 1120900 + }, + { + "epoch": 11.94, + "learning_rate": 1.2333860975333178e-08, + "loss": 8.2015, + "step": 1121000 + }, + { + "epoch": 11.94, + "learning_rate": 1.1899765788236928e-08, + "loss": 8.1138, + "step": 1121100 + }, + { + "epoch": 11.94, + "learning_rate": 1.1473446686038892e-08, + "loss": 8.2364, + "step": 1121200 + }, + { + "epoch": 11.94, + "learning_rate": 1.1054903701901431e-08, + "loss": 8.1393, + "step": 1121300 + }, + { + "epoch": 11.94, + "learning_rate": 1.0644136868387389e-08, + "loss": 8.2134, + "step": 1121400 + }, + { + "epoch": 11.95, + "learning_rate": 1.0241146217426777e-08, + "loss": 8.1373, + "step": 1121500 + }, + { + "epoch": 11.95, + "learning_rate": 9.845931780361194e-09, + "loss": 8.1998, + "step": 1121600 + }, + { + "epoch": 11.95, + "learning_rate": 9.458493587943817e-09, + "loss": 8.1314, + "step": 1121700 + }, + { + "epoch": 11.95, + "learning_rate": 9.078831670283895e-09, + "loss": 8.1046, + "step": 1121800 + }, + { + "epoch": 11.95, + "learning_rate": 8.706946056913357e-09, + "loss": 8.277, + "step": 1121900 + }, + { + "epoch": 11.95, + "learning_rate": 8.342836776764617e-09, + "loss": 8.0816, + "step": 1122000 + }, + { + "epoch": 11.95, + "learning_rate": 7.986503858137262e-09, + "loss": 8.1678, + "step": 1122100 + }, + { + "epoch": 11.95, + "learning_rate": 7.637947328764662e-09, + "loss": 8.168, + "step": 1122200 + }, + { + "epoch": 11.95, + "learning_rate": 7.297167215736256e-09, + "loss": 8.15, + "step": 1122300 + }, + { + "epoch": 11.96, + "learning_rate": 6.964163545553071e-09, + "loss": 8.0957, + "step": 1122400 + }, + { + "epoch": 11.96, + "learning_rate": 6.638936344127711e-09, + "loss": 8.1633, + "step": 1122500 + }, + { + "epoch": 11.96, + "learning_rate": 6.3214856367510565e-09, + "loss": 8.1131, + "step": 1122600 + }, + { + "epoch": 11.96, + "learning_rate": 6.0118114481033665e-09, + "loss": 8.1871, + "step": 1122700 + }, + { + "epoch": 11.96, + "learning_rate": 5.7099138022653765e-09, + "loss": 8.2024, + "step": 1122800 + }, + { + "epoch": 11.96, + "learning_rate": 5.415792722718305e-09, + "loss": 8.1664, + "step": 1122900 + }, + { + "epoch": 11.96, + "learning_rate": 5.129448232343848e-09, + "loss": 8.1284, + "step": 1123000 + }, + { + "epoch": 11.96, + "learning_rate": 4.8508803534130785e-09, + "loss": 8.1615, + "step": 1123100 + }, + { + "epoch": 11.96, + "learning_rate": 4.580089107575347e-09, + "loss": 8.1382, + "step": 1123200 + }, + { + "epoch": 11.96, + "learning_rate": 4.3170745158915835e-09, + "loss": 8.1529, + "step": 1123300 + }, + { + "epoch": 11.97, + "learning_rate": 4.061836598834301e-09, + "loss": 8.149, + "step": 1123400 + }, + { + "epoch": 11.97, + "learning_rate": 3.814375376243184e-09, + "loss": 8.1766, + "step": 1123500 + }, + { + "epoch": 11.97, + "learning_rate": 3.574690867347297e-09, + "loss": 8.055, + "step": 1123600 + }, + { + "epoch": 11.97, + "learning_rate": 3.3427830908205895e-09, + "loss": 8.1066, + "step": 1123700 + }, + { + "epoch": 11.97, + "learning_rate": 3.118652064659777e-09, + "loss": 8.0998, + "step": 1123800 + }, + { + "epoch": 11.97, + "learning_rate": 2.9022978063286686e-09, + "loss": 8.1878, + "step": 1123900 + }, + { + "epoch": 11.97, + "learning_rate": 2.6937203326360407e-09, + "loss": 8.1568, + "step": 1124000 + }, + { + "epoch": 11.97, + "learning_rate": 2.492919659813353e-09, + "loss": 8.2, + "step": 1124100 + }, + { + "epoch": 11.97, + "learning_rate": 2.2998958034592397e-09, + "loss": 8.1856, + "step": 1124200 + }, + { + "epoch": 11.98, + "learning_rate": 2.1146487786061207e-09, + "loss": 8.1942, + "step": 1124300 + }, + { + "epoch": 11.98, + "learning_rate": 1.9371785996424864e-09, + "loss": 8.1778, + "step": 1124400 + }, + { + "epoch": 11.98, + "learning_rate": 1.7674852803795106e-09, + "loss": 8.1571, + "step": 1124500 + }, + { + "epoch": 11.98, + "learning_rate": 1.6055688340177455e-09, + "loss": 8.1025, + "step": 1124600 + }, + { + "epoch": 11.98, + "learning_rate": 1.4514292731471202e-09, + "loss": 8.1687, + "step": 1124700 + }, + { + "epoch": 11.98, + "learning_rate": 1.305066609746941e-09, + "loss": 8.1588, + "step": 1124800 + }, + { + "epoch": 11.98, + "learning_rate": 1.1664808552080964e-09, + "loss": 8.185, + "step": 1124900 + }, + { + "epoch": 11.98, + "learning_rate": 1.0356720202997495e-09, + "loss": 8.1149, + "step": 1125000 + }, + { + "epoch": 11.98, + "learning_rate": 9.126401152137476e-10, + "loss": 8.1099, + "step": 1125100 + }, + { + "epoch": 11.98, + "learning_rate": 7.973851494980089e-10, + "loss": 8.1103, + "step": 1125200 + }, + { + "epoch": 11.99, + "learning_rate": 6.899071321231354e-10, + "loss": 8.1382, + "step": 1125300 + }, + { + "epoch": 11.99, + "learning_rate": 5.902060714491064e-10, + "loss": 8.1411, + "step": 1125400 + }, + { + "epoch": 11.99, + "learning_rate": 4.982819752252788e-10, + "loss": 8.1504, + "step": 1125500 + }, + { + "epoch": 11.99, + "learning_rate": 4.141348506125908e-10, + "loss": 8.1573, + "step": 1125600 + }, + { + "epoch": 11.99, + "learning_rate": 3.377647041502563e-10, + "loss": 8.16, + "step": 1125700 + }, + { + "epoch": 11.99, + "learning_rate": 2.691715417668661e-10, + "loss": 8.1804, + "step": 1125800 + }, + { + "epoch": 11.99, + "learning_rate": 2.0835536880259299e-10, + "loss": 8.1776, + "step": 1125900 + }, + { + "epoch": 11.99, + "learning_rate": 1.5531618998698705e-10, + "loss": 8.1192, + "step": 1126000 + }, + { + "epoch": 11.99, + "learning_rate": 1.1005400945007793e-10, + "loss": 8.1463, + "step": 1126100 + }, + { + "epoch": 12.0, + "learning_rate": 7.256883071127263e-11, + "loss": 8.1926, + "step": 1126200 + }, + { + "epoch": 12.0, + "learning_rate": 4.286065667935546e-11, + "loss": 8.1842, + "step": 1126300 + }, + { + "epoch": 12.0, + "learning_rate": 2.092948967469255e-11, + "loss": 8.2233, + "step": 1126400 + }, + { + "epoch": 12.0, + "learning_rate": 6.775331384822891e-12, + "loss": 8.0857, + "step": 1126500 + }, + { + "epoch": 12.0, + "learning_rate": 3.9818293107174445e-13, + "loss": 8.1872, + "step": 1126600 + }, + { + "epoch": 12.0, + "step": 1126620, + "total_flos": 0.0, + "train_loss": 8.691952179193992, + "train_runtime": 394221.0859, + "train_samples_per_second": 182.902, + "train_steps_per_second": 2.858 + } + ], + "logging_steps": 100, + "max_steps": 1126632, + "num_train_epochs": 12, + "save_steps": 500, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}