diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,70021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 11666, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.714285714285715e-08, + "loss": 0.5529, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.142857142857143e-07, + "loss": 0.5725, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.7142857142857146e-07, + "loss": 0.4912, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.285714285714286e-07, + "loss": 0.5575, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.8571428571428575e-07, + "loss": 0.5187, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3.428571428571429e-07, + "loss": 0.6355, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.5984, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.571428571428572e-07, + "loss": 0.6348, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 5.142857142857143e-07, + "loss": 0.5369, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5.714285714285715e-07, + "loss": 0.5762, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.285714285714287e-07, + "loss": 0.5363, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 6.857142857142858e-07, + "loss": 0.554, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 7.428571428571429e-07, + "loss": 0.6045, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 0.6339, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 8.571428571428572e-07, + "loss": 0.5054, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 9.142857142857144e-07, + "loss": 0.5938, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 9.714285714285715e-07, + "loss": 0.5323, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 1.0285714285714286e-06, + "loss": 0.5791, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 1.0857142857142858e-06, + "loss": 0.5463, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 1.142857142857143e-06, + "loss": 0.5763, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.5176, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 1.2571428571428573e-06, + "loss": 0.5574, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.3142857142857143e-06, + "loss": 0.5089, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.3714285714285717e-06, + "loss": 0.5031, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.5227, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.4857142857142858e-06, + "loss": 0.4615, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.542857142857143e-06, + "loss": 0.4647, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.4697, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.657142857142857e-06, + "loss": 0.4701, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 1.7142857142857145e-06, + "loss": 0.4543, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.7714285714285714e-06, + "loss": 0.4435, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 1.8285714285714288e-06, + "loss": 0.447, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 1.885714285714286e-06, + "loss": 0.4642, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 1.942857142857143e-06, + "loss": 0.4464, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.5004, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 2.0571428571428573e-06, + "loss": 0.442, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 2.1142857142857147e-06, + "loss": 0.4708, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 2.1714285714285716e-06, + "loss": 0.4746, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 2.228571428571429e-06, + "loss": 0.4735, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 2.285714285714286e-06, + "loss": 0.469, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.342857142857143e-06, + "loss": 0.6212, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.4497, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 2.4571428571428573e-06, + "loss": 0.4344, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 2.5142857142857147e-06, + "loss": 0.3882, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 2.571428571428571e-06, + "loss": 0.4233, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 2.6285714285714286e-06, + "loss": 0.4608, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 2.685714285714286e-06, + "loss": 0.4189, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 2.7428571428571433e-06, + "loss": 0.4578, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.3942, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.3894, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.9142857142857146e-06, + "loss": 0.4807, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 2.9714285714285716e-06, + "loss": 0.5984, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 3.028571428571429e-06, + "loss": 0.4355, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 3.085714285714286e-06, + "loss": 0.4977, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 3.142857142857143e-06, + "loss": 0.4062, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.4528, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 3.2571428571428577e-06, + "loss": 0.4639, + "step": 57 + }, + { + "epoch": 0.0, + "learning_rate": 3.314285714285714e-06, + "loss": 0.3911, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 3.3714285714285716e-06, + "loss": 0.4202, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 3.428571428571429e-06, + "loss": 0.3906, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 3.4857142857142863e-06, + "loss": 0.4504, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 3.542857142857143e-06, + "loss": 0.4298, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.3845, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 3.6571428571428576e-06, + "loss": 0.418, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.4547, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 3.771428571428572e-06, + "loss": 0.4591, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 3.828571428571429e-06, + "loss": 0.3833, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 3.885714285714286e-06, + "loss": 0.421, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 3.942857142857143e-06, + "loss": 0.4047, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.448, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.057142857142858e-06, + "loss": 0.4734, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 4.114285714285715e-06, + "loss": 0.4569, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 4.1714285714285715e-06, + "loss": 0.4249, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 4.228571428571429e-06, + "loss": 0.3678, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 4.2857142857142855e-06, + "loss": 0.437, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 4.342857142857143e-06, + "loss": 0.3923, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 4.4e-06, + "loss": 0.4132, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 4.457142857142858e-06, + "loss": 0.4335, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 4.514285714285714e-06, + "loss": 0.4343, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 4.571428571428572e-06, + "loss": 0.423, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.628571428571429e-06, + "loss": 0.4707, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 4.685714285714286e-06, + "loss": 0.4405, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 4.742857142857144e-06, + "loss": 0.3959, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 4.800000000000001e-06, + "loss": 0.4052, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 4.857142857142858e-06, + "loss": 0.4122, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 4.9142857142857145e-06, + "loss": 0.4609, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 4.971428571428572e-06, + "loss": 0.3727, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 5.028571428571429e-06, + "loss": 0.366, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 5.085714285714286e-06, + "loss": 0.3898, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 5.142857142857142e-06, + "loss": 0.4215, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 5.2e-06, + "loss": 0.3989, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 5.257142857142857e-06, + "loss": 0.4144, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 5.314285714285715e-06, + "loss": 0.4241, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 5.371428571428572e-06, + "loss": 0.3887, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 5.428571428571429e-06, + "loss": 0.3746, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 5.485714285714287e-06, + "loss": 0.4066, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 5.542857142857143e-06, + "loss": 0.4607, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 5.600000000000001e-06, + "loss": 0.3784, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 5.6571428571428576e-06, + "loss": 0.4544, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.4081, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 5.771428571428572e-06, + "loss": 0.3824, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 5.828571428571429e-06, + "loss": 0.3564, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 5.885714285714285e-06, + "loss": 0.4146, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 5.942857142857143e-06, + "loss": 0.4246, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 0.3865, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 6.057142857142858e-06, + "loss": 0.4225, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 6.114285714285715e-06, + "loss": 0.3798, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 6.171428571428572e-06, + "loss": 0.4398, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 6.22857142857143e-06, + "loss": 0.4353, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 6.285714285714286e-06, + "loss": 0.4125, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 6.342857142857143e-06, + "loss": 0.3684, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.3872, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 6.4571428571428575e-06, + "loss": 0.4212, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 6.514285714285715e-06, + "loss": 0.3522, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 6.571428571428572e-06, + "loss": 0.351, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 6.628571428571428e-06, + "loss": 0.3942, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 6.685714285714286e-06, + "loss": 0.3751, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 6.742857142857143e-06, + "loss": 0.3584, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 6.800000000000001e-06, + "loss": 0.4096, + "step": 119 + }, + { + "epoch": 0.01, + "learning_rate": 6.857142857142858e-06, + "loss": 0.4347, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 6.914285714285715e-06, + "loss": 0.3983, + "step": 121 + }, + { + "epoch": 0.01, + "learning_rate": 6.971428571428573e-06, + "loss": 0.397, + "step": 122 + }, + { + "epoch": 0.01, + "learning_rate": 7.028571428571429e-06, + "loss": 0.376, + "step": 123 + }, + { + "epoch": 0.01, + "learning_rate": 7.085714285714286e-06, + "loss": 0.3793, + "step": 124 + }, + { + "epoch": 0.01, + "learning_rate": 7.1428571428571436e-06, + "loss": 0.3593, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.399, + "step": 126 + }, + { + "epoch": 0.01, + "learning_rate": 7.257142857142858e-06, + "loss": 0.4334, + "step": 127 + }, + { + "epoch": 0.01, + "learning_rate": 7.314285714285715e-06, + "loss": 0.3678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 7.371428571428571e-06, + "loss": 0.389, + "step": 129 + }, + { + "epoch": 0.01, + "learning_rate": 7.428571428571429e-06, + "loss": 0.4114, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 7.485714285714286e-06, + "loss": 0.3561, + "step": 131 + }, + { + "epoch": 0.01, + "learning_rate": 7.542857142857144e-06, + "loss": 0.379, + "step": 132 + }, + { + "epoch": 0.01, + "learning_rate": 7.600000000000001e-06, + "loss": 0.41, + "step": 133 + }, + { + "epoch": 0.01, + "learning_rate": 7.657142857142858e-06, + "loss": 0.3839, + "step": 134 + }, + { + "epoch": 0.01, + "learning_rate": 7.714285714285716e-06, + "loss": 0.4163, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 7.771428571428572e-06, + "loss": 0.3829, + "step": 136 + }, + { + "epoch": 0.01, + "learning_rate": 7.828571428571428e-06, + "loss": 0.3513, + "step": 137 + }, + { + "epoch": 0.01, + "learning_rate": 7.885714285714286e-06, + "loss": 0.4048, + "step": 138 + }, + { + "epoch": 0.01, + "learning_rate": 7.942857142857144e-06, + "loss": 0.3633, + "step": 139 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 0.5511, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 8.057142857142857e-06, + "loss": 0.4023, + "step": 141 + }, + { + "epoch": 0.01, + "learning_rate": 8.114285714285715e-06, + "loss": 0.4149, + "step": 142 + }, + { + "epoch": 0.01, + "learning_rate": 8.171428571428573e-06, + "loss": 0.4139, + "step": 143 + }, + { + "epoch": 0.01, + "learning_rate": 8.22857142857143e-06, + "loss": 0.3245, + "step": 144 + }, + { + "epoch": 0.01, + "learning_rate": 8.285714285714287e-06, + "loss": 0.3558, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 8.342857142857143e-06, + "loss": 0.384, + "step": 146 + }, + { + "epoch": 0.01, + "learning_rate": 8.400000000000001e-06, + "loss": 0.3575, + "step": 147 + }, + { + "epoch": 0.01, + "learning_rate": 8.457142857142859e-06, + "loss": 0.3829, + "step": 148 + }, + { + "epoch": 0.01, + "learning_rate": 8.514285714285715e-06, + "loss": 0.3997, + "step": 149 + }, + { + "epoch": 0.01, + "learning_rate": 8.571428571428571e-06, + "loss": 0.3687, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 8.628571428571429e-06, + "loss": 0.3605, + "step": 151 + }, + { + "epoch": 0.01, + "learning_rate": 8.685714285714287e-06, + "loss": 0.4476, + "step": 152 + }, + { + "epoch": 0.01, + "learning_rate": 8.742857142857144e-06, + "loss": 0.3475, + "step": 153 + }, + { + "epoch": 0.01, + "learning_rate": 8.8e-06, + "loss": 0.3788, + "step": 154 + }, + { + "epoch": 0.01, + "learning_rate": 8.857142857142858e-06, + "loss": 0.4537, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 8.914285714285716e-06, + "loss": 0.4049, + "step": 156 + }, + { + "epoch": 0.01, + "learning_rate": 8.971428571428572e-06, + "loss": 0.4028, + "step": 157 + }, + { + "epoch": 0.01, + "learning_rate": 9.028571428571428e-06, + "loss": 0.3348, + "step": 158 + }, + { + "epoch": 0.01, + "learning_rate": 9.085714285714286e-06, + "loss": 0.3737, + "step": 159 + }, + { + "epoch": 0.01, + "learning_rate": 9.142857142857144e-06, + "loss": 0.381, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000002e-06, + "loss": 0.411, + "step": 161 + }, + { + "epoch": 0.01, + "learning_rate": 9.257142857142858e-06, + "loss": 0.3879, + "step": 162 + }, + { + "epoch": 0.01, + "learning_rate": 9.314285714285714e-06, + "loss": 0.3915, + "step": 163 + }, + { + "epoch": 0.01, + "learning_rate": 9.371428571428572e-06, + "loss": 0.3442, + "step": 164 + }, + { + "epoch": 0.01, + "learning_rate": 9.42857142857143e-06, + "loss": 0.4283, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 9.485714285714287e-06, + "loss": 0.3656, + "step": 166 + }, + { + "epoch": 0.01, + "learning_rate": 9.542857142857143e-06, + "loss": 0.3682, + "step": 167 + }, + { + "epoch": 0.01, + "learning_rate": 9.600000000000001e-06, + "loss": 0.3803, + "step": 168 + }, + { + "epoch": 0.01, + "learning_rate": 9.657142857142859e-06, + "loss": 0.3868, + "step": 169 + }, + { + "epoch": 0.01, + "learning_rate": 9.714285714285715e-06, + "loss": 0.3719, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 9.771428571428571e-06, + "loss": 0.3582, + "step": 171 + }, + { + "epoch": 0.01, + "learning_rate": 9.828571428571429e-06, + "loss": 0.3861, + "step": 172 + }, + { + "epoch": 0.01, + "learning_rate": 9.885714285714287e-06, + "loss": 0.3943, + "step": 173 + }, + { + "epoch": 0.01, + "learning_rate": 9.942857142857145e-06, + "loss": 0.4069, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.3989, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 1.0057142857142859e-05, + "loss": 0.3892, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 1.0114285714285715e-05, + "loss": 0.3906, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 1.0171428571428573e-05, + "loss": 0.3853, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 1.022857142857143e-05, + "loss": 0.4231, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 1.0285714285714285e-05, + "loss": 0.3798, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 1.0342857142857143e-05, + "loss": 0.3433, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.3929, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 1.045714285714286e-05, + "loss": 0.3617, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 1.0514285714285714e-05, + "loss": 0.3925, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 1.0571428571428572e-05, + "loss": 0.3739, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 1.062857142857143e-05, + "loss": 0.3937, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 1.0685714285714286e-05, + "loss": 0.3399, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 1.0742857142857144e-05, + "loss": 0.3892, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 1.0800000000000002e-05, + "loss": 0.3416, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 1.0857142857142858e-05, + "loss": 0.3815, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 1.0914285714285716e-05, + "loss": 0.3743, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 1.0971428571428573e-05, + "loss": 0.3636, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 1.1028571428571428e-05, + "loss": 0.4591, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 1.1085714285714286e-05, + "loss": 0.3463, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 1.1142857142857143e-05, + "loss": 0.3828, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.3506, + "step": 196 + }, + { + "epoch": 0.02, + "learning_rate": 1.1257142857142857e-05, + "loss": 0.4203, + "step": 197 + }, + { + "epoch": 0.02, + "learning_rate": 1.1314285714285715e-05, + "loss": 0.3699, + "step": 198 + }, + { + "epoch": 0.02, + "learning_rate": 1.1371428571428573e-05, + "loss": 0.618, + "step": 199 + }, + { + "epoch": 0.02, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.3959, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 1.1485714285714287e-05, + "loss": 0.3685, + "step": 201 + }, + { + "epoch": 0.02, + "learning_rate": 1.1542857142857145e-05, + "loss": 0.3714, + "step": 202 + }, + { + "epoch": 0.02, + "learning_rate": 1.16e-05, + "loss": 0.3918, + "step": 203 + }, + { + "epoch": 0.02, + "learning_rate": 1.1657142857142859e-05, + "loss": 0.3181, + "step": 204 + }, + { + "epoch": 0.02, + "learning_rate": 1.1714285714285716e-05, + "loss": 0.3566, + "step": 205 + }, + { + "epoch": 0.02, + "learning_rate": 1.177142857142857e-05, + "loss": 0.33, + "step": 206 + }, + { + "epoch": 0.02, + "learning_rate": 1.1828571428571429e-05, + "loss": 0.3928, + "step": 207 + }, + { + "epoch": 0.02, + "learning_rate": 1.1885714285714286e-05, + "loss": 0.3738, + "step": 208 + }, + { + "epoch": 0.02, + "learning_rate": 1.1942857142857144e-05, + "loss": 0.3224, + "step": 209 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 0.3654, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.2057142857142858e-05, + "loss": 0.3367, + "step": 211 + }, + { + "epoch": 0.02, + "learning_rate": 1.2114285714285716e-05, + "loss": 0.3665, + "step": 212 + }, + { + "epoch": 0.02, + "learning_rate": 1.2171428571428572e-05, + "loss": 0.3373, + "step": 213 + }, + { + "epoch": 0.02, + "learning_rate": 1.222857142857143e-05, + "loss": 0.3722, + "step": 214 + }, + { + "epoch": 0.02, + "learning_rate": 1.2285714285714288e-05, + "loss": 0.5845, + "step": 215 + }, + { + "epoch": 0.02, + "learning_rate": 1.2342857142857144e-05, + "loss": 0.3638, + "step": 216 + }, + { + "epoch": 0.02, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.3444, + "step": 217 + }, + { + "epoch": 0.02, + "learning_rate": 1.245714285714286e-05, + "loss": 0.4137, + "step": 218 + }, + { + "epoch": 0.02, + "learning_rate": 1.2514285714285714e-05, + "loss": 0.3753, + "step": 219 + }, + { + "epoch": 0.02, + "learning_rate": 1.2571428571428572e-05, + "loss": 0.3772, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 1.262857142857143e-05, + "loss": 0.4005, + "step": 221 + }, + { + "epoch": 0.02, + "learning_rate": 1.2685714285714286e-05, + "loss": 0.405, + "step": 222 + }, + { + "epoch": 0.02, + "learning_rate": 1.2742857142857143e-05, + "loss": 0.3678, + "step": 223 + }, + { + "epoch": 0.02, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.3552, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 1.2857142857142859e-05, + "loss": 0.3738, + "step": 225 + }, + { + "epoch": 0.02, + "learning_rate": 1.2914285714285715e-05, + "loss": 0.388, + "step": 226 + }, + { + "epoch": 0.02, + "learning_rate": 1.2971428571428573e-05, + "loss": 0.3687, + "step": 227 + }, + { + "epoch": 0.02, + "learning_rate": 1.302857142857143e-05, + "loss": 0.368, + "step": 228 + }, + { + "epoch": 0.02, + "learning_rate": 1.3085714285714287e-05, + "loss": 0.3291, + "step": 229 + }, + { + "epoch": 0.02, + "learning_rate": 1.3142857142857145e-05, + "loss": 0.4016, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.3512, + "step": 231 + }, + { + "epoch": 0.02, + "learning_rate": 1.3257142857142857e-05, + "loss": 0.3287, + "step": 232 + }, + { + "epoch": 0.02, + "learning_rate": 1.3314285714285715e-05, + "loss": 0.3435, + "step": 233 + }, + { + "epoch": 0.02, + "learning_rate": 1.3371428571428572e-05, + "loss": 0.3379, + "step": 234 + }, + { + "epoch": 0.02, + "learning_rate": 1.3428571428571429e-05, + "loss": 0.3394, + "step": 235 + }, + { + "epoch": 0.02, + "learning_rate": 1.3485714285714286e-05, + "loss": 0.3784, + "step": 236 + }, + { + "epoch": 0.02, + "learning_rate": 1.3542857142857144e-05, + "loss": 0.3846, + "step": 237 + }, + { + "epoch": 0.02, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.3494, + "step": 238 + }, + { + "epoch": 0.02, + "learning_rate": 1.3657142857142858e-05, + "loss": 0.3472, + "step": 239 + }, + { + "epoch": 0.02, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.3765, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 1.3771428571428574e-05, + "loss": 0.431, + "step": 241 + }, + { + "epoch": 0.02, + "learning_rate": 1.382857142857143e-05, + "loss": 0.3636, + "step": 242 + }, + { + "epoch": 0.02, + "learning_rate": 1.3885714285714288e-05, + "loss": 0.3846, + "step": 243 + }, + { + "epoch": 0.02, + "learning_rate": 1.3942857142857145e-05, + "loss": 0.3311, + "step": 244 + }, + { + "epoch": 0.02, + "learning_rate": 1.4e-05, + "loss": 0.6819, + "step": 245 + }, + { + "epoch": 0.02, + "learning_rate": 1.4057142857142858e-05, + "loss": 0.3956, + "step": 246 + }, + { + "epoch": 0.02, + "learning_rate": 1.4114285714285715e-05, + "loss": 0.4042, + "step": 247 + }, + { + "epoch": 0.02, + "learning_rate": 1.4171428571428572e-05, + "loss": 0.5765, + "step": 248 + }, + { + "epoch": 0.02, + "learning_rate": 1.422857142857143e-05, + "loss": 0.3557, + "step": 249 + }, + { + "epoch": 0.02, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.3547, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 1.4342857142857145e-05, + "loss": 0.3671, + "step": 251 + }, + { + "epoch": 0.02, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.3599, + "step": 252 + }, + { + "epoch": 0.02, + "learning_rate": 1.4457142857142859e-05, + "loss": 0.3845, + "step": 253 + }, + { + "epoch": 0.02, + "learning_rate": 1.4514285714285717e-05, + "loss": 0.3499, + "step": 254 + }, + { + "epoch": 0.02, + "learning_rate": 1.4571428571428573e-05, + "loss": 0.3463, + "step": 255 + }, + { + "epoch": 0.02, + "learning_rate": 1.462857142857143e-05, + "loss": 0.4027, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 1.4685714285714288e-05, + "loss": 0.3233, + "step": 257 + }, + { + "epoch": 0.02, + "learning_rate": 1.4742857142857143e-05, + "loss": 0.3505, + "step": 258 + }, + { + "epoch": 0.02, + "learning_rate": 1.48e-05, + "loss": 0.3649, + "step": 259 + }, + { + "epoch": 0.02, + "learning_rate": 1.4857142857142858e-05, + "loss": 0.3671, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 1.4914285714285715e-05, + "loss": 0.3244, + "step": 261 + }, + { + "epoch": 0.02, + "learning_rate": 1.4971428571428572e-05, + "loss": 0.3764, + "step": 262 + }, + { + "epoch": 0.02, + "learning_rate": 1.502857142857143e-05, + "loss": 0.4233, + "step": 263 + }, + { + "epoch": 0.02, + "learning_rate": 1.5085714285714288e-05, + "loss": 0.3888, + "step": 264 + }, + { + "epoch": 0.02, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.3811, + "step": 265 + }, + { + "epoch": 0.02, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.3345, + "step": 266 + }, + { + "epoch": 0.02, + "learning_rate": 1.525714285714286e-05, + "loss": 0.6316, + "step": 267 + }, + { + "epoch": 0.02, + "learning_rate": 1.5314285714285716e-05, + "loss": 0.3324, + "step": 268 + }, + { + "epoch": 0.02, + "learning_rate": 1.5371428571428572e-05, + "loss": 0.3956, + "step": 269 + }, + { + "epoch": 0.02, + "learning_rate": 1.542857142857143e-05, + "loss": 0.4175, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 1.5485714285714287e-05, + "loss": 0.3496, + "step": 271 + }, + { + "epoch": 0.02, + "learning_rate": 1.5542857142857144e-05, + "loss": 0.3684, + "step": 272 + }, + { + "epoch": 0.02, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.3895, + "step": 273 + }, + { + "epoch": 0.02, + "learning_rate": 1.5657142857142856e-05, + "loss": 0.3804, + "step": 274 + }, + { + "epoch": 0.02, + "learning_rate": 1.5714285714285715e-05, + "loss": 0.3207, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 1.577142857142857e-05, + "loss": 0.3481, + "step": 276 + }, + { + "epoch": 0.02, + "learning_rate": 1.582857142857143e-05, + "loss": 0.3585, + "step": 277 + }, + { + "epoch": 0.02, + "learning_rate": 1.5885714285714287e-05, + "loss": 0.382, + "step": 278 + }, + { + "epoch": 0.02, + "learning_rate": 1.5942857142857143e-05, + "loss": 0.3727, + "step": 279 + }, + { + "epoch": 0.02, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.4006, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 1.605714285714286e-05, + "loss": 0.3492, + "step": 281 + }, + { + "epoch": 0.02, + "learning_rate": 1.6114285714285715e-05, + "loss": 0.38, + "step": 282 + }, + { + "epoch": 0.02, + "learning_rate": 1.6171428571428574e-05, + "loss": 0.3618, + "step": 283 + }, + { + "epoch": 0.02, + "learning_rate": 1.622857142857143e-05, + "loss": 0.3693, + "step": 284 + }, + { + "epoch": 0.02, + "learning_rate": 1.6285714285714287e-05, + "loss": 0.6064, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 1.6342857142857146e-05, + "loss": 0.3769, + "step": 286 + }, + { + "epoch": 0.02, + "learning_rate": 1.64e-05, + "loss": 0.4042, + "step": 287 + }, + { + "epoch": 0.02, + "learning_rate": 1.645714285714286e-05, + "loss": 0.3466, + "step": 288 + }, + { + "epoch": 0.02, + "learning_rate": 1.6514285714285714e-05, + "loss": 0.3666, + "step": 289 + }, + { + "epoch": 0.02, + "learning_rate": 1.6571428571428574e-05, + "loss": 0.3208, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 1.662857142857143e-05, + "loss": 0.3625, + "step": 291 + }, + { + "epoch": 0.03, + "learning_rate": 1.6685714285714286e-05, + "loss": 0.3184, + "step": 292 + }, + { + "epoch": 0.03, + "learning_rate": 1.6742857142857146e-05, + "loss": 0.3839, + "step": 293 + }, + { + "epoch": 0.03, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.3312, + "step": 294 + }, + { + "epoch": 0.03, + "learning_rate": 1.6857142857142858e-05, + "loss": 0.3323, + "step": 295 + }, + { + "epoch": 0.03, + "learning_rate": 1.6914285714285717e-05, + "loss": 0.3436, + "step": 296 + }, + { + "epoch": 0.03, + "learning_rate": 1.6971428571428574e-05, + "loss": 0.345, + "step": 297 + }, + { + "epoch": 0.03, + "learning_rate": 1.702857142857143e-05, + "loss": 0.3371, + "step": 298 + }, + { + "epoch": 0.03, + "learning_rate": 1.708571428571429e-05, + "loss": 0.3867, + "step": 299 + }, + { + "epoch": 0.03, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.3937, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 1.72e-05, + "loss": 0.3862, + "step": 301 + }, + { + "epoch": 0.03, + "learning_rate": 1.7257142857142857e-05, + "loss": 0.366, + "step": 302 + }, + { + "epoch": 0.03, + "learning_rate": 1.7314285714285717e-05, + "loss": 0.397, + "step": 303 + }, + { + "epoch": 0.03, + "learning_rate": 1.7371428571428573e-05, + "loss": 0.3698, + "step": 304 + }, + { + "epoch": 0.03, + "learning_rate": 1.742857142857143e-05, + "loss": 0.3694, + "step": 305 + }, + { + "epoch": 0.03, + "learning_rate": 1.748571428571429e-05, + "loss": 0.5981, + "step": 306 + }, + { + "epoch": 0.03, + "learning_rate": 1.7542857142857145e-05, + "loss": 0.3619, + "step": 307 + }, + { + "epoch": 0.03, + "learning_rate": 1.76e-05, + "loss": 0.326, + "step": 308 + }, + { + "epoch": 0.03, + "learning_rate": 1.765714285714286e-05, + "loss": 0.3652, + "step": 309 + }, + { + "epoch": 0.03, + "learning_rate": 1.7714285714285717e-05, + "loss": 0.3784, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 1.7771428571428573e-05, + "loss": 0.3403, + "step": 311 + }, + { + "epoch": 0.03, + "learning_rate": 1.7828571428571432e-05, + "loss": 0.3816, + "step": 312 + }, + { + "epoch": 0.03, + "learning_rate": 1.7885714285714285e-05, + "loss": 0.3508, + "step": 313 + }, + { + "epoch": 0.03, + "learning_rate": 1.7942857142857144e-05, + "loss": 0.389, + "step": 314 + }, + { + "epoch": 0.03, + "learning_rate": 1.8e-05, + "loss": 0.3719, + "step": 315 + }, + { + "epoch": 0.03, + "learning_rate": 1.8057142857142857e-05, + "loss": 0.3411, + "step": 316 + }, + { + "epoch": 0.03, + "learning_rate": 1.8114285714285716e-05, + "loss": 0.3459, + "step": 317 + }, + { + "epoch": 0.03, + "learning_rate": 1.8171428571428572e-05, + "loss": 0.606, + "step": 318 + }, + { + "epoch": 0.03, + "learning_rate": 1.822857142857143e-05, + "loss": 0.3605, + "step": 319 + }, + { + "epoch": 0.03, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.3584, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 1.8342857142857144e-05, + "loss": 0.3464, + "step": 321 + }, + { + "epoch": 0.03, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.3539, + "step": 322 + }, + { + "epoch": 0.03, + "learning_rate": 1.845714285714286e-05, + "loss": 0.3964, + "step": 323 + }, + { + "epoch": 0.03, + "learning_rate": 1.8514285714285716e-05, + "loss": 0.3508, + "step": 324 + }, + { + "epoch": 0.03, + "learning_rate": 1.8571428571428575e-05, + "loss": 0.3266, + "step": 325 + }, + { + "epoch": 0.03, + "learning_rate": 1.8628571428571428e-05, + "loss": 0.3758, + "step": 326 + }, + { + "epoch": 0.03, + "learning_rate": 1.8685714285714287e-05, + "loss": 0.3062, + "step": 327 + }, + { + "epoch": 0.03, + "learning_rate": 1.8742857142857143e-05, + "loss": 0.4012, + "step": 328 + }, + { + "epoch": 0.03, + "learning_rate": 1.88e-05, + "loss": 0.3679, + "step": 329 + }, + { + "epoch": 0.03, + "learning_rate": 1.885714285714286e-05, + "loss": 0.337, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 1.8914285714285715e-05, + "loss": 0.4494, + "step": 331 + }, + { + "epoch": 0.03, + "learning_rate": 1.8971428571428575e-05, + "loss": 0.3542, + "step": 332 + }, + { + "epoch": 0.03, + "learning_rate": 1.902857142857143e-05, + "loss": 0.3492, + "step": 333 + }, + { + "epoch": 0.03, + "learning_rate": 1.9085714285714287e-05, + "loss": 0.3257, + "step": 334 + }, + { + "epoch": 0.03, + "learning_rate": 1.9142857142857146e-05, + "loss": 0.3489, + "step": 335 + }, + { + "epoch": 0.03, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.317, + "step": 336 + }, + { + "epoch": 0.03, + "learning_rate": 1.925714285714286e-05, + "loss": 0.3724, + "step": 337 + }, + { + "epoch": 0.03, + "learning_rate": 1.9314285714285718e-05, + "loss": 0.3696, + "step": 338 + }, + { + "epoch": 0.03, + "learning_rate": 1.937142857142857e-05, + "loss": 0.3848, + "step": 339 + }, + { + "epoch": 0.03, + "learning_rate": 1.942857142857143e-05, + "loss": 0.3625, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 1.9485714285714286e-05, + "loss": 0.2758, + "step": 341 + }, + { + "epoch": 0.03, + "learning_rate": 1.9542857142857143e-05, + "loss": 0.4115, + "step": 342 + }, + { + "epoch": 0.03, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.3625, + "step": 343 + }, + { + "epoch": 0.03, + "learning_rate": 1.9657142857142858e-05, + "loss": 0.3627, + "step": 344 + }, + { + "epoch": 0.03, + "learning_rate": 1.9714285714285718e-05, + "loss": 0.3516, + "step": 345 + }, + { + "epoch": 0.03, + "learning_rate": 1.9771428571428574e-05, + "loss": 0.2944, + "step": 346 + }, + { + "epoch": 0.03, + "learning_rate": 1.982857142857143e-05, + "loss": 0.41, + "step": 347 + }, + { + "epoch": 0.03, + "learning_rate": 1.988571428571429e-05, + "loss": 0.376, + "step": 348 + }, + { + "epoch": 0.03, + "learning_rate": 1.9942857142857142e-05, + "loss": 0.3127, + "step": 349 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 0.3829, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999999614624707e-05, + "loss": 0.3218, + "step": 351 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999998458498852e-05, + "loss": 0.3351, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 1.999999653162253e-05, + "loss": 0.3874, + "step": 353 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999993833995886e-05, + "loss": 0.361, + "step": 354 + }, + { + "epoch": 0.03, + "learning_rate": 1.999999036561913e-05, + "loss": 0.3157, + "step": 355 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999986126492526e-05, + "loss": 0.3591, + "step": 356 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999981116616402e-05, + "loss": 0.3319, + "step": 357 + }, + { + "epoch": 0.03, + "learning_rate": 1.999997533599115e-05, + "loss": 0.3223, + "step": 358 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999968784617204e-05, + "loss": 0.323, + "step": 359 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999961462495078e-05, + "loss": 0.3703, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999953369625334e-05, + "loss": 0.3814, + "step": 361 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999944506008594e-05, + "loss": 0.3477, + "step": 362 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999934871645544e-05, + "loss": 0.3254, + "step": 363 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999924466536925e-05, + "loss": 0.4109, + "step": 364 + }, + { + "epoch": 0.03, + "learning_rate": 1.999991329068354e-05, + "loss": 0.3309, + "step": 365 + }, + { + "epoch": 0.03, + "learning_rate": 1.999990134408625e-05, + "loss": 0.3484, + "step": 366 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999888626745975e-05, + "loss": 0.3314, + "step": 367 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999875138663694e-05, + "loss": 0.3262, + "step": 368 + }, + { + "epoch": 0.03, + "learning_rate": 1.999986087984045e-05, + "loss": 0.3336, + "step": 369 + }, + { + "epoch": 0.03, + "learning_rate": 1.999984585027734e-05, + "loss": 0.3501, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999830049975523e-05, + "loss": 0.3049, + "step": 371 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999813478936213e-05, + "loss": 0.3162, + "step": 372 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999796137160693e-05, + "loss": 0.359, + "step": 373 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999778024650296e-05, + "loss": 0.3531, + "step": 374 + }, + { + "epoch": 0.03, + "learning_rate": 1.999975914140642e-05, + "loss": 0.6349, + "step": 375 + }, + { + "epoch": 0.03, + "learning_rate": 1.999973948743052e-05, + "loss": 0.3192, + "step": 376 + }, + { + "epoch": 0.03, + "learning_rate": 1.999971906272411e-05, + "loss": 0.3427, + "step": 377 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999697867288764e-05, + "loss": 0.3779, + "step": 378 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999675901126117e-05, + "loss": 0.326, + "step": 379 + }, + { + "epoch": 0.03, + "learning_rate": 1.999965316423786e-05, + "loss": 0.3265, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999629656625748e-05, + "loss": 0.3572, + "step": 381 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999605378291593e-05, + "loss": 0.4268, + "step": 382 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999580329237264e-05, + "loss": 0.3033, + "step": 383 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999554509464695e-05, + "loss": 0.3462, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 1.999952791897587e-05, + "loss": 0.3683, + "step": 385 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999500557772843e-05, + "loss": 0.3035, + "step": 386 + }, + { + "epoch": 0.03, + "learning_rate": 1.999947242585772e-05, + "loss": 0.3604, + "step": 387 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999443523232676e-05, + "loss": 0.3183, + "step": 388 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999413849899933e-05, + "loss": 0.3355, + "step": 389 + }, + { + "epoch": 0.03, + "learning_rate": 1.999938340586178e-05, + "loss": 0.3467, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999352191120556e-05, + "loss": 0.3902, + "step": 391 + }, + { + "epoch": 0.03, + "learning_rate": 1.999932020567868e-05, + "loss": 0.3678, + "step": 392 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999287449538608e-05, + "loss": 0.3354, + "step": 393 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999253922702868e-05, + "loss": 0.3428, + "step": 394 + }, + { + "epoch": 0.03, + "learning_rate": 1.999921962517404e-05, + "loss": 0.3271, + "step": 395 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999184556954777e-05, + "loss": 0.3287, + "step": 396 + }, + { + "epoch": 0.03, + "learning_rate": 1.999914871804777e-05, + "loss": 0.3804, + "step": 397 + }, + { + "epoch": 0.03, + "learning_rate": 1.999911210845579e-05, + "loss": 0.3475, + "step": 398 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999074728181657e-05, + "loss": 0.3671, + "step": 399 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999036577228245e-05, + "loss": 0.2861, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998997655598505e-05, + "loss": 0.3315, + "step": 401 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998957963295434e-05, + "loss": 0.3436, + "step": 402 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998917500322086e-05, + "loss": 0.4235, + "step": 403 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998876266681585e-05, + "loss": 0.3639, + "step": 404 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998834262377107e-05, + "loss": 0.3844, + "step": 405 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998791487411887e-05, + "loss": 0.3854, + "step": 406 + }, + { + "epoch": 0.03, + "learning_rate": 1.999874794178923e-05, + "loss": 0.3645, + "step": 407 + }, + { + "epoch": 0.03, + "learning_rate": 1.999870362551248e-05, + "loss": 0.3372, + "step": 408 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998658538585067e-05, + "loss": 0.3628, + "step": 409 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998612681010452e-05, + "loss": 0.3358, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998566052792178e-05, + "loss": 0.38, + "step": 411 + }, + { + "epoch": 0.04, + "learning_rate": 1.999851865393384e-05, + "loss": 0.3282, + "step": 412 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998470484439084e-05, + "loss": 0.3574, + "step": 413 + }, + { + "epoch": 0.04, + "learning_rate": 1.999842154431163e-05, + "loss": 0.381, + "step": 414 + }, + { + "epoch": 0.04, + "learning_rate": 1.999837183355525e-05, + "loss": 0.3918, + "step": 415 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998321352173767e-05, + "loss": 0.343, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 1.999827010017108e-05, + "loss": 0.3431, + "step": 417 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998218077551135e-05, + "loss": 0.3518, + "step": 418 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998165284317944e-05, + "loss": 0.3424, + "step": 419 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998111720475574e-05, + "loss": 0.3839, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998057386028157e-05, + "loss": 0.3334, + "step": 421 + }, + { + "epoch": 0.04, + "learning_rate": 1.999800228097988e-05, + "loss": 0.3163, + "step": 422 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997946405334986e-05, + "loss": 0.3484, + "step": 423 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997889759097785e-05, + "loss": 0.3439, + "step": 424 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997832342272642e-05, + "loss": 0.3445, + "step": 425 + }, + { + "epoch": 0.04, + "learning_rate": 1.999777415486398e-05, + "loss": 0.3461, + "step": 426 + }, + { + "epoch": 0.04, + "learning_rate": 1.999771519687629e-05, + "loss": 0.3525, + "step": 427 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997655468314115e-05, + "loss": 0.3342, + "step": 428 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997594969182054e-05, + "loss": 0.3387, + "step": 429 + }, + { + "epoch": 0.04, + "learning_rate": 1.999753369948477e-05, + "loss": 0.6073, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 1.999747165922699e-05, + "loss": 0.3946, + "step": 431 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997408848413494e-05, + "loss": 0.3114, + "step": 432 + }, + { + "epoch": 0.04, + "learning_rate": 1.999734526704912e-05, + "loss": 0.3873, + "step": 433 + }, + { + "epoch": 0.04, + "learning_rate": 1.999728091513877e-05, + "loss": 0.3724, + "step": 434 + }, + { + "epoch": 0.04, + "learning_rate": 1.999721579268741e-05, + "loss": 0.3638, + "step": 435 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997149899700056e-05, + "loss": 0.3469, + "step": 436 + }, + { + "epoch": 0.04, + "learning_rate": 1.999708323618178e-05, + "loss": 0.3553, + "step": 437 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997015802137727e-05, + "loss": 0.3351, + "step": 438 + }, + { + "epoch": 0.04, + "learning_rate": 1.999694759757309e-05, + "loss": 0.3518, + "step": 439 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996878622493134e-05, + "loss": 0.3533, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996808876903168e-05, + "loss": 0.3652, + "step": 441 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996738360808566e-05, + "loss": 0.3635, + "step": 442 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996667074214768e-05, + "loss": 0.309, + "step": 443 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996595017127268e-05, + "loss": 0.3436, + "step": 444 + }, + { + "epoch": 0.04, + "learning_rate": 1.999652218955162e-05, + "loss": 0.4006, + "step": 445 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996448591493433e-05, + "loss": 0.3715, + "step": 446 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996374222958383e-05, + "loss": 0.2994, + "step": 447 + }, + { + "epoch": 0.04, + "learning_rate": 1.99962990839522e-05, + "loss": 0.3869, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 1.999622317448068e-05, + "loss": 0.3598, + "step": 449 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996146494549672e-05, + "loss": 0.3042, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996069044165082e-05, + "loss": 0.3992, + "step": 451 + }, + { + "epoch": 0.04, + "learning_rate": 1.999599082333288e-05, + "loss": 0.3358, + "step": 452 + }, + { + "epoch": 0.04, + "learning_rate": 1.99959118320591e-05, + "loss": 0.3698, + "step": 453 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995832070349827e-05, + "loss": 0.3474, + "step": 454 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995751538211205e-05, + "loss": 0.3728, + "step": 455 + }, + { + "epoch": 0.04, + "learning_rate": 1.999567023564945e-05, + "loss": 0.3209, + "step": 456 + }, + { + "epoch": 0.04, + "learning_rate": 1.999558816267082e-05, + "loss": 0.6414, + "step": 457 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995505319281645e-05, + "loss": 0.3498, + "step": 458 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995421705488313e-05, + "loss": 0.3851, + "step": 459 + }, + { + "epoch": 0.04, + "learning_rate": 1.999533732129726e-05, + "loss": 0.3364, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995252166714993e-05, + "loss": 0.355, + "step": 461 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995166241748084e-05, + "loss": 0.3567, + "step": 462 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995079546403143e-05, + "loss": 0.3937, + "step": 463 + }, + { + "epoch": 0.04, + "learning_rate": 1.999499208068686e-05, + "loss": 0.3322, + "step": 464 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994903844605973e-05, + "loss": 0.3702, + "step": 465 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994814838167286e-05, + "loss": 0.3237, + "step": 466 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994725061377653e-05, + "loss": 0.3216, + "step": 467 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994634514244002e-05, + "loss": 0.3365, + "step": 468 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994543196773307e-05, + "loss": 0.3491, + "step": 469 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994451108972604e-05, + "loss": 0.3711, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 1.999435825084899e-05, + "loss": 0.3419, + "step": 471 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994264622409636e-05, + "loss": 0.3483, + "step": 472 + }, + { + "epoch": 0.04, + "learning_rate": 1.999417022366174e-05, + "loss": 0.609, + "step": 473 + }, + { + "epoch": 0.04, + "learning_rate": 1.999407505461259e-05, + "loss": 0.3381, + "step": 474 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993979115269517e-05, + "loss": 0.3595, + "step": 475 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993882405639914e-05, + "loss": 0.3449, + "step": 476 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993784925731234e-05, + "loss": 0.353, + "step": 477 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993686675550998e-05, + "loss": 0.3359, + "step": 478 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993587655106766e-05, + "loss": 0.2803, + "step": 479 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993487864406185e-05, + "loss": 0.3397, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993387303456938e-05, + "loss": 0.3652, + "step": 481 + }, + { + "epoch": 0.04, + "learning_rate": 1.999328597226677e-05, + "loss": 0.2666, + "step": 482 + }, + { + "epoch": 0.04, + "learning_rate": 1.99931838708435e-05, + "loss": 0.4054, + "step": 483 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993080999195e-05, + "loss": 0.3444, + "step": 484 + }, + { + "epoch": 0.04, + "learning_rate": 1.999297735732919e-05, + "loss": 0.3139, + "step": 485 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992872945254064e-05, + "loss": 0.3209, + "step": 486 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992767762977662e-05, + "loss": 0.3729, + "step": 487 + }, + { + "epoch": 0.04, + "learning_rate": 1.99926618105081e-05, + "loss": 0.3699, + "step": 488 + }, + { + "epoch": 0.04, + "learning_rate": 1.999255508785354e-05, + "loss": 0.3394, + "step": 489 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992447595022214e-05, + "loss": 0.3563, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992339332022396e-05, + "loss": 0.2916, + "step": 491 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992230298862436e-05, + "loss": 0.3495, + "step": 492 + }, + { + "epoch": 0.04, + "learning_rate": 1.999212049555074e-05, + "loss": 0.3378, + "step": 493 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992009922095766e-05, + "loss": 0.3357, + "step": 494 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991898578506043e-05, + "loss": 0.3236, + "step": 495 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991786464790145e-05, + "loss": 0.3782, + "step": 496 + }, + { + "epoch": 0.04, + "learning_rate": 1.999167358095672e-05, + "loss": 0.366, + "step": 497 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991559927014465e-05, + "loss": 0.3559, + "step": 498 + }, + { + "epoch": 0.04, + "learning_rate": 1.999144550297214e-05, + "loss": 0.3563, + "step": 499 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991330308838565e-05, + "loss": 0.5999, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991214344622616e-05, + "loss": 0.3724, + "step": 501 + }, + { + "epoch": 0.04, + "learning_rate": 1.999109761033324e-05, + "loss": 0.3094, + "step": 502 + }, + { + "epoch": 0.04, + "learning_rate": 1.999098010597942e-05, + "loss": 0.3472, + "step": 503 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990861831570224e-05, + "loss": 0.3942, + "step": 504 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990742787114765e-05, + "loss": 0.333, + "step": 505 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990622972622216e-05, + "loss": 0.332, + "step": 506 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990502388101813e-05, + "loss": 0.4093, + "step": 507 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990381033562853e-05, + "loss": 0.4125, + "step": 508 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990258909014684e-05, + "loss": 0.3181, + "step": 509 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990136014466722e-05, + "loss": 0.3375, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 1.999001234992844e-05, + "loss": 0.3879, + "step": 511 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989887915409368e-05, + "loss": 0.3282, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 1.99897627109191e-05, + "loss": 0.3341, + "step": 513 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989636736467278e-05, + "loss": 0.3152, + "step": 514 + }, + { + "epoch": 0.04, + "learning_rate": 1.998950999206362e-05, + "loss": 0.3618, + "step": 515 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989382477717888e-05, + "loss": 0.5778, + "step": 516 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989254193439915e-05, + "loss": 0.4129, + "step": 517 + }, + { + "epoch": 0.04, + "learning_rate": 1.998912513923959e-05, + "loss": 0.3804, + "step": 518 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988995315126852e-05, + "loss": 0.3449, + "step": 519 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988864721111714e-05, + "loss": 0.3252, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 1.998873335720424e-05, + "loss": 0.3777, + "step": 521 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988601223414555e-05, + "loss": 0.3055, + "step": 522 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988468319752846e-05, + "loss": 0.3441, + "step": 523 + }, + { + "epoch": 0.04, + "learning_rate": 1.998833464622935e-05, + "loss": 0.3198, + "step": 524 + }, + { + "epoch": 0.05, + "learning_rate": 1.998820020285437e-05, + "loss": 0.3326, + "step": 525 + }, + { + "epoch": 0.05, + "learning_rate": 1.998806498963828e-05, + "loss": 0.5486, + "step": 526 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987929006591487e-05, + "loss": 0.3384, + "step": 527 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987792253724477e-05, + "loss": 0.3862, + "step": 528 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987654731047793e-05, + "loss": 0.3315, + "step": 529 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987516438572035e-05, + "loss": 0.303, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987377376307856e-05, + "loss": 0.363, + "step": 531 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987237544265982e-05, + "loss": 0.3525, + "step": 532 + }, + { + "epoch": 0.05, + "learning_rate": 1.998709694245718e-05, + "loss": 0.3682, + "step": 533 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986955570892302e-05, + "loss": 0.3137, + "step": 534 + }, + { + "epoch": 0.05, + "learning_rate": 1.998681342958223e-05, + "loss": 0.3818, + "step": 535 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986670518537928e-05, + "loss": 0.3285, + "step": 536 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986526837770405e-05, + "loss": 0.3517, + "step": 537 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986382387290738e-05, + "loss": 0.3886, + "step": 538 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986237167110066e-05, + "loss": 0.3514, + "step": 539 + }, + { + "epoch": 0.05, + "learning_rate": 1.998609117723957e-05, + "loss": 0.3494, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 1.998594441769051e-05, + "loss": 0.6116, + "step": 541 + }, + { + "epoch": 0.05, + "learning_rate": 1.99857968884742e-05, + "loss": 0.3469, + "step": 542 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985648589602005e-05, + "loss": 0.3199, + "step": 543 + }, + { + "epoch": 0.05, + "learning_rate": 1.998549952108536e-05, + "loss": 0.3323, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985349682935747e-05, + "loss": 0.3555, + "step": 545 + }, + { + "epoch": 0.05, + "learning_rate": 1.998519907516472e-05, + "loss": 0.38, + "step": 546 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985047697783886e-05, + "loss": 0.3052, + "step": 547 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984895550804918e-05, + "loss": 0.2993, + "step": 548 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984742634239535e-05, + "loss": 0.3571, + "step": 549 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984588948099528e-05, + "loss": 0.3547, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984434492396736e-05, + "loss": 0.3859, + "step": 551 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984279267143072e-05, + "loss": 0.3196, + "step": 552 + }, + { + "epoch": 0.05, + "learning_rate": 1.998412327235049e-05, + "loss": 0.323, + "step": 553 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983966508031026e-05, + "loss": 0.3586, + "step": 554 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983808974196752e-05, + "loss": 0.3491, + "step": 555 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983650670859814e-05, + "loss": 0.3204, + "step": 556 + }, + { + "epoch": 0.05, + "learning_rate": 1.998349159803241e-05, + "loss": 0.3123, + "step": 557 + }, + { + "epoch": 0.05, + "learning_rate": 1.998333175572681e-05, + "loss": 0.3387, + "step": 558 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983171143955326e-05, + "loss": 0.3529, + "step": 559 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983009762730336e-05, + "loss": 0.3638, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 1.998284761206428e-05, + "loss": 0.3256, + "step": 561 + }, + { + "epoch": 0.05, + "learning_rate": 1.998268469196966e-05, + "loss": 0.3117, + "step": 562 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982521002459026e-05, + "loss": 0.3577, + "step": 563 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982356543545003e-05, + "loss": 0.3594, + "step": 564 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982191315240257e-05, + "loss": 0.3307, + "step": 565 + }, + { + "epoch": 0.05, + "learning_rate": 1.998202531755753e-05, + "loss": 0.3172, + "step": 566 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981858550509617e-05, + "loss": 0.3244, + "step": 567 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981691014109364e-05, + "loss": 0.355, + "step": 568 + }, + { + "epoch": 0.05, + "learning_rate": 1.998152270836969e-05, + "loss": 0.3032, + "step": 569 + }, + { + "epoch": 0.05, + "learning_rate": 1.998135363330357e-05, + "loss": 0.3289, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981183788924025e-05, + "loss": 0.303, + "step": 571 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981013175244154e-05, + "loss": 0.3134, + "step": 572 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980841792277104e-05, + "loss": 0.3936, + "step": 573 + }, + { + "epoch": 0.05, + "learning_rate": 1.998066964003609e-05, + "loss": 0.4129, + "step": 574 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980496718534375e-05, + "loss": 0.3412, + "step": 575 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980323027785285e-05, + "loss": 0.3352, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 1.998014856780221e-05, + "loss": 0.3317, + "step": 577 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979973338598603e-05, + "loss": 0.3583, + "step": 578 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979797340187957e-05, + "loss": 0.3521, + "step": 579 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979620572583846e-05, + "loss": 0.3589, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979443035799893e-05, + "loss": 0.3391, + "step": 581 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979264729849776e-05, + "loss": 0.3552, + "step": 582 + }, + { + "epoch": 0.05, + "learning_rate": 1.9979085654747248e-05, + "loss": 0.3248, + "step": 583 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978905810506105e-05, + "loss": 0.3448, + "step": 584 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978725197140204e-05, + "loss": 0.3793, + "step": 585 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978543814663478e-05, + "loss": 0.2988, + "step": 586 + }, + { + "epoch": 0.05, + "learning_rate": 1.99783616630899e-05, + "loss": 0.3255, + "step": 587 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978178742433504e-05, + "loss": 0.3887, + "step": 588 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977995052708398e-05, + "loss": 0.3858, + "step": 589 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977810593928736e-05, + "loss": 0.326, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977625366108733e-05, + "loss": 0.3425, + "step": 591 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977439369262668e-05, + "loss": 0.3419, + "step": 592 + }, + { + "epoch": 0.05, + "learning_rate": 1.997725260340488e-05, + "loss": 0.3846, + "step": 593 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977065068549756e-05, + "loss": 0.3597, + "step": 594 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976876764711756e-05, + "loss": 0.3577, + "step": 595 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976687691905394e-05, + "loss": 0.4155, + "step": 596 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976497850145237e-05, + "loss": 0.337, + "step": 597 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976307239445924e-05, + "loss": 0.3729, + "step": 598 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976115859822146e-05, + "loss": 0.3073, + "step": 599 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975923711288646e-05, + "loss": 0.3835, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975730793860242e-05, + "loss": 0.343, + "step": 601 + }, + { + "epoch": 0.05, + "learning_rate": 1.99755371075518e-05, + "loss": 0.3936, + "step": 602 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975342652378247e-05, + "loss": 0.3302, + "step": 603 + }, + { + "epoch": 0.05, + "learning_rate": 1.997514742835457e-05, + "loss": 0.3183, + "step": 604 + }, + { + "epoch": 0.05, + "learning_rate": 1.997495143549582e-05, + "loss": 0.3001, + "step": 605 + }, + { + "epoch": 0.05, + "learning_rate": 1.99747546738171e-05, + "loss": 0.3329, + "step": 606 + }, + { + "epoch": 0.05, + "learning_rate": 1.997455714333358e-05, + "loss": 0.6206, + "step": 607 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974358844060476e-05, + "loss": 0.3369, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 1.997415977601308e-05, + "loss": 0.3627, + "step": 609 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973959939206734e-05, + "loss": 0.276, + "step": 610 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973759333656835e-05, + "loss": 0.3221, + "step": 611 + }, + { + "epoch": 0.05, + "learning_rate": 1.997355795937885e-05, + "loss": 0.3703, + "step": 612 + }, + { + "epoch": 0.05, + "learning_rate": 1.99733558163883e-05, + "loss": 0.3236, + "step": 613 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973152904700762e-05, + "loss": 0.3277, + "step": 614 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972949224331876e-05, + "loss": 0.3821, + "step": 615 + }, + { + "epoch": 0.05, + "learning_rate": 1.997274477529734e-05, + "loss": 0.3165, + "step": 616 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972539557612918e-05, + "loss": 0.351, + "step": 617 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972333571294418e-05, + "loss": 0.333, + "step": 618 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972126816357723e-05, + "loss": 0.3143, + "step": 619 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971919292818768e-05, + "loss": 0.3568, + "step": 620 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971711000693544e-05, + "loss": 0.3401, + "step": 621 + }, + { + "epoch": 0.05, + "learning_rate": 1.997150193999811e-05, + "loss": 0.3361, + "step": 622 + }, + { + "epoch": 0.05, + "learning_rate": 1.997129211074858e-05, + "loss": 0.3162, + "step": 623 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971081512961117e-05, + "loss": 0.3279, + "step": 624 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970870146651964e-05, + "loss": 0.3678, + "step": 625 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970658011837404e-05, + "loss": 0.3188, + "step": 626 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970445108533795e-05, + "loss": 0.3673, + "step": 627 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970231436757542e-05, + "loss": 0.3157, + "step": 628 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970016996525112e-05, + "loss": 0.3331, + "step": 629 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969801787853035e-05, + "loss": 0.3539, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969585810757902e-05, + "loss": 0.3256, + "step": 631 + }, + { + "epoch": 0.05, + "learning_rate": 1.996936906525635e-05, + "loss": 0.3576, + "step": 632 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969151551365097e-05, + "loss": 0.3077, + "step": 633 + }, + { + "epoch": 0.05, + "learning_rate": 1.99689332691009e-05, + "loss": 0.3336, + "step": 634 + }, + { + "epoch": 0.05, + "learning_rate": 1.996871421848058e-05, + "loss": 0.3175, + "step": 635 + }, + { + "epoch": 0.05, + "learning_rate": 1.996849439952103e-05, + "loss": 0.3411, + "step": 636 + }, + { + "epoch": 0.05, + "learning_rate": 1.9968273812239185e-05, + "loss": 0.3721, + "step": 637 + }, + { + "epoch": 0.05, + "learning_rate": 1.9968052456652048e-05, + "loss": 0.3252, + "step": 638 + }, + { + "epoch": 0.05, + "learning_rate": 1.9967830332776684e-05, + "loss": 0.2779, + "step": 639 + }, + { + "epoch": 0.05, + "learning_rate": 1.996760744063021e-05, + "loss": 0.2823, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 1.9967383780229805e-05, + "loss": 0.3191, + "step": 641 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967159351592706e-05, + "loss": 0.3174, + "step": 642 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966934154736216e-05, + "loss": 0.3321, + "step": 643 + }, + { + "epoch": 0.06, + "learning_rate": 1.996670818967769e-05, + "loss": 0.302, + "step": 644 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966481456434543e-05, + "loss": 0.5845, + "step": 645 + }, + { + "epoch": 0.06, + "learning_rate": 1.996625395502425e-05, + "loss": 0.3726, + "step": 646 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966025685464353e-05, + "loss": 0.3862, + "step": 647 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965796647772434e-05, + "loss": 0.3524, + "step": 648 + }, + { + "epoch": 0.06, + "learning_rate": 1.996556684196615e-05, + "loss": 0.3419, + "step": 649 + }, + { + "epoch": 0.06, + "learning_rate": 1.996533626806322e-05, + "loss": 0.3483, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 1.996510492608141e-05, + "loss": 0.3784, + "step": 651 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964872816038547e-05, + "loss": 0.3185, + "step": 652 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964639937952527e-05, + "loss": 0.3842, + "step": 653 + }, + { + "epoch": 0.06, + "learning_rate": 1.99644062918413e-05, + "loss": 0.3243, + "step": 654 + }, + { + "epoch": 0.06, + "learning_rate": 1.996417187772287e-05, + "loss": 0.3759, + "step": 655 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963936695615307e-05, + "loss": 0.3246, + "step": 656 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963700745536733e-05, + "loss": 0.3589, + "step": 657 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963464027505343e-05, + "loss": 0.3464, + "step": 658 + }, + { + "epoch": 0.06, + "learning_rate": 1.996322654153937e-05, + "loss": 0.3057, + "step": 659 + }, + { + "epoch": 0.06, + "learning_rate": 1.996298828765713e-05, + "loss": 0.3293, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962749265876983e-05, + "loss": 0.3138, + "step": 661 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962509476217348e-05, + "loss": 0.3162, + "step": 662 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962268918696708e-05, + "loss": 0.3716, + "step": 663 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962027593333603e-05, + "loss": 0.363, + "step": 664 + }, + { + "epoch": 0.06, + "learning_rate": 1.9961785500146638e-05, + "loss": 0.3375, + "step": 665 + }, + { + "epoch": 0.06, + "learning_rate": 1.9961542639154467e-05, + "loss": 0.3518, + "step": 666 + }, + { + "epoch": 0.06, + "learning_rate": 1.9961299010375813e-05, + "loss": 0.3, + "step": 667 + }, + { + "epoch": 0.06, + "learning_rate": 1.996105461382945e-05, + "loss": 0.3693, + "step": 668 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960809449534214e-05, + "loss": 0.3063, + "step": 669 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960563517509008e-05, + "loss": 0.304, + "step": 670 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960316817772783e-05, + "loss": 0.3294, + "step": 671 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960069350344547e-05, + "loss": 0.3421, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959821115243385e-05, + "loss": 0.3419, + "step": 673 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959572112488423e-05, + "loss": 0.3864, + "step": 674 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959322342098854e-05, + "loss": 0.3907, + "step": 675 + }, + { + "epoch": 0.06, + "learning_rate": 1.995907180409393e-05, + "loss": 0.3643, + "step": 676 + }, + { + "epoch": 0.06, + "learning_rate": 1.9958820498492958e-05, + "loss": 0.3456, + "step": 677 + }, + { + "epoch": 0.06, + "learning_rate": 1.9958568425315316e-05, + "loss": 0.279, + "step": 678 + }, + { + "epoch": 0.06, + "learning_rate": 1.995831558458042e-05, + "loss": 0.3097, + "step": 679 + }, + { + "epoch": 0.06, + "learning_rate": 1.9958061976307767e-05, + "loss": 0.3608, + "step": 680 + }, + { + "epoch": 0.06, + "learning_rate": 1.99578076005169e-05, + "loss": 0.3563, + "step": 681 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957552457227428e-05, + "loss": 0.3356, + "step": 682 + }, + { + "epoch": 0.06, + "learning_rate": 1.995729654645901e-05, + "loss": 0.3179, + "step": 683 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957039868231382e-05, + "loss": 0.3613, + "step": 684 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956782422564313e-05, + "loss": 0.3726, + "step": 685 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956524209477658e-05, + "loss": 0.296, + "step": 686 + }, + { + "epoch": 0.06, + "learning_rate": 1.995626522899131e-05, + "loss": 0.3303, + "step": 687 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956005481125235e-05, + "loss": 0.3466, + "step": 688 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955744965899452e-05, + "loss": 0.3399, + "step": 689 + }, + { + "epoch": 0.06, + "learning_rate": 1.995548368333404e-05, + "loss": 0.3265, + "step": 690 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955221633449137e-05, + "loss": 0.361, + "step": 691 + }, + { + "epoch": 0.06, + "learning_rate": 1.995495881626494e-05, + "loss": 0.3459, + "step": 692 + }, + { + "epoch": 0.06, + "learning_rate": 1.9954695231801706e-05, + "loss": 0.3618, + "step": 693 + }, + { + "epoch": 0.06, + "learning_rate": 1.995443088007975e-05, + "loss": 0.3339, + "step": 694 + }, + { + "epoch": 0.06, + "learning_rate": 1.995416576111945e-05, + "loss": 0.3064, + "step": 695 + }, + { + "epoch": 0.06, + "learning_rate": 1.995389987494124e-05, + "loss": 0.3008, + "step": 696 + }, + { + "epoch": 0.06, + "learning_rate": 1.995363322156561e-05, + "loss": 0.3214, + "step": 697 + }, + { + "epoch": 0.06, + "learning_rate": 1.995336580101311e-05, + "loss": 0.2971, + "step": 698 + }, + { + "epoch": 0.06, + "learning_rate": 1.995309761330436e-05, + "loss": 0.6136, + "step": 699 + }, + { + "epoch": 0.06, + "learning_rate": 1.995282865846002e-05, + "loss": 0.3083, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 1.995255893650083e-05, + "loss": 0.3008, + "step": 701 + }, + { + "epoch": 0.06, + "learning_rate": 1.9952288447447573e-05, + "loss": 0.3506, + "step": 702 + }, + { + "epoch": 0.06, + "learning_rate": 1.9952017191321098e-05, + "loss": 0.3716, + "step": 703 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951745168142312e-05, + "loss": 0.357, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951472377932183e-05, + "loss": 0.3171, + "step": 705 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951198820711735e-05, + "loss": 0.356, + "step": 706 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950924496502048e-05, + "loss": 0.3558, + "step": 707 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950649405324275e-05, + "loss": 0.3903, + "step": 708 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950373547199612e-05, + "loss": 0.417, + "step": 709 + }, + { + "epoch": 0.06, + "learning_rate": 1.995009692214932e-05, + "loss": 0.314, + "step": 710 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949819530194722e-05, + "loss": 0.3521, + "step": 711 + }, + { + "epoch": 0.06, + "learning_rate": 1.99495413713572e-05, + "loss": 0.3274, + "step": 712 + }, + { + "epoch": 0.06, + "learning_rate": 1.994926244565819e-05, + "loss": 0.619, + "step": 713 + }, + { + "epoch": 0.06, + "learning_rate": 1.994898275311919e-05, + "loss": 0.3841, + "step": 714 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948702293761763e-05, + "loss": 0.3521, + "step": 715 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948421067607518e-05, + "loss": 0.3113, + "step": 716 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948139074678138e-05, + "loss": 0.3591, + "step": 717 + }, + { + "epoch": 0.06, + "learning_rate": 1.994785631499535e-05, + "loss": 0.3658, + "step": 718 + }, + { + "epoch": 0.06, + "learning_rate": 1.994757278858095e-05, + "loss": 0.4016, + "step": 719 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947288495456793e-05, + "loss": 0.3768, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947003435644788e-05, + "loss": 0.4028, + "step": 721 + }, + { + "epoch": 0.06, + "learning_rate": 1.994671760916691e-05, + "loss": 0.3552, + "step": 722 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946431016045187e-05, + "loss": 0.348, + "step": 723 + }, + { + "epoch": 0.06, + "learning_rate": 1.994614365630171e-05, + "loss": 0.3878, + "step": 724 + }, + { + "epoch": 0.06, + "learning_rate": 1.994585552995862e-05, + "loss": 0.3807, + "step": 725 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945566637038133e-05, + "loss": 0.3594, + "step": 726 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945276977562515e-05, + "loss": 0.3416, + "step": 727 + }, + { + "epoch": 0.06, + "learning_rate": 1.994498655155408e-05, + "loss": 0.31, + "step": 728 + }, + { + "epoch": 0.06, + "learning_rate": 1.994469535903523e-05, + "loss": 0.3784, + "step": 729 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944403400028392e-05, + "loss": 0.3105, + "step": 730 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944110674556082e-05, + "loss": 0.5735, + "step": 731 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943817182640856e-05, + "loss": 0.3445, + "step": 732 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943522924305337e-05, + "loss": 0.347, + "step": 733 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943227899572198e-05, + "loss": 0.2965, + "step": 734 + }, + { + "epoch": 0.06, + "learning_rate": 1.994293210846419e-05, + "loss": 0.3818, + "step": 735 + }, + { + "epoch": 0.06, + "learning_rate": 1.99426355510041e-05, + "loss": 0.3463, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 1.994233822721479e-05, + "loss": 0.3334, + "step": 737 + }, + { + "epoch": 0.06, + "learning_rate": 1.994204013711918e-05, + "loss": 0.3172, + "step": 738 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941741280740235e-05, + "loss": 0.3109, + "step": 739 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941441658101e-05, + "loss": 0.3264, + "step": 740 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941141269224564e-05, + "loss": 0.3964, + "step": 741 + }, + { + "epoch": 0.06, + "learning_rate": 1.9940840114134078e-05, + "loss": 0.348, + "step": 742 + }, + { + "epoch": 0.06, + "learning_rate": 1.9940538192852753e-05, + "loss": 0.3367, + "step": 743 + }, + { + "epoch": 0.06, + "learning_rate": 1.9940235505403867e-05, + "loss": 0.3307, + "step": 744 + }, + { + "epoch": 0.06, + "learning_rate": 1.993993205181074e-05, + "loss": 0.2921, + "step": 745 + }, + { + "epoch": 0.06, + "learning_rate": 1.993962783209677e-05, + "loss": 0.3784, + "step": 746 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939322846285397e-05, + "loss": 0.3198, + "step": 747 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939017094400128e-05, + "loss": 0.3177, + "step": 748 + }, + { + "epoch": 0.06, + "learning_rate": 1.9938710576464535e-05, + "loss": 0.6008, + "step": 749 + }, + { + "epoch": 0.06, + "learning_rate": 1.993840329250224e-05, + "loss": 0.3267, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 1.9938095242536925e-05, + "loss": 0.3933, + "step": 751 + }, + { + "epoch": 0.06, + "learning_rate": 1.993778642659233e-05, + "loss": 0.3732, + "step": 752 + }, + { + "epoch": 0.06, + "learning_rate": 1.9937476844692268e-05, + "loss": 0.306, + "step": 753 + }, + { + "epoch": 0.06, + "learning_rate": 1.993716649686059e-05, + "loss": 0.3562, + "step": 754 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936855383121217e-05, + "loss": 0.2776, + "step": 755 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936543503498135e-05, + "loss": 0.3516, + "step": 756 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936230858015376e-05, + "loss": 0.3804, + "step": 757 + }, + { + "epoch": 0.06, + "learning_rate": 1.9935917446697038e-05, + "loss": 0.3049, + "step": 758 + }, + { + "epoch": 0.07, + "learning_rate": 1.993560326956728e-05, + "loss": 0.3068, + "step": 759 + }, + { + "epoch": 0.07, + "learning_rate": 1.9935288326650314e-05, + "loss": 0.3506, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934972617970415e-05, + "loss": 0.3657, + "step": 761 + }, + { + "epoch": 0.07, + "learning_rate": 1.993465614355192e-05, + "loss": 0.311, + "step": 762 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934338903419213e-05, + "loss": 0.3981, + "step": 763 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934020897596752e-05, + "loss": 0.5864, + "step": 764 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933702126109048e-05, + "loss": 0.3585, + "step": 765 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933382588980665e-05, + "loss": 0.5985, + "step": 766 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933062286236235e-05, + "loss": 0.3079, + "step": 767 + }, + { + "epoch": 0.07, + "learning_rate": 1.9932741217900444e-05, + "loss": 0.3301, + "step": 768 + }, + { + "epoch": 0.07, + "learning_rate": 1.993241938399804e-05, + "loss": 0.372, + "step": 769 + }, + { + "epoch": 0.07, + "learning_rate": 1.993209678455383e-05, + "loss": 0.3864, + "step": 770 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931773419592675e-05, + "loss": 0.3069, + "step": 771 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931449289139495e-05, + "loss": 0.2963, + "step": 772 + }, + { + "epoch": 0.07, + "learning_rate": 1.993112439321928e-05, + "loss": 0.3364, + "step": 773 + }, + { + "epoch": 0.07, + "learning_rate": 1.993079873185707e-05, + "loss": 0.3281, + "step": 774 + }, + { + "epoch": 0.07, + "learning_rate": 1.993047230507796e-05, + "loss": 0.3059, + "step": 775 + }, + { + "epoch": 0.07, + "learning_rate": 1.993014511290711e-05, + "loss": 0.3986, + "step": 776 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929817155369746e-05, + "loss": 0.3258, + "step": 777 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929488432491137e-05, + "loss": 0.3033, + "step": 778 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929158944296627e-05, + "loss": 0.3777, + "step": 779 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928828690811603e-05, + "loss": 0.3599, + "step": 780 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928497672061523e-05, + "loss": 0.3278, + "step": 781 + }, + { + "epoch": 0.07, + "learning_rate": 1.99281658880719e-05, + "loss": 0.3645, + "step": 782 + }, + { + "epoch": 0.07, + "learning_rate": 1.992783333886831e-05, + "loss": 0.3481, + "step": 783 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927500024476378e-05, + "loss": 0.3496, + "step": 784 + }, + { + "epoch": 0.07, + "learning_rate": 1.9927165944921803e-05, + "loss": 0.2755, + "step": 785 + }, + { + "epoch": 0.07, + "learning_rate": 1.9926831100230322e-05, + "loss": 0.3297, + "step": 786 + }, + { + "epoch": 0.07, + "learning_rate": 1.9926495490427753e-05, + "loss": 0.3339, + "step": 787 + }, + { + "epoch": 0.07, + "learning_rate": 1.992615911553996e-05, + "loss": 0.2912, + "step": 788 + }, + { + "epoch": 0.07, + "learning_rate": 1.9925821975592866e-05, + "loss": 0.3555, + "step": 789 + }, + { + "epoch": 0.07, + "learning_rate": 1.9925484070612465e-05, + "loss": 0.3691, + "step": 790 + }, + { + "epoch": 0.07, + "learning_rate": 1.9925145400624788e-05, + "loss": 0.316, + "step": 791 + }, + { + "epoch": 0.07, + "learning_rate": 1.992480596565595e-05, + "loss": 0.3272, + "step": 792 + }, + { + "epoch": 0.07, + "learning_rate": 1.9924465765732106e-05, + "loss": 0.3127, + "step": 793 + }, + { + "epoch": 0.07, + "learning_rate": 1.992412480087948e-05, + "loss": 0.3422, + "step": 794 + }, + { + "epoch": 0.07, + "learning_rate": 1.992378307112435e-05, + "loss": 0.35, + "step": 795 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923440576493056e-05, + "loss": 0.3304, + "step": 796 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923097317011995e-05, + "loss": 0.3016, + "step": 797 + }, + { + "epoch": 0.07, + "learning_rate": 1.9922753292707627e-05, + "loss": 0.3372, + "step": 798 + }, + { + "epoch": 0.07, + "learning_rate": 1.992240850360646e-05, + "loss": 0.324, + "step": 799 + }, + { + "epoch": 0.07, + "learning_rate": 1.992206294973508e-05, + "loss": 0.3347, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 1.992171663112011e-05, + "loss": 0.3676, + "step": 801 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921369547788246e-05, + "loss": 0.261, + "step": 802 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921021699766243e-05, + "loss": 0.2889, + "step": 803 + }, + { + "epoch": 0.07, + "learning_rate": 1.9920673087080903e-05, + "loss": 0.3527, + "step": 804 + }, + { + "epoch": 0.07, + "learning_rate": 1.9920323709759108e-05, + "loss": 0.3105, + "step": 805 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919973567827776e-05, + "loss": 0.2941, + "step": 806 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919622661313897e-05, + "loss": 0.6091, + "step": 807 + }, + { + "epoch": 0.07, + "learning_rate": 1.991927099024452e-05, + "loss": 0.3502, + "step": 808 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918918554646745e-05, + "loss": 0.3297, + "step": 809 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918565354547738e-05, + "loss": 0.3663, + "step": 810 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918211389974726e-05, + "loss": 0.3846, + "step": 811 + }, + { + "epoch": 0.07, + "learning_rate": 1.9917856660954985e-05, + "loss": 0.3278, + "step": 812 + }, + { + "epoch": 0.07, + "learning_rate": 1.991750116751586e-05, + "loss": 0.2994, + "step": 813 + }, + { + "epoch": 0.07, + "learning_rate": 1.9917144909684745e-05, + "loss": 0.3167, + "step": 814 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916787887489108e-05, + "loss": 0.3237, + "step": 815 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916430100956458e-05, + "loss": 0.2934, + "step": 816 + }, + { + "epoch": 0.07, + "learning_rate": 1.991607155011437e-05, + "loss": 0.3155, + "step": 817 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915712234990486e-05, + "loss": 0.5964, + "step": 818 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915352155612503e-05, + "loss": 0.3163, + "step": 819 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914991312008164e-05, + "loss": 0.3191, + "step": 820 + }, + { + "epoch": 0.07, + "learning_rate": 1.991462970420529e-05, + "loss": 0.3099, + "step": 821 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914267332231746e-05, + "loss": 0.3243, + "step": 822 + }, + { + "epoch": 0.07, + "learning_rate": 1.991390419611546e-05, + "loss": 0.3366, + "step": 823 + }, + { + "epoch": 0.07, + "learning_rate": 1.991354029588443e-05, + "loss": 0.3292, + "step": 824 + }, + { + "epoch": 0.07, + "learning_rate": 1.9913175631566698e-05, + "loss": 0.3362, + "step": 825 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912810203190367e-05, + "loss": 0.3617, + "step": 826 + }, + { + "epoch": 0.07, + "learning_rate": 1.991244401078361e-05, + "loss": 0.3519, + "step": 827 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912077054374646e-05, + "loss": 0.3312, + "step": 828 + }, + { + "epoch": 0.07, + "learning_rate": 1.9911709333991758e-05, + "loss": 0.4035, + "step": 829 + }, + { + "epoch": 0.07, + "learning_rate": 1.9911340849663293e-05, + "loss": 0.3193, + "step": 830 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910971601417645e-05, + "loss": 0.3417, + "step": 831 + }, + { + "epoch": 0.07, + "learning_rate": 1.991060158928328e-05, + "loss": 0.3412, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910230813288713e-05, + "loss": 0.3275, + "step": 833 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909859273462525e-05, + "loss": 0.295, + "step": 834 + }, + { + "epoch": 0.07, + "learning_rate": 1.9909486969833346e-05, + "loss": 0.3278, + "step": 835 + }, + { + "epoch": 0.07, + "learning_rate": 1.990911390242988e-05, + "loss": 0.3846, + "step": 836 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908740071280873e-05, + "loss": 0.3689, + "step": 837 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908365476415146e-05, + "loss": 0.3255, + "step": 838 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907990117861564e-05, + "loss": 0.3314, + "step": 839 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907613995649063e-05, + "loss": 0.3344, + "step": 840 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907237109806627e-05, + "loss": 0.3314, + "step": 841 + }, + { + "epoch": 0.07, + "learning_rate": 1.9906859460363307e-05, + "loss": 0.3077, + "step": 842 + }, + { + "epoch": 0.07, + "learning_rate": 1.9906481047348215e-05, + "loss": 0.33, + "step": 843 + }, + { + "epoch": 0.07, + "learning_rate": 1.9906101870790512e-05, + "loss": 0.3832, + "step": 844 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905721930719425e-05, + "loss": 0.3558, + "step": 845 + }, + { + "epoch": 0.07, + "learning_rate": 1.990534122716423e-05, + "loss": 0.3477, + "step": 846 + }, + { + "epoch": 0.07, + "learning_rate": 1.9904959760154287e-05, + "loss": 0.335, + "step": 847 + }, + { + "epoch": 0.07, + "learning_rate": 1.9904577529718982e-05, + "loss": 0.3232, + "step": 848 + }, + { + "epoch": 0.07, + "learning_rate": 1.9904194535887783e-05, + "loss": 0.3464, + "step": 849 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903810778690204e-05, + "loss": 0.3001, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903426258155833e-05, + "loss": 0.3398, + "step": 851 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903040974314293e-05, + "loss": 0.3693, + "step": 852 + }, + { + "epoch": 0.07, + "learning_rate": 1.990265492719529e-05, + "loss": 0.3096, + "step": 853 + }, + { + "epoch": 0.07, + "learning_rate": 1.9902268116828578e-05, + "loss": 0.3546, + "step": 854 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901880543243966e-05, + "loss": 0.3275, + "step": 855 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901492206471325e-05, + "loss": 0.3226, + "step": 856 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901103106540597e-05, + "loss": 0.2867, + "step": 857 + }, + { + "epoch": 0.07, + "learning_rate": 1.9900713243481758e-05, + "loss": 0.3155, + "step": 858 + }, + { + "epoch": 0.07, + "learning_rate": 1.9900322617324863e-05, + "loss": 0.3881, + "step": 859 + }, + { + "epoch": 0.07, + "learning_rate": 1.9899931228100024e-05, + "loss": 0.3416, + "step": 860 + }, + { + "epoch": 0.07, + "learning_rate": 1.98995390758374e-05, + "loss": 0.3362, + "step": 861 + }, + { + "epoch": 0.07, + "learning_rate": 1.989914616056722e-05, + "loss": 0.3507, + "step": 862 + }, + { + "epoch": 0.07, + "learning_rate": 1.9898752482319766e-05, + "loss": 0.3228, + "step": 863 + }, + { + "epoch": 0.07, + "learning_rate": 1.9898358041125382e-05, + "loss": 0.3431, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 1.989796283701447e-05, + "loss": 0.3011, + "step": 865 + }, + { + "epoch": 0.07, + "learning_rate": 1.989756687001749e-05, + "loss": 0.3501, + "step": 866 + }, + { + "epoch": 0.07, + "learning_rate": 1.989717014016496e-05, + "loss": 0.5685, + "step": 867 + }, + { + "epoch": 0.07, + "learning_rate": 1.989677264748746e-05, + "loss": 0.3452, + "step": 868 + }, + { + "epoch": 0.07, + "learning_rate": 1.9896374392015624e-05, + "loss": 0.3271, + "step": 869 + }, + { + "epoch": 0.07, + "learning_rate": 1.989597537378015e-05, + "loss": 0.3281, + "step": 870 + }, + { + "epoch": 0.07, + "learning_rate": 1.9895575592811795e-05, + "loss": 0.3235, + "step": 871 + }, + { + "epoch": 0.07, + "learning_rate": 1.9895175049141366e-05, + "loss": 0.5752, + "step": 872 + }, + { + "epoch": 0.07, + "learning_rate": 1.9894773742799737e-05, + "loss": 0.3206, + "step": 873 + }, + { + "epoch": 0.07, + "learning_rate": 1.989437167381784e-05, + "loss": 0.3124, + "step": 874 + }, + { + "epoch": 0.08, + "learning_rate": 1.9893968842226664e-05, + "loss": 0.3116, + "step": 875 + }, + { + "epoch": 0.08, + "learning_rate": 1.9893565248057257e-05, + "loss": 0.3371, + "step": 876 + }, + { + "epoch": 0.08, + "learning_rate": 1.9893160891340728e-05, + "loss": 0.2988, + "step": 877 + }, + { + "epoch": 0.08, + "learning_rate": 1.989275577210824e-05, + "loss": 0.3743, + "step": 878 + }, + { + "epoch": 0.08, + "learning_rate": 1.9892349890391015e-05, + "loss": 0.3319, + "step": 879 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891943246220344e-05, + "loss": 0.2971, + "step": 880 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891535839627565e-05, + "loss": 0.3267, + "step": 881 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891127670644076e-05, + "loss": 0.37, + "step": 882 + }, + { + "epoch": 0.08, + "learning_rate": 1.9890718739301346e-05, + "loss": 0.3203, + "step": 883 + }, + { + "epoch": 0.08, + "learning_rate": 1.989030904563088e-05, + "loss": 0.3338, + "step": 884 + }, + { + "epoch": 0.08, + "learning_rate": 1.988989858966427e-05, + "loss": 0.2997, + "step": 885 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889487371433134e-05, + "loss": 0.3385, + "step": 886 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889075390969182e-05, + "loss": 0.3393, + "step": 887 + }, + { + "epoch": 0.08, + "learning_rate": 1.9888662648304162e-05, + "loss": 0.3691, + "step": 888 + }, + { + "epoch": 0.08, + "learning_rate": 1.988824914346989e-05, + "loss": 0.2976, + "step": 889 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887834876498228e-05, + "loss": 0.3438, + "step": 890 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887419847421113e-05, + "loss": 0.3074, + "step": 891 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887004056270532e-05, + "loss": 0.3702, + "step": 892 + }, + { + "epoch": 0.08, + "learning_rate": 1.988658750307853e-05, + "loss": 0.3333, + "step": 893 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886170187877214e-05, + "loss": 0.3124, + "step": 894 + }, + { + "epoch": 0.08, + "learning_rate": 1.988575211069875e-05, + "loss": 0.3116, + "step": 895 + }, + { + "epoch": 0.08, + "learning_rate": 1.9885333271575362e-05, + "loss": 0.3234, + "step": 896 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884913670539327e-05, + "loss": 0.2993, + "step": 897 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884493307622993e-05, + "loss": 0.3008, + "step": 898 + }, + { + "epoch": 0.08, + "learning_rate": 1.988407218285875e-05, + "loss": 0.6298, + "step": 899 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883650296279068e-05, + "loss": 0.2564, + "step": 900 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883227647916454e-05, + "loss": 0.3343, + "step": 901 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882804237803487e-05, + "loss": 0.316, + "step": 902 + }, + { + "epoch": 0.08, + "learning_rate": 1.98823800659728e-05, + "loss": 0.3342, + "step": 903 + }, + { + "epoch": 0.08, + "learning_rate": 1.9881955132457095e-05, + "loss": 0.3773, + "step": 904 + }, + { + "epoch": 0.08, + "learning_rate": 1.988152943728911e-05, + "loss": 0.3944, + "step": 905 + }, + { + "epoch": 0.08, + "learning_rate": 1.9881102980501664e-05, + "loss": 0.3303, + "step": 906 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880675762127624e-05, + "loss": 0.3203, + "step": 907 + }, + { + "epoch": 0.08, + "learning_rate": 1.988024778219992e-05, + "loss": 0.3059, + "step": 908 + }, + { + "epoch": 0.08, + "learning_rate": 1.9879819040751532e-05, + "loss": 0.3581, + "step": 909 + }, + { + "epoch": 0.08, + "learning_rate": 1.9879389537815514e-05, + "loss": 0.3987, + "step": 910 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878959273424968e-05, + "loss": 0.3455, + "step": 911 + }, + { + "epoch": 0.08, + "learning_rate": 1.987852824761305e-05, + "loss": 0.3065, + "step": 912 + }, + { + "epoch": 0.08, + "learning_rate": 1.987809646041299e-05, + "loss": 0.2888, + "step": 913 + }, + { + "epoch": 0.08, + "learning_rate": 1.987766391185806e-05, + "loss": 0.3319, + "step": 914 + }, + { + "epoch": 0.08, + "learning_rate": 1.98772306019816e-05, + "loss": 0.3539, + "step": 915 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876796530817017e-05, + "loss": 0.3716, + "step": 916 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876361698397755e-05, + "loss": 0.3692, + "step": 917 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875926104757337e-05, + "loss": 0.3367, + "step": 918 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875489749929334e-05, + "loss": 0.5887, + "step": 919 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875052633947373e-05, + "loss": 0.3486, + "step": 920 + }, + { + "epoch": 0.08, + "learning_rate": 1.987461475684515e-05, + "loss": 0.3786, + "step": 921 + }, + { + "epoch": 0.08, + "learning_rate": 1.9874176118656415e-05, + "loss": 0.3452, + "step": 922 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873736719414977e-05, + "loss": 0.364, + "step": 923 + }, + { + "epoch": 0.08, + "learning_rate": 1.98732965591547e-05, + "loss": 0.308, + "step": 924 + }, + { + "epoch": 0.08, + "learning_rate": 1.9872855637909506e-05, + "loss": 0.3063, + "step": 925 + }, + { + "epoch": 0.08, + "learning_rate": 1.9872413955713382e-05, + "loss": 0.3671, + "step": 926 + }, + { + "epoch": 0.08, + "learning_rate": 1.9871971512600375e-05, + "loss": 0.3909, + "step": 927 + }, + { + "epoch": 0.08, + "learning_rate": 1.987152830860458e-05, + "loss": 0.36, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 1.987108434376016e-05, + "loss": 0.3239, + "step": 929 + }, + { + "epoch": 0.08, + "learning_rate": 1.9870639618101333e-05, + "loss": 0.3331, + "step": 930 + }, + { + "epoch": 0.08, + "learning_rate": 1.987019413166238e-05, + "loss": 0.35, + "step": 931 + }, + { + "epoch": 0.08, + "learning_rate": 1.986974788447763e-05, + "loss": 0.3496, + "step": 932 + }, + { + "epoch": 0.08, + "learning_rate": 1.986930087658148e-05, + "loss": 0.3754, + "step": 933 + }, + { + "epoch": 0.08, + "learning_rate": 1.9868853108008387e-05, + "loss": 0.3561, + "step": 934 + }, + { + "epoch": 0.08, + "learning_rate": 1.9868404578792858e-05, + "loss": 0.3477, + "step": 935 + }, + { + "epoch": 0.08, + "learning_rate": 1.9867955288969468e-05, + "loss": 0.3076, + "step": 936 + }, + { + "epoch": 0.08, + "learning_rate": 1.986750523857284e-05, + "loss": 0.3112, + "step": 937 + }, + { + "epoch": 0.08, + "learning_rate": 1.9867054427637667e-05, + "loss": 0.3149, + "step": 938 + }, + { + "epoch": 0.08, + "learning_rate": 1.986660285619869e-05, + "loss": 0.3094, + "step": 939 + }, + { + "epoch": 0.08, + "learning_rate": 1.986615052429072e-05, + "loss": 0.3588, + "step": 940 + }, + { + "epoch": 0.08, + "learning_rate": 1.986569743194862e-05, + "loss": 0.3519, + "step": 941 + }, + { + "epoch": 0.08, + "learning_rate": 1.9865243579207304e-05, + "loss": 0.3169, + "step": 942 + }, + { + "epoch": 0.08, + "learning_rate": 1.986478896610176e-05, + "loss": 0.3291, + "step": 943 + }, + { + "epoch": 0.08, + "learning_rate": 1.986433359266703e-05, + "loss": 0.3159, + "step": 944 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863877458938204e-05, + "loss": 0.3229, + "step": 945 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863420564950445e-05, + "loss": 0.3204, + "step": 946 + }, + { + "epoch": 0.08, + "learning_rate": 1.9862962910738965e-05, + "loss": 0.2886, + "step": 947 + }, + { + "epoch": 0.08, + "learning_rate": 1.9862504496339036e-05, + "loss": 0.3209, + "step": 948 + }, + { + "epoch": 0.08, + "learning_rate": 1.9862045321785994e-05, + "loss": 0.3069, + "step": 949 + }, + { + "epoch": 0.08, + "learning_rate": 1.9861585387115228e-05, + "loss": 0.3577, + "step": 950 + }, + { + "epoch": 0.08, + "learning_rate": 1.9861124692362188e-05, + "loss": 0.3427, + "step": 951 + }, + { + "epoch": 0.08, + "learning_rate": 1.986066323756238e-05, + "loss": 0.3471, + "step": 952 + }, + { + "epoch": 0.08, + "learning_rate": 1.9860201022751376e-05, + "loss": 0.3679, + "step": 953 + }, + { + "epoch": 0.08, + "learning_rate": 1.9859738047964795e-05, + "loss": 0.3129, + "step": 954 + }, + { + "epoch": 0.08, + "learning_rate": 1.9859274313238327e-05, + "loss": 0.338, + "step": 955 + }, + { + "epoch": 0.08, + "learning_rate": 1.985880981860771e-05, + "loss": 0.3145, + "step": 956 + }, + { + "epoch": 0.08, + "learning_rate": 1.9858344564108743e-05, + "loss": 0.3157, + "step": 957 + }, + { + "epoch": 0.08, + "learning_rate": 1.985787854977729e-05, + "loss": 0.3309, + "step": 958 + }, + { + "epoch": 0.08, + "learning_rate": 1.985741177564927e-05, + "loss": 0.3246, + "step": 959 + }, + { + "epoch": 0.08, + "learning_rate": 1.9856944241760655e-05, + "loss": 0.3215, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 1.985647594814748e-05, + "loss": 0.3479, + "step": 961 + }, + { + "epoch": 0.08, + "learning_rate": 1.9856006894845844e-05, + "loss": 0.3265, + "step": 962 + }, + { + "epoch": 0.08, + "learning_rate": 1.98555370818919e-05, + "loss": 0.3215, + "step": 963 + }, + { + "epoch": 0.08, + "learning_rate": 1.985506650932185e-05, + "loss": 0.3504, + "step": 964 + }, + { + "epoch": 0.08, + "learning_rate": 1.9854595177171968e-05, + "loss": 0.3011, + "step": 965 + }, + { + "epoch": 0.08, + "learning_rate": 1.9854123085478587e-05, + "loss": 0.3987, + "step": 966 + }, + { + "epoch": 0.08, + "learning_rate": 1.9853650234278088e-05, + "loss": 0.3414, + "step": 967 + }, + { + "epoch": 0.08, + "learning_rate": 1.9853176623606916e-05, + "loss": 0.2996, + "step": 968 + }, + { + "epoch": 0.08, + "learning_rate": 1.9852702253501578e-05, + "loss": 0.3721, + "step": 969 + }, + { + "epoch": 0.08, + "learning_rate": 1.985222712399863e-05, + "loss": 0.2773, + "step": 970 + }, + { + "epoch": 0.08, + "learning_rate": 1.98517512351347e-05, + "loss": 0.3486, + "step": 971 + }, + { + "epoch": 0.08, + "learning_rate": 1.9851274586946463e-05, + "loss": 0.3467, + "step": 972 + }, + { + "epoch": 0.08, + "learning_rate": 1.9850797179470657e-05, + "loss": 0.3059, + "step": 973 + }, + { + "epoch": 0.08, + "learning_rate": 1.985031901274408e-05, + "loss": 0.2716, + "step": 974 + }, + { + "epoch": 0.08, + "learning_rate": 1.9849840086803584e-05, + "loss": 0.3488, + "step": 975 + }, + { + "epoch": 0.08, + "learning_rate": 1.9849360401686084e-05, + "loss": 0.371, + "step": 976 + }, + { + "epoch": 0.08, + "learning_rate": 1.9848879957428552e-05, + "loss": 0.5894, + "step": 977 + }, + { + "epoch": 0.08, + "learning_rate": 1.9848398754068018e-05, + "loss": 0.6475, + "step": 978 + }, + { + "epoch": 0.08, + "learning_rate": 1.9847916791641567e-05, + "loss": 0.3305, + "step": 979 + }, + { + "epoch": 0.08, + "learning_rate": 1.9847434070186355e-05, + "loss": 0.2983, + "step": 980 + }, + { + "epoch": 0.08, + "learning_rate": 1.9846950589739576e-05, + "loss": 0.3073, + "step": 981 + }, + { + "epoch": 0.08, + "learning_rate": 1.9846466350338506e-05, + "loss": 0.2871, + "step": 982 + }, + { + "epoch": 0.08, + "learning_rate": 1.984598135202046e-05, + "loss": 0.347, + "step": 983 + }, + { + "epoch": 0.08, + "learning_rate": 1.9845495594822824e-05, + "loss": 0.3453, + "step": 984 + }, + { + "epoch": 0.08, + "learning_rate": 1.984500907878303e-05, + "loss": 0.3809, + "step": 985 + }, + { + "epoch": 0.08, + "learning_rate": 1.9844521803938588e-05, + "loss": 0.3064, + "step": 986 + }, + { + "epoch": 0.08, + "learning_rate": 1.9844033770327048e-05, + "loss": 0.3229, + "step": 987 + }, + { + "epoch": 0.08, + "learning_rate": 1.984354497798602e-05, + "loss": 0.3281, + "step": 988 + }, + { + "epoch": 0.08, + "learning_rate": 1.984305542695319e-05, + "loss": 0.3356, + "step": 989 + }, + { + "epoch": 0.08, + "learning_rate": 1.984256511726628e-05, + "loss": 0.3574, + "step": 990 + }, + { + "epoch": 0.08, + "learning_rate": 1.984207404896309e-05, + "loss": 0.3558, + "step": 991 + }, + { + "epoch": 0.09, + "learning_rate": 1.984158222208146e-05, + "loss": 0.3175, + "step": 992 + }, + { + "epoch": 0.09, + "learning_rate": 1.9841089636659296e-05, + "loss": 0.3337, + "step": 993 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840596292734573e-05, + "loss": 0.3245, + "step": 994 + }, + { + "epoch": 0.09, + "learning_rate": 1.984010219034531e-05, + "loss": 0.3109, + "step": 995 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839607329529594e-05, + "loss": 0.3693, + "step": 996 + }, + { + "epoch": 0.09, + "learning_rate": 1.983911171032556e-05, + "loss": 0.3055, + "step": 997 + }, + { + "epoch": 0.09, + "learning_rate": 1.983861533277142e-05, + "loss": 0.3206, + "step": 998 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838118196905417e-05, + "loss": 0.3652, + "step": 999 + }, + { + "epoch": 0.09, + "learning_rate": 1.983762030276588e-05, + "loss": 0.3318, + "step": 1000 + }, + { + "epoch": 0.09, + "learning_rate": 1.9837121650391173e-05, + "loss": 0.3504, + "step": 1001 + }, + { + "epoch": 0.09, + "learning_rate": 1.9836622239819743e-05, + "loss": 0.3115, + "step": 1002 + }, + { + "epoch": 0.09, + "learning_rate": 1.983612207109007e-05, + "loss": 0.36, + "step": 1003 + }, + { + "epoch": 0.09, + "learning_rate": 1.983562114424071e-05, + "loss": 0.332, + "step": 1004 + }, + { + "epoch": 0.09, + "learning_rate": 1.983511945931027e-05, + "loss": 0.3432, + "step": 1005 + }, + { + "epoch": 0.09, + "learning_rate": 1.9834617016337424e-05, + "loss": 0.36, + "step": 1006 + }, + { + "epoch": 0.09, + "learning_rate": 1.983411381536089e-05, + "loss": 0.3352, + "step": 1007 + }, + { + "epoch": 0.09, + "learning_rate": 1.9833609856419452e-05, + "loss": 0.3488, + "step": 1008 + }, + { + "epoch": 0.09, + "learning_rate": 1.983310513955196e-05, + "loss": 0.3171, + "step": 1009 + }, + { + "epoch": 0.09, + "learning_rate": 1.9832599664797306e-05, + "loss": 0.3448, + "step": 1010 + }, + { + "epoch": 0.09, + "learning_rate": 1.983209343219446e-05, + "loss": 0.3558, + "step": 1011 + }, + { + "epoch": 0.09, + "learning_rate": 1.9831586441782427e-05, + "loss": 0.3386, + "step": 1012 + }, + { + "epoch": 0.09, + "learning_rate": 1.9831078693600295e-05, + "loss": 0.3693, + "step": 1013 + }, + { + "epoch": 0.09, + "learning_rate": 1.983057018768719e-05, + "loss": 0.3177, + "step": 1014 + }, + { + "epoch": 0.09, + "learning_rate": 1.9830060924082316e-05, + "loss": 0.3682, + "step": 1015 + }, + { + "epoch": 0.09, + "learning_rate": 1.9829550902824914e-05, + "loss": 0.3482, + "step": 1016 + }, + { + "epoch": 0.09, + "learning_rate": 1.98290401239543e-05, + "loss": 0.3864, + "step": 1017 + }, + { + "epoch": 0.09, + "learning_rate": 1.9828528587509836e-05, + "loss": 0.299, + "step": 1018 + }, + { + "epoch": 0.09, + "learning_rate": 1.9828016293530954e-05, + "loss": 0.3115, + "step": 1019 + }, + { + "epoch": 0.09, + "learning_rate": 1.982750324205714e-05, + "loss": 0.329, + "step": 1020 + }, + { + "epoch": 0.09, + "learning_rate": 1.982698943312793e-05, + "loss": 0.3254, + "step": 1021 + }, + { + "epoch": 0.09, + "learning_rate": 1.9826474866782933e-05, + "loss": 0.3503, + "step": 1022 + }, + { + "epoch": 0.09, + "learning_rate": 1.9825959543061812e-05, + "loss": 0.3382, + "step": 1023 + }, + { + "epoch": 0.09, + "learning_rate": 1.9825443462004278e-05, + "loss": 0.3307, + "step": 1024 + }, + { + "epoch": 0.09, + "learning_rate": 1.982492662365011e-05, + "loss": 0.2749, + "step": 1025 + }, + { + "epoch": 0.09, + "learning_rate": 1.9824409028039143e-05, + "loss": 0.3265, + "step": 1026 + }, + { + "epoch": 0.09, + "learning_rate": 1.9823890675211275e-05, + "loss": 0.3478, + "step": 1027 + }, + { + "epoch": 0.09, + "learning_rate": 1.9823371565206452e-05, + "loss": 0.3239, + "step": 1028 + }, + { + "epoch": 0.09, + "learning_rate": 1.9822851698064692e-05, + "loss": 0.2896, + "step": 1029 + }, + { + "epoch": 0.09, + "learning_rate": 1.9822331073826056e-05, + "loss": 0.2916, + "step": 1030 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821809692530673e-05, + "loss": 0.623, + "step": 1031 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821287554218733e-05, + "loss": 0.3153, + "step": 1032 + }, + { + "epoch": 0.09, + "learning_rate": 1.9820764658930477e-05, + "loss": 0.3569, + "step": 1033 + }, + { + "epoch": 0.09, + "learning_rate": 1.9820241006706203e-05, + "loss": 0.3831, + "step": 1034 + }, + { + "epoch": 0.09, + "learning_rate": 1.9819716597586277e-05, + "loss": 0.6156, + "step": 1035 + }, + { + "epoch": 0.09, + "learning_rate": 1.981919143161112e-05, + "loss": 0.301, + "step": 1036 + }, + { + "epoch": 0.09, + "learning_rate": 1.98186655088212e-05, + "loss": 0.3411, + "step": 1037 + }, + { + "epoch": 0.09, + "learning_rate": 1.9818138829257063e-05, + "loss": 0.3174, + "step": 1038 + }, + { + "epoch": 0.09, + "learning_rate": 1.9817611392959294e-05, + "loss": 0.3257, + "step": 1039 + }, + { + "epoch": 0.09, + "learning_rate": 1.9817083199968552e-05, + "loss": 0.3298, + "step": 1040 + }, + { + "epoch": 0.09, + "learning_rate": 1.981655425032554e-05, + "loss": 0.3502, + "step": 1041 + }, + { + "epoch": 0.09, + "learning_rate": 1.9816024544071038e-05, + "loss": 0.3313, + "step": 1042 + }, + { + "epoch": 0.09, + "learning_rate": 1.981549408124586e-05, + "loss": 0.3438, + "step": 1043 + }, + { + "epoch": 0.09, + "learning_rate": 1.9814962861890903e-05, + "loss": 0.3137, + "step": 1044 + }, + { + "epoch": 0.09, + "learning_rate": 1.9814430886047105e-05, + "loss": 0.3065, + "step": 1045 + }, + { + "epoch": 0.09, + "learning_rate": 1.9813898153755465e-05, + "loss": 0.3173, + "step": 1046 + }, + { + "epoch": 0.09, + "learning_rate": 1.981336466505705e-05, + "loss": 0.2928, + "step": 1047 + }, + { + "epoch": 0.09, + "learning_rate": 1.9812830419992976e-05, + "loss": 0.5908, + "step": 1048 + }, + { + "epoch": 0.09, + "learning_rate": 1.981229541860442e-05, + "loss": 0.2985, + "step": 1049 + }, + { + "epoch": 0.09, + "learning_rate": 1.981175966093262e-05, + "loss": 0.3712, + "step": 1050 + }, + { + "epoch": 0.09, + "learning_rate": 1.9811223147018862e-05, + "loss": 0.3634, + "step": 1051 + }, + { + "epoch": 0.09, + "learning_rate": 1.98106858769045e-05, + "loss": 0.3686, + "step": 1052 + }, + { + "epoch": 0.09, + "learning_rate": 1.981014785063095e-05, + "loss": 0.3173, + "step": 1053 + }, + { + "epoch": 0.09, + "learning_rate": 1.980960906823968e-05, + "loss": 0.355, + "step": 1054 + }, + { + "epoch": 0.09, + "learning_rate": 1.9809069529772215e-05, + "loss": 0.3669, + "step": 1055 + }, + { + "epoch": 0.09, + "learning_rate": 1.9808529235270134e-05, + "loss": 0.2851, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 1.9807988184775085e-05, + "loss": 0.3068, + "step": 1057 + }, + { + "epoch": 0.09, + "learning_rate": 1.980744637832877e-05, + "loss": 0.3116, + "step": 1058 + }, + { + "epoch": 0.09, + "learning_rate": 1.980690381597295e-05, + "loss": 0.3416, + "step": 1059 + }, + { + "epoch": 0.09, + "learning_rate": 1.9806360497749436e-05, + "loss": 0.3684, + "step": 1060 + }, + { + "epoch": 0.09, + "learning_rate": 1.9805816423700114e-05, + "loss": 0.2925, + "step": 1061 + }, + { + "epoch": 0.09, + "learning_rate": 1.9805271593866914e-05, + "loss": 0.3238, + "step": 1062 + }, + { + "epoch": 0.09, + "learning_rate": 1.9804726008291827e-05, + "loss": 0.3391, + "step": 1063 + }, + { + "epoch": 0.09, + "learning_rate": 1.9804179667016906e-05, + "loss": 0.3081, + "step": 1064 + }, + { + "epoch": 0.09, + "learning_rate": 1.9803632570084265e-05, + "loss": 0.2841, + "step": 1065 + }, + { + "epoch": 0.09, + "learning_rate": 1.980308471753606e-05, + "loss": 0.3983, + "step": 1066 + }, + { + "epoch": 0.09, + "learning_rate": 1.9802536109414526e-05, + "loss": 0.2763, + "step": 1067 + }, + { + "epoch": 0.09, + "learning_rate": 1.980198674576194e-05, + "loss": 0.3582, + "step": 1068 + }, + { + "epoch": 0.09, + "learning_rate": 1.9801436626620658e-05, + "loss": 0.3123, + "step": 1069 + }, + { + "epoch": 0.09, + "learning_rate": 1.9800885752033067e-05, + "loss": 0.3397, + "step": 1070 + }, + { + "epoch": 0.09, + "learning_rate": 1.9800334122041626e-05, + "loss": 0.6584, + "step": 1071 + }, + { + "epoch": 0.09, + "learning_rate": 1.9799781736688862e-05, + "loss": 0.3849, + "step": 1072 + }, + { + "epoch": 0.09, + "learning_rate": 1.979922859601734e-05, + "loss": 0.2826, + "step": 1073 + }, + { + "epoch": 0.09, + "learning_rate": 1.9798674700069698e-05, + "loss": 0.3252, + "step": 1074 + }, + { + "epoch": 0.09, + "learning_rate": 1.9798120048888628e-05, + "loss": 0.3536, + "step": 1075 + }, + { + "epoch": 0.09, + "learning_rate": 1.9797564642516876e-05, + "loss": 0.2996, + "step": 1076 + }, + { + "epoch": 0.09, + "learning_rate": 1.9797008480997253e-05, + "loss": 0.3048, + "step": 1077 + }, + { + "epoch": 0.09, + "learning_rate": 1.9796451564372624e-05, + "loss": 0.3671, + "step": 1078 + }, + { + "epoch": 0.09, + "learning_rate": 1.9795893892685918e-05, + "loss": 0.3156, + "step": 1079 + }, + { + "epoch": 0.09, + "learning_rate": 1.979533546598011e-05, + "loss": 0.3223, + "step": 1080 + }, + { + "epoch": 0.09, + "learning_rate": 1.9794776284298247e-05, + "loss": 0.2913, + "step": 1081 + }, + { + "epoch": 0.09, + "learning_rate": 1.9794216347683425e-05, + "loss": 0.3216, + "step": 1082 + }, + { + "epoch": 0.09, + "learning_rate": 1.97936556561788e-05, + "loss": 0.3663, + "step": 1083 + }, + { + "epoch": 0.09, + "learning_rate": 1.979309420982759e-05, + "loss": 0.3406, + "step": 1084 + }, + { + "epoch": 0.09, + "learning_rate": 1.9792532008673067e-05, + "loss": 0.3624, + "step": 1085 + }, + { + "epoch": 0.09, + "learning_rate": 1.9791969052758563e-05, + "loss": 0.3109, + "step": 1086 + }, + { + "epoch": 0.09, + "learning_rate": 1.979140534212747e-05, + "loss": 0.3622, + "step": 1087 + }, + { + "epoch": 0.09, + "learning_rate": 1.979084087682323e-05, + "loss": 0.2924, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 1.9790275656889356e-05, + "loss": 0.3307, + "step": 1089 + }, + { + "epoch": 0.09, + "learning_rate": 1.978970968236941e-05, + "loss": 0.3334, + "step": 1090 + }, + { + "epoch": 0.09, + "learning_rate": 1.978914295330701e-05, + "loss": 0.312, + "step": 1091 + }, + { + "epoch": 0.09, + "learning_rate": 1.9788575469745845e-05, + "loss": 0.3537, + "step": 1092 + }, + { + "epoch": 0.09, + "learning_rate": 1.9788007231729647e-05, + "loss": 0.324, + "step": 1093 + }, + { + "epoch": 0.09, + "learning_rate": 1.9787438239302217e-05, + "loss": 0.3004, + "step": 1094 + }, + { + "epoch": 0.09, + "learning_rate": 1.978686849250741e-05, + "loss": 0.3788, + "step": 1095 + }, + { + "epoch": 0.09, + "learning_rate": 1.9786297991389136e-05, + "loss": 0.2927, + "step": 1096 + }, + { + "epoch": 0.09, + "learning_rate": 1.978572673599137e-05, + "loss": 0.3024, + "step": 1097 + }, + { + "epoch": 0.09, + "learning_rate": 1.9785154726358134e-05, + "loss": 0.363, + "step": 1098 + }, + { + "epoch": 0.09, + "learning_rate": 1.9784581962533528e-05, + "loss": 0.3209, + "step": 1099 + }, + { + "epoch": 0.09, + "learning_rate": 1.9784008444561692e-05, + "loss": 0.5852, + "step": 1100 + }, + { + "epoch": 0.09, + "learning_rate": 1.9783434172486825e-05, + "loss": 0.3198, + "step": 1101 + }, + { + "epoch": 0.09, + "learning_rate": 1.9782859146353196e-05, + "loss": 0.357, + "step": 1102 + }, + { + "epoch": 0.09, + "learning_rate": 1.9782283366205122e-05, + "loss": 0.3658, + "step": 1103 + }, + { + "epoch": 0.09, + "learning_rate": 1.9781706832086984e-05, + "loss": 0.3181, + "step": 1104 + }, + { + "epoch": 0.09, + "learning_rate": 1.978112954404321e-05, + "loss": 0.3444, + "step": 1105 + }, + { + "epoch": 0.09, + "learning_rate": 1.9780551502118306e-05, + "loss": 0.3747, + "step": 1106 + }, + { + "epoch": 0.09, + "learning_rate": 1.9779972706356818e-05, + "loss": 0.3378, + "step": 1107 + }, + { + "epoch": 0.09, + "learning_rate": 1.9779393156803356e-05, + "loss": 0.2866, + "step": 1108 + }, + { + "epoch": 0.1, + "learning_rate": 1.9778812853502592e-05, + "loss": 0.3322, + "step": 1109 + }, + { + "epoch": 0.1, + "learning_rate": 1.9778231796499254e-05, + "loss": 0.3243, + "step": 1110 + }, + { + "epoch": 0.1, + "learning_rate": 1.9777649985838123e-05, + "loss": 0.3409, + "step": 1111 + }, + { + "epoch": 0.1, + "learning_rate": 1.9777067421564046e-05, + "loss": 0.3838, + "step": 1112 + }, + { + "epoch": 0.1, + "learning_rate": 1.9776484103721918e-05, + "loss": 0.3428, + "step": 1113 + }, + { + "epoch": 0.1, + "learning_rate": 1.9775900032356704e-05, + "loss": 0.2975, + "step": 1114 + }, + { + "epoch": 0.1, + "learning_rate": 1.977531520751342e-05, + "loss": 0.332, + "step": 1115 + }, + { + "epoch": 0.1, + "learning_rate": 1.9774729629237143e-05, + "loss": 0.3278, + "step": 1116 + }, + { + "epoch": 0.1, + "learning_rate": 1.9774143297573003e-05, + "loss": 0.3423, + "step": 1117 + }, + { + "epoch": 0.1, + "learning_rate": 1.977355621256619e-05, + "loss": 0.3135, + "step": 1118 + }, + { + "epoch": 0.1, + "learning_rate": 1.977296837426196e-05, + "loss": 0.3113, + "step": 1119 + }, + { + "epoch": 0.1, + "learning_rate": 1.9772379782705616e-05, + "loss": 0.2944, + "step": 1120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9771790437942528e-05, + "loss": 0.3194, + "step": 1121 + }, + { + "epoch": 0.1, + "learning_rate": 1.9771200340018115e-05, + "loss": 0.3669, + "step": 1122 + }, + { + "epoch": 0.1, + "learning_rate": 1.977060948897786e-05, + "loss": 0.3427, + "step": 1123 + }, + { + "epoch": 0.1, + "learning_rate": 1.9770017884867306e-05, + "loss": 0.2729, + "step": 1124 + }, + { + "epoch": 0.1, + "learning_rate": 1.9769425527732046e-05, + "loss": 0.3396, + "step": 1125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9768832417617737e-05, + "loss": 0.3331, + "step": 1126 + }, + { + "epoch": 0.1, + "learning_rate": 1.9768238554570098e-05, + "loss": 0.2742, + "step": 1127 + }, + { + "epoch": 0.1, + "learning_rate": 1.9767643938634896e-05, + "loss": 0.3248, + "step": 1128 + }, + { + "epoch": 0.1, + "learning_rate": 1.9767048569857963e-05, + "loss": 0.2985, + "step": 1129 + }, + { + "epoch": 0.1, + "learning_rate": 1.9766452448285184e-05, + "loss": 0.3088, + "step": 1130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9765855573962512e-05, + "loss": 0.3185, + "step": 1131 + }, + { + "epoch": 0.1, + "learning_rate": 1.9765257946935944e-05, + "loss": 0.3276, + "step": 1132 + }, + { + "epoch": 0.1, + "learning_rate": 1.9764659567251546e-05, + "loss": 0.3474, + "step": 1133 + }, + { + "epoch": 0.1, + "learning_rate": 1.9764060434955437e-05, + "loss": 0.3538, + "step": 1134 + }, + { + "epoch": 0.1, + "learning_rate": 1.97634605500938e-05, + "loss": 0.301, + "step": 1135 + }, + { + "epoch": 0.1, + "learning_rate": 1.976285991271286e-05, + "loss": 0.3145, + "step": 1136 + }, + { + "epoch": 0.1, + "learning_rate": 1.9762258522858917e-05, + "loss": 0.3041, + "step": 1137 + }, + { + "epoch": 0.1, + "learning_rate": 1.9761656380578328e-05, + "loss": 0.3067, + "step": 1138 + }, + { + "epoch": 0.1, + "learning_rate": 1.9761053485917497e-05, + "loss": 0.5691, + "step": 1139 + }, + { + "epoch": 0.1, + "learning_rate": 1.9760449838922894e-05, + "loss": 0.3526, + "step": 1140 + }, + { + "epoch": 0.1, + "learning_rate": 1.9759845439641047e-05, + "loss": 0.3307, + "step": 1141 + }, + { + "epoch": 0.1, + "learning_rate": 1.9759240288118536e-05, + "loss": 0.3066, + "step": 1142 + }, + { + "epoch": 0.1, + "learning_rate": 1.9758634384402007e-05, + "loss": 0.3082, + "step": 1143 + }, + { + "epoch": 0.1, + "learning_rate": 1.9758027728538157e-05, + "loss": 0.3555, + "step": 1144 + }, + { + "epoch": 0.1, + "learning_rate": 1.9757420320573747e-05, + "loss": 0.3967, + "step": 1145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9756812160555586e-05, + "loss": 0.3606, + "step": 1146 + }, + { + "epoch": 0.1, + "learning_rate": 1.975620324853056e-05, + "loss": 0.269, + "step": 1147 + }, + { + "epoch": 0.1, + "learning_rate": 1.9755593584545594e-05, + "loss": 0.343, + "step": 1148 + }, + { + "epoch": 0.1, + "learning_rate": 1.975498316864768e-05, + "loss": 0.3651, + "step": 1149 + }, + { + "epoch": 0.1, + "learning_rate": 1.975437200088386e-05, + "loss": 0.3627, + "step": 1150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9753760081301242e-05, + "loss": 0.3104, + "step": 1151 + }, + { + "epoch": 0.1, + "learning_rate": 1.9753147409946997e-05, + "loss": 0.2998, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 1.9752533986868337e-05, + "loss": 0.3517, + "step": 1153 + }, + { + "epoch": 0.1, + "learning_rate": 1.975191981211255e-05, + "loss": 0.3054, + "step": 1154 + }, + { + "epoch": 0.1, + "learning_rate": 1.9751304885726967e-05, + "loss": 0.3893, + "step": 1155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9750689207758985e-05, + "loss": 0.3115, + "step": 1156 + }, + { + "epoch": 0.1, + "learning_rate": 1.975007277825606e-05, + "loss": 0.2866, + "step": 1157 + }, + { + "epoch": 0.1, + "learning_rate": 1.9749455597265704e-05, + "loss": 0.3831, + "step": 1158 + }, + { + "epoch": 0.1, + "learning_rate": 1.974883766483548e-05, + "loss": 0.3689, + "step": 1159 + }, + { + "epoch": 0.1, + "learning_rate": 1.974821898101302e-05, + "loss": 0.3565, + "step": 1160 + }, + { + "epoch": 0.1, + "learning_rate": 1.974759954584601e-05, + "loss": 0.3069, + "step": 1161 + }, + { + "epoch": 0.1, + "learning_rate": 1.9746979359382193e-05, + "loss": 0.3671, + "step": 1162 + }, + { + "epoch": 0.1, + "learning_rate": 1.9746358421669365e-05, + "loss": 0.3289, + "step": 1163 + }, + { + "epoch": 0.1, + "learning_rate": 1.974573673275539e-05, + "loss": 0.2828, + "step": 1164 + }, + { + "epoch": 0.1, + "learning_rate": 1.9745114292688183e-05, + "loss": 0.3294, + "step": 1165 + }, + { + "epoch": 0.1, + "learning_rate": 1.9744491101515715e-05, + "loss": 0.3215, + "step": 1166 + }, + { + "epoch": 0.1, + "learning_rate": 1.9743867159286022e-05, + "loss": 0.2885, + "step": 1167 + }, + { + "epoch": 0.1, + "learning_rate": 1.9743242466047196e-05, + "loss": 0.2812, + "step": 1168 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742617021847385e-05, + "loss": 0.3553, + "step": 1169 + }, + { + "epoch": 0.1, + "learning_rate": 1.9741990826734793e-05, + "loss": 0.3026, + "step": 1170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9741363880757682e-05, + "loss": 0.3358, + "step": 1171 + }, + { + "epoch": 0.1, + "learning_rate": 1.974073618396438e-05, + "loss": 0.2916, + "step": 1172 + }, + { + "epoch": 0.1, + "learning_rate": 1.974010773640326e-05, + "loss": 0.3527, + "step": 1173 + }, + { + "epoch": 0.1, + "learning_rate": 1.9739478538122765e-05, + "loss": 0.6641, + "step": 1174 + }, + { + "epoch": 0.1, + "learning_rate": 1.9738848589171388e-05, + "loss": 0.3412, + "step": 1175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9738217889597684e-05, + "loss": 0.3173, + "step": 1176 + }, + { + "epoch": 0.1, + "learning_rate": 1.9737586439450262e-05, + "loss": 0.319, + "step": 1177 + }, + { + "epoch": 0.1, + "learning_rate": 1.9736954238777793e-05, + "loss": 0.2921, + "step": 1178 + }, + { + "epoch": 0.1, + "learning_rate": 1.9736321287629e-05, + "loss": 0.3076, + "step": 1179 + }, + { + "epoch": 0.1, + "learning_rate": 1.9735687586052673e-05, + "loss": 0.3096, + "step": 1180 + }, + { + "epoch": 0.1, + "learning_rate": 1.9735053134097653e-05, + "loss": 0.356, + "step": 1181 + }, + { + "epoch": 0.1, + "learning_rate": 1.973441793181284e-05, + "loss": 0.2906, + "step": 1182 + }, + { + "epoch": 0.1, + "learning_rate": 1.973378197924719e-05, + "loss": 0.3333, + "step": 1183 + }, + { + "epoch": 0.1, + "learning_rate": 1.973314527644972e-05, + "loss": 0.3184, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 1.9732507823469507e-05, + "loss": 0.3072, + "step": 1185 + }, + { + "epoch": 0.1, + "learning_rate": 1.973186962035568e-05, + "loss": 0.316, + "step": 1186 + }, + { + "epoch": 0.1, + "learning_rate": 1.973123066715743e-05, + "loss": 0.2676, + "step": 1187 + }, + { + "epoch": 0.1, + "learning_rate": 1.9730590963924005e-05, + "loss": 0.3167, + "step": 1188 + }, + { + "epoch": 0.1, + "learning_rate": 1.9729950510704708e-05, + "loss": 0.3391, + "step": 1189 + }, + { + "epoch": 0.1, + "learning_rate": 1.97293093075489e-05, + "loss": 0.3276, + "step": 1190 + }, + { + "epoch": 0.1, + "learning_rate": 1.972866735450601e-05, + "loss": 0.326, + "step": 1191 + }, + { + "epoch": 0.1, + "learning_rate": 1.9728024651625506e-05, + "loss": 0.3426, + "step": 1192 + }, + { + "epoch": 0.1, + "learning_rate": 1.972738119895693e-05, + "loss": 0.2963, + "step": 1193 + }, + { + "epoch": 0.1, + "learning_rate": 1.972673699654988e-05, + "loss": 0.3212, + "step": 1194 + }, + { + "epoch": 0.1, + "learning_rate": 1.9726092044453996e-05, + "loss": 0.3074, + "step": 1195 + }, + { + "epoch": 0.1, + "learning_rate": 1.9725446342719e-05, + "loss": 0.3315, + "step": 1196 + }, + { + "epoch": 0.1, + "learning_rate": 1.9724799891394653e-05, + "loss": 0.3264, + "step": 1197 + }, + { + "epoch": 0.1, + "learning_rate": 1.9724152690530785e-05, + "loss": 0.3649, + "step": 1198 + }, + { + "epoch": 0.1, + "learning_rate": 1.9723504740177274e-05, + "loss": 0.3009, + "step": 1199 + }, + { + "epoch": 0.1, + "learning_rate": 1.972285604038406e-05, + "loss": 0.3538, + "step": 1200 + }, + { + "epoch": 0.1, + "learning_rate": 1.972220659120115e-05, + "loss": 0.306, + "step": 1201 + }, + { + "epoch": 0.1, + "learning_rate": 1.972155639267859e-05, + "loss": 0.2983, + "step": 1202 + }, + { + "epoch": 0.1, + "learning_rate": 1.97209054448665e-05, + "loss": 0.3271, + "step": 1203 + }, + { + "epoch": 0.1, + "learning_rate": 1.9720253747815055e-05, + "loss": 0.3235, + "step": 1204 + }, + { + "epoch": 0.1, + "learning_rate": 1.9719601301574476e-05, + "loss": 0.3336, + "step": 1205 + }, + { + "epoch": 0.1, + "learning_rate": 1.9718948106195055e-05, + "loss": 0.3091, + "step": 1206 + }, + { + "epoch": 0.1, + "learning_rate": 1.971829416172714e-05, + "loss": 0.3513, + "step": 1207 + }, + { + "epoch": 0.1, + "learning_rate": 1.9717639468221127e-05, + "loss": 0.3291, + "step": 1208 + }, + { + "epoch": 0.1, + "learning_rate": 1.9716984025727478e-05, + "loss": 0.3432, + "step": 1209 + }, + { + "epoch": 0.1, + "learning_rate": 1.971632783429672e-05, + "loss": 0.3279, + "step": 1210 + }, + { + "epoch": 0.1, + "learning_rate": 1.9715670893979416e-05, + "loss": 0.3201, + "step": 1211 + }, + { + "epoch": 0.1, + "learning_rate": 1.971501320482621e-05, + "loss": 0.3038, + "step": 1212 + }, + { + "epoch": 0.1, + "learning_rate": 1.9714354766887787e-05, + "loss": 0.3313, + "step": 1213 + }, + { + "epoch": 0.1, + "learning_rate": 1.97136955802149e-05, + "loss": 0.3126, + "step": 1214 + }, + { + "epoch": 0.1, + "learning_rate": 1.9713035644858354e-05, + "loss": 0.3312, + "step": 1215 + }, + { + "epoch": 0.1, + "learning_rate": 1.9712374960869015e-05, + "loss": 0.6537, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 1.9711713528297802e-05, + "loss": 0.2822, + "step": 1217 + }, + { + "epoch": 0.1, + "learning_rate": 1.97110513471957e-05, + "loss": 0.3393, + "step": 1218 + }, + { + "epoch": 0.1, + "learning_rate": 1.9710388417613742e-05, + "loss": 0.2998, + "step": 1219 + }, + { + "epoch": 0.1, + "learning_rate": 1.9709724739603024e-05, + "loss": 0.3191, + "step": 1220 + }, + { + "epoch": 0.1, + "learning_rate": 1.9709060313214703e-05, + "loss": 0.3303, + "step": 1221 + }, + { + "epoch": 0.1, + "learning_rate": 1.9708395138499986e-05, + "loss": 0.3647, + "step": 1222 + }, + { + "epoch": 0.1, + "learning_rate": 1.9707729215510143e-05, + "loss": 0.285, + "step": 1223 + }, + { + "epoch": 0.1, + "learning_rate": 1.9707062544296497e-05, + "loss": 0.3701, + "step": 1224 + }, + { + "epoch": 0.11, + "learning_rate": 1.970639512491044e-05, + "loss": 0.3284, + "step": 1225 + }, + { + "epoch": 0.11, + "learning_rate": 1.9705726957403398e-05, + "loss": 0.2914, + "step": 1226 + }, + { + "epoch": 0.11, + "learning_rate": 1.9705058041826887e-05, + "loss": 0.284, + "step": 1227 + }, + { + "epoch": 0.11, + "learning_rate": 1.9704388378232454e-05, + "loss": 0.3489, + "step": 1228 + }, + { + "epoch": 0.11, + "learning_rate": 1.9703717966671715e-05, + "loss": 0.2979, + "step": 1229 + }, + { + "epoch": 0.11, + "learning_rate": 1.970304680719634e-05, + "loss": 0.3108, + "step": 1230 + }, + { + "epoch": 0.11, + "learning_rate": 1.9702374899858067e-05, + "loss": 0.3232, + "step": 1231 + }, + { + "epoch": 0.11, + "learning_rate": 1.9701702244708673e-05, + "loss": 0.287, + "step": 1232 + }, + { + "epoch": 0.11, + "learning_rate": 1.970102884180001e-05, + "loss": 0.2937, + "step": 1233 + }, + { + "epoch": 0.11, + "learning_rate": 1.9700354691183977e-05, + "loss": 0.3445, + "step": 1234 + }, + { + "epoch": 0.11, + "learning_rate": 1.969967979291254e-05, + "loss": 0.3489, + "step": 1235 + }, + { + "epoch": 0.11, + "learning_rate": 1.9699004147037706e-05, + "loss": 0.3497, + "step": 1236 + }, + { + "epoch": 0.11, + "learning_rate": 1.9698327753611557e-05, + "loss": 0.3231, + "step": 1237 + }, + { + "epoch": 0.11, + "learning_rate": 1.9697650612686228e-05, + "loss": 0.3339, + "step": 1238 + }, + { + "epoch": 0.11, + "learning_rate": 1.9696972724313904e-05, + "loss": 0.3009, + "step": 1239 + }, + { + "epoch": 0.11, + "learning_rate": 1.9696294088546842e-05, + "loss": 0.3696, + "step": 1240 + }, + { + "epoch": 0.11, + "learning_rate": 1.969561470543734e-05, + "loss": 0.3711, + "step": 1241 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694934575037762e-05, + "loss": 0.3234, + "step": 1242 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694253697400532e-05, + "loss": 0.3335, + "step": 1243 + }, + { + "epoch": 0.11, + "learning_rate": 1.9693572072578127e-05, + "loss": 0.3385, + "step": 1244 + }, + { + "epoch": 0.11, + "learning_rate": 1.9692889700623084e-05, + "loss": 0.3211, + "step": 1245 + }, + { + "epoch": 0.11, + "learning_rate": 1.9692206581588e-05, + "loss": 0.3268, + "step": 1246 + }, + { + "epoch": 0.11, + "learning_rate": 1.969152271552552e-05, + "loss": 0.3088, + "step": 1247 + }, + { + "epoch": 0.11, + "learning_rate": 1.9690838102488356e-05, + "loss": 0.316, + "step": 1248 + }, + { + "epoch": 0.11, + "learning_rate": 1.9690152742529277e-05, + "loss": 0.3044, + "step": 1249 + }, + { + "epoch": 0.11, + "learning_rate": 1.9689466635701106e-05, + "loss": 0.2941, + "step": 1250 + }, + { + "epoch": 0.11, + "learning_rate": 1.968877978205672e-05, + "loss": 0.2966, + "step": 1251 + }, + { + "epoch": 0.11, + "learning_rate": 1.9688092181649065e-05, + "loss": 0.3041, + "step": 1252 + }, + { + "epoch": 0.11, + "learning_rate": 1.9687403834531133e-05, + "loss": 0.3092, + "step": 1253 + }, + { + "epoch": 0.11, + "learning_rate": 1.968671474075598e-05, + "loss": 0.3373, + "step": 1254 + }, + { + "epoch": 0.11, + "learning_rate": 1.968602490037672e-05, + "loss": 0.2792, + "step": 1255 + }, + { + "epoch": 0.11, + "learning_rate": 1.9685334313446523e-05, + "loss": 0.3065, + "step": 1256 + }, + { + "epoch": 0.11, + "learning_rate": 1.968464298001861e-05, + "loss": 0.3174, + "step": 1257 + }, + { + "epoch": 0.11, + "learning_rate": 1.968395090014627e-05, + "loss": 0.3482, + "step": 1258 + }, + { + "epoch": 0.11, + "learning_rate": 1.9683258073882845e-05, + "loss": 0.2797, + "step": 1259 + }, + { + "epoch": 0.11, + "learning_rate": 1.9682564501281733e-05, + "loss": 0.3176, + "step": 1260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9681870182396395e-05, + "loss": 0.632, + "step": 1261 + }, + { + "epoch": 0.11, + "learning_rate": 1.9681175117280343e-05, + "loss": 0.3442, + "step": 1262 + }, + { + "epoch": 0.11, + "learning_rate": 1.9680479305987147e-05, + "loss": 0.3046, + "step": 1263 + }, + { + "epoch": 0.11, + "learning_rate": 1.9679782748570434e-05, + "loss": 0.3264, + "step": 1264 + }, + { + "epoch": 0.11, + "learning_rate": 1.9679085445083903e-05, + "loss": 0.321, + "step": 1265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9678387395581292e-05, + "loss": 0.2701, + "step": 1266 + }, + { + "epoch": 0.11, + "learning_rate": 1.96776886001164e-05, + "loss": 0.2859, + "step": 1267 + }, + { + "epoch": 0.11, + "learning_rate": 1.9676989058743088e-05, + "loss": 0.3779, + "step": 1268 + }, + { + "epoch": 0.11, + "learning_rate": 1.9676288771515275e-05, + "loss": 0.335, + "step": 1269 + }, + { + "epoch": 0.11, + "learning_rate": 1.9675587738486935e-05, + "loss": 0.3172, + "step": 1270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9674885959712106e-05, + "loss": 0.3051, + "step": 1271 + }, + { + "epoch": 0.11, + "learning_rate": 1.9674183435244867e-05, + "loss": 0.3927, + "step": 1272 + }, + { + "epoch": 0.11, + "learning_rate": 1.9673480165139372e-05, + "loss": 0.3196, + "step": 1273 + }, + { + "epoch": 0.11, + "learning_rate": 1.9672776149449826e-05, + "loss": 0.3372, + "step": 1274 + }, + { + "epoch": 0.11, + "learning_rate": 1.9672071388230485e-05, + "loss": 0.3205, + "step": 1275 + }, + { + "epoch": 0.11, + "learning_rate": 1.967136588153568e-05, + "loss": 0.604, + "step": 1276 + }, + { + "epoch": 0.11, + "learning_rate": 1.9670659629419774e-05, + "loss": 0.316, + "step": 1277 + }, + { + "epoch": 0.11, + "learning_rate": 1.9669952631937206e-05, + "loss": 0.3635, + "step": 1278 + }, + { + "epoch": 0.11, + "learning_rate": 1.966924488914247e-05, + "loss": 0.3298, + "step": 1279 + }, + { + "epoch": 0.11, + "learning_rate": 1.9668536401090123e-05, + "loss": 0.3095, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 1.9667827167834756e-05, + "loss": 0.3401, + "step": 1281 + }, + { + "epoch": 0.11, + "learning_rate": 1.9667117189431045e-05, + "loss": 0.3059, + "step": 1282 + }, + { + "epoch": 0.11, + "learning_rate": 1.9666406465933703e-05, + "loss": 0.2832, + "step": 1283 + }, + { + "epoch": 0.11, + "learning_rate": 1.966569499739752e-05, + "loss": 0.3093, + "step": 1284 + }, + { + "epoch": 0.11, + "learning_rate": 1.9664982783877323e-05, + "loss": 0.3404, + "step": 1285 + }, + { + "epoch": 0.11, + "learning_rate": 1.966426982542801e-05, + "loss": 0.3428, + "step": 1286 + }, + { + "epoch": 0.11, + "learning_rate": 1.966355612210453e-05, + "loss": 0.3384, + "step": 1287 + }, + { + "epoch": 0.11, + "learning_rate": 1.9662841673961893e-05, + "loss": 0.2938, + "step": 1288 + }, + { + "epoch": 0.11, + "learning_rate": 1.9662126481055165e-05, + "loss": 0.3517, + "step": 1289 + }, + { + "epoch": 0.11, + "learning_rate": 1.966141054343947e-05, + "loss": 0.386, + "step": 1290 + }, + { + "epoch": 0.11, + "learning_rate": 1.9660693861169992e-05, + "loss": 0.3474, + "step": 1291 + }, + { + "epoch": 0.11, + "learning_rate": 1.9659976434301967e-05, + "loss": 0.2773, + "step": 1292 + }, + { + "epoch": 0.11, + "learning_rate": 1.9659258262890683e-05, + "loss": 0.3221, + "step": 1293 + }, + { + "epoch": 0.11, + "learning_rate": 1.9658539346991504e-05, + "loss": 0.3381, + "step": 1294 + }, + { + "epoch": 0.11, + "learning_rate": 1.9657819686659838e-05, + "loss": 0.3088, + "step": 1295 + }, + { + "epoch": 0.11, + "learning_rate": 1.9657099281951148e-05, + "loss": 0.2853, + "step": 1296 + }, + { + "epoch": 0.11, + "learning_rate": 1.9656378132920964e-05, + "loss": 0.3268, + "step": 1297 + }, + { + "epoch": 0.11, + "learning_rate": 1.9655656239624864e-05, + "loss": 0.3094, + "step": 1298 + }, + { + "epoch": 0.11, + "learning_rate": 1.9654933602118494e-05, + "loss": 0.3052, + "step": 1299 + }, + { + "epoch": 0.11, + "learning_rate": 1.965421022045755e-05, + "loss": 0.6509, + "step": 1300 + }, + { + "epoch": 0.11, + "learning_rate": 1.9653486094697785e-05, + "loss": 0.3095, + "step": 1301 + }, + { + "epoch": 0.11, + "learning_rate": 1.9652761224895006e-05, + "loss": 0.3526, + "step": 1302 + }, + { + "epoch": 0.11, + "learning_rate": 1.9652035611105093e-05, + "loss": 0.3631, + "step": 1303 + }, + { + "epoch": 0.11, + "learning_rate": 1.9651309253383964e-05, + "loss": 0.3007, + "step": 1304 + }, + { + "epoch": 0.11, + "learning_rate": 1.9650582151787608e-05, + "loss": 0.3162, + "step": 1305 + }, + { + "epoch": 0.11, + "learning_rate": 1.9649854306372065e-05, + "loss": 0.2925, + "step": 1306 + }, + { + "epoch": 0.11, + "learning_rate": 1.964912571719343e-05, + "loss": 0.3151, + "step": 1307 + }, + { + "epoch": 0.11, + "learning_rate": 1.9648396384307866e-05, + "loss": 0.3362, + "step": 1308 + }, + { + "epoch": 0.11, + "learning_rate": 1.964766630777158e-05, + "loss": 0.3009, + "step": 1309 + }, + { + "epoch": 0.11, + "learning_rate": 1.9646935487640848e-05, + "loss": 0.3253, + "step": 1310 + }, + { + "epoch": 0.11, + "learning_rate": 1.9646203923971992e-05, + "loss": 0.2739, + "step": 1311 + }, + { + "epoch": 0.11, + "learning_rate": 1.9645471616821404e-05, + "loss": 0.2913, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 1.9644738566245526e-05, + "loss": 0.3408, + "step": 1313 + }, + { + "epoch": 0.11, + "learning_rate": 1.964400477230085e-05, + "loss": 0.348, + "step": 1314 + }, + { + "epoch": 0.11, + "learning_rate": 1.9643270235043942e-05, + "loss": 0.3062, + "step": 1315 + }, + { + "epoch": 0.11, + "learning_rate": 1.964253495453141e-05, + "loss": 0.358, + "step": 1316 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641798930819932e-05, + "loss": 0.3432, + "step": 1317 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641062163966232e-05, + "loss": 0.3134, + "step": 1318 + }, + { + "epoch": 0.11, + "learning_rate": 1.96403246540271e-05, + "loss": 0.3087, + "step": 1319 + }, + { + "epoch": 0.11, + "learning_rate": 1.9639586401059376e-05, + "loss": 0.3083, + "step": 1320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9638847405119966e-05, + "loss": 0.3452, + "step": 1321 + }, + { + "epoch": 0.11, + "learning_rate": 1.963810766626582e-05, + "loss": 0.2977, + "step": 1322 + }, + { + "epoch": 0.11, + "learning_rate": 1.963736718455396e-05, + "loss": 0.3087, + "step": 1323 + }, + { + "epoch": 0.11, + "learning_rate": 1.963662596004146e-05, + "loss": 0.2944, + "step": 1324 + }, + { + "epoch": 0.11, + "learning_rate": 1.9635883992785443e-05, + "loss": 0.2821, + "step": 1325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9635141282843105e-05, + "loss": 0.3586, + "step": 1326 + }, + { + "epoch": 0.11, + "learning_rate": 1.9634397830271685e-05, + "loss": 0.3307, + "step": 1327 + }, + { + "epoch": 0.11, + "learning_rate": 1.963365363512848e-05, + "loss": 0.2733, + "step": 1328 + }, + { + "epoch": 0.11, + "learning_rate": 1.9632908697470857e-05, + "loss": 0.3167, + "step": 1329 + }, + { + "epoch": 0.11, + "learning_rate": 1.963216301735623e-05, + "loss": 0.3754, + "step": 1330 + }, + { + "epoch": 0.11, + "learning_rate": 1.963141659484207e-05, + "loss": 0.3229, + "step": 1331 + }, + { + "epoch": 0.11, + "learning_rate": 1.9630669429985908e-05, + "loss": 0.2964, + "step": 1332 + }, + { + "epoch": 0.11, + "learning_rate": 1.9629921522845338e-05, + "loss": 0.2958, + "step": 1333 + }, + { + "epoch": 0.11, + "learning_rate": 1.9629172873477995e-05, + "loss": 0.3058, + "step": 1334 + }, + { + "epoch": 0.11, + "learning_rate": 1.9628423481941587e-05, + "loss": 0.3494, + "step": 1335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9627673348293874e-05, + "loss": 0.3159, + "step": 1336 + }, + { + "epoch": 0.11, + "learning_rate": 1.962692247259267e-05, + "loss": 0.3448, + "step": 1337 + }, + { + "epoch": 0.11, + "learning_rate": 1.962617085489585e-05, + "loss": 0.3494, + "step": 1338 + }, + { + "epoch": 0.11, + "learning_rate": 1.9625418495261343e-05, + "loss": 0.3315, + "step": 1339 + }, + { + "epoch": 0.11, + "learning_rate": 1.962466539374714e-05, + "loss": 0.3564, + "step": 1340 + }, + { + "epoch": 0.11, + "learning_rate": 1.9623911550411286e-05, + "loss": 0.3583, + "step": 1341 + }, + { + "epoch": 0.12, + "learning_rate": 1.9623156965311884e-05, + "loss": 0.2705, + "step": 1342 + }, + { + "epoch": 0.12, + "learning_rate": 1.9622401638507093e-05, + "loss": 0.2719, + "step": 1343 + }, + { + "epoch": 0.12, + "learning_rate": 1.9621645570055127e-05, + "loss": 0.3119, + "step": 1344 + }, + { + "epoch": 0.12, + "learning_rate": 1.9620888760014262e-05, + "loss": 0.3333, + "step": 1345 + }, + { + "epoch": 0.12, + "learning_rate": 1.962013120844283e-05, + "loss": 0.3217, + "step": 1346 + }, + { + "epoch": 0.12, + "learning_rate": 1.961937291539922e-05, + "loss": 0.3124, + "step": 1347 + }, + { + "epoch": 0.12, + "learning_rate": 1.9618613880941876e-05, + "loss": 0.3288, + "step": 1348 + }, + { + "epoch": 0.12, + "learning_rate": 1.9617854105129303e-05, + "loss": 0.3229, + "step": 1349 + }, + { + "epoch": 0.12, + "learning_rate": 1.9617093588020057e-05, + "loss": 0.3679, + "step": 1350 + }, + { + "epoch": 0.12, + "learning_rate": 1.9616332329672756e-05, + "loss": 0.353, + "step": 1351 + }, + { + "epoch": 0.12, + "learning_rate": 1.9615570330146077e-05, + "loss": 0.3467, + "step": 1352 + }, + { + "epoch": 0.12, + "learning_rate": 1.9614807589498746e-05, + "loss": 0.3987, + "step": 1353 + }, + { + "epoch": 0.12, + "learning_rate": 1.9614044107789553e-05, + "loss": 0.3444, + "step": 1354 + }, + { + "epoch": 0.12, + "learning_rate": 1.9613279885077347e-05, + "loss": 0.2725, + "step": 1355 + }, + { + "epoch": 0.12, + "learning_rate": 1.9612514921421028e-05, + "loss": 0.3038, + "step": 1356 + }, + { + "epoch": 0.12, + "learning_rate": 1.9611749216879555e-05, + "loss": 0.3329, + "step": 1357 + }, + { + "epoch": 0.12, + "learning_rate": 1.9610982771511947e-05, + "loss": 0.2932, + "step": 1358 + }, + { + "epoch": 0.12, + "learning_rate": 1.9610215585377275e-05, + "loss": 0.3118, + "step": 1359 + }, + { + "epoch": 0.12, + "learning_rate": 1.9609447658534673e-05, + "loss": 0.3145, + "step": 1360 + }, + { + "epoch": 0.12, + "learning_rate": 1.9608678991043325e-05, + "loss": 0.3352, + "step": 1361 + }, + { + "epoch": 0.12, + "learning_rate": 1.9607909582962478e-05, + "loss": 0.2866, + "step": 1362 + }, + { + "epoch": 0.12, + "learning_rate": 1.9607139434351435e-05, + "loss": 0.2622, + "step": 1363 + }, + { + "epoch": 0.12, + "learning_rate": 1.9606368545269557e-05, + "loss": 0.3335, + "step": 1364 + }, + { + "epoch": 0.12, + "learning_rate": 1.9605596915776254e-05, + "loss": 0.2919, + "step": 1365 + }, + { + "epoch": 0.12, + "learning_rate": 1.9604824545931005e-05, + "loss": 0.3305, + "step": 1366 + }, + { + "epoch": 0.12, + "learning_rate": 1.9604051435793342e-05, + "loss": 0.3397, + "step": 1367 + }, + { + "epoch": 0.12, + "learning_rate": 1.9603277585422847e-05, + "loss": 0.3036, + "step": 1368 + }, + { + "epoch": 0.12, + "learning_rate": 1.9602502994879163e-05, + "loss": 0.3799, + "step": 1369 + }, + { + "epoch": 0.12, + "learning_rate": 1.9601727664222e-05, + "loss": 0.2985, + "step": 1370 + }, + { + "epoch": 0.12, + "learning_rate": 1.960095159351111e-05, + "loss": 0.3703, + "step": 1371 + }, + { + "epoch": 0.12, + "learning_rate": 1.9600174782806313e-05, + "loss": 0.3688, + "step": 1372 + }, + { + "epoch": 0.12, + "learning_rate": 1.9599397232167478e-05, + "loss": 0.2938, + "step": 1373 + }, + { + "epoch": 0.12, + "learning_rate": 1.9598618941654535e-05, + "loss": 0.3423, + "step": 1374 + }, + { + "epoch": 0.12, + "learning_rate": 1.9597839911327475e-05, + "loss": 0.3302, + "step": 1375 + }, + { + "epoch": 0.12, + "learning_rate": 1.959706014124634e-05, + "loss": 0.3286, + "step": 1376 + }, + { + "epoch": 0.12, + "learning_rate": 1.9596279631471228e-05, + "loss": 0.3239, + "step": 1377 + }, + { + "epoch": 0.12, + "learning_rate": 1.9595498382062295e-05, + "loss": 0.2922, + "step": 1378 + }, + { + "epoch": 0.12, + "learning_rate": 1.9594716393079765e-05, + "loss": 0.3518, + "step": 1379 + }, + { + "epoch": 0.12, + "learning_rate": 1.9593933664583903e-05, + "loss": 0.3268, + "step": 1380 + }, + { + "epoch": 0.12, + "learning_rate": 1.9593150196635037e-05, + "loss": 0.3065, + "step": 1381 + }, + { + "epoch": 0.12, + "learning_rate": 1.9592365989293557e-05, + "loss": 0.2964, + "step": 1382 + }, + { + "epoch": 0.12, + "learning_rate": 1.95915810426199e-05, + "loss": 0.3573, + "step": 1383 + }, + { + "epoch": 0.12, + "learning_rate": 1.9590795356674578e-05, + "loss": 0.34, + "step": 1384 + }, + { + "epoch": 0.12, + "learning_rate": 1.9590008931518133e-05, + "loss": 0.3181, + "step": 1385 + }, + { + "epoch": 0.12, + "learning_rate": 1.9589221767211188e-05, + "loss": 0.3452, + "step": 1386 + }, + { + "epoch": 0.12, + "learning_rate": 1.9588433863814405e-05, + "loss": 0.2997, + "step": 1387 + }, + { + "epoch": 0.12, + "learning_rate": 1.9587645221388522e-05, + "loss": 0.3014, + "step": 1388 + }, + { + "epoch": 0.12, + "learning_rate": 1.958685583999432e-05, + "loss": 0.3198, + "step": 1389 + }, + { + "epoch": 0.12, + "learning_rate": 1.9586065719692636e-05, + "loss": 0.3335, + "step": 1390 + }, + { + "epoch": 0.12, + "learning_rate": 1.958527486054438e-05, + "loss": 0.3108, + "step": 1391 + }, + { + "epoch": 0.12, + "learning_rate": 1.9584483262610492e-05, + "loss": 0.3513, + "step": 1392 + }, + { + "epoch": 0.12, + "learning_rate": 1.9583690925951996e-05, + "loss": 0.2822, + "step": 1393 + }, + { + "epoch": 0.12, + "learning_rate": 1.9582897850629958e-05, + "loss": 0.3318, + "step": 1394 + }, + { + "epoch": 0.12, + "learning_rate": 1.9582104036705506e-05, + "loss": 0.2783, + "step": 1395 + }, + { + "epoch": 0.12, + "learning_rate": 1.9581309484239818e-05, + "loss": 0.3094, + "step": 1396 + }, + { + "epoch": 0.12, + "learning_rate": 1.9580514193294137e-05, + "loss": 0.3391, + "step": 1397 + }, + { + "epoch": 0.12, + "learning_rate": 1.9579718163929767e-05, + "loss": 0.2934, + "step": 1398 + }, + { + "epoch": 0.12, + "learning_rate": 1.957892139620805e-05, + "loss": 0.3187, + "step": 1399 + }, + { + "epoch": 0.12, + "learning_rate": 1.9578123890190405e-05, + "loss": 0.3441, + "step": 1400 + }, + { + "epoch": 0.12, + "learning_rate": 1.9577325645938294e-05, + "loss": 0.3539, + "step": 1401 + }, + { + "epoch": 0.12, + "learning_rate": 1.957652666351325e-05, + "loss": 0.2948, + "step": 1402 + }, + { + "epoch": 0.12, + "learning_rate": 1.9575726942976844e-05, + "loss": 0.3293, + "step": 1403 + }, + { + "epoch": 0.12, + "learning_rate": 1.9574926484390725e-05, + "loss": 0.3058, + "step": 1404 + }, + { + "epoch": 0.12, + "learning_rate": 1.9574125287816582e-05, + "loss": 0.3119, + "step": 1405 + }, + { + "epoch": 0.12, + "learning_rate": 1.957332335331617e-05, + "loss": 0.3138, + "step": 1406 + }, + { + "epoch": 0.12, + "learning_rate": 1.95725206809513e-05, + "loss": 0.3298, + "step": 1407 + }, + { + "epoch": 0.12, + "learning_rate": 1.9571717270783827e-05, + "loss": 0.2903, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 1.9570913122875686e-05, + "loss": 0.3236, + "step": 1409 + }, + { + "epoch": 0.12, + "learning_rate": 1.9570108237288853e-05, + "loss": 0.316, + "step": 1410 + }, + { + "epoch": 0.12, + "learning_rate": 1.9569302614085366e-05, + "loss": 0.3088, + "step": 1411 + }, + { + "epoch": 0.12, + "learning_rate": 1.9568496253327317e-05, + "loss": 0.3454, + "step": 1412 + }, + { + "epoch": 0.12, + "learning_rate": 1.956768915507685e-05, + "loss": 0.3477, + "step": 1413 + }, + { + "epoch": 0.12, + "learning_rate": 1.9566881319396184e-05, + "loss": 0.3123, + "step": 1414 + }, + { + "epoch": 0.12, + "learning_rate": 1.9566072746347576e-05, + "loss": 0.3279, + "step": 1415 + }, + { + "epoch": 0.12, + "learning_rate": 1.956526343599335e-05, + "loss": 0.3585, + "step": 1416 + }, + { + "epoch": 0.12, + "learning_rate": 1.956445338839588e-05, + "loss": 0.316, + "step": 1417 + }, + { + "epoch": 0.12, + "learning_rate": 1.95636426036176e-05, + "loss": 0.2994, + "step": 1418 + }, + { + "epoch": 0.12, + "learning_rate": 1.9562831081721007e-05, + "loss": 0.318, + "step": 1419 + }, + { + "epoch": 0.12, + "learning_rate": 1.956201882276864e-05, + "loss": 0.2904, + "step": 1420 + }, + { + "epoch": 0.12, + "learning_rate": 1.9561205826823116e-05, + "loss": 0.3336, + "step": 1421 + }, + { + "epoch": 0.12, + "learning_rate": 1.956039209394709e-05, + "loss": 0.3124, + "step": 1422 + }, + { + "epoch": 0.12, + "learning_rate": 1.9559577624203277e-05, + "loss": 0.3179, + "step": 1423 + }, + { + "epoch": 0.12, + "learning_rate": 1.9558762417654456e-05, + "loss": 0.313, + "step": 1424 + }, + { + "epoch": 0.12, + "learning_rate": 1.9557946474363462e-05, + "loss": 0.3146, + "step": 1425 + }, + { + "epoch": 0.12, + "learning_rate": 1.955712979439318e-05, + "loss": 0.3507, + "step": 1426 + }, + { + "epoch": 0.12, + "learning_rate": 1.9556312377806555e-05, + "loss": 0.3499, + "step": 1427 + }, + { + "epoch": 0.12, + "learning_rate": 1.955549422466659e-05, + "loss": 0.3262, + "step": 1428 + }, + { + "epoch": 0.12, + "learning_rate": 1.955467533503635e-05, + "loss": 0.282, + "step": 1429 + }, + { + "epoch": 0.12, + "learning_rate": 1.9553855708978943e-05, + "loss": 0.3256, + "step": 1430 + }, + { + "epoch": 0.12, + "learning_rate": 1.9553035346557543e-05, + "loss": 0.3435, + "step": 1431 + }, + { + "epoch": 0.12, + "learning_rate": 1.9552214247835387e-05, + "loss": 0.3371, + "step": 1432 + }, + { + "epoch": 0.12, + "learning_rate": 1.955139241287575e-05, + "loss": 0.3386, + "step": 1433 + }, + { + "epoch": 0.12, + "learning_rate": 1.9550569841741984e-05, + "loss": 0.3165, + "step": 1434 + }, + { + "epoch": 0.12, + "learning_rate": 1.9549746534497484e-05, + "loss": 0.3429, + "step": 1435 + }, + { + "epoch": 0.12, + "learning_rate": 1.9548922491205708e-05, + "loss": 0.3297, + "step": 1436 + }, + { + "epoch": 0.12, + "learning_rate": 1.954809771193017e-05, + "loss": 0.3159, + "step": 1437 + }, + { + "epoch": 0.12, + "learning_rate": 1.9547272196734436e-05, + "loss": 0.2679, + "step": 1438 + }, + { + "epoch": 0.12, + "learning_rate": 1.954644594568214e-05, + "loss": 0.28, + "step": 1439 + }, + { + "epoch": 0.12, + "learning_rate": 1.9545618958836957e-05, + "loss": 0.2993, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 1.9544791236262634e-05, + "loss": 0.295, + "step": 1441 + }, + { + "epoch": 0.12, + "learning_rate": 1.954396277802296e-05, + "loss": 0.295, + "step": 1442 + }, + { + "epoch": 0.12, + "learning_rate": 1.9543133584181798e-05, + "loss": 0.345, + "step": 1443 + }, + { + "epoch": 0.12, + "learning_rate": 1.954230365480305e-05, + "loss": 0.3275, + "step": 1444 + }, + { + "epoch": 0.12, + "learning_rate": 1.954147298995069e-05, + "loss": 0.2953, + "step": 1445 + }, + { + "epoch": 0.12, + "learning_rate": 1.9540641589688735e-05, + "loss": 0.4037, + "step": 1446 + }, + { + "epoch": 0.12, + "learning_rate": 1.953980945408127e-05, + "loss": 0.3205, + "step": 1447 + }, + { + "epoch": 0.12, + "learning_rate": 1.9538976583192428e-05, + "loss": 0.3191, + "step": 1448 + }, + { + "epoch": 0.12, + "learning_rate": 1.9538142977086406e-05, + "loss": 0.3475, + "step": 1449 + }, + { + "epoch": 0.12, + "learning_rate": 1.953730863582745e-05, + "loss": 0.3641, + "step": 1450 + }, + { + "epoch": 0.12, + "learning_rate": 1.9536473559479873e-05, + "loss": 0.6105, + "step": 1451 + }, + { + "epoch": 0.12, + "learning_rate": 1.953563774810803e-05, + "loss": 0.2547, + "step": 1452 + }, + { + "epoch": 0.12, + "learning_rate": 1.953480120177635e-05, + "loss": 0.353, + "step": 1453 + }, + { + "epoch": 0.12, + "learning_rate": 1.9533963920549307e-05, + "loss": 0.6849, + "step": 1454 + }, + { + "epoch": 0.12, + "learning_rate": 1.9533125904491433e-05, + "loss": 0.384, + "step": 1455 + }, + { + "epoch": 0.12, + "learning_rate": 1.953228715366732e-05, + "loss": 0.3024, + "step": 1456 + }, + { + "epoch": 0.12, + "learning_rate": 1.953144766814161e-05, + "loss": 0.2779, + "step": 1457 + }, + { + "epoch": 0.12, + "learning_rate": 1.953060744797901e-05, + "loss": 0.2972, + "step": 1458 + }, + { + "epoch": 0.13, + "learning_rate": 1.9529766493244284e-05, + "loss": 0.3231, + "step": 1459 + }, + { + "epoch": 0.13, + "learning_rate": 1.952892480400224e-05, + "loss": 0.5813, + "step": 1460 + }, + { + "epoch": 0.13, + "learning_rate": 1.952808238031776e-05, + "loss": 0.3079, + "step": 1461 + }, + { + "epoch": 0.13, + "learning_rate": 1.952723922225577e-05, + "loss": 0.2817, + "step": 1462 + }, + { + "epoch": 0.13, + "learning_rate": 1.9526395329881253e-05, + "loss": 0.3154, + "step": 1463 + }, + { + "epoch": 0.13, + "learning_rate": 1.9525550703259257e-05, + "loss": 0.32, + "step": 1464 + }, + { + "epoch": 0.13, + "learning_rate": 1.952470534245488e-05, + "loss": 0.3211, + "step": 1465 + }, + { + "epoch": 0.13, + "learning_rate": 1.952385924753328e-05, + "loss": 0.2892, + "step": 1466 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523012418559663e-05, + "loss": 0.3455, + "step": 1467 + }, + { + "epoch": 0.13, + "learning_rate": 1.9522164855599306e-05, + "loss": 0.3442, + "step": 1468 + }, + { + "epoch": 0.13, + "learning_rate": 1.9521316558717534e-05, + "loss": 0.2601, + "step": 1469 + }, + { + "epoch": 0.13, + "learning_rate": 1.9520467527979726e-05, + "loss": 0.2995, + "step": 1470 + }, + { + "epoch": 0.13, + "learning_rate": 1.9519617763451322e-05, + "loss": 0.2793, + "step": 1471 + }, + { + "epoch": 0.13, + "learning_rate": 1.9518767265197823e-05, + "loss": 0.2986, + "step": 1472 + }, + { + "epoch": 0.13, + "learning_rate": 1.951791603328477e-05, + "loss": 0.3049, + "step": 1473 + }, + { + "epoch": 0.13, + "learning_rate": 1.9517064067777786e-05, + "loss": 0.28, + "step": 1474 + }, + { + "epoch": 0.13, + "learning_rate": 1.9516211368742524e-05, + "loss": 0.3165, + "step": 1475 + }, + { + "epoch": 0.13, + "learning_rate": 1.951535793624471e-05, + "loss": 0.3018, + "step": 1476 + }, + { + "epoch": 0.13, + "learning_rate": 1.9514503770350125e-05, + "loss": 0.3166, + "step": 1477 + }, + { + "epoch": 0.13, + "learning_rate": 1.9513648871124604e-05, + "loss": 0.3506, + "step": 1478 + }, + { + "epoch": 0.13, + "learning_rate": 1.9512793238634035e-05, + "loss": 0.309, + "step": 1479 + }, + { + "epoch": 0.13, + "learning_rate": 1.9511936872944367e-05, + "loss": 0.3057, + "step": 1480 + }, + { + "epoch": 0.13, + "learning_rate": 1.95110797741216e-05, + "loss": 0.3157, + "step": 1481 + }, + { + "epoch": 0.13, + "learning_rate": 1.9510221942231803e-05, + "loss": 0.2953, + "step": 1482 + }, + { + "epoch": 0.13, + "learning_rate": 1.950936337734109e-05, + "loss": 0.3173, + "step": 1483 + }, + { + "epoch": 0.13, + "learning_rate": 1.9508504079515637e-05, + "loss": 0.3226, + "step": 1484 + }, + { + "epoch": 0.13, + "learning_rate": 1.9507644048821674e-05, + "loss": 0.2922, + "step": 1485 + }, + { + "epoch": 0.13, + "learning_rate": 1.9506783285325482e-05, + "loss": 0.3123, + "step": 1486 + }, + { + "epoch": 0.13, + "learning_rate": 1.950592178909341e-05, + "loss": 0.3209, + "step": 1487 + }, + { + "epoch": 0.13, + "learning_rate": 1.950505956019186e-05, + "loss": 0.2998, + "step": 1488 + }, + { + "epoch": 0.13, + "learning_rate": 1.950419659868728e-05, + "loss": 0.3032, + "step": 1489 + }, + { + "epoch": 0.13, + "learning_rate": 1.9503332904646188e-05, + "loss": 0.2921, + "step": 1490 + }, + { + "epoch": 0.13, + "learning_rate": 1.9502468478135158e-05, + "loss": 0.3018, + "step": 1491 + }, + { + "epoch": 0.13, + "learning_rate": 1.9501603319220807e-05, + "loss": 0.3274, + "step": 1492 + }, + { + "epoch": 0.13, + "learning_rate": 1.950073742796982e-05, + "loss": 0.3657, + "step": 1493 + }, + { + "epoch": 0.13, + "learning_rate": 1.9499870804448936e-05, + "loss": 0.3357, + "step": 1494 + }, + { + "epoch": 0.13, + "learning_rate": 1.9499003448724956e-05, + "loss": 0.3713, + "step": 1495 + }, + { + "epoch": 0.13, + "learning_rate": 1.9498135360864724e-05, + "loss": 0.3123, + "step": 1496 + }, + { + "epoch": 0.13, + "learning_rate": 1.9497266540935146e-05, + "loss": 0.324, + "step": 1497 + }, + { + "epoch": 0.13, + "learning_rate": 1.9496396989003195e-05, + "loss": 0.2944, + "step": 1498 + }, + { + "epoch": 0.13, + "learning_rate": 1.9495526705135885e-05, + "loss": 0.3209, + "step": 1499 + }, + { + "epoch": 0.13, + "learning_rate": 1.9494655689400294e-05, + "loss": 0.6093, + "step": 1500 + }, + { + "epoch": 0.13, + "learning_rate": 1.949378394186356e-05, + "loss": 0.3105, + "step": 1501 + }, + { + "epoch": 0.13, + "learning_rate": 1.949291146259287e-05, + "loss": 0.2971, + "step": 1502 + }, + { + "epoch": 0.13, + "learning_rate": 1.949203825165547e-05, + "loss": 0.3077, + "step": 1503 + }, + { + "epoch": 0.13, + "learning_rate": 1.949116430911866e-05, + "loss": 0.3217, + "step": 1504 + }, + { + "epoch": 0.13, + "learning_rate": 1.9490289635049805e-05, + "loss": 0.3035, + "step": 1505 + }, + { + "epoch": 0.13, + "learning_rate": 1.9489414229516318e-05, + "loss": 0.3545, + "step": 1506 + }, + { + "epoch": 0.13, + "learning_rate": 1.948853809258567e-05, + "loss": 0.3203, + "step": 1507 + }, + { + "epoch": 0.13, + "learning_rate": 1.948766122432539e-05, + "loss": 0.3126, + "step": 1508 + }, + { + "epoch": 0.13, + "learning_rate": 1.948678362480306e-05, + "loss": 0.3262, + "step": 1509 + }, + { + "epoch": 0.13, + "learning_rate": 1.948590529408633e-05, + "loss": 0.3683, + "step": 1510 + }, + { + "epoch": 0.13, + "learning_rate": 1.9485026232242887e-05, + "loss": 0.2927, + "step": 1511 + }, + { + "epoch": 0.13, + "learning_rate": 1.948414643934049e-05, + "loss": 0.3082, + "step": 1512 + }, + { + "epoch": 0.13, + "learning_rate": 1.9483265915446946e-05, + "loss": 0.3093, + "step": 1513 + }, + { + "epoch": 0.13, + "learning_rate": 1.9482384660630125e-05, + "loss": 0.2876, + "step": 1514 + }, + { + "epoch": 0.13, + "learning_rate": 1.948150267495795e-05, + "loss": 0.3708, + "step": 1515 + }, + { + "epoch": 0.13, + "learning_rate": 1.9480619958498393e-05, + "loss": 0.3249, + "step": 1516 + }, + { + "epoch": 0.13, + "learning_rate": 1.94797365113195e-05, + "loss": 0.3624, + "step": 1517 + }, + { + "epoch": 0.13, + "learning_rate": 1.9478852333489356e-05, + "loss": 0.2885, + "step": 1518 + }, + { + "epoch": 0.13, + "learning_rate": 1.947796742507611e-05, + "loss": 0.2864, + "step": 1519 + }, + { + "epoch": 0.13, + "learning_rate": 1.947708178614797e-05, + "loss": 0.2759, + "step": 1520 + }, + { + "epoch": 0.13, + "learning_rate": 1.9476195416773188e-05, + "loss": 0.2808, + "step": 1521 + }, + { + "epoch": 0.13, + "learning_rate": 1.947530831702009e-05, + "loss": 0.3254, + "step": 1522 + }, + { + "epoch": 0.13, + "learning_rate": 1.9474420486957045e-05, + "loss": 0.3075, + "step": 1523 + }, + { + "epoch": 0.13, + "learning_rate": 1.9473531926652483e-05, + "loss": 0.2863, + "step": 1524 + }, + { + "epoch": 0.13, + "learning_rate": 1.947264263617489e-05, + "loss": 0.3137, + "step": 1525 + }, + { + "epoch": 0.13, + "learning_rate": 1.947175261559281e-05, + "loss": 0.3484, + "step": 1526 + }, + { + "epoch": 0.13, + "learning_rate": 1.9470861864974837e-05, + "loss": 0.3353, + "step": 1527 + }, + { + "epoch": 0.13, + "learning_rate": 1.946997038438963e-05, + "loss": 0.2685, + "step": 1528 + }, + { + "epoch": 0.13, + "learning_rate": 1.94690781739059e-05, + "loss": 0.2676, + "step": 1529 + }, + { + "epoch": 0.13, + "learning_rate": 1.946818523359241e-05, + "loss": 0.3207, + "step": 1530 + }, + { + "epoch": 0.13, + "learning_rate": 1.9467291563517987e-05, + "loss": 0.3172, + "step": 1531 + }, + { + "epoch": 0.13, + "learning_rate": 1.946639716375151e-05, + "loss": 0.3126, + "step": 1532 + }, + { + "epoch": 0.13, + "learning_rate": 1.9465502034361908e-05, + "loss": 0.3037, + "step": 1533 + }, + { + "epoch": 0.13, + "learning_rate": 1.946460617541819e-05, + "loss": 0.3279, + "step": 1534 + }, + { + "epoch": 0.13, + "learning_rate": 1.9463709586989387e-05, + "loss": 0.2889, + "step": 1535 + }, + { + "epoch": 0.13, + "learning_rate": 1.9462812269144613e-05, + "loss": 0.275, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 1.9461914221953026e-05, + "loss": 0.3031, + "step": 1537 + }, + { + "epoch": 0.13, + "learning_rate": 1.9461015445483843e-05, + "loss": 0.2833, + "step": 1538 + }, + { + "epoch": 0.13, + "learning_rate": 1.946011593980634e-05, + "loss": 0.3129, + "step": 1539 + }, + { + "epoch": 0.13, + "learning_rate": 1.945921570498984e-05, + "loss": 0.3472, + "step": 1540 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458314741103737e-05, + "loss": 0.3273, + "step": 1541 + }, + { + "epoch": 0.13, + "learning_rate": 1.9457413048217466e-05, + "loss": 0.32, + "step": 1542 + }, + { + "epoch": 0.13, + "learning_rate": 1.945651062640053e-05, + "loss": 0.3675, + "step": 1543 + }, + { + "epoch": 0.13, + "learning_rate": 1.9455607475722478e-05, + "loss": 0.3088, + "step": 1544 + }, + { + "epoch": 0.13, + "learning_rate": 1.9454703596252926e-05, + "loss": 0.3207, + "step": 1545 + }, + { + "epoch": 0.13, + "learning_rate": 1.9453798988061535e-05, + "loss": 0.3292, + "step": 1546 + }, + { + "epoch": 0.13, + "learning_rate": 1.945289365121803e-05, + "loss": 0.309, + "step": 1547 + }, + { + "epoch": 0.13, + "learning_rate": 1.9451987585792195e-05, + "loss": 0.2941, + "step": 1548 + }, + { + "epoch": 0.13, + "learning_rate": 1.945108079185386e-05, + "loss": 0.3597, + "step": 1549 + }, + { + "epoch": 0.13, + "learning_rate": 1.9450173269472915e-05, + "loss": 0.2799, + "step": 1550 + }, + { + "epoch": 0.13, + "learning_rate": 1.9449265018719307e-05, + "loss": 0.3589, + "step": 1551 + }, + { + "epoch": 0.13, + "learning_rate": 1.9448356039663044e-05, + "loss": 0.3272, + "step": 1552 + }, + { + "epoch": 0.13, + "learning_rate": 1.9447446332374182e-05, + "loss": 0.3453, + "step": 1553 + }, + { + "epoch": 0.13, + "learning_rate": 1.944653589692284e-05, + "loss": 0.3012, + "step": 1554 + }, + { + "epoch": 0.13, + "learning_rate": 1.9445624733379186e-05, + "loss": 0.3046, + "step": 1555 + }, + { + "epoch": 0.13, + "learning_rate": 1.944471284181345e-05, + "loss": 0.316, + "step": 1556 + }, + { + "epoch": 0.13, + "learning_rate": 1.9443800222295918e-05, + "loss": 0.291, + "step": 1557 + }, + { + "epoch": 0.13, + "learning_rate": 1.9442886874896924e-05, + "loss": 0.3208, + "step": 1558 + }, + { + "epoch": 0.13, + "learning_rate": 1.944197279968687e-05, + "loss": 0.3055, + "step": 1559 + }, + { + "epoch": 0.13, + "learning_rate": 1.9441057996736207e-05, + "loss": 0.3518, + "step": 1560 + }, + { + "epoch": 0.13, + "learning_rate": 1.9440142466115443e-05, + "loss": 0.2662, + "step": 1561 + }, + { + "epoch": 0.13, + "learning_rate": 1.9439226207895143e-05, + "loss": 0.2976, + "step": 1562 + }, + { + "epoch": 0.13, + "learning_rate": 1.943830922214593e-05, + "loss": 0.2922, + "step": 1563 + }, + { + "epoch": 0.13, + "learning_rate": 1.9437391508938476e-05, + "loss": 0.3578, + "step": 1564 + }, + { + "epoch": 0.13, + "learning_rate": 1.9436473068343516e-05, + "loss": 0.31, + "step": 1565 + }, + { + "epoch": 0.13, + "learning_rate": 1.9435553900431838e-05, + "loss": 0.3405, + "step": 1566 + }, + { + "epoch": 0.13, + "learning_rate": 1.943463400527429e-05, + "loss": 0.306, + "step": 1567 + }, + { + "epoch": 0.13, + "learning_rate": 1.9433713382941768e-05, + "loss": 0.3032, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 1.943279203350523e-05, + "loss": 0.288, + "step": 1569 + }, + { + "epoch": 0.13, + "learning_rate": 1.9431869957035698e-05, + "loss": 0.3475, + "step": 1570 + }, + { + "epoch": 0.13, + "learning_rate": 1.943094715360423e-05, + "loss": 0.3038, + "step": 1571 + }, + { + "epoch": 0.13, + "learning_rate": 1.9430023623281955e-05, + "loss": 0.3118, + "step": 1572 + }, + { + "epoch": 0.13, + "learning_rate": 1.9429099366140055e-05, + "loss": 0.3512, + "step": 1573 + }, + { + "epoch": 0.13, + "learning_rate": 1.9428174382249764e-05, + "loss": 0.3785, + "step": 1574 + }, + { + "epoch": 0.14, + "learning_rate": 1.942724867168238e-05, + "loss": 0.3773, + "step": 1575 + }, + { + "epoch": 0.14, + "learning_rate": 1.9426322234509248e-05, + "loss": 0.3151, + "step": 1576 + }, + { + "epoch": 0.14, + "learning_rate": 1.9425395070801775e-05, + "loss": 0.3328, + "step": 1577 + }, + { + "epoch": 0.14, + "learning_rate": 1.9424467180631422e-05, + "loss": 0.3016, + "step": 1578 + }, + { + "epoch": 0.14, + "learning_rate": 1.942353856406971e-05, + "loss": 0.3376, + "step": 1579 + }, + { + "epoch": 0.14, + "learning_rate": 1.9422609221188208e-05, + "loss": 0.2816, + "step": 1580 + }, + { + "epoch": 0.14, + "learning_rate": 1.9421679152058545e-05, + "loss": 0.3253, + "step": 1581 + }, + { + "epoch": 0.14, + "learning_rate": 1.9420748356752405e-05, + "loss": 0.3448, + "step": 1582 + }, + { + "epoch": 0.14, + "learning_rate": 1.9419816835341528e-05, + "loss": 0.2772, + "step": 1583 + }, + { + "epoch": 0.14, + "learning_rate": 1.941888458789772e-05, + "loss": 0.3293, + "step": 1584 + }, + { + "epoch": 0.14, + "learning_rate": 1.9417951614492824e-05, + "loss": 0.2995, + "step": 1585 + }, + { + "epoch": 0.14, + "learning_rate": 1.9417017915198758e-05, + "loss": 0.3094, + "step": 1586 + }, + { + "epoch": 0.14, + "learning_rate": 1.9416083490087475e-05, + "loss": 0.3469, + "step": 1587 + }, + { + "epoch": 0.14, + "learning_rate": 1.9415148339231008e-05, + "loss": 0.3573, + "step": 1588 + }, + { + "epoch": 0.14, + "learning_rate": 1.9414212462701428e-05, + "loss": 0.3596, + "step": 1589 + }, + { + "epoch": 0.14, + "learning_rate": 1.941327586057087e-05, + "loss": 0.3703, + "step": 1590 + }, + { + "epoch": 0.14, + "learning_rate": 1.9412338532911515e-05, + "loss": 0.3645, + "step": 1591 + }, + { + "epoch": 0.14, + "learning_rate": 1.9411400479795618e-05, + "loss": 0.2745, + "step": 1592 + }, + { + "epoch": 0.14, + "learning_rate": 1.9410461701295474e-05, + "loss": 0.2949, + "step": 1593 + }, + { + "epoch": 0.14, + "learning_rate": 1.940952219748344e-05, + "loss": 0.6079, + "step": 1594 + }, + { + "epoch": 0.14, + "learning_rate": 1.940858196843193e-05, + "loss": 0.3245, + "step": 1595 + }, + { + "epoch": 0.14, + "learning_rate": 1.940764101421341e-05, + "loss": 0.3019, + "step": 1596 + }, + { + "epoch": 0.14, + "learning_rate": 1.9406699334900407e-05, + "loss": 0.3044, + "step": 1597 + }, + { + "epoch": 0.14, + "learning_rate": 1.9405756930565496e-05, + "loss": 0.3335, + "step": 1598 + }, + { + "epoch": 0.14, + "learning_rate": 1.940481380128132e-05, + "loss": 0.2662, + "step": 1599 + }, + { + "epoch": 0.14, + "learning_rate": 1.9403869947120563e-05, + "loss": 0.3498, + "step": 1600 + }, + { + "epoch": 0.14, + "learning_rate": 1.9402925368155978e-05, + "loss": 0.3083, + "step": 1601 + }, + { + "epoch": 0.14, + "learning_rate": 1.940198006446037e-05, + "loss": 0.313, + "step": 1602 + }, + { + "epoch": 0.14, + "learning_rate": 1.940103403610659e-05, + "loss": 0.2927, + "step": 1603 + }, + { + "epoch": 0.14, + "learning_rate": 1.940008728316756e-05, + "loss": 0.2975, + "step": 1604 + }, + { + "epoch": 0.14, + "learning_rate": 1.939913980571625e-05, + "loss": 0.3222, + "step": 1605 + }, + { + "epoch": 0.14, + "learning_rate": 1.9398191603825687e-05, + "loss": 0.3059, + "step": 1606 + }, + { + "epoch": 0.14, + "learning_rate": 1.939724267756895e-05, + "loss": 0.3022, + "step": 1607 + }, + { + "epoch": 0.14, + "learning_rate": 1.9396293027019186e-05, + "loss": 0.2991, + "step": 1608 + }, + { + "epoch": 0.14, + "learning_rate": 1.9395342652249578e-05, + "loss": 0.3347, + "step": 1609 + }, + { + "epoch": 0.14, + "learning_rate": 1.9394391553333384e-05, + "loss": 0.3745, + "step": 1610 + }, + { + "epoch": 0.14, + "learning_rate": 1.939343973034391e-05, + "loss": 0.2777, + "step": 1611 + }, + { + "epoch": 0.14, + "learning_rate": 1.9392487183354514e-05, + "loss": 0.2809, + "step": 1612 + }, + { + "epoch": 0.14, + "learning_rate": 1.9391533912438615e-05, + "loss": 0.3855, + "step": 1613 + }, + { + "epoch": 0.14, + "learning_rate": 1.939057991766969e-05, + "loss": 0.3334, + "step": 1614 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389625199121264e-05, + "loss": 0.271, + "step": 1615 + }, + { + "epoch": 0.14, + "learning_rate": 1.938866975686692e-05, + "loss": 0.2993, + "step": 1616 + }, + { + "epoch": 0.14, + "learning_rate": 1.9387713590980305e-05, + "loss": 0.3319, + "step": 1617 + }, + { + "epoch": 0.14, + "learning_rate": 1.9386756701535115e-05, + "loss": 0.3168, + "step": 1618 + }, + { + "epoch": 0.14, + "learning_rate": 1.9385799088605095e-05, + "loss": 0.2659, + "step": 1619 + }, + { + "epoch": 0.14, + "learning_rate": 1.938484075226406e-05, + "loss": 0.3383, + "step": 1620 + }, + { + "epoch": 0.14, + "learning_rate": 1.938388169258587e-05, + "loss": 0.299, + "step": 1621 + }, + { + "epoch": 0.14, + "learning_rate": 1.9382921909644448e-05, + "loss": 0.2908, + "step": 1622 + }, + { + "epoch": 0.14, + "learning_rate": 1.938196140351377e-05, + "loss": 0.3201, + "step": 1623 + }, + { + "epoch": 0.14, + "learning_rate": 1.938100017426786e-05, + "loss": 0.3209, + "step": 1624 + }, + { + "epoch": 0.14, + "learning_rate": 1.938003822198081e-05, + "loss": 0.2896, + "step": 1625 + }, + { + "epoch": 0.14, + "learning_rate": 1.9379075546726764e-05, + "loss": 0.2878, + "step": 1626 + }, + { + "epoch": 0.14, + "learning_rate": 1.9378112148579916e-05, + "loss": 0.3328, + "step": 1627 + }, + { + "epoch": 0.14, + "learning_rate": 1.9377148027614523e-05, + "loss": 0.3584, + "step": 1628 + }, + { + "epoch": 0.14, + "learning_rate": 1.9376183183904896e-05, + "loss": 0.3284, + "step": 1629 + }, + { + "epoch": 0.14, + "learning_rate": 1.9375217617525396e-05, + "loss": 0.3144, + "step": 1630 + }, + { + "epoch": 0.14, + "learning_rate": 1.9374251328550448e-05, + "loss": 0.2917, + "step": 1631 + }, + { + "epoch": 0.14, + "learning_rate": 1.9373284317054525e-05, + "loss": 0.3212, + "step": 1632 + }, + { + "epoch": 0.14, + "learning_rate": 1.9372316583112163e-05, + "loss": 0.3152, + "step": 1633 + }, + { + "epoch": 0.14, + "learning_rate": 1.937134812679795e-05, + "loss": 0.3564, + "step": 1634 + }, + { + "epoch": 0.14, + "learning_rate": 1.9370378948186526e-05, + "loss": 0.3588, + "step": 1635 + }, + { + "epoch": 0.14, + "learning_rate": 1.9369409047352593e-05, + "loss": 0.3024, + "step": 1636 + }, + { + "epoch": 0.14, + "learning_rate": 1.936843842437091e-05, + "loss": 0.3436, + "step": 1637 + }, + { + "epoch": 0.14, + "learning_rate": 1.936746707931628e-05, + "loss": 0.3406, + "step": 1638 + }, + { + "epoch": 0.14, + "learning_rate": 1.9366495012263575e-05, + "loss": 0.3167, + "step": 1639 + }, + { + "epoch": 0.14, + "learning_rate": 1.9365522223287717e-05, + "loss": 0.3439, + "step": 1640 + }, + { + "epoch": 0.14, + "learning_rate": 1.936454871246368e-05, + "loss": 0.364, + "step": 1641 + }, + { + "epoch": 0.14, + "learning_rate": 1.9363574479866504e-05, + "loss": 0.3007, + "step": 1642 + }, + { + "epoch": 0.14, + "learning_rate": 1.936259952557127e-05, + "loss": 0.3105, + "step": 1643 + }, + { + "epoch": 0.14, + "learning_rate": 1.9361623849653126e-05, + "loss": 0.3159, + "step": 1644 + }, + { + "epoch": 0.14, + "learning_rate": 1.9360647452187273e-05, + "loss": 0.3434, + "step": 1645 + }, + { + "epoch": 0.14, + "learning_rate": 1.9359670333248967e-05, + "loss": 0.35, + "step": 1646 + }, + { + "epoch": 0.14, + "learning_rate": 1.935869249291352e-05, + "loss": 0.3565, + "step": 1647 + }, + { + "epoch": 0.14, + "learning_rate": 1.9357713931256298e-05, + "loss": 0.2778, + "step": 1648 + }, + { + "epoch": 0.14, + "learning_rate": 1.9356734648352727e-05, + "loss": 0.3659, + "step": 1649 + }, + { + "epoch": 0.14, + "learning_rate": 1.935575464427828e-05, + "loss": 0.2652, + "step": 1650 + }, + { + "epoch": 0.14, + "learning_rate": 1.935477391910849e-05, + "loss": 0.3193, + "step": 1651 + }, + { + "epoch": 0.14, + "learning_rate": 1.9353792472918954e-05, + "loss": 0.2858, + "step": 1652 + }, + { + "epoch": 0.14, + "learning_rate": 1.9352810305785314e-05, + "loss": 0.2687, + "step": 1653 + }, + { + "epoch": 0.14, + "learning_rate": 1.935182741778326e-05, + "loss": 0.2885, + "step": 1654 + }, + { + "epoch": 0.14, + "learning_rate": 1.9350843808988566e-05, + "loss": 0.3188, + "step": 1655 + }, + { + "epoch": 0.14, + "learning_rate": 1.9349859479477034e-05, + "loss": 0.3995, + "step": 1656 + }, + { + "epoch": 0.14, + "learning_rate": 1.934887442932453e-05, + "loss": 0.308, + "step": 1657 + }, + { + "epoch": 0.14, + "learning_rate": 1.934788865860698e-05, + "loss": 0.3033, + "step": 1658 + }, + { + "epoch": 0.14, + "learning_rate": 1.9346902167400363e-05, + "loss": 0.33, + "step": 1659 + }, + { + "epoch": 0.14, + "learning_rate": 1.9345914955780708e-05, + "loss": 0.2574, + "step": 1660 + }, + { + "epoch": 0.14, + "learning_rate": 1.9344927023824112e-05, + "loss": 0.6281, + "step": 1661 + }, + { + "epoch": 0.14, + "learning_rate": 1.9343938371606714e-05, + "loss": 0.332, + "step": 1662 + }, + { + "epoch": 0.14, + "learning_rate": 1.9342948999204712e-05, + "loss": 0.2936, + "step": 1663 + }, + { + "epoch": 0.14, + "learning_rate": 1.934195890669437e-05, + "loss": 0.4017, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 1.9340968094151997e-05, + "loss": 0.3555, + "step": 1665 + }, + { + "epoch": 0.14, + "learning_rate": 1.9339976561653956e-05, + "loss": 0.317, + "step": 1666 + }, + { + "epoch": 0.14, + "learning_rate": 1.933898430927667e-05, + "loss": 0.3245, + "step": 1667 + }, + { + "epoch": 0.14, + "learning_rate": 1.933799133709662e-05, + "loss": 0.2429, + "step": 1668 + }, + { + "epoch": 0.14, + "learning_rate": 1.9336997645190342e-05, + "loss": 0.3087, + "step": 1669 + }, + { + "epoch": 0.14, + "learning_rate": 1.933600323363442e-05, + "loss": 0.3079, + "step": 1670 + }, + { + "epoch": 0.14, + "learning_rate": 1.9335008102505494e-05, + "loss": 0.314, + "step": 1671 + }, + { + "epoch": 0.14, + "learning_rate": 1.9334012251880274e-05, + "loss": 0.3101, + "step": 1672 + }, + { + "epoch": 0.14, + "learning_rate": 1.933301568183551e-05, + "loss": 0.3582, + "step": 1673 + }, + { + "epoch": 0.14, + "learning_rate": 1.933201839244801e-05, + "loss": 0.3174, + "step": 1674 + }, + { + "epoch": 0.14, + "learning_rate": 1.9331020383794647e-05, + "loss": 0.3832, + "step": 1675 + }, + { + "epoch": 0.14, + "learning_rate": 1.9330021655952333e-05, + "loss": 0.3176, + "step": 1676 + }, + { + "epoch": 0.14, + "learning_rate": 1.9329022208998056e-05, + "loss": 0.35, + "step": 1677 + }, + { + "epoch": 0.14, + "learning_rate": 1.9328022043008842e-05, + "loss": 0.3019, + "step": 1678 + }, + { + "epoch": 0.14, + "learning_rate": 1.9327021158061776e-05, + "loss": 0.3632, + "step": 1679 + }, + { + "epoch": 0.14, + "learning_rate": 1.932601955423401e-05, + "loss": 0.2994, + "step": 1680 + }, + { + "epoch": 0.14, + "learning_rate": 1.9325017231602737e-05, + "loss": 0.3564, + "step": 1681 + }, + { + "epoch": 0.14, + "learning_rate": 1.932401419024521e-05, + "loss": 0.3143, + "step": 1682 + }, + { + "epoch": 0.14, + "learning_rate": 1.9323010430238746e-05, + "loss": 0.3065, + "step": 1683 + }, + { + "epoch": 0.14, + "learning_rate": 1.93220059516607e-05, + "loss": 0.3391, + "step": 1684 + }, + { + "epoch": 0.14, + "learning_rate": 1.9321000754588497e-05, + "loss": 0.3037, + "step": 1685 + }, + { + "epoch": 0.14, + "learning_rate": 1.931999483909961e-05, + "loss": 0.3147, + "step": 1686 + }, + { + "epoch": 0.14, + "learning_rate": 1.9318988205271577e-05, + "loss": 0.3432, + "step": 1687 + }, + { + "epoch": 0.14, + "learning_rate": 1.9317980853181975e-05, + "loss": 0.3123, + "step": 1688 + }, + { + "epoch": 0.14, + "learning_rate": 1.9316972782908455e-05, + "loss": 0.2975, + "step": 1689 + }, + { + "epoch": 0.14, + "learning_rate": 1.9315963994528707e-05, + "loss": 0.3085, + "step": 1690 + }, + { + "epoch": 0.14, + "learning_rate": 1.9314954488120484e-05, + "loss": 0.3236, + "step": 1691 + }, + { + "epoch": 0.15, + "learning_rate": 1.93139442637616e-05, + "loss": 0.306, + "step": 1692 + }, + { + "epoch": 0.15, + "learning_rate": 1.9312933321529912e-05, + "loss": 0.3127, + "step": 1693 + }, + { + "epoch": 0.15, + "learning_rate": 1.9311921661503338e-05, + "loss": 0.2866, + "step": 1694 + }, + { + "epoch": 0.15, + "learning_rate": 1.9310909283759855e-05, + "loss": 0.3159, + "step": 1695 + }, + { + "epoch": 0.15, + "learning_rate": 1.930989618837749e-05, + "loss": 0.3041, + "step": 1696 + }, + { + "epoch": 0.15, + "learning_rate": 1.930888237543433e-05, + "loss": 0.3199, + "step": 1697 + }, + { + "epoch": 0.15, + "learning_rate": 1.9307867845008513e-05, + "loss": 0.3293, + "step": 1698 + }, + { + "epoch": 0.15, + "learning_rate": 1.9306852597178233e-05, + "loss": 0.3046, + "step": 1699 + }, + { + "epoch": 0.15, + "learning_rate": 1.9305836632021744e-05, + "loss": 0.6284, + "step": 1700 + }, + { + "epoch": 0.15, + "learning_rate": 1.930481994961735e-05, + "loss": 0.3041, + "step": 1701 + }, + { + "epoch": 0.15, + "learning_rate": 1.9303802550043404e-05, + "loss": 0.3094, + "step": 1702 + }, + { + "epoch": 0.15, + "learning_rate": 1.9302784433378333e-05, + "loss": 0.32, + "step": 1703 + }, + { + "epoch": 0.15, + "learning_rate": 1.9301765599700604e-05, + "loss": 0.2931, + "step": 1704 + }, + { + "epoch": 0.15, + "learning_rate": 1.9300746049088746e-05, + "loss": 0.3551, + "step": 1705 + }, + { + "epoch": 0.15, + "learning_rate": 1.9299725781621335e-05, + "loss": 0.2919, + "step": 1706 + }, + { + "epoch": 0.15, + "learning_rate": 1.9298704797377013e-05, + "loss": 0.3267, + "step": 1707 + }, + { + "epoch": 0.15, + "learning_rate": 1.929768309643447e-05, + "loss": 0.2783, + "step": 1708 + }, + { + "epoch": 0.15, + "learning_rate": 1.929666067887246e-05, + "loss": 0.2867, + "step": 1709 + }, + { + "epoch": 0.15, + "learning_rate": 1.929563754476978e-05, + "loss": 0.3345, + "step": 1710 + }, + { + "epoch": 0.15, + "learning_rate": 1.9294613694205285e-05, + "loss": 0.3818, + "step": 1711 + }, + { + "epoch": 0.15, + "learning_rate": 1.9293589127257896e-05, + "loss": 0.3317, + "step": 1712 + }, + { + "epoch": 0.15, + "learning_rate": 1.9292563844006578e-05, + "loss": 0.3288, + "step": 1713 + }, + { + "epoch": 0.15, + "learning_rate": 1.9291537844530352e-05, + "loss": 0.316, + "step": 1714 + }, + { + "epoch": 0.15, + "learning_rate": 1.92905111289083e-05, + "loss": 0.3204, + "step": 1715 + }, + { + "epoch": 0.15, + "learning_rate": 1.928948369721956e-05, + "loss": 0.2514, + "step": 1716 + }, + { + "epoch": 0.15, + "learning_rate": 1.9288455549543315e-05, + "loss": 0.2993, + "step": 1717 + }, + { + "epoch": 0.15, + "learning_rate": 1.928742668595881e-05, + "loss": 0.3181, + "step": 1718 + }, + { + "epoch": 0.15, + "learning_rate": 1.9286397106545348e-05, + "loss": 0.3017, + "step": 1719 + }, + { + "epoch": 0.15, + "learning_rate": 1.9285366811382283e-05, + "loss": 0.2833, + "step": 1720 + }, + { + "epoch": 0.15, + "learning_rate": 1.9284335800549026e-05, + "loss": 0.2975, + "step": 1721 + }, + { + "epoch": 0.15, + "learning_rate": 1.928330407412504e-05, + "loss": 0.3507, + "step": 1722 + }, + { + "epoch": 0.15, + "learning_rate": 1.9282271632189844e-05, + "loss": 0.373, + "step": 1723 + }, + { + "epoch": 0.15, + "learning_rate": 1.9281238474823016e-05, + "loss": 0.3446, + "step": 1724 + }, + { + "epoch": 0.15, + "learning_rate": 1.9280204602104185e-05, + "loss": 0.2996, + "step": 1725 + }, + { + "epoch": 0.15, + "learning_rate": 1.927917001411304e-05, + "loss": 0.3392, + "step": 1726 + }, + { + "epoch": 0.15, + "learning_rate": 1.9278134710929322e-05, + "loss": 0.3228, + "step": 1727 + }, + { + "epoch": 0.15, + "learning_rate": 1.9277098692632824e-05, + "loss": 0.3207, + "step": 1728 + }, + { + "epoch": 0.15, + "learning_rate": 1.9276061959303397e-05, + "loss": 0.2594, + "step": 1729 + }, + { + "epoch": 0.15, + "learning_rate": 1.927502451102095e-05, + "loss": 0.3571, + "step": 1730 + }, + { + "epoch": 0.15, + "learning_rate": 1.927398634786544e-05, + "loss": 0.3116, + "step": 1731 + }, + { + "epoch": 0.15, + "learning_rate": 1.927294746991689e-05, + "loss": 0.3369, + "step": 1732 + }, + { + "epoch": 0.15, + "learning_rate": 1.9271907877255364e-05, + "loss": 0.2906, + "step": 1733 + }, + { + "epoch": 0.15, + "learning_rate": 1.9270867569960994e-05, + "loss": 0.325, + "step": 1734 + }, + { + "epoch": 0.15, + "learning_rate": 1.9269826548113964e-05, + "loss": 0.3072, + "step": 1735 + }, + { + "epoch": 0.15, + "learning_rate": 1.9268784811794507e-05, + "loss": 0.3358, + "step": 1736 + }, + { + "epoch": 0.15, + "learning_rate": 1.926774236108291e-05, + "loss": 0.3163, + "step": 1737 + }, + { + "epoch": 0.15, + "learning_rate": 1.926669919605953e-05, + "loss": 0.288, + "step": 1738 + }, + { + "epoch": 0.15, + "learning_rate": 1.926565531680476e-05, + "loss": 0.3553, + "step": 1739 + }, + { + "epoch": 0.15, + "learning_rate": 1.9264610723399065e-05, + "loss": 0.2657, + "step": 1740 + }, + { + "epoch": 0.15, + "learning_rate": 1.926356541592295e-05, + "loss": 0.2704, + "step": 1741 + }, + { + "epoch": 0.15, + "learning_rate": 1.9262519394456985e-05, + "loss": 0.3201, + "step": 1742 + }, + { + "epoch": 0.15, + "learning_rate": 1.9261472659081793e-05, + "loss": 0.3238, + "step": 1743 + }, + { + "epoch": 0.15, + "learning_rate": 1.9260425209878052e-05, + "loss": 0.3533, + "step": 1744 + }, + { + "epoch": 0.15, + "learning_rate": 1.9259377046926494e-05, + "loss": 0.2781, + "step": 1745 + }, + { + "epoch": 0.15, + "learning_rate": 1.9258328170307905e-05, + "loss": 0.3461, + "step": 1746 + }, + { + "epoch": 0.15, + "learning_rate": 1.9257278580103124e-05, + "loss": 0.3197, + "step": 1747 + }, + { + "epoch": 0.15, + "learning_rate": 1.9256228276393055e-05, + "loss": 0.3057, + "step": 1748 + }, + { + "epoch": 0.15, + "learning_rate": 1.9255177259258647e-05, + "loss": 0.3086, + "step": 1749 + }, + { + "epoch": 0.15, + "learning_rate": 1.9254125528780908e-05, + "loss": 0.3104, + "step": 1750 + }, + { + "epoch": 0.15, + "learning_rate": 1.9253073085040895e-05, + "loss": 0.3045, + "step": 1751 + }, + { + "epoch": 0.15, + "learning_rate": 1.9252019928119733e-05, + "loss": 0.2928, + "step": 1752 + }, + { + "epoch": 0.15, + "learning_rate": 1.925096605809859e-05, + "loss": 0.3046, + "step": 1753 + }, + { + "epoch": 0.15, + "learning_rate": 1.924991147505869e-05, + "loss": 0.3116, + "step": 1754 + }, + { + "epoch": 0.15, + "learning_rate": 1.924885617908132e-05, + "loss": 0.3054, + "step": 1755 + }, + { + "epoch": 0.15, + "learning_rate": 1.9247800170247817e-05, + "loss": 0.3149, + "step": 1756 + }, + { + "epoch": 0.15, + "learning_rate": 1.924674344863957e-05, + "loss": 0.2634, + "step": 1757 + }, + { + "epoch": 0.15, + "learning_rate": 1.924568601433803e-05, + "loss": 0.3314, + "step": 1758 + }, + { + "epoch": 0.15, + "learning_rate": 1.9244627867424695e-05, + "loss": 0.3168, + "step": 1759 + }, + { + "epoch": 0.15, + "learning_rate": 1.9243569007981126e-05, + "loss": 0.2955, + "step": 1760 + }, + { + "epoch": 0.15, + "learning_rate": 1.9242509436088928e-05, + "loss": 0.34, + "step": 1761 + }, + { + "epoch": 0.15, + "learning_rate": 1.924144915182977e-05, + "loss": 0.3589, + "step": 1762 + }, + { + "epoch": 0.15, + "learning_rate": 1.9240388155285378e-05, + "loss": 0.5908, + "step": 1763 + }, + { + "epoch": 0.15, + "learning_rate": 1.9239326446537526e-05, + "loss": 0.2861, + "step": 1764 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238264025668043e-05, + "loss": 0.3053, + "step": 1765 + }, + { + "epoch": 0.15, + "learning_rate": 1.9237200892758814e-05, + "loss": 0.2995, + "step": 1766 + }, + { + "epoch": 0.15, + "learning_rate": 1.9236137047891783e-05, + "loss": 0.3101, + "step": 1767 + }, + { + "epoch": 0.15, + "learning_rate": 1.9235072491148946e-05, + "loss": 0.295, + "step": 1768 + }, + { + "epoch": 0.15, + "learning_rate": 1.9234007222612356e-05, + "loss": 0.3499, + "step": 1769 + }, + { + "epoch": 0.15, + "learning_rate": 1.9232941242364114e-05, + "loss": 0.6174, + "step": 1770 + }, + { + "epoch": 0.15, + "learning_rate": 1.923187455048638e-05, + "loss": 0.3262, + "step": 1771 + }, + { + "epoch": 0.15, + "learning_rate": 1.9230807147061374e-05, + "loss": 0.3286, + "step": 1772 + }, + { + "epoch": 0.15, + "learning_rate": 1.9229739032171365e-05, + "loss": 0.3104, + "step": 1773 + }, + { + "epoch": 0.15, + "learning_rate": 1.9228670205898675e-05, + "loss": 0.3065, + "step": 1774 + }, + { + "epoch": 0.15, + "learning_rate": 1.9227600668325687e-05, + "loss": 0.2874, + "step": 1775 + }, + { + "epoch": 0.15, + "learning_rate": 1.9226530419534834e-05, + "loss": 0.2909, + "step": 1776 + }, + { + "epoch": 0.15, + "learning_rate": 1.9225459459608604e-05, + "loss": 0.2921, + "step": 1777 + }, + { + "epoch": 0.15, + "learning_rate": 1.9224387788629547e-05, + "loss": 0.2999, + "step": 1778 + }, + { + "epoch": 0.15, + "learning_rate": 1.9223315406680254e-05, + "loss": 0.2816, + "step": 1779 + }, + { + "epoch": 0.15, + "learning_rate": 1.9222242313843385e-05, + "loss": 0.3271, + "step": 1780 + }, + { + "epoch": 0.15, + "learning_rate": 1.922116851020165e-05, + "loss": 0.2969, + "step": 1781 + }, + { + "epoch": 0.15, + "learning_rate": 1.9220093995837805e-05, + "loss": 0.3345, + "step": 1782 + }, + { + "epoch": 0.15, + "learning_rate": 1.9219018770834676e-05, + "loss": 0.3407, + "step": 1783 + }, + { + "epoch": 0.15, + "learning_rate": 1.921794283527513e-05, + "loss": 0.3454, + "step": 1784 + }, + { + "epoch": 0.15, + "learning_rate": 1.9216866189242095e-05, + "loss": 0.3488, + "step": 1785 + }, + { + "epoch": 0.15, + "learning_rate": 1.921578883281856e-05, + "loss": 0.3583, + "step": 1786 + }, + { + "epoch": 0.15, + "learning_rate": 1.921471076608756e-05, + "loss": 0.3239, + "step": 1787 + }, + { + "epoch": 0.15, + "learning_rate": 1.9213631989132184e-05, + "loss": 0.3118, + "step": 1788 + }, + { + "epoch": 0.15, + "learning_rate": 1.9212552502035576e-05, + "loss": 0.3254, + "step": 1789 + }, + { + "epoch": 0.15, + "learning_rate": 1.9211472304880945e-05, + "loss": 0.316, + "step": 1790 + }, + { + "epoch": 0.15, + "learning_rate": 1.9210391397751544e-05, + "loss": 0.3038, + "step": 1791 + }, + { + "epoch": 0.15, + "learning_rate": 1.920930978073068e-05, + "loss": 0.3088, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 1.920822745390173e-05, + "loss": 0.3186, + "step": 1793 + }, + { + "epoch": 0.15, + "learning_rate": 1.9207144417348103e-05, + "loss": 0.3231, + "step": 1794 + }, + { + "epoch": 0.15, + "learning_rate": 1.920606067115328e-05, + "loss": 0.3181, + "step": 1795 + }, + { + "epoch": 0.15, + "learning_rate": 1.9204976215400788e-05, + "loss": 0.6426, + "step": 1796 + }, + { + "epoch": 0.15, + "learning_rate": 1.920389105017421e-05, + "loss": 0.2999, + "step": 1797 + }, + { + "epoch": 0.15, + "learning_rate": 1.920280517555719e-05, + "loss": 0.3567, + "step": 1798 + }, + { + "epoch": 0.15, + "learning_rate": 1.9201718591633417e-05, + "loss": 0.3383, + "step": 1799 + }, + { + "epoch": 0.15, + "learning_rate": 1.9200631298486647e-05, + "loss": 0.2611, + "step": 1800 + }, + { + "epoch": 0.15, + "learning_rate": 1.9199543296200675e-05, + "loss": 0.3422, + "step": 1801 + }, + { + "epoch": 0.15, + "learning_rate": 1.919845458485936e-05, + "loss": 0.3254, + "step": 1802 + }, + { + "epoch": 0.15, + "learning_rate": 1.919736516454662e-05, + "loss": 0.3239, + "step": 1803 + }, + { + "epoch": 0.15, + "learning_rate": 1.919627503534642e-05, + "loss": 0.3033, + "step": 1804 + }, + { + "epoch": 0.15, + "learning_rate": 1.9195184197342775e-05, + "loss": 0.3134, + "step": 1805 + }, + { + "epoch": 0.15, + "learning_rate": 1.9194092650619767e-05, + "loss": 0.3568, + "step": 1806 + }, + { + "epoch": 0.15, + "learning_rate": 1.9193000395261532e-05, + "loss": 0.3514, + "step": 1807 + }, + { + "epoch": 0.15, + "learning_rate": 1.9191907431352248e-05, + "loss": 0.3237, + "step": 1808 + }, + { + "epoch": 0.16, + "learning_rate": 1.9190813758976156e-05, + "loss": 0.2794, + "step": 1809 + }, + { + "epoch": 0.16, + "learning_rate": 1.9189719378217554e-05, + "loss": 0.3182, + "step": 1810 + }, + { + "epoch": 0.16, + "learning_rate": 1.9188624289160792e-05, + "loss": 0.2975, + "step": 1811 + }, + { + "epoch": 0.16, + "learning_rate": 1.918752849189027e-05, + "loss": 0.3179, + "step": 1812 + }, + { + "epoch": 0.16, + "learning_rate": 1.918643198649045e-05, + "loss": 0.3536, + "step": 1813 + }, + { + "epoch": 0.16, + "learning_rate": 1.918533477304584e-05, + "loss": 0.3471, + "step": 1814 + }, + { + "epoch": 0.16, + "learning_rate": 1.9184236851641017e-05, + "loss": 0.3458, + "step": 1815 + }, + { + "epoch": 0.16, + "learning_rate": 1.9183138222360596e-05, + "loss": 0.3276, + "step": 1816 + }, + { + "epoch": 0.16, + "learning_rate": 1.9182038885289256e-05, + "loss": 0.2862, + "step": 1817 + }, + { + "epoch": 0.16, + "learning_rate": 1.9180938840511727e-05, + "loss": 0.289, + "step": 1818 + }, + { + "epoch": 0.16, + "learning_rate": 1.91798380881128e-05, + "loss": 0.3012, + "step": 1819 + }, + { + "epoch": 0.16, + "learning_rate": 1.9178736628177308e-05, + "loss": 0.2905, + "step": 1820 + }, + { + "epoch": 0.16, + "learning_rate": 1.9177634460790152e-05, + "loss": 0.2877, + "step": 1821 + }, + { + "epoch": 0.16, + "learning_rate": 1.9176531586036282e-05, + "loss": 0.3076, + "step": 1822 + }, + { + "epoch": 0.16, + "learning_rate": 1.9175428004000695e-05, + "loss": 0.3479, + "step": 1823 + }, + { + "epoch": 0.16, + "learning_rate": 1.917432371476846e-05, + "loss": 0.3928, + "step": 1824 + }, + { + "epoch": 0.16, + "learning_rate": 1.9173218718424683e-05, + "loss": 0.3532, + "step": 1825 + }, + { + "epoch": 0.16, + "learning_rate": 1.917211301505453e-05, + "loss": 0.2908, + "step": 1826 + }, + { + "epoch": 0.16, + "learning_rate": 1.917100660474323e-05, + "loss": 0.3114, + "step": 1827 + }, + { + "epoch": 0.16, + "learning_rate": 1.9169899487576056e-05, + "loss": 0.3756, + "step": 1828 + }, + { + "epoch": 0.16, + "learning_rate": 1.9168791663638338e-05, + "loss": 0.2842, + "step": 1829 + }, + { + "epoch": 0.16, + "learning_rate": 1.9167683133015465e-05, + "loss": 0.3354, + "step": 1830 + }, + { + "epoch": 0.16, + "learning_rate": 1.9166573895792873e-05, + "loss": 0.5896, + "step": 1831 + }, + { + "epoch": 0.16, + "learning_rate": 1.916546395205606e-05, + "loss": 0.3014, + "step": 1832 + }, + { + "epoch": 0.16, + "learning_rate": 1.916435330189057e-05, + "loss": 0.2882, + "step": 1833 + }, + { + "epoch": 0.16, + "learning_rate": 1.9163241945382012e-05, + "loss": 0.3216, + "step": 1834 + }, + { + "epoch": 0.16, + "learning_rate": 1.9162129882616046e-05, + "loss": 0.3249, + "step": 1835 + }, + { + "epoch": 0.16, + "learning_rate": 1.9161017113678376e-05, + "loss": 0.3039, + "step": 1836 + }, + { + "epoch": 0.16, + "learning_rate": 1.9159903638654773e-05, + "loss": 0.3002, + "step": 1837 + }, + { + "epoch": 0.16, + "learning_rate": 1.9158789457631054e-05, + "loss": 0.2936, + "step": 1838 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157674570693104e-05, + "loss": 0.3344, + "step": 1839 + }, + { + "epoch": 0.16, + "learning_rate": 1.9156558977926847e-05, + "loss": 0.3528, + "step": 1840 + }, + { + "epoch": 0.16, + "learning_rate": 1.9155442679418267e-05, + "loss": 0.3334, + "step": 1841 + }, + { + "epoch": 0.16, + "learning_rate": 1.91543256752534e-05, + "loss": 0.3035, + "step": 1842 + }, + { + "epoch": 0.16, + "learning_rate": 1.915320796551835e-05, + "loss": 0.3264, + "step": 1843 + }, + { + "epoch": 0.16, + "learning_rate": 1.9152089550299253e-05, + "loss": 0.2941, + "step": 1844 + }, + { + "epoch": 0.16, + "learning_rate": 1.9150970429682316e-05, + "loss": 0.3088, + "step": 1845 + }, + { + "epoch": 0.16, + "learning_rate": 1.9149850603753793e-05, + "loss": 0.3586, + "step": 1846 + }, + { + "epoch": 0.16, + "learning_rate": 1.91487300726e-05, + "loss": 0.3423, + "step": 1847 + }, + { + "epoch": 0.16, + "learning_rate": 1.9147608836307296e-05, + "loss": 0.2903, + "step": 1848 + }, + { + "epoch": 0.16, + "learning_rate": 1.91464868949621e-05, + "loss": 0.3341, + "step": 1849 + }, + { + "epoch": 0.16, + "learning_rate": 1.9145364248650892e-05, + "loss": 0.3488, + "step": 1850 + }, + { + "epoch": 0.16, + "learning_rate": 1.9144240897460195e-05, + "loss": 0.2908, + "step": 1851 + }, + { + "epoch": 0.16, + "learning_rate": 1.914311684147659e-05, + "loss": 0.3142, + "step": 1852 + }, + { + "epoch": 0.16, + "learning_rate": 1.9141992080786718e-05, + "loss": 0.3373, + "step": 1853 + }, + { + "epoch": 0.16, + "learning_rate": 1.9140866615477272e-05, + "loss": 0.2561, + "step": 1854 + }, + { + "epoch": 0.16, + "learning_rate": 1.913974044563499e-05, + "loss": 0.3109, + "step": 1855 + }, + { + "epoch": 0.16, + "learning_rate": 1.9138613571346676e-05, + "loss": 0.272, + "step": 1856 + }, + { + "epoch": 0.16, + "learning_rate": 1.9137485992699182e-05, + "loss": 0.289, + "step": 1857 + }, + { + "epoch": 0.16, + "learning_rate": 1.9136357709779418e-05, + "loss": 0.3573, + "step": 1858 + }, + { + "epoch": 0.16, + "learning_rate": 1.9135228722674347e-05, + "loss": 0.3286, + "step": 1859 + }, + { + "epoch": 0.16, + "learning_rate": 1.913409903147098e-05, + "loss": 0.27, + "step": 1860 + }, + { + "epoch": 0.16, + "learning_rate": 1.91329686362564e-05, + "loss": 0.2725, + "step": 1861 + }, + { + "epoch": 0.16, + "learning_rate": 1.9131837537117724e-05, + "loss": 0.3669, + "step": 1862 + }, + { + "epoch": 0.16, + "learning_rate": 1.9130705734142127e-05, + "loss": 0.3441, + "step": 1863 + }, + { + "epoch": 0.16, + "learning_rate": 1.912957322741685e-05, + "loss": 0.2839, + "step": 1864 + }, + { + "epoch": 0.16, + "learning_rate": 1.912844001702918e-05, + "loss": 0.3696, + "step": 1865 + }, + { + "epoch": 0.16, + "learning_rate": 1.912730610306646e-05, + "loss": 0.2759, + "step": 1866 + }, + { + "epoch": 0.16, + "learning_rate": 1.912617148561608e-05, + "loss": 0.3264, + "step": 1867 + }, + { + "epoch": 0.16, + "learning_rate": 1.9125036164765502e-05, + "loss": 0.2915, + "step": 1868 + }, + { + "epoch": 0.16, + "learning_rate": 1.912390014060222e-05, + "loss": 0.3107, + "step": 1869 + }, + { + "epoch": 0.16, + "learning_rate": 1.91227634132138e-05, + "loss": 0.3126, + "step": 1870 + }, + { + "epoch": 0.16, + "learning_rate": 1.9121625982687854e-05, + "loss": 0.3183, + "step": 1871 + }, + { + "epoch": 0.16, + "learning_rate": 1.9120487849112046e-05, + "loss": 0.3176, + "step": 1872 + }, + { + "epoch": 0.16, + "learning_rate": 1.91193490125741e-05, + "loss": 0.3382, + "step": 1873 + }, + { + "epoch": 0.16, + "learning_rate": 1.9118209473161794e-05, + "loss": 0.2983, + "step": 1874 + }, + { + "epoch": 0.16, + "learning_rate": 1.9117069230962956e-05, + "loss": 0.3129, + "step": 1875 + }, + { + "epoch": 0.16, + "learning_rate": 1.9115928286065467e-05, + "loss": 0.2848, + "step": 1876 + }, + { + "epoch": 0.16, + "learning_rate": 1.9114786638557272e-05, + "loss": 0.2885, + "step": 1877 + }, + { + "epoch": 0.16, + "learning_rate": 1.911364428852636e-05, + "loss": 0.2938, + "step": 1878 + }, + { + "epoch": 0.16, + "learning_rate": 1.9112501236060777e-05, + "loss": 0.2717, + "step": 1879 + }, + { + "epoch": 0.16, + "learning_rate": 1.9111357481248627e-05, + "loss": 0.3409, + "step": 1880 + }, + { + "epoch": 0.16, + "learning_rate": 1.9110213024178062e-05, + "loss": 0.3215, + "step": 1881 + }, + { + "epoch": 0.16, + "learning_rate": 1.9109067864937292e-05, + "loss": 0.3062, + "step": 1882 + }, + { + "epoch": 0.16, + "learning_rate": 1.910792200361458e-05, + "loss": 0.3286, + "step": 1883 + }, + { + "epoch": 0.16, + "learning_rate": 1.9106775440298242e-05, + "loss": 0.2964, + "step": 1884 + }, + { + "epoch": 0.16, + "learning_rate": 1.9105628175076654e-05, + "loss": 0.2551, + "step": 1885 + }, + { + "epoch": 0.16, + "learning_rate": 1.9104480208038236e-05, + "loss": 0.3083, + "step": 1886 + }, + { + "epoch": 0.16, + "learning_rate": 1.9103331539271473e-05, + "loss": 0.3112, + "step": 1887 + }, + { + "epoch": 0.16, + "learning_rate": 1.9102182168864894e-05, + "loss": 0.279, + "step": 1888 + }, + { + "epoch": 0.16, + "learning_rate": 1.9101032096907088e-05, + "loss": 0.2707, + "step": 1889 + }, + { + "epoch": 0.16, + "learning_rate": 1.90998813234867e-05, + "loss": 0.2793, + "step": 1890 + }, + { + "epoch": 0.16, + "learning_rate": 1.909872984869242e-05, + "loss": 0.3558, + "step": 1891 + }, + { + "epoch": 0.16, + "learning_rate": 1.9097577672613002e-05, + "loss": 0.2997, + "step": 1892 + }, + { + "epoch": 0.16, + "learning_rate": 1.909642479533725e-05, + "loss": 0.288, + "step": 1893 + }, + { + "epoch": 0.16, + "learning_rate": 1.9095271216954022e-05, + "loss": 0.2843, + "step": 1894 + }, + { + "epoch": 0.16, + "learning_rate": 1.909411693755223e-05, + "loss": 0.327, + "step": 1895 + }, + { + "epoch": 0.16, + "learning_rate": 1.909296195722084e-05, + "loss": 0.3427, + "step": 1896 + }, + { + "epoch": 0.16, + "learning_rate": 1.909180627604887e-05, + "loss": 0.2953, + "step": 1897 + }, + { + "epoch": 0.16, + "learning_rate": 1.9090649894125395e-05, + "loss": 0.2892, + "step": 1898 + }, + { + "epoch": 0.16, + "learning_rate": 1.908949281153955e-05, + "loss": 0.3901, + "step": 1899 + }, + { + "epoch": 0.16, + "learning_rate": 1.9088335028380504e-05, + "loss": 0.3151, + "step": 1900 + }, + { + "epoch": 0.16, + "learning_rate": 1.9087176544737507e-05, + "loss": 0.3711, + "step": 1901 + }, + { + "epoch": 0.16, + "learning_rate": 1.9086017360699843e-05, + "loss": 0.2921, + "step": 1902 + }, + { + "epoch": 0.16, + "learning_rate": 1.9084857476356852e-05, + "loss": 0.3477, + "step": 1903 + }, + { + "epoch": 0.16, + "learning_rate": 1.908369689179794e-05, + "loss": 0.2933, + "step": 1904 + }, + { + "epoch": 0.16, + "learning_rate": 1.9082535607112554e-05, + "loss": 0.2968, + "step": 1905 + }, + { + "epoch": 0.16, + "learning_rate": 1.9081373622390204e-05, + "loss": 0.3433, + "step": 1906 + }, + { + "epoch": 0.16, + "learning_rate": 1.9080210937720443e-05, + "loss": 0.2717, + "step": 1907 + }, + { + "epoch": 0.16, + "learning_rate": 1.907904755319289e-05, + "loss": 0.3653, + "step": 1908 + }, + { + "epoch": 0.16, + "learning_rate": 1.9077883468897215e-05, + "loss": 0.2932, + "step": 1909 + }, + { + "epoch": 0.16, + "learning_rate": 1.9076718684923136e-05, + "loss": 0.3005, + "step": 1910 + }, + { + "epoch": 0.16, + "learning_rate": 1.9075553201360432e-05, + "loss": 0.2938, + "step": 1911 + }, + { + "epoch": 0.16, + "learning_rate": 1.907438701829893e-05, + "loss": 0.3301, + "step": 1912 + }, + { + "epoch": 0.16, + "learning_rate": 1.9073220135828513e-05, + "loss": 0.3149, + "step": 1913 + }, + { + "epoch": 0.16, + "learning_rate": 1.9072052554039123e-05, + "loss": 0.3231, + "step": 1914 + }, + { + "epoch": 0.16, + "learning_rate": 1.9070884273020745e-05, + "loss": 0.3517, + "step": 1915 + }, + { + "epoch": 0.16, + "learning_rate": 1.906971529286343e-05, + "loss": 0.6, + "step": 1916 + }, + { + "epoch": 0.16, + "learning_rate": 1.9068545613657273e-05, + "loss": 0.3317, + "step": 1917 + }, + { + "epoch": 0.16, + "learning_rate": 1.906737523549243e-05, + "loss": 0.3067, + "step": 1918 + }, + { + "epoch": 0.16, + "learning_rate": 1.906620415845911e-05, + "loss": 0.2936, + "step": 1919 + }, + { + "epoch": 0.16, + "learning_rate": 1.9065032382647566e-05, + "loss": 0.3136, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 1.9063859908148123e-05, + "loss": 0.3419, + "step": 1921 + }, + { + "epoch": 0.16, + "learning_rate": 1.906268673505114e-05, + "loss": 0.2871, + "step": 1922 + }, + { + "epoch": 0.16, + "learning_rate": 1.9061512863447046e-05, + "loss": 0.3053, + "step": 1923 + }, + { + "epoch": 0.16, + "learning_rate": 1.906033829342631e-05, + "loss": 0.2992, + "step": 1924 + }, + { + "epoch": 0.17, + "learning_rate": 1.905916302507947e-05, + "loss": 0.3175, + "step": 1925 + }, + { + "epoch": 0.17, + "learning_rate": 1.9057987058497106e-05, + "loss": 0.3081, + "step": 1926 + }, + { + "epoch": 0.17, + "learning_rate": 1.9056810393769857e-05, + "loss": 0.3236, + "step": 1927 + }, + { + "epoch": 0.17, + "learning_rate": 1.9055633030988417e-05, + "loss": 0.2919, + "step": 1928 + }, + { + "epoch": 0.17, + "learning_rate": 1.9054454970243525e-05, + "loss": 0.3991, + "step": 1929 + }, + { + "epoch": 0.17, + "learning_rate": 1.905327621162598e-05, + "loss": 0.338, + "step": 1930 + }, + { + "epoch": 0.17, + "learning_rate": 1.9052096755226643e-05, + "loss": 0.2745, + "step": 1931 + }, + { + "epoch": 0.17, + "learning_rate": 1.9050916601136418e-05, + "loss": 0.3105, + "step": 1932 + }, + { + "epoch": 0.17, + "learning_rate": 1.904973574944626e-05, + "loss": 0.3568, + "step": 1933 + }, + { + "epoch": 0.17, + "learning_rate": 1.9048554200247184e-05, + "loss": 0.2861, + "step": 1934 + }, + { + "epoch": 0.17, + "learning_rate": 1.9047371953630262e-05, + "loss": 0.3148, + "step": 1935 + }, + { + "epoch": 0.17, + "learning_rate": 1.904618900968662e-05, + "loss": 0.2726, + "step": 1936 + }, + { + "epoch": 0.17, + "learning_rate": 1.9045005368507418e-05, + "loss": 0.3517, + "step": 1937 + }, + { + "epoch": 0.17, + "learning_rate": 1.90438210301839e-05, + "loss": 0.3078, + "step": 1938 + }, + { + "epoch": 0.17, + "learning_rate": 1.9042635994807344e-05, + "loss": 0.2863, + "step": 1939 + }, + { + "epoch": 0.17, + "learning_rate": 1.9041450262469087e-05, + "loss": 0.4042, + "step": 1940 + }, + { + "epoch": 0.17, + "learning_rate": 1.9040263833260513e-05, + "loss": 0.3239, + "step": 1941 + }, + { + "epoch": 0.17, + "learning_rate": 1.903907670727308e-05, + "loss": 0.3154, + "step": 1942 + }, + { + "epoch": 0.17, + "learning_rate": 1.9037888884598272e-05, + "loss": 0.3273, + "step": 1943 + }, + { + "epoch": 0.17, + "learning_rate": 1.9036700365327648e-05, + "loss": 0.3491, + "step": 1944 + }, + { + "epoch": 0.17, + "learning_rate": 1.9035511149552816e-05, + "loss": 0.3012, + "step": 1945 + }, + { + "epoch": 0.17, + "learning_rate": 1.9034321237365424e-05, + "loss": 0.3345, + "step": 1946 + }, + { + "epoch": 0.17, + "learning_rate": 1.9033130628857194e-05, + "loss": 0.6304, + "step": 1947 + }, + { + "epoch": 0.17, + "learning_rate": 1.903193932411989e-05, + "loss": 0.3242, + "step": 1948 + }, + { + "epoch": 0.17, + "learning_rate": 1.903074732324533e-05, + "loss": 0.3118, + "step": 1949 + }, + { + "epoch": 0.17, + "learning_rate": 1.9029554626325386e-05, + "loss": 0.2962, + "step": 1950 + }, + { + "epoch": 0.17, + "learning_rate": 1.902836123345199e-05, + "loss": 0.2737, + "step": 1951 + }, + { + "epoch": 0.17, + "learning_rate": 1.902716714471712e-05, + "loss": 0.3114, + "step": 1952 + }, + { + "epoch": 0.17, + "learning_rate": 1.9025972360212813e-05, + "loss": 0.3246, + "step": 1953 + }, + { + "epoch": 0.17, + "learning_rate": 1.9024776880031154e-05, + "loss": 0.3302, + "step": 1954 + }, + { + "epoch": 0.17, + "learning_rate": 1.9023580704264284e-05, + "loss": 0.2795, + "step": 1955 + }, + { + "epoch": 0.17, + "learning_rate": 1.9022383833004404e-05, + "loss": 0.3148, + "step": 1956 + }, + { + "epoch": 0.17, + "learning_rate": 1.9021186266343756e-05, + "loss": 0.2803, + "step": 1957 + }, + { + "epoch": 0.17, + "learning_rate": 1.9019988004374645e-05, + "loss": 0.291, + "step": 1958 + }, + { + "epoch": 0.17, + "learning_rate": 1.901878904718943e-05, + "loss": 0.3284, + "step": 1959 + }, + { + "epoch": 0.17, + "learning_rate": 1.9017589394880515e-05, + "loss": 0.2957, + "step": 1960 + }, + { + "epoch": 0.17, + "learning_rate": 1.9016389047540368e-05, + "loss": 0.2958, + "step": 1961 + }, + { + "epoch": 0.17, + "learning_rate": 1.9015188005261505e-05, + "loss": 0.3433, + "step": 1962 + }, + { + "epoch": 0.17, + "learning_rate": 1.90139862681365e-05, + "loss": 0.2927, + "step": 1963 + }, + { + "epoch": 0.17, + "learning_rate": 1.901278383625797e-05, + "loss": 0.2879, + "step": 1964 + }, + { + "epoch": 0.17, + "learning_rate": 1.9011580709718594e-05, + "loss": 0.283, + "step": 1965 + }, + { + "epoch": 0.17, + "learning_rate": 1.9010376888611106e-05, + "loss": 0.3064, + "step": 1966 + }, + { + "epoch": 0.17, + "learning_rate": 1.9009172373028286e-05, + "loss": 0.3329, + "step": 1967 + }, + { + "epoch": 0.17, + "learning_rate": 1.9007967163062978e-05, + "loss": 0.2832, + "step": 1968 + }, + { + "epoch": 0.17, + "learning_rate": 1.900676125880807e-05, + "loss": 0.2922, + "step": 1969 + }, + { + "epoch": 0.17, + "learning_rate": 1.9005554660356505e-05, + "loss": 0.3642, + "step": 1970 + }, + { + "epoch": 0.17, + "learning_rate": 1.9004347367801288e-05, + "loss": 0.2974, + "step": 1971 + }, + { + "epoch": 0.17, + "learning_rate": 1.9003139381235467e-05, + "loss": 0.2886, + "step": 1972 + }, + { + "epoch": 0.17, + "learning_rate": 1.900193070075215e-05, + "loss": 0.2899, + "step": 1973 + }, + { + "epoch": 0.17, + "learning_rate": 1.9000721326444492e-05, + "loss": 0.6342, + "step": 1974 + }, + { + "epoch": 0.17, + "learning_rate": 1.899951125840571e-05, + "loss": 0.2646, + "step": 1975 + }, + { + "epoch": 0.17, + "learning_rate": 1.8998300496729066e-05, + "loss": 0.2878, + "step": 1976 + }, + { + "epoch": 0.17, + "learning_rate": 1.8997089041507882e-05, + "loss": 0.2545, + "step": 1977 + }, + { + "epoch": 0.17, + "learning_rate": 1.899587689283553e-05, + "loss": 0.2688, + "step": 1978 + }, + { + "epoch": 0.17, + "learning_rate": 1.8994664050805437e-05, + "loss": 0.3099, + "step": 1979 + }, + { + "epoch": 0.17, + "learning_rate": 1.8993450515511086e-05, + "loss": 0.3057, + "step": 1980 + }, + { + "epoch": 0.17, + "learning_rate": 1.8992236287046008e-05, + "loss": 0.6141, + "step": 1981 + }, + { + "epoch": 0.17, + "learning_rate": 1.8991021365503782e-05, + "loss": 0.3133, + "step": 1982 + }, + { + "epoch": 0.17, + "learning_rate": 1.8989805750978062e-05, + "loss": 0.3036, + "step": 1983 + }, + { + "epoch": 0.17, + "learning_rate": 1.898858944356253e-05, + "loss": 0.2885, + "step": 1984 + }, + { + "epoch": 0.17, + "learning_rate": 1.898737244335094e-05, + "loss": 0.2827, + "step": 1985 + }, + { + "epoch": 0.17, + "learning_rate": 1.898615475043709e-05, + "loss": 0.2995, + "step": 1986 + }, + { + "epoch": 0.17, + "learning_rate": 1.8984936364914835e-05, + "loss": 0.3311, + "step": 1987 + }, + { + "epoch": 0.17, + "learning_rate": 1.8983717286878078e-05, + "loss": 0.2932, + "step": 1988 + }, + { + "epoch": 0.17, + "learning_rate": 1.898249751642078e-05, + "loss": 0.3197, + "step": 1989 + }, + { + "epoch": 0.17, + "learning_rate": 1.8981277053636963e-05, + "loss": 0.3235, + "step": 1990 + }, + { + "epoch": 0.17, + "learning_rate": 1.8980055898620688e-05, + "loss": 0.3088, + "step": 1991 + }, + { + "epoch": 0.17, + "learning_rate": 1.8978834051466073e-05, + "loss": 0.3177, + "step": 1992 + }, + { + "epoch": 0.17, + "learning_rate": 1.8977611512267294e-05, + "loss": 0.2531, + "step": 1993 + }, + { + "epoch": 0.17, + "learning_rate": 1.8976388281118584e-05, + "loss": 0.3004, + "step": 1994 + }, + { + "epoch": 0.17, + "learning_rate": 1.8975164358114216e-05, + "loss": 0.3114, + "step": 1995 + }, + { + "epoch": 0.17, + "learning_rate": 1.8973939743348527e-05, + "loss": 0.3235, + "step": 1996 + }, + { + "epoch": 0.17, + "learning_rate": 1.8972714436915905e-05, + "loss": 0.3309, + "step": 1997 + }, + { + "epoch": 0.17, + "learning_rate": 1.897148843891079e-05, + "loss": 0.3483, + "step": 1998 + }, + { + "epoch": 0.17, + "learning_rate": 1.8970261749427674e-05, + "loss": 0.3162, + "step": 1999 + }, + { + "epoch": 0.17, + "learning_rate": 1.8969034368561105e-05, + "loss": 0.3036, + "step": 2000 + }, + { + "epoch": 0.17, + "learning_rate": 1.8967806296405686e-05, + "loss": 0.2573, + "step": 2001 + }, + { + "epoch": 0.17, + "learning_rate": 1.896657753305607e-05, + "loss": 0.2863, + "step": 2002 + }, + { + "epoch": 0.17, + "learning_rate": 1.8965348078606962e-05, + "loss": 0.317, + "step": 2003 + }, + { + "epoch": 0.17, + "learning_rate": 1.896411793315312e-05, + "loss": 0.3018, + "step": 2004 + }, + { + "epoch": 0.17, + "learning_rate": 1.8962887096789363e-05, + "loss": 0.3557, + "step": 2005 + }, + { + "epoch": 0.17, + "learning_rate": 1.8961655569610557e-05, + "loss": 0.3107, + "step": 2006 + }, + { + "epoch": 0.17, + "learning_rate": 1.8960423351711622e-05, + "loss": 0.3047, + "step": 2007 + }, + { + "epoch": 0.17, + "learning_rate": 1.8959190443187525e-05, + "loss": 0.3517, + "step": 2008 + }, + { + "epoch": 0.17, + "learning_rate": 1.89579568441333e-05, + "loss": 0.3871, + "step": 2009 + }, + { + "epoch": 0.17, + "learning_rate": 1.8956722554644026e-05, + "loss": 0.305, + "step": 2010 + }, + { + "epoch": 0.17, + "learning_rate": 1.895548757481483e-05, + "loss": 0.2921, + "step": 2011 + }, + { + "epoch": 0.17, + "learning_rate": 1.8954251904740904e-05, + "loss": 0.2544, + "step": 2012 + }, + { + "epoch": 0.17, + "learning_rate": 1.8953015544517482e-05, + "loss": 0.2961, + "step": 2013 + }, + { + "epoch": 0.17, + "learning_rate": 1.8951778494239862e-05, + "loss": 0.2822, + "step": 2014 + }, + { + "epoch": 0.17, + "learning_rate": 1.895054075400339e-05, + "loss": 0.2968, + "step": 2015 + }, + { + "epoch": 0.17, + "learning_rate": 1.894930232390346e-05, + "loss": 0.3514, + "step": 2016 + }, + { + "epoch": 0.17, + "learning_rate": 1.894806320403553e-05, + "loss": 0.3347, + "step": 2017 + }, + { + "epoch": 0.17, + "learning_rate": 1.89468233944951e-05, + "loss": 0.2739, + "step": 2018 + }, + { + "epoch": 0.17, + "learning_rate": 1.894558289537773e-05, + "loss": 0.2888, + "step": 2019 + }, + { + "epoch": 0.17, + "learning_rate": 1.8944341706779033e-05, + "loss": 0.3325, + "step": 2020 + }, + { + "epoch": 0.17, + "learning_rate": 1.894309982879467e-05, + "loss": 0.313, + "step": 2021 + }, + { + "epoch": 0.17, + "learning_rate": 1.8941857261520363e-05, + "loss": 0.2653, + "step": 2022 + }, + { + "epoch": 0.17, + "learning_rate": 1.8940614005051882e-05, + "loss": 0.3132, + "step": 2023 + }, + { + "epoch": 0.17, + "learning_rate": 1.893937005948505e-05, + "loss": 0.3065, + "step": 2024 + }, + { + "epoch": 0.17, + "learning_rate": 1.8938125424915744e-05, + "loss": 0.3253, + "step": 2025 + }, + { + "epoch": 0.17, + "learning_rate": 1.8936880101439893e-05, + "loss": 0.3522, + "step": 2026 + }, + { + "epoch": 0.17, + "learning_rate": 1.8935634089153486e-05, + "loss": 0.3019, + "step": 2027 + }, + { + "epoch": 0.17, + "learning_rate": 1.8934387388152554e-05, + "loss": 0.3073, + "step": 2028 + }, + { + "epoch": 0.17, + "learning_rate": 1.893313999853319e-05, + "loss": 0.2911, + "step": 2029 + }, + { + "epoch": 0.17, + "learning_rate": 1.8931891920391533e-05, + "loss": 0.2897, + "step": 2030 + }, + { + "epoch": 0.17, + "learning_rate": 1.8930643153823777e-05, + "loss": 0.3073, + "step": 2031 + }, + { + "epoch": 0.17, + "learning_rate": 1.8929393698926177e-05, + "loss": 0.2939, + "step": 2032 + }, + { + "epoch": 0.17, + "learning_rate": 1.8928143555795034e-05, + "loss": 0.3099, + "step": 2033 + }, + { + "epoch": 0.17, + "learning_rate": 1.89268927245267e-05, + "loss": 0.3145, + "step": 2034 + }, + { + "epoch": 0.17, + "learning_rate": 1.8925641205217583e-05, + "loss": 0.2874, + "step": 2035 + }, + { + "epoch": 0.17, + "learning_rate": 1.8924388997964147e-05, + "loss": 0.3191, + "step": 2036 + }, + { + "epoch": 0.17, + "learning_rate": 1.8923136102862902e-05, + "loss": 0.3145, + "step": 2037 + }, + { + "epoch": 0.17, + "learning_rate": 1.8921882520010416e-05, + "loss": 0.3859, + "step": 2038 + }, + { + "epoch": 0.17, + "learning_rate": 1.892062824950331e-05, + "loss": 0.312, + "step": 2039 + }, + { + "epoch": 0.17, + "learning_rate": 1.8919373291438257e-05, + "loss": 0.2589, + "step": 2040 + }, + { + "epoch": 0.17, + "learning_rate": 1.8918117645911985e-05, + "loss": 0.3284, + "step": 2041 + }, + { + "epoch": 0.18, + "learning_rate": 1.8916861313021268e-05, + "loss": 0.3251, + "step": 2042 + }, + { + "epoch": 0.18, + "learning_rate": 1.891560429286294e-05, + "loss": 0.3199, + "step": 2043 + }, + { + "epoch": 0.18, + "learning_rate": 1.891434658553389e-05, + "loss": 0.3227, + "step": 2044 + }, + { + "epoch": 0.18, + "learning_rate": 1.8913088191131047e-05, + "loss": 0.2993, + "step": 2045 + }, + { + "epoch": 0.18, + "learning_rate": 1.891182910975141e-05, + "loss": 0.3227, + "step": 2046 + }, + { + "epoch": 0.18, + "learning_rate": 1.891056934149202e-05, + "loss": 0.3022, + "step": 2047 + }, + { + "epoch": 0.18, + "learning_rate": 1.890930888644997e-05, + "loss": 0.3165, + "step": 2048 + }, + { + "epoch": 0.18, + "learning_rate": 1.8908047744722417e-05, + "loss": 0.3163, + "step": 2049 + }, + { + "epoch": 0.18, + "learning_rate": 1.890678591640656e-05, + "loss": 0.3312, + "step": 2050 + }, + { + "epoch": 0.18, + "learning_rate": 1.8905523401599655e-05, + "loss": 0.3138, + "step": 2051 + }, + { + "epoch": 0.18, + "learning_rate": 1.890426020039901e-05, + "loss": 0.3002, + "step": 2052 + }, + { + "epoch": 0.18, + "learning_rate": 1.890299631290198e-05, + "loss": 0.3511, + "step": 2053 + }, + { + "epoch": 0.18, + "learning_rate": 1.8901731739205992e-05, + "loss": 0.3078, + "step": 2054 + }, + { + "epoch": 0.18, + "learning_rate": 1.8900466479408505e-05, + "loss": 0.3206, + "step": 2055 + }, + { + "epoch": 0.18, + "learning_rate": 1.8899200533607037e-05, + "loss": 0.3278, + "step": 2056 + }, + { + "epoch": 0.18, + "learning_rate": 1.8897933901899165e-05, + "loss": 0.3536, + "step": 2057 + }, + { + "epoch": 0.18, + "learning_rate": 1.8896666584382516e-05, + "loss": 0.318, + "step": 2058 + }, + { + "epoch": 0.18, + "learning_rate": 1.8895398581154763e-05, + "loss": 0.2911, + "step": 2059 + }, + { + "epoch": 0.18, + "learning_rate": 1.8894129892313643e-05, + "loss": 0.2946, + "step": 2060 + }, + { + "epoch": 0.18, + "learning_rate": 1.8892860517956938e-05, + "loss": 0.287, + "step": 2061 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891590458182486e-05, + "loss": 0.6182, + "step": 2062 + }, + { + "epoch": 0.18, + "learning_rate": 1.8890319713088178e-05, + "loss": 0.3275, + "step": 2063 + }, + { + "epoch": 0.18, + "learning_rate": 1.888904828277195e-05, + "loss": 0.2731, + "step": 2064 + }, + { + "epoch": 0.18, + "learning_rate": 1.8887776167331803e-05, + "loss": 0.3074, + "step": 2065 + }, + { + "epoch": 0.18, + "learning_rate": 1.8886503366865786e-05, + "loss": 0.2817, + "step": 2066 + }, + { + "epoch": 0.18, + "learning_rate": 1.8885229881472002e-05, + "loss": 0.3303, + "step": 2067 + }, + { + "epoch": 0.18, + "learning_rate": 1.88839557112486e-05, + "loss": 0.3032, + "step": 2068 + }, + { + "epoch": 0.18, + "learning_rate": 1.8882680856293785e-05, + "loss": 0.3172, + "step": 2069 + }, + { + "epoch": 0.18, + "learning_rate": 1.8881405316705824e-05, + "loss": 0.2908, + "step": 2070 + }, + { + "epoch": 0.18, + "learning_rate": 1.8880129092583027e-05, + "loss": 0.3937, + "step": 2071 + }, + { + "epoch": 0.18, + "learning_rate": 1.8878852184023754e-05, + "loss": 0.2861, + "step": 2072 + }, + { + "epoch": 0.18, + "learning_rate": 1.8877574591126427e-05, + "loss": 0.3243, + "step": 2073 + }, + { + "epoch": 0.18, + "learning_rate": 1.8876296313989516e-05, + "loss": 0.2978, + "step": 2074 + }, + { + "epoch": 0.18, + "learning_rate": 1.8875017352711547e-05, + "loss": 0.3172, + "step": 2075 + }, + { + "epoch": 0.18, + "learning_rate": 1.887373770739109e-05, + "loss": 0.3356, + "step": 2076 + }, + { + "epoch": 0.18, + "learning_rate": 1.8872457378126778e-05, + "loss": 0.3309, + "step": 2077 + }, + { + "epoch": 0.18, + "learning_rate": 1.8871176365017293e-05, + "loss": 0.3307, + "step": 2078 + }, + { + "epoch": 0.18, + "learning_rate": 1.8869894668161365e-05, + "loss": 0.2736, + "step": 2079 + }, + { + "epoch": 0.18, + "learning_rate": 1.8868612287657783e-05, + "loss": 0.2706, + "step": 2080 + }, + { + "epoch": 0.18, + "learning_rate": 1.886732922360539e-05, + "loss": 0.3145, + "step": 2081 + }, + { + "epoch": 0.18, + "learning_rate": 1.8866045476103073e-05, + "loss": 0.3381, + "step": 2082 + }, + { + "epoch": 0.18, + "learning_rate": 1.8864761045249777e-05, + "loss": 0.3324, + "step": 2083 + }, + { + "epoch": 0.18, + "learning_rate": 1.8863475931144506e-05, + "loss": 0.3838, + "step": 2084 + }, + { + "epoch": 0.18, + "learning_rate": 1.8862190133886303e-05, + "loss": 0.3732, + "step": 2085 + }, + { + "epoch": 0.18, + "learning_rate": 1.8860903653574277e-05, + "loss": 0.2498, + "step": 2086 + }, + { + "epoch": 0.18, + "learning_rate": 1.8859616490307578e-05, + "loss": 0.3399, + "step": 2087 + }, + { + "epoch": 0.18, + "learning_rate": 1.8858328644185414e-05, + "loss": 0.3198, + "step": 2088 + }, + { + "epoch": 0.18, + "learning_rate": 1.8857040115307055e-05, + "loss": 0.3476, + "step": 2089 + }, + { + "epoch": 0.18, + "learning_rate": 1.8855750903771805e-05, + "loss": 0.3046, + "step": 2090 + }, + { + "epoch": 0.18, + "learning_rate": 1.885446100967903e-05, + "loss": 0.3245, + "step": 2091 + }, + { + "epoch": 0.18, + "learning_rate": 1.8853170433128155e-05, + "loss": 0.3167, + "step": 2092 + }, + { + "epoch": 0.18, + "learning_rate": 1.8851879174218645e-05, + "loss": 0.2946, + "step": 2093 + }, + { + "epoch": 0.18, + "learning_rate": 1.885058723305003e-05, + "loss": 0.3139, + "step": 2094 + }, + { + "epoch": 0.18, + "learning_rate": 1.884929460972188e-05, + "loss": 0.3494, + "step": 2095 + }, + { + "epoch": 0.18, + "learning_rate": 1.8848001304333828e-05, + "loss": 0.2764, + "step": 2096 + }, + { + "epoch": 0.18, + "learning_rate": 1.8846707316985556e-05, + "loss": 0.3117, + "step": 2097 + }, + { + "epoch": 0.18, + "learning_rate": 1.8845412647776795e-05, + "loss": 0.2834, + "step": 2098 + }, + { + "epoch": 0.18, + "learning_rate": 1.8844117296807332e-05, + "loss": 0.2692, + "step": 2099 + }, + { + "epoch": 0.18, + "learning_rate": 1.8842821264177012e-05, + "loss": 0.2645, + "step": 2100 + }, + { + "epoch": 0.18, + "learning_rate": 1.884152454998572e-05, + "loss": 0.3059, + "step": 2101 + }, + { + "epoch": 0.18, + "learning_rate": 1.8840227154333405e-05, + "loss": 0.3278, + "step": 2102 + }, + { + "epoch": 0.18, + "learning_rate": 1.883892907732006e-05, + "loss": 0.3282, + "step": 2103 + }, + { + "epoch": 0.18, + "learning_rate": 1.883763031904574e-05, + "loss": 0.6036, + "step": 2104 + }, + { + "epoch": 0.18, + "learning_rate": 1.883633087961054e-05, + "loss": 0.2932, + "step": 2105 + }, + { + "epoch": 0.18, + "learning_rate": 1.8835030759114617e-05, + "loss": 0.2885, + "step": 2106 + }, + { + "epoch": 0.18, + "learning_rate": 1.883372995765818e-05, + "loss": 0.2918, + "step": 2107 + }, + { + "epoch": 0.18, + "learning_rate": 1.8832428475341486e-05, + "loss": 0.2858, + "step": 2108 + }, + { + "epoch": 0.18, + "learning_rate": 1.8831126312264843e-05, + "loss": 0.2882, + "step": 2109 + }, + { + "epoch": 0.18, + "learning_rate": 1.8829823468528624e-05, + "loss": 0.331, + "step": 2110 + }, + { + "epoch": 0.18, + "learning_rate": 1.882851994423324e-05, + "loss": 0.3467, + "step": 2111 + }, + { + "epoch": 0.18, + "learning_rate": 1.8827215739479163e-05, + "loss": 0.2709, + "step": 2112 + }, + { + "epoch": 0.18, + "learning_rate": 1.8825910854366914e-05, + "loss": 0.319, + "step": 2113 + }, + { + "epoch": 0.18, + "learning_rate": 1.8824605288997064e-05, + "loss": 0.306, + "step": 2114 + }, + { + "epoch": 0.18, + "learning_rate": 1.8823299043470243e-05, + "loss": 0.3165, + "step": 2115 + }, + { + "epoch": 0.18, + "learning_rate": 1.882199211788713e-05, + "loss": 0.3169, + "step": 2116 + }, + { + "epoch": 0.18, + "learning_rate": 1.8820684512348455e-05, + "loss": 0.3154, + "step": 2117 + }, + { + "epoch": 0.18, + "learning_rate": 1.8819376226955e-05, + "loss": 0.3041, + "step": 2118 + }, + { + "epoch": 0.18, + "learning_rate": 1.8818067261807606e-05, + "loss": 0.303, + "step": 2119 + }, + { + "epoch": 0.18, + "learning_rate": 1.881675761700716e-05, + "loss": 0.2918, + "step": 2120 + }, + { + "epoch": 0.18, + "learning_rate": 1.8815447292654598e-05, + "loss": 0.3339, + "step": 2121 + }, + { + "epoch": 0.18, + "learning_rate": 1.881413628885092e-05, + "loss": 0.2827, + "step": 2122 + }, + { + "epoch": 0.18, + "learning_rate": 1.8812824605697163e-05, + "loss": 0.3256, + "step": 2123 + }, + { + "epoch": 0.18, + "learning_rate": 1.8811512243294436e-05, + "loss": 0.3273, + "step": 2124 + }, + { + "epoch": 0.18, + "learning_rate": 1.8810199201743884e-05, + "loss": 0.2935, + "step": 2125 + }, + { + "epoch": 0.18, + "learning_rate": 1.880888548114671e-05, + "loss": 0.2778, + "step": 2126 + }, + { + "epoch": 0.18, + "learning_rate": 1.8807571081604167e-05, + "loss": 0.3264, + "step": 2127 + }, + { + "epoch": 0.18, + "learning_rate": 1.8806256003217566e-05, + "loss": 0.3231, + "step": 2128 + }, + { + "epoch": 0.18, + "learning_rate": 1.8804940246088265e-05, + "loss": 0.3083, + "step": 2129 + }, + { + "epoch": 0.18, + "learning_rate": 1.8803623810317678e-05, + "loss": 0.3403, + "step": 2130 + }, + { + "epoch": 0.18, + "learning_rate": 1.8802306696007265e-05, + "loss": 0.2906, + "step": 2131 + }, + { + "epoch": 0.18, + "learning_rate": 1.880098890325855e-05, + "loss": 0.3035, + "step": 2132 + }, + { + "epoch": 0.18, + "learning_rate": 1.8799670432173093e-05, + "loss": 0.2933, + "step": 2133 + }, + { + "epoch": 0.18, + "learning_rate": 1.879835128285252e-05, + "loss": 0.3176, + "step": 2134 + }, + { + "epoch": 0.18, + "learning_rate": 1.8797031455398504e-05, + "loss": 0.2634, + "step": 2135 + }, + { + "epoch": 0.18, + "learning_rate": 1.879571094991277e-05, + "loss": 0.3088, + "step": 2136 + }, + { + "epoch": 0.18, + "learning_rate": 1.87943897664971e-05, + "loss": 0.2938, + "step": 2137 + }, + { + "epoch": 0.18, + "learning_rate": 1.8793067905253318e-05, + "loss": 0.3134, + "step": 2138 + }, + { + "epoch": 0.18, + "learning_rate": 1.8791745366283313e-05, + "loss": 0.2851, + "step": 2139 + }, + { + "epoch": 0.18, + "learning_rate": 1.879042214968901e-05, + "loss": 0.317, + "step": 2140 + }, + { + "epoch": 0.18, + "learning_rate": 1.878909825557241e-05, + "loss": 0.2835, + "step": 2141 + }, + { + "epoch": 0.18, + "learning_rate": 1.878777368403554e-05, + "loss": 0.3166, + "step": 2142 + }, + { + "epoch": 0.18, + "learning_rate": 1.87864484351805e-05, + "loss": 0.2952, + "step": 2143 + }, + { + "epoch": 0.18, + "learning_rate": 1.8785122509109425e-05, + "loss": 0.2638, + "step": 2144 + }, + { + "epoch": 0.18, + "learning_rate": 1.8783795905924516e-05, + "loss": 0.2883, + "step": 2145 + }, + { + "epoch": 0.18, + "learning_rate": 1.8782468625728027e-05, + "loss": 0.3054, + "step": 2146 + }, + { + "epoch": 0.18, + "learning_rate": 1.8781140668622243e-05, + "loss": 0.3151, + "step": 2147 + }, + { + "epoch": 0.18, + "learning_rate": 1.8779812034709532e-05, + "loss": 0.2928, + "step": 2148 + }, + { + "epoch": 0.18, + "learning_rate": 1.877848272409229e-05, + "loss": 0.2657, + "step": 2149 + }, + { + "epoch": 0.18, + "learning_rate": 1.877715273687297e-05, + "loss": 0.3471, + "step": 2150 + }, + { + "epoch": 0.18, + "learning_rate": 1.8775822073154093e-05, + "loss": 0.3234, + "step": 2151 + }, + { + "epoch": 0.18, + "learning_rate": 1.8774490733038214e-05, + "loss": 0.3738, + "step": 2152 + }, + { + "epoch": 0.18, + "learning_rate": 1.877315871662794e-05, + "loss": 0.3361, + "step": 2153 + }, + { + "epoch": 0.18, + "learning_rate": 1.8771826024025944e-05, + "loss": 0.302, + "step": 2154 + }, + { + "epoch": 0.18, + "learning_rate": 1.8770492655334938e-05, + "loss": 0.3044, + "step": 2155 + }, + { + "epoch": 0.18, + "learning_rate": 1.87691586106577e-05, + "loss": 0.2828, + "step": 2156 + }, + { + "epoch": 0.18, + "learning_rate": 1.8767823890097044e-05, + "loss": 0.3239, + "step": 2157 + }, + { + "epoch": 0.18, + "learning_rate": 1.8766488493755845e-05, + "loss": 0.3028, + "step": 2158 + }, + { + "epoch": 0.19, + "learning_rate": 1.876515242173703e-05, + "loss": 0.3709, + "step": 2159 + }, + { + "epoch": 0.19, + "learning_rate": 1.8763815674143574e-05, + "loss": 0.2885, + "step": 2160 + }, + { + "epoch": 0.19, + "learning_rate": 1.8762478251078508e-05, + "loss": 0.2751, + "step": 2161 + }, + { + "epoch": 0.19, + "learning_rate": 1.876114015264492e-05, + "loss": 0.2958, + "step": 2162 + }, + { + "epoch": 0.19, + "learning_rate": 1.8759801378945938e-05, + "loss": 0.3157, + "step": 2163 + }, + { + "epoch": 0.19, + "learning_rate": 1.8758461930084745e-05, + "loss": 0.2953, + "step": 2164 + }, + { + "epoch": 0.19, + "learning_rate": 1.8757121806164588e-05, + "loss": 0.2949, + "step": 2165 + }, + { + "epoch": 0.19, + "learning_rate": 1.875578100728875e-05, + "loss": 0.3612, + "step": 2166 + }, + { + "epoch": 0.19, + "learning_rate": 1.8754439533560576e-05, + "loss": 0.3277, + "step": 2167 + }, + { + "epoch": 0.19, + "learning_rate": 1.875309738508346e-05, + "loss": 0.319, + "step": 2168 + }, + { + "epoch": 0.19, + "learning_rate": 1.8751754561960847e-05, + "loss": 0.286, + "step": 2169 + }, + { + "epoch": 0.19, + "learning_rate": 1.8750411064296237e-05, + "loss": 0.2993, + "step": 2170 + }, + { + "epoch": 0.19, + "learning_rate": 1.874906689219318e-05, + "loss": 0.3162, + "step": 2171 + }, + { + "epoch": 0.19, + "learning_rate": 1.8747722045755273e-05, + "loss": 0.2867, + "step": 2172 + }, + { + "epoch": 0.19, + "learning_rate": 1.874637652508618e-05, + "loss": 0.3014, + "step": 2173 + }, + { + "epoch": 0.19, + "learning_rate": 1.87450303302896e-05, + "loss": 0.2891, + "step": 2174 + }, + { + "epoch": 0.19, + "learning_rate": 1.874368346146929e-05, + "loss": 0.2783, + "step": 2175 + }, + { + "epoch": 0.19, + "learning_rate": 1.8742335918729066e-05, + "loss": 0.3362, + "step": 2176 + }, + { + "epoch": 0.19, + "learning_rate": 1.8740987702172787e-05, + "loss": 0.3497, + "step": 2177 + }, + { + "epoch": 0.19, + "learning_rate": 1.8739638811904363e-05, + "loss": 0.3555, + "step": 2178 + }, + { + "epoch": 0.19, + "learning_rate": 1.8738289248027764e-05, + "loss": 0.2637, + "step": 2179 + }, + { + "epoch": 0.19, + "learning_rate": 1.8736939010647008e-05, + "loss": 0.3163, + "step": 2180 + }, + { + "epoch": 0.19, + "learning_rate": 1.873558809986616e-05, + "loss": 0.3246, + "step": 2181 + }, + { + "epoch": 0.19, + "learning_rate": 1.873423651578935e-05, + "loss": 0.3236, + "step": 2182 + }, + { + "epoch": 0.19, + "learning_rate": 1.8732884258520745e-05, + "loss": 0.3394, + "step": 2183 + }, + { + "epoch": 0.19, + "learning_rate": 1.8731531328164572e-05, + "loss": 0.3182, + "step": 2184 + }, + { + "epoch": 0.19, + "learning_rate": 1.8730177724825107e-05, + "loss": 0.2806, + "step": 2185 + }, + { + "epoch": 0.19, + "learning_rate": 1.872882344860668e-05, + "loss": 0.3619, + "step": 2186 + }, + { + "epoch": 0.19, + "learning_rate": 1.8727468499613675e-05, + "loss": 0.2708, + "step": 2187 + }, + { + "epoch": 0.19, + "learning_rate": 1.8726112877950517e-05, + "loss": 0.3092, + "step": 2188 + }, + { + "epoch": 0.19, + "learning_rate": 1.87247565837217e-05, + "loss": 0.2755, + "step": 2189 + }, + { + "epoch": 0.19, + "learning_rate": 1.8723399617031754e-05, + "loss": 0.3241, + "step": 2190 + }, + { + "epoch": 0.19, + "learning_rate": 1.8722041977985264e-05, + "loss": 0.2998, + "step": 2191 + }, + { + "epoch": 0.19, + "learning_rate": 1.8720683666686882e-05, + "loss": 0.311, + "step": 2192 + }, + { + "epoch": 0.19, + "learning_rate": 1.871932468324129e-05, + "loss": 0.3071, + "step": 2193 + }, + { + "epoch": 0.19, + "learning_rate": 1.8717965027753235e-05, + "loss": 0.283, + "step": 2194 + }, + { + "epoch": 0.19, + "learning_rate": 1.8716604700327516e-05, + "loss": 0.3267, + "step": 2195 + }, + { + "epoch": 0.19, + "learning_rate": 1.871524370106897e-05, + "loss": 0.3349, + "step": 2196 + }, + { + "epoch": 0.19, + "learning_rate": 1.871388203008251e-05, + "loss": 0.3089, + "step": 2197 + }, + { + "epoch": 0.19, + "learning_rate": 1.8712519687473075e-05, + "loss": 0.3096, + "step": 2198 + }, + { + "epoch": 0.19, + "learning_rate": 1.8711156673345675e-05, + "loss": 0.3047, + "step": 2199 + }, + { + "epoch": 0.19, + "learning_rate": 1.8709792987805357e-05, + "loss": 0.3143, + "step": 2200 + }, + { + "epoch": 0.19, + "learning_rate": 1.8708428630957236e-05, + "loss": 0.3167, + "step": 2201 + }, + { + "epoch": 0.19, + "learning_rate": 1.8707063602906466e-05, + "loss": 0.2883, + "step": 2202 + }, + { + "epoch": 0.19, + "learning_rate": 1.8705697903758254e-05, + "loss": 0.2916, + "step": 2203 + }, + { + "epoch": 0.19, + "learning_rate": 1.8704331533617866e-05, + "loss": 0.2983, + "step": 2204 + }, + { + "epoch": 0.19, + "learning_rate": 1.8702964492590613e-05, + "loss": 0.3063, + "step": 2205 + }, + { + "epoch": 0.19, + "learning_rate": 1.8701596780781855e-05, + "loss": 0.2296, + "step": 2206 + }, + { + "epoch": 0.19, + "learning_rate": 1.8700228398297017e-05, + "loss": 0.3269, + "step": 2207 + }, + { + "epoch": 0.19, + "learning_rate": 1.8698859345241562e-05, + "loss": 0.3207, + "step": 2208 + }, + { + "epoch": 0.19, + "learning_rate": 1.8697489621721013e-05, + "loss": 0.3031, + "step": 2209 + }, + { + "epoch": 0.19, + "learning_rate": 1.8696119227840937e-05, + "loss": 0.2792, + "step": 2210 + }, + { + "epoch": 0.19, + "learning_rate": 1.869474816370696e-05, + "loss": 0.5715, + "step": 2211 + }, + { + "epoch": 0.19, + "learning_rate": 1.8693376429424756e-05, + "loss": 0.274, + "step": 2212 + }, + { + "epoch": 0.19, + "learning_rate": 1.8692004025100054e-05, + "loss": 0.2643, + "step": 2213 + }, + { + "epoch": 0.19, + "learning_rate": 1.869063095083863e-05, + "loss": 0.3497, + "step": 2214 + }, + { + "epoch": 0.19, + "learning_rate": 1.8689257206746312e-05, + "loss": 0.2688, + "step": 2215 + }, + { + "epoch": 0.19, + "learning_rate": 1.8687882792928987e-05, + "loss": 0.2903, + "step": 2216 + }, + { + "epoch": 0.19, + "learning_rate": 1.868650770949258e-05, + "loss": 0.3087, + "step": 2217 + }, + { + "epoch": 0.19, + "learning_rate": 1.8685131956543082e-05, + "loss": 0.3093, + "step": 2218 + }, + { + "epoch": 0.19, + "learning_rate": 1.8683755534186528e-05, + "loss": 0.3425, + "step": 2219 + }, + { + "epoch": 0.19, + "learning_rate": 1.8682378442529005e-05, + "loss": 0.3351, + "step": 2220 + }, + { + "epoch": 0.19, + "learning_rate": 1.8681000681676652e-05, + "loss": 0.3079, + "step": 2221 + }, + { + "epoch": 0.19, + "learning_rate": 1.867962225173566e-05, + "loss": 0.2826, + "step": 2222 + }, + { + "epoch": 0.19, + "learning_rate": 1.8678243152812273e-05, + "loss": 0.3386, + "step": 2223 + }, + { + "epoch": 0.19, + "learning_rate": 1.8676863385012785e-05, + "loss": 0.3022, + "step": 2224 + }, + { + "epoch": 0.19, + "learning_rate": 1.867548294844354e-05, + "loss": 0.3186, + "step": 2225 + }, + { + "epoch": 0.19, + "learning_rate": 1.8674101843210935e-05, + "loss": 0.278, + "step": 2226 + }, + { + "epoch": 0.19, + "learning_rate": 1.867272006942142e-05, + "loss": 0.6252, + "step": 2227 + }, + { + "epoch": 0.19, + "learning_rate": 1.8671337627181497e-05, + "loss": 0.3765, + "step": 2228 + }, + { + "epoch": 0.19, + "learning_rate": 1.8669954516597717e-05, + "loss": 0.3115, + "step": 2229 + }, + { + "epoch": 0.19, + "learning_rate": 1.866857073777668e-05, + "loss": 0.3462, + "step": 2230 + }, + { + "epoch": 0.19, + "learning_rate": 1.866718629082504e-05, + "loss": 0.3608, + "step": 2231 + }, + { + "epoch": 0.19, + "learning_rate": 1.866580117584951e-05, + "loss": 0.3091, + "step": 2232 + }, + { + "epoch": 0.19, + "learning_rate": 1.8664415392956848e-05, + "loss": 0.3033, + "step": 2233 + }, + { + "epoch": 0.19, + "learning_rate": 1.8663028942253854e-05, + "loss": 0.326, + "step": 2234 + }, + { + "epoch": 0.19, + "learning_rate": 1.86616418238474e-05, + "loss": 0.3051, + "step": 2235 + }, + { + "epoch": 0.19, + "learning_rate": 1.866025403784439e-05, + "loss": 0.2784, + "step": 2236 + }, + { + "epoch": 0.19, + "learning_rate": 1.8658865584351787e-05, + "loss": 0.273, + "step": 2237 + }, + { + "epoch": 0.19, + "learning_rate": 1.865747646347661e-05, + "loss": 0.338, + "step": 2238 + }, + { + "epoch": 0.19, + "learning_rate": 1.865608667532593e-05, + "loss": 0.3135, + "step": 2239 + }, + { + "epoch": 0.19, + "learning_rate": 1.865469622000686e-05, + "loss": 0.2948, + "step": 2240 + }, + { + "epoch": 0.19, + "learning_rate": 1.8653305097626565e-05, + "loss": 0.3037, + "step": 2241 + }, + { + "epoch": 0.19, + "learning_rate": 1.865191330829227e-05, + "loss": 0.3492, + "step": 2242 + }, + { + "epoch": 0.19, + "learning_rate": 1.865052085211125e-05, + "loss": 0.6414, + "step": 2243 + }, + { + "epoch": 0.19, + "learning_rate": 1.8649127729190825e-05, + "loss": 0.3001, + "step": 2244 + }, + { + "epoch": 0.19, + "learning_rate": 1.8647733939638373e-05, + "loss": 0.3071, + "step": 2245 + }, + { + "epoch": 0.19, + "learning_rate": 1.864633948356132e-05, + "loss": 0.2802, + "step": 2246 + }, + { + "epoch": 0.19, + "learning_rate": 1.864494436106714e-05, + "loss": 0.3492, + "step": 2247 + }, + { + "epoch": 0.19, + "learning_rate": 1.8643548572263364e-05, + "loss": 0.2723, + "step": 2248 + }, + { + "epoch": 0.19, + "learning_rate": 1.8642152117257574e-05, + "loss": 0.3063, + "step": 2249 + }, + { + "epoch": 0.19, + "learning_rate": 1.8640754996157397e-05, + "loss": 0.3141, + "step": 2250 + }, + { + "epoch": 0.19, + "learning_rate": 1.8639357209070526e-05, + "loss": 0.2906, + "step": 2251 + }, + { + "epoch": 0.19, + "learning_rate": 1.8637958756104687e-05, + "loss": 0.2592, + "step": 2252 + }, + { + "epoch": 0.19, + "learning_rate": 1.863655963736767e-05, + "loss": 0.308, + "step": 2253 + }, + { + "epoch": 0.19, + "learning_rate": 1.863515985296731e-05, + "loss": 0.325, + "step": 2254 + }, + { + "epoch": 0.19, + "learning_rate": 1.8633759403011496e-05, + "loss": 0.2852, + "step": 2255 + }, + { + "epoch": 0.19, + "learning_rate": 1.8632358287608167e-05, + "loss": 0.3242, + "step": 2256 + }, + { + "epoch": 0.19, + "learning_rate": 1.8630956506865313e-05, + "loss": 0.2997, + "step": 2257 + }, + { + "epoch": 0.19, + "learning_rate": 1.8629554060890982e-05, + "loss": 0.3124, + "step": 2258 + }, + { + "epoch": 0.19, + "learning_rate": 1.8628150949793262e-05, + "loss": 0.3447, + "step": 2259 + }, + { + "epoch": 0.19, + "learning_rate": 1.8626747173680302e-05, + "loss": 0.3174, + "step": 2260 + }, + { + "epoch": 0.19, + "learning_rate": 1.862534273266029e-05, + "loss": 0.3788, + "step": 2261 + }, + { + "epoch": 0.19, + "learning_rate": 1.8623937626841485e-05, + "loss": 0.3508, + "step": 2262 + }, + { + "epoch": 0.19, + "learning_rate": 1.862253185633218e-05, + "loss": 0.2986, + "step": 2263 + }, + { + "epoch": 0.19, + "learning_rate": 1.8621125421240722e-05, + "loss": 0.2567, + "step": 2264 + }, + { + "epoch": 0.19, + "learning_rate": 1.8619718321675514e-05, + "loss": 0.2867, + "step": 2265 + }, + { + "epoch": 0.19, + "learning_rate": 1.861831055774501e-05, + "loss": 0.2954, + "step": 2266 + }, + { + "epoch": 0.19, + "learning_rate": 1.8616902129557718e-05, + "loss": 0.3412, + "step": 2267 + }, + { + "epoch": 0.19, + "learning_rate": 1.861549303722218e-05, + "loss": 0.3212, + "step": 2268 + }, + { + "epoch": 0.19, + "learning_rate": 1.8614083280847013e-05, + "loss": 0.3067, + "step": 2269 + }, + { + "epoch": 0.19, + "learning_rate": 1.8612672860540865e-05, + "loss": 0.3444, + "step": 2270 + }, + { + "epoch": 0.19, + "learning_rate": 1.8611261776412455e-05, + "loss": 0.308, + "step": 2271 + }, + { + "epoch": 0.19, + "learning_rate": 1.8609850028570536e-05, + "loss": 0.279, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 1.8608437617123917e-05, + "loss": 0.3302, + "step": 2273 + }, + { + "epoch": 0.19, + "learning_rate": 1.8607024542181465e-05, + "loss": 0.3188, + "step": 2274 + }, + { + "epoch": 0.2, + "learning_rate": 1.8605610803852086e-05, + "loss": 0.3171, + "step": 2275 + }, + { + "epoch": 0.2, + "learning_rate": 1.8604196402244752e-05, + "loss": 0.2758, + "step": 2276 + }, + { + "epoch": 0.2, + "learning_rate": 1.8602781337468472e-05, + "loss": 0.2822, + "step": 2277 + }, + { + "epoch": 0.2, + "learning_rate": 1.8601365609632315e-05, + "loss": 0.2995, + "step": 2278 + }, + { + "epoch": 0.2, + "learning_rate": 1.8599949218845394e-05, + "loss": 0.333, + "step": 2279 + }, + { + "epoch": 0.2, + "learning_rate": 1.8598532165216882e-05, + "loss": 0.2821, + "step": 2280 + }, + { + "epoch": 0.2, + "learning_rate": 1.8597114448856e-05, + "loss": 0.3109, + "step": 2281 + }, + { + "epoch": 0.2, + "learning_rate": 1.8595696069872013e-05, + "loss": 0.2556, + "step": 2282 + }, + { + "epoch": 0.2, + "learning_rate": 1.8594277028374245e-05, + "loss": 0.3195, + "step": 2283 + }, + { + "epoch": 0.2, + "learning_rate": 1.8592857324472073e-05, + "loss": 0.2954, + "step": 2284 + }, + { + "epoch": 0.2, + "learning_rate": 1.8591436958274914e-05, + "loss": 0.6501, + "step": 2285 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590015929892245e-05, + "loss": 0.2958, + "step": 2286 + }, + { + "epoch": 0.2, + "learning_rate": 1.85885942394336e-05, + "loss": 0.5961, + "step": 2287 + }, + { + "epoch": 0.2, + "learning_rate": 1.858717188700854e-05, + "loss": 0.274, + "step": 2288 + }, + { + "epoch": 0.2, + "learning_rate": 1.8585748872726704e-05, + "loss": 0.3195, + "step": 2289 + }, + { + "epoch": 0.2, + "learning_rate": 1.8584325196697767e-05, + "loss": 0.2817, + "step": 2290 + }, + { + "epoch": 0.2, + "learning_rate": 1.858290085903146e-05, + "loss": 0.2811, + "step": 2291 + }, + { + "epoch": 0.2, + "learning_rate": 1.858147585983757e-05, + "loss": 0.3346, + "step": 2292 + }, + { + "epoch": 0.2, + "learning_rate": 1.858005019922592e-05, + "loss": 0.2809, + "step": 2293 + }, + { + "epoch": 0.2, + "learning_rate": 1.8578623877306394e-05, + "loss": 0.3316, + "step": 2294 + }, + { + "epoch": 0.2, + "learning_rate": 1.8577196894188926e-05, + "loss": 0.3046, + "step": 2295 + }, + { + "epoch": 0.2, + "learning_rate": 1.857576924998351e-05, + "loss": 0.3535, + "step": 2296 + }, + { + "epoch": 0.2, + "learning_rate": 1.8574340944800165e-05, + "loss": 0.2656, + "step": 2297 + }, + { + "epoch": 0.2, + "learning_rate": 1.8572911978748993e-05, + "loss": 0.3254, + "step": 2298 + }, + { + "epoch": 0.2, + "learning_rate": 1.8571482351940124e-05, + "loss": 0.296, + "step": 2299 + }, + { + "epoch": 0.2, + "learning_rate": 1.857005206448375e-05, + "loss": 0.3737, + "step": 2300 + }, + { + "epoch": 0.2, + "learning_rate": 1.8568621116490108e-05, + "loss": 0.3925, + "step": 2301 + }, + { + "epoch": 0.2, + "learning_rate": 1.856718950806949e-05, + "loss": 0.2954, + "step": 2302 + }, + { + "epoch": 0.2, + "learning_rate": 1.8565757239332232e-05, + "loss": 0.3333, + "step": 2303 + }, + { + "epoch": 0.2, + "learning_rate": 1.8564324310388735e-05, + "loss": 0.3148, + "step": 2304 + }, + { + "epoch": 0.2, + "learning_rate": 1.8562890721349434e-05, + "loss": 0.3179, + "step": 2305 + }, + { + "epoch": 0.2, + "learning_rate": 1.856145647232483e-05, + "loss": 0.3184, + "step": 2306 + }, + { + "epoch": 0.2, + "learning_rate": 1.8560021563425462e-05, + "loss": 0.2841, + "step": 2307 + }, + { + "epoch": 0.2, + "learning_rate": 1.8558585994761932e-05, + "loss": 0.3315, + "step": 2308 + }, + { + "epoch": 0.2, + "learning_rate": 1.855714976644488e-05, + "loss": 0.3156, + "step": 2309 + }, + { + "epoch": 0.2, + "learning_rate": 1.8555712878585005e-05, + "loss": 0.3032, + "step": 2310 + }, + { + "epoch": 0.2, + "learning_rate": 1.855427533129306e-05, + "loss": 0.2994, + "step": 2311 + }, + { + "epoch": 0.2, + "learning_rate": 1.8552837124679835e-05, + "loss": 0.334, + "step": 2312 + }, + { + "epoch": 0.2, + "learning_rate": 1.855139825885619e-05, + "loss": 0.2925, + "step": 2313 + }, + { + "epoch": 0.2, + "learning_rate": 1.854995873393302e-05, + "loss": 0.3335, + "step": 2314 + }, + { + "epoch": 0.2, + "learning_rate": 1.8548518550021274e-05, + "loss": 0.327, + "step": 2315 + }, + { + "epoch": 0.2, + "learning_rate": 1.8547077707231963e-05, + "loss": 0.3067, + "step": 2316 + }, + { + "epoch": 0.2, + "learning_rate": 1.8545636205676133e-05, + "loss": 0.2688, + "step": 2317 + }, + { + "epoch": 0.2, + "learning_rate": 1.8544194045464888e-05, + "loss": 0.2933, + "step": 2318 + }, + { + "epoch": 0.2, + "learning_rate": 1.8542751226709382e-05, + "loss": 0.3052, + "step": 2319 + }, + { + "epoch": 0.2, + "learning_rate": 1.8541307749520828e-05, + "loss": 0.2762, + "step": 2320 + }, + { + "epoch": 0.2, + "learning_rate": 1.853986361401047e-05, + "loss": 0.2846, + "step": 2321 + }, + { + "epoch": 0.2, + "learning_rate": 1.8538418820289628e-05, + "loss": 0.3182, + "step": 2322 + }, + { + "epoch": 0.2, + "learning_rate": 1.853697336846965e-05, + "loss": 0.3107, + "step": 2323 + }, + { + "epoch": 0.2, + "learning_rate": 1.8535527258661944e-05, + "loss": 0.3061, + "step": 2324 + }, + { + "epoch": 0.2, + "learning_rate": 1.8534080490977977e-05, + "loss": 0.3303, + "step": 2325 + }, + { + "epoch": 0.2, + "learning_rate": 1.853263306552925e-05, + "loss": 0.2715, + "step": 2326 + }, + { + "epoch": 0.2, + "learning_rate": 1.853118498242733e-05, + "loss": 0.2714, + "step": 2327 + }, + { + "epoch": 0.2, + "learning_rate": 1.8529736241783825e-05, + "loss": 0.3378, + "step": 2328 + }, + { + "epoch": 0.2, + "learning_rate": 1.8528286843710398e-05, + "loss": 0.2717, + "step": 2329 + }, + { + "epoch": 0.2, + "learning_rate": 1.852683678831876e-05, + "loss": 0.3435, + "step": 2330 + }, + { + "epoch": 0.2, + "learning_rate": 1.8525386075720675e-05, + "loss": 0.3502, + "step": 2331 + }, + { + "epoch": 0.2, + "learning_rate": 1.8523934706027952e-05, + "loss": 0.3094, + "step": 2332 + }, + { + "epoch": 0.2, + "learning_rate": 1.8522482679352464e-05, + "loss": 0.3027, + "step": 2333 + }, + { + "epoch": 0.2, + "learning_rate": 1.8521029995806123e-05, + "loss": 0.3154, + "step": 2334 + }, + { + "epoch": 0.2, + "learning_rate": 1.851957665550089e-05, + "loss": 0.2781, + "step": 2335 + }, + { + "epoch": 0.2, + "learning_rate": 1.851812265854879e-05, + "loss": 0.3049, + "step": 2336 + }, + { + "epoch": 0.2, + "learning_rate": 1.851666800506188e-05, + "loss": 0.2771, + "step": 2337 + }, + { + "epoch": 0.2, + "learning_rate": 1.8515212695152284e-05, + "loss": 0.3619, + "step": 2338 + }, + { + "epoch": 0.2, + "learning_rate": 1.851375672893217e-05, + "loss": 0.311, + "step": 2339 + }, + { + "epoch": 0.2, + "learning_rate": 1.851230010651375e-05, + "loss": 0.6145, + "step": 2340 + }, + { + "epoch": 0.2, + "learning_rate": 1.8510842828009303e-05, + "loss": 0.293, + "step": 2341 + }, + { + "epoch": 0.2, + "learning_rate": 1.850938489353114e-05, + "loss": 0.279, + "step": 2342 + }, + { + "epoch": 0.2, + "learning_rate": 1.850792630319164e-05, + "loss": 0.3413, + "step": 2343 + }, + { + "epoch": 0.2, + "learning_rate": 1.8506467057103217e-05, + "loss": 0.2908, + "step": 2344 + }, + { + "epoch": 0.2, + "learning_rate": 1.8505007155378347e-05, + "loss": 0.2761, + "step": 2345 + }, + { + "epoch": 0.2, + "learning_rate": 1.8503546598129547e-05, + "loss": 0.2833, + "step": 2346 + }, + { + "epoch": 0.2, + "learning_rate": 1.8502085385469396e-05, + "loss": 0.2955, + "step": 2347 + }, + { + "epoch": 0.2, + "learning_rate": 1.850062351751051e-05, + "loss": 0.304, + "step": 2348 + }, + { + "epoch": 0.2, + "learning_rate": 1.8499160994365568e-05, + "loss": 0.2961, + "step": 2349 + }, + { + "epoch": 0.2, + "learning_rate": 1.849769781614729e-05, + "loss": 0.2924, + "step": 2350 + }, + { + "epoch": 0.2, + "learning_rate": 1.849623398296846e-05, + "loss": 0.4272, + "step": 2351 + }, + { + "epoch": 0.2, + "learning_rate": 1.8494769494941883e-05, + "loss": 0.3087, + "step": 2352 + }, + { + "epoch": 0.2, + "learning_rate": 1.8493304352180455e-05, + "loss": 0.3331, + "step": 2353 + }, + { + "epoch": 0.2, + "learning_rate": 1.8491838554797096e-05, + "loss": 0.3338, + "step": 2354 + }, + { + "epoch": 0.2, + "learning_rate": 1.8490372102904778e-05, + "loss": 0.2849, + "step": 2355 + }, + { + "epoch": 0.2, + "learning_rate": 1.848890499661653e-05, + "loss": 0.2883, + "step": 2356 + }, + { + "epoch": 0.2, + "learning_rate": 1.848743723604543e-05, + "loss": 0.3171, + "step": 2357 + }, + { + "epoch": 0.2, + "learning_rate": 1.8485968821304604e-05, + "loss": 0.2959, + "step": 2358 + }, + { + "epoch": 0.2, + "learning_rate": 1.8484499752507234e-05, + "loss": 0.3009, + "step": 2359 + }, + { + "epoch": 0.2, + "learning_rate": 1.8483030029766548e-05, + "loss": 0.3013, + "step": 2360 + }, + { + "epoch": 0.2, + "learning_rate": 1.848155965319582e-05, + "loss": 0.3588, + "step": 2361 + }, + { + "epoch": 0.2, + "learning_rate": 1.8480088622908382e-05, + "loss": 0.3114, + "step": 2362 + }, + { + "epoch": 0.2, + "learning_rate": 1.8478616939017615e-05, + "loss": 0.3317, + "step": 2363 + }, + { + "epoch": 0.2, + "learning_rate": 1.847714460163695e-05, + "loss": 0.3439, + "step": 2364 + }, + { + "epoch": 0.2, + "learning_rate": 1.8475671610879864e-05, + "loss": 0.3274, + "step": 2365 + }, + { + "epoch": 0.2, + "learning_rate": 1.847419796685989e-05, + "loss": 0.3441, + "step": 2366 + }, + { + "epoch": 0.2, + "learning_rate": 1.847272366969061e-05, + "loss": 0.2842, + "step": 2367 + }, + { + "epoch": 0.2, + "learning_rate": 1.8471248719485654e-05, + "loss": 0.2921, + "step": 2368 + }, + { + "epoch": 0.2, + "learning_rate": 1.8469773116358705e-05, + "loss": 0.2912, + "step": 2369 + }, + { + "epoch": 0.2, + "learning_rate": 1.8468296860423494e-05, + "loss": 0.3018, + "step": 2370 + }, + { + "epoch": 0.2, + "learning_rate": 1.8466819951793805e-05, + "loss": 0.2886, + "step": 2371 + }, + { + "epoch": 0.2, + "learning_rate": 1.846534239058347e-05, + "loss": 0.3138, + "step": 2372 + }, + { + "epoch": 0.2, + "learning_rate": 1.846386417690637e-05, + "loss": 0.3499, + "step": 2373 + }, + { + "epoch": 0.2, + "learning_rate": 1.8462385310876444e-05, + "loss": 0.3368, + "step": 2374 + }, + { + "epoch": 0.2, + "learning_rate": 1.8460905792607667e-05, + "loss": 0.2726, + "step": 2375 + }, + { + "epoch": 0.2, + "learning_rate": 1.8459425622214082e-05, + "loss": 0.298, + "step": 2376 + }, + { + "epoch": 0.2, + "learning_rate": 1.8457944799809765e-05, + "loss": 0.2847, + "step": 2377 + }, + { + "epoch": 0.2, + "learning_rate": 1.845646332550886e-05, + "loss": 0.2843, + "step": 2378 + }, + { + "epoch": 0.2, + "learning_rate": 1.8454981199425542e-05, + "loss": 0.2782, + "step": 2379 + }, + { + "epoch": 0.2, + "learning_rate": 1.8453498421674055e-05, + "loss": 0.2857, + "step": 2380 + }, + { + "epoch": 0.2, + "learning_rate": 1.845201499236868e-05, + "loss": 0.3391, + "step": 2381 + }, + { + "epoch": 0.2, + "learning_rate": 1.8450530911623747e-05, + "loss": 0.3425, + "step": 2382 + }, + { + "epoch": 0.2, + "learning_rate": 1.844904617955365e-05, + "loss": 0.264, + "step": 2383 + }, + { + "epoch": 0.2, + "learning_rate": 1.8447560796272817e-05, + "loss": 0.2936, + "step": 2384 + }, + { + "epoch": 0.2, + "learning_rate": 1.8446074761895746e-05, + "loss": 0.2835, + "step": 2385 + }, + { + "epoch": 0.2, + "learning_rate": 1.844458807653696e-05, + "loss": 0.2734, + "step": 2386 + }, + { + "epoch": 0.2, + "learning_rate": 1.844310074031105e-05, + "loss": 0.2825, + "step": 2387 + }, + { + "epoch": 0.2, + "learning_rate": 1.8441612753332658e-05, + "loss": 0.2701, + "step": 2388 + }, + { + "epoch": 0.2, + "learning_rate": 1.8440124115716463e-05, + "loss": 0.373, + "step": 2389 + }, + { + "epoch": 0.2, + "learning_rate": 1.843863482757721e-05, + "loss": 0.3769, + "step": 2390 + }, + { + "epoch": 0.2, + "learning_rate": 1.8437144889029675e-05, + "loss": 0.351, + "step": 2391 + }, + { + "epoch": 0.21, + "learning_rate": 1.8435654300188705e-05, + "loss": 0.3513, + "step": 2392 + }, + { + "epoch": 0.21, + "learning_rate": 1.8434163061169178e-05, + "loss": 0.3262, + "step": 2393 + }, + { + "epoch": 0.21, + "learning_rate": 1.8432671172086044e-05, + "loss": 0.3374, + "step": 2394 + }, + { + "epoch": 0.21, + "learning_rate": 1.8431178633054275e-05, + "loss": 0.2755, + "step": 2395 + }, + { + "epoch": 0.21, + "learning_rate": 1.8429685444188922e-05, + "loss": 0.289, + "step": 2396 + }, + { + "epoch": 0.21, + "learning_rate": 1.8428191605605067e-05, + "loss": 0.307, + "step": 2397 + }, + { + "epoch": 0.21, + "learning_rate": 1.8426697117417848e-05, + "loss": 0.3223, + "step": 2398 + }, + { + "epoch": 0.21, + "learning_rate": 1.8425201979742455e-05, + "loss": 0.2998, + "step": 2399 + }, + { + "epoch": 0.21, + "learning_rate": 1.8423706192694118e-05, + "loss": 0.3238, + "step": 2400 + }, + { + "epoch": 0.21, + "learning_rate": 1.8422209756388132e-05, + "loss": 0.3258, + "step": 2401 + }, + { + "epoch": 0.21, + "learning_rate": 1.8420712670939837e-05, + "loss": 0.3314, + "step": 2402 + }, + { + "epoch": 0.21, + "learning_rate": 1.8419214936464613e-05, + "loss": 0.2272, + "step": 2403 + }, + { + "epoch": 0.21, + "learning_rate": 1.8417716553077903e-05, + "loss": 0.2912, + "step": 2404 + }, + { + "epoch": 0.21, + "learning_rate": 1.8416217520895198e-05, + "loss": 0.259, + "step": 2405 + }, + { + "epoch": 0.21, + "learning_rate": 1.841471784003203e-05, + "loss": 0.3311, + "step": 2406 + }, + { + "epoch": 0.21, + "learning_rate": 1.8413217510603988e-05, + "loss": 0.2832, + "step": 2407 + }, + { + "epoch": 0.21, + "learning_rate": 1.8411716532726707e-05, + "loss": 0.3456, + "step": 2408 + }, + { + "epoch": 0.21, + "learning_rate": 1.8410214906515887e-05, + "loss": 0.2956, + "step": 2409 + }, + { + "epoch": 0.21, + "learning_rate": 1.8408712632087256e-05, + "loss": 0.2783, + "step": 2410 + }, + { + "epoch": 0.21, + "learning_rate": 1.8407209709556603e-05, + "loss": 0.3012, + "step": 2411 + }, + { + "epoch": 0.21, + "learning_rate": 1.8405706139039766e-05, + "loss": 0.3292, + "step": 2412 + }, + { + "epoch": 0.21, + "learning_rate": 1.8404201920652635e-05, + "loss": 0.311, + "step": 2413 + }, + { + "epoch": 0.21, + "learning_rate": 1.8402697054511145e-05, + "loss": 0.2782, + "step": 2414 + }, + { + "epoch": 0.21, + "learning_rate": 1.8401191540731286e-05, + "loss": 0.3084, + "step": 2415 + }, + { + "epoch": 0.21, + "learning_rate": 1.839968537942909e-05, + "loss": 0.251, + "step": 2416 + }, + { + "epoch": 0.21, + "learning_rate": 1.839817857072066e-05, + "loss": 0.3433, + "step": 2417 + }, + { + "epoch": 0.21, + "learning_rate": 1.8396671114722112e-05, + "loss": 0.3035, + "step": 2418 + }, + { + "epoch": 0.21, + "learning_rate": 1.839516301154965e-05, + "loss": 0.2882, + "step": 2419 + }, + { + "epoch": 0.21, + "learning_rate": 1.8393654261319504e-05, + "loss": 0.3397, + "step": 2420 + }, + { + "epoch": 0.21, + "learning_rate": 1.839214486414796e-05, + "loss": 0.2822, + "step": 2421 + }, + { + "epoch": 0.21, + "learning_rate": 1.8390634820151353e-05, + "loss": 0.3478, + "step": 2422 + }, + { + "epoch": 0.21, + "learning_rate": 1.8389124129446078e-05, + "loss": 0.3356, + "step": 2423 + }, + { + "epoch": 0.21, + "learning_rate": 1.8387612792148566e-05, + "loss": 0.2992, + "step": 2424 + }, + { + "epoch": 0.21, + "learning_rate": 1.8386100808375305e-05, + "loss": 0.3185, + "step": 2425 + }, + { + "epoch": 0.21, + "learning_rate": 1.8384588178242828e-05, + "loss": 0.2957, + "step": 2426 + }, + { + "epoch": 0.21, + "learning_rate": 1.8383074901867728e-05, + "loss": 0.3258, + "step": 2427 + }, + { + "epoch": 0.21, + "learning_rate": 1.8381560979366633e-05, + "loss": 0.3, + "step": 2428 + }, + { + "epoch": 0.21, + "learning_rate": 1.8380046410856234e-05, + "loss": 0.2985, + "step": 2429 + }, + { + "epoch": 0.21, + "learning_rate": 1.8378531196453265e-05, + "loss": 0.3295, + "step": 2430 + }, + { + "epoch": 0.21, + "learning_rate": 1.8377015336274507e-05, + "loss": 0.6105, + "step": 2431 + }, + { + "epoch": 0.21, + "learning_rate": 1.8375498830436805e-05, + "loss": 0.3095, + "step": 2432 + }, + { + "epoch": 0.21, + "learning_rate": 1.8373981679057036e-05, + "loss": 0.3124, + "step": 2433 + }, + { + "epoch": 0.21, + "learning_rate": 1.8372463882252133e-05, + "loss": 0.3447, + "step": 2434 + }, + { + "epoch": 0.21, + "learning_rate": 1.837094544013909e-05, + "loss": 0.3058, + "step": 2435 + }, + { + "epoch": 0.21, + "learning_rate": 1.8369426352834927e-05, + "loss": 0.3349, + "step": 2436 + }, + { + "epoch": 0.21, + "learning_rate": 1.8367906620456737e-05, + "loss": 0.2932, + "step": 2437 + }, + { + "epoch": 0.21, + "learning_rate": 1.8366386243121654e-05, + "loss": 0.2755, + "step": 2438 + }, + { + "epoch": 0.21, + "learning_rate": 1.8364865220946856e-05, + "loss": 0.3089, + "step": 2439 + }, + { + "epoch": 0.21, + "learning_rate": 1.8363343554049582e-05, + "loss": 0.3532, + "step": 2440 + }, + { + "epoch": 0.21, + "learning_rate": 1.836182124254711e-05, + "loss": 0.2551, + "step": 2441 + }, + { + "epoch": 0.21, + "learning_rate": 1.8360298286556774e-05, + "loss": 0.361, + "step": 2442 + }, + { + "epoch": 0.21, + "learning_rate": 1.8358774686195956e-05, + "loss": 0.2964, + "step": 2443 + }, + { + "epoch": 0.21, + "learning_rate": 1.8357250441582085e-05, + "loss": 0.3021, + "step": 2444 + }, + { + "epoch": 0.21, + "learning_rate": 1.8355725552832648e-05, + "loss": 0.2558, + "step": 2445 + }, + { + "epoch": 0.21, + "learning_rate": 1.8354200020065168e-05, + "loss": 0.2985, + "step": 2446 + }, + { + "epoch": 0.21, + "learning_rate": 1.8352673843397232e-05, + "loss": 0.2946, + "step": 2447 + }, + { + "epoch": 0.21, + "learning_rate": 1.8351147022946468e-05, + "loss": 0.3085, + "step": 2448 + }, + { + "epoch": 0.21, + "learning_rate": 1.8349619558830553e-05, + "loss": 0.6111, + "step": 2449 + }, + { + "epoch": 0.21, + "learning_rate": 1.8348091451167224e-05, + "loss": 0.3421, + "step": 2450 + }, + { + "epoch": 0.21, + "learning_rate": 1.8346562700074253e-05, + "loss": 0.2653, + "step": 2451 + }, + { + "epoch": 0.21, + "learning_rate": 1.834503330566947e-05, + "loss": 0.293, + "step": 2452 + }, + { + "epoch": 0.21, + "learning_rate": 1.8343503268070752e-05, + "loss": 0.315, + "step": 2453 + }, + { + "epoch": 0.21, + "learning_rate": 1.8341972587396032e-05, + "loss": 0.2822, + "step": 2454 + }, + { + "epoch": 0.21, + "learning_rate": 1.8340441263763282e-05, + "loss": 0.3389, + "step": 2455 + }, + { + "epoch": 0.21, + "learning_rate": 1.833890929729053e-05, + "loss": 0.3104, + "step": 2456 + }, + { + "epoch": 0.21, + "learning_rate": 1.8337376688095854e-05, + "loss": 0.325, + "step": 2457 + }, + { + "epoch": 0.21, + "learning_rate": 1.833584343629738e-05, + "loss": 0.3056, + "step": 2458 + }, + { + "epoch": 0.21, + "learning_rate": 1.8334309542013282e-05, + "loss": 0.2958, + "step": 2459 + }, + { + "epoch": 0.21, + "learning_rate": 1.8332775005361786e-05, + "loss": 0.3148, + "step": 2460 + }, + { + "epoch": 0.21, + "learning_rate": 1.8331239826461165e-05, + "loss": 0.3084, + "step": 2461 + }, + { + "epoch": 0.21, + "learning_rate": 1.8329704005429745e-05, + "loss": 0.3241, + "step": 2462 + }, + { + "epoch": 0.21, + "learning_rate": 1.8328167542385898e-05, + "loss": 0.3234, + "step": 2463 + }, + { + "epoch": 0.21, + "learning_rate": 1.8326630437448045e-05, + "loss": 0.2882, + "step": 2464 + }, + { + "epoch": 0.21, + "learning_rate": 1.8325092690734663e-05, + "loss": 0.3796, + "step": 2465 + }, + { + "epoch": 0.21, + "learning_rate": 1.8323554302364273e-05, + "loss": 0.3371, + "step": 2466 + }, + { + "epoch": 0.21, + "learning_rate": 1.8322015272455445e-05, + "loss": 0.3239, + "step": 2467 + }, + { + "epoch": 0.21, + "learning_rate": 1.83204756011268e-05, + "loss": 0.2818, + "step": 2468 + }, + { + "epoch": 0.21, + "learning_rate": 1.8318935288497007e-05, + "loss": 0.312, + "step": 2469 + }, + { + "epoch": 0.21, + "learning_rate": 1.831739433468479e-05, + "loss": 0.2676, + "step": 2470 + }, + { + "epoch": 0.21, + "learning_rate": 1.8315852739808914e-05, + "loss": 0.3077, + "step": 2471 + }, + { + "epoch": 0.21, + "learning_rate": 1.8314310503988198e-05, + "loss": 0.2911, + "step": 2472 + }, + { + "epoch": 0.21, + "learning_rate": 1.831276762734151e-05, + "loss": 0.3268, + "step": 2473 + }, + { + "epoch": 0.21, + "learning_rate": 1.8311224109987768e-05, + "loss": 0.2565, + "step": 2474 + }, + { + "epoch": 0.21, + "learning_rate": 1.830967995204594e-05, + "loss": 0.3039, + "step": 2475 + }, + { + "epoch": 0.21, + "learning_rate": 1.8308135153635037e-05, + "loss": 0.2855, + "step": 2476 + }, + { + "epoch": 0.21, + "learning_rate": 1.830658971487413e-05, + "loss": 0.6318, + "step": 2477 + }, + { + "epoch": 0.21, + "learning_rate": 1.8305043635882334e-05, + "loss": 0.293, + "step": 2478 + }, + { + "epoch": 0.21, + "learning_rate": 1.830349691677881e-05, + "loss": 0.258, + "step": 2479 + }, + { + "epoch": 0.21, + "learning_rate": 1.830194955768277e-05, + "loss": 0.2902, + "step": 2480 + }, + { + "epoch": 0.21, + "learning_rate": 1.830040155871348e-05, + "loss": 0.3034, + "step": 2481 + }, + { + "epoch": 0.21, + "learning_rate": 1.8298852919990254e-05, + "loss": 0.3347, + "step": 2482 + }, + { + "epoch": 0.21, + "learning_rate": 1.8297303641632448e-05, + "loss": 0.2749, + "step": 2483 + }, + { + "epoch": 0.21, + "learning_rate": 1.829575372375948e-05, + "loss": 0.293, + "step": 2484 + }, + { + "epoch": 0.21, + "learning_rate": 1.8294203166490797e-05, + "loss": 0.3049, + "step": 2485 + }, + { + "epoch": 0.21, + "learning_rate": 1.8292651969945923e-05, + "loss": 0.2784, + "step": 2486 + }, + { + "epoch": 0.21, + "learning_rate": 1.8291100134244407e-05, + "loss": 0.2871, + "step": 2487 + }, + { + "epoch": 0.21, + "learning_rate": 1.8289547659505867e-05, + "loss": 0.3049, + "step": 2488 + }, + { + "epoch": 0.21, + "learning_rate": 1.8287994545849948e-05, + "loss": 0.295, + "step": 2489 + }, + { + "epoch": 0.21, + "learning_rate": 1.828644079339636e-05, + "loss": 0.3129, + "step": 2490 + }, + { + "epoch": 0.21, + "learning_rate": 1.8284886402264864e-05, + "loss": 0.2702, + "step": 2491 + }, + { + "epoch": 0.21, + "learning_rate": 1.8283331372575258e-05, + "loss": 0.3613, + "step": 2492 + }, + { + "epoch": 0.21, + "learning_rate": 1.8281775704447402e-05, + "loss": 0.2748, + "step": 2493 + }, + { + "epoch": 0.21, + "learning_rate": 1.8280219398001192e-05, + "loss": 0.2891, + "step": 2494 + }, + { + "epoch": 0.21, + "learning_rate": 1.8278662453356588e-05, + "loss": 0.3527, + "step": 2495 + }, + { + "epoch": 0.21, + "learning_rate": 1.8277104870633588e-05, + "loss": 0.2952, + "step": 2496 + }, + { + "epoch": 0.21, + "learning_rate": 1.827554664995224e-05, + "loss": 0.306, + "step": 2497 + }, + { + "epoch": 0.21, + "learning_rate": 1.827398779143265e-05, + "loss": 0.2878, + "step": 2498 + }, + { + "epoch": 0.21, + "learning_rate": 1.8272428295194965e-05, + "loss": 0.3098, + "step": 2499 + }, + { + "epoch": 0.21, + "learning_rate": 1.8270868161359377e-05, + "loss": 0.327, + "step": 2500 + }, + { + "epoch": 0.21, + "learning_rate": 1.8269307390046143e-05, + "loss": 0.3403, + "step": 2501 + }, + { + "epoch": 0.21, + "learning_rate": 1.8267745981375555e-05, + "loss": 0.3116, + "step": 2502 + }, + { + "epoch": 0.21, + "learning_rate": 1.826618393546796e-05, + "loss": 0.6324, + "step": 2503 + }, + { + "epoch": 0.21, + "learning_rate": 1.826462125244375e-05, + "loss": 0.5909, + "step": 2504 + }, + { + "epoch": 0.21, + "learning_rate": 1.826305793242337e-05, + "loss": 0.2809, + "step": 2505 + }, + { + "epoch": 0.21, + "learning_rate": 1.8261493975527312e-05, + "loss": 0.2873, + "step": 2506 + }, + { + "epoch": 0.21, + "learning_rate": 1.825992938187612e-05, + "loss": 0.3083, + "step": 2507 + }, + { + "epoch": 0.21, + "learning_rate": 1.8258364151590386e-05, + "loss": 0.2934, + "step": 2508 + }, + { + "epoch": 0.22, + "learning_rate": 1.825679828479075e-05, + "loss": 0.3502, + "step": 2509 + }, + { + "epoch": 0.22, + "learning_rate": 1.82552317815979e-05, + "loss": 0.2945, + "step": 2510 + }, + { + "epoch": 0.22, + "learning_rate": 1.8253664642132576e-05, + "loss": 0.2768, + "step": 2511 + }, + { + "epoch": 0.22, + "learning_rate": 1.825209686651556e-05, + "loss": 0.3544, + "step": 2512 + }, + { + "epoch": 0.22, + "learning_rate": 1.8250528454867695e-05, + "loss": 0.3334, + "step": 2513 + }, + { + "epoch": 0.22, + "learning_rate": 1.8248959407309862e-05, + "loss": 0.2967, + "step": 2514 + }, + { + "epoch": 0.22, + "learning_rate": 1.8247389723962998e-05, + "loss": 0.2784, + "step": 2515 + }, + { + "epoch": 0.22, + "learning_rate": 1.8245819404948088e-05, + "loss": 0.2604, + "step": 2516 + }, + { + "epoch": 0.22, + "learning_rate": 1.8244248450386156e-05, + "loss": 0.265, + "step": 2517 + }, + { + "epoch": 0.22, + "learning_rate": 1.8242676860398295e-05, + "loss": 0.2751, + "step": 2518 + }, + { + "epoch": 0.22, + "learning_rate": 1.8241104635105627e-05, + "loss": 0.2861, + "step": 2519 + }, + { + "epoch": 0.22, + "learning_rate": 1.823953177462934e-05, + "loss": 0.3376, + "step": 2520 + }, + { + "epoch": 0.22, + "learning_rate": 1.823795827909065e-05, + "loss": 0.2474, + "step": 2521 + }, + { + "epoch": 0.22, + "learning_rate": 1.8236384148610843e-05, + "loss": 0.2532, + "step": 2522 + }, + { + "epoch": 0.22, + "learning_rate": 1.823480938331124e-05, + "loss": 0.2949, + "step": 2523 + }, + { + "epoch": 0.22, + "learning_rate": 1.8233233983313224e-05, + "loss": 0.325, + "step": 2524 + }, + { + "epoch": 0.22, + "learning_rate": 1.8231657948738212e-05, + "loss": 0.3331, + "step": 2525 + }, + { + "epoch": 0.22, + "learning_rate": 1.8230081279707675e-05, + "loss": 0.3224, + "step": 2526 + }, + { + "epoch": 0.22, + "learning_rate": 1.8228503976343147e-05, + "loss": 0.3739, + "step": 2527 + }, + { + "epoch": 0.22, + "learning_rate": 1.8226926038766185e-05, + "loss": 0.2748, + "step": 2528 + }, + { + "epoch": 0.22, + "learning_rate": 1.8225347467098418e-05, + "loss": 0.3304, + "step": 2529 + }, + { + "epoch": 0.22, + "learning_rate": 1.822376826146151e-05, + "loss": 0.2908, + "step": 2530 + }, + { + "epoch": 0.22, + "learning_rate": 1.8222188421977178e-05, + "loss": 0.3076, + "step": 2531 + }, + { + "epoch": 0.22, + "learning_rate": 1.8220607948767187e-05, + "loss": 0.3163, + "step": 2532 + }, + { + "epoch": 0.22, + "learning_rate": 1.821902684195336e-05, + "loss": 0.2876, + "step": 2533 + }, + { + "epoch": 0.22, + "learning_rate": 1.8217445101657553e-05, + "loss": 0.3032, + "step": 2534 + }, + { + "epoch": 0.22, + "learning_rate": 1.821586272800168e-05, + "loss": 0.319, + "step": 2535 + }, + { + "epoch": 0.22, + "learning_rate": 1.8214279721107705e-05, + "loss": 0.3348, + "step": 2536 + }, + { + "epoch": 0.22, + "learning_rate": 1.8212696081097636e-05, + "loss": 0.301, + "step": 2537 + }, + { + "epoch": 0.22, + "learning_rate": 1.8211111808093534e-05, + "loss": 0.3395, + "step": 2538 + }, + { + "epoch": 0.22, + "learning_rate": 1.8209526902217506e-05, + "loss": 0.3055, + "step": 2539 + }, + { + "epoch": 0.22, + "learning_rate": 1.820794136359171e-05, + "loss": 0.3011, + "step": 2540 + }, + { + "epoch": 0.22, + "learning_rate": 1.820635519233835e-05, + "loss": 0.3148, + "step": 2541 + }, + { + "epoch": 0.22, + "learning_rate": 1.820476838857968e-05, + "loss": 0.3375, + "step": 2542 + }, + { + "epoch": 0.22, + "learning_rate": 1.8203180952438e-05, + "loss": 0.3148, + "step": 2543 + }, + { + "epoch": 0.22, + "learning_rate": 1.820159288403567e-05, + "loss": 0.3963, + "step": 2544 + }, + { + "epoch": 0.22, + "learning_rate": 1.8200004183495085e-05, + "loss": 0.2939, + "step": 2545 + }, + { + "epoch": 0.22, + "learning_rate": 1.8198414850938694e-05, + "loss": 0.3329, + "step": 2546 + }, + { + "epoch": 0.22, + "learning_rate": 1.8196824886488996e-05, + "loss": 0.3519, + "step": 2547 + }, + { + "epoch": 0.22, + "learning_rate": 1.8195234290268536e-05, + "loss": 0.2755, + "step": 2548 + }, + { + "epoch": 0.22, + "learning_rate": 1.8193643062399913e-05, + "loss": 0.2719, + "step": 2549 + }, + { + "epoch": 0.22, + "learning_rate": 1.8192051203005768e-05, + "loss": 0.3317, + "step": 2550 + }, + { + "epoch": 0.22, + "learning_rate": 1.8190458712208795e-05, + "loss": 0.3207, + "step": 2551 + }, + { + "epoch": 0.22, + "learning_rate": 1.8188865590131733e-05, + "loss": 0.2989, + "step": 2552 + }, + { + "epoch": 0.22, + "learning_rate": 1.8187271836897377e-05, + "loss": 0.295, + "step": 2553 + }, + { + "epoch": 0.22, + "learning_rate": 1.8185677452628557e-05, + "loss": 0.2838, + "step": 2554 + }, + { + "epoch": 0.22, + "learning_rate": 1.818408243744817e-05, + "loss": 0.6395, + "step": 2555 + }, + { + "epoch": 0.22, + "learning_rate": 1.8182486791479145e-05, + "loss": 0.2875, + "step": 2556 + }, + { + "epoch": 0.22, + "learning_rate": 1.818089051484447e-05, + "loss": 0.2908, + "step": 2557 + }, + { + "epoch": 0.22, + "learning_rate": 1.8179293607667177e-05, + "loss": 0.2991, + "step": 2558 + }, + { + "epoch": 0.22, + "learning_rate": 1.817769607007035e-05, + "loss": 0.276, + "step": 2559 + }, + { + "epoch": 0.22, + "learning_rate": 1.8176097902177115e-05, + "loss": 0.2872, + "step": 2560 + }, + { + "epoch": 0.22, + "learning_rate": 1.8174499104110653e-05, + "loss": 0.3059, + "step": 2561 + }, + { + "epoch": 0.22, + "learning_rate": 1.817289967599419e-05, + "loss": 0.3077, + "step": 2562 + }, + { + "epoch": 0.22, + "learning_rate": 1.8171299617951007e-05, + "loss": 0.299, + "step": 2563 + }, + { + "epoch": 0.22, + "learning_rate": 1.816969893010442e-05, + "loss": 0.2706, + "step": 2564 + }, + { + "epoch": 0.22, + "learning_rate": 1.816809761257781e-05, + "loss": 0.3182, + "step": 2565 + }, + { + "epoch": 0.22, + "learning_rate": 1.81664956654946e-05, + "loss": 0.3344, + "step": 2566 + }, + { + "epoch": 0.22, + "learning_rate": 1.816489308897825e-05, + "loss": 0.3079, + "step": 2567 + }, + { + "epoch": 0.22, + "learning_rate": 1.8163289883152285e-05, + "loss": 0.2955, + "step": 2568 + }, + { + "epoch": 0.22, + "learning_rate": 1.8161686048140275e-05, + "loss": 0.2844, + "step": 2569 + }, + { + "epoch": 0.22, + "learning_rate": 1.8160081584065833e-05, + "loss": 0.2552, + "step": 2570 + }, + { + "epoch": 0.22, + "learning_rate": 1.8158476491052616e-05, + "loss": 0.2571, + "step": 2571 + }, + { + "epoch": 0.22, + "learning_rate": 1.815687076922435e-05, + "loss": 0.3284, + "step": 2572 + }, + { + "epoch": 0.22, + "learning_rate": 1.8155264418704785e-05, + "loss": 0.3126, + "step": 2573 + }, + { + "epoch": 0.22, + "learning_rate": 1.8153657439617738e-05, + "loss": 0.3306, + "step": 2574 + }, + { + "epoch": 0.22, + "learning_rate": 1.8152049832087063e-05, + "loss": 0.2721, + "step": 2575 + }, + { + "epoch": 0.22, + "learning_rate": 1.8150441596236667e-05, + "loss": 0.3096, + "step": 2576 + }, + { + "epoch": 0.22, + "learning_rate": 1.8148832732190508e-05, + "loss": 0.3085, + "step": 2577 + }, + { + "epoch": 0.22, + "learning_rate": 1.814722324007258e-05, + "loss": 0.3082, + "step": 2578 + }, + { + "epoch": 0.22, + "learning_rate": 1.8145613120006947e-05, + "loss": 0.3274, + "step": 2579 + }, + { + "epoch": 0.22, + "learning_rate": 1.8144002372117705e-05, + "loss": 0.3143, + "step": 2580 + }, + { + "epoch": 0.22, + "learning_rate": 1.8142390996528994e-05, + "loss": 0.3163, + "step": 2581 + }, + { + "epoch": 0.22, + "learning_rate": 1.814077899336502e-05, + "loss": 0.3377, + "step": 2582 + }, + { + "epoch": 0.22, + "learning_rate": 1.813916636275003e-05, + "loss": 0.2891, + "step": 2583 + }, + { + "epoch": 0.22, + "learning_rate": 1.8137553104808308e-05, + "loss": 0.3035, + "step": 2584 + }, + { + "epoch": 0.22, + "learning_rate": 1.8135939219664205e-05, + "loss": 0.3094, + "step": 2585 + }, + { + "epoch": 0.22, + "learning_rate": 1.813432470744211e-05, + "loss": 0.2516, + "step": 2586 + }, + { + "epoch": 0.22, + "learning_rate": 1.8132709568266457e-05, + "loss": 0.2946, + "step": 2587 + }, + { + "epoch": 0.22, + "learning_rate": 1.8131093802261738e-05, + "loss": 0.3115, + "step": 2588 + }, + { + "epoch": 0.22, + "learning_rate": 1.812947740955248e-05, + "loss": 0.2675, + "step": 2589 + }, + { + "epoch": 0.22, + "learning_rate": 1.8127860390263275e-05, + "loss": 0.3096, + "step": 2590 + }, + { + "epoch": 0.22, + "learning_rate": 1.8126242744518753e-05, + "loss": 0.2975, + "step": 2591 + }, + { + "epoch": 0.22, + "learning_rate": 1.8124624472443592e-05, + "loss": 0.2581, + "step": 2592 + }, + { + "epoch": 0.22, + "learning_rate": 1.812300557416252e-05, + "loss": 0.2588, + "step": 2593 + }, + { + "epoch": 0.22, + "learning_rate": 1.8121386049800317e-05, + "loss": 0.3183, + "step": 2594 + }, + { + "epoch": 0.22, + "learning_rate": 1.8119765899481807e-05, + "loss": 0.2561, + "step": 2595 + }, + { + "epoch": 0.22, + "learning_rate": 1.811814512333186e-05, + "loss": 0.3161, + "step": 2596 + }, + { + "epoch": 0.22, + "learning_rate": 1.81165237214754e-05, + "loss": 0.2949, + "step": 2597 + }, + { + "epoch": 0.22, + "learning_rate": 1.8114901694037402e-05, + "loss": 0.3259, + "step": 2598 + }, + { + "epoch": 0.22, + "learning_rate": 1.811327904114287e-05, + "loss": 0.2724, + "step": 2599 + }, + { + "epoch": 0.22, + "learning_rate": 1.8111655762916885e-05, + "loss": 0.3187, + "step": 2600 + }, + { + "epoch": 0.22, + "learning_rate": 1.8110031859484554e-05, + "loss": 0.309, + "step": 2601 + }, + { + "epoch": 0.22, + "learning_rate": 1.810840733097104e-05, + "loss": 0.3133, + "step": 2602 + }, + { + "epoch": 0.22, + "learning_rate": 1.810678217750155e-05, + "loss": 0.3854, + "step": 2603 + }, + { + "epoch": 0.22, + "learning_rate": 1.810515639920135e-05, + "loss": 0.2874, + "step": 2604 + }, + { + "epoch": 0.22, + "learning_rate": 1.810352999619574e-05, + "loss": 0.2714, + "step": 2605 + }, + { + "epoch": 0.22, + "learning_rate": 1.8101902968610082e-05, + "loss": 0.3195, + "step": 2606 + }, + { + "epoch": 0.22, + "learning_rate": 1.8100275316569774e-05, + "loss": 0.2803, + "step": 2607 + }, + { + "epoch": 0.22, + "learning_rate": 1.809864704020027e-05, + "loss": 0.3149, + "step": 2608 + }, + { + "epoch": 0.22, + "learning_rate": 1.8097018139627068e-05, + "loss": 0.2936, + "step": 2609 + }, + { + "epoch": 0.22, + "learning_rate": 1.809538861497572e-05, + "loss": 0.3329, + "step": 2610 + }, + { + "epoch": 0.22, + "learning_rate": 1.8093758466371812e-05, + "loss": 0.2933, + "step": 2611 + }, + { + "epoch": 0.22, + "learning_rate": 1.8092127693940998e-05, + "loss": 0.2873, + "step": 2612 + }, + { + "epoch": 0.22, + "learning_rate": 1.8090496297808962e-05, + "loss": 0.2746, + "step": 2613 + }, + { + "epoch": 0.22, + "learning_rate": 1.8088864278101452e-05, + "loss": 0.3024, + "step": 2614 + }, + { + "epoch": 0.22, + "learning_rate": 1.808723163494425e-05, + "loss": 0.3531, + "step": 2615 + }, + { + "epoch": 0.22, + "learning_rate": 1.8085598368463194e-05, + "loss": 0.2829, + "step": 2616 + }, + { + "epoch": 0.22, + "learning_rate": 1.8083964478784167e-05, + "loss": 0.2899, + "step": 2617 + }, + { + "epoch": 0.22, + "learning_rate": 1.8082329966033105e-05, + "loss": 0.272, + "step": 2618 + }, + { + "epoch": 0.22, + "learning_rate": 1.8080694830335985e-05, + "loss": 0.2876, + "step": 2619 + }, + { + "epoch": 0.22, + "learning_rate": 1.8079059071818828e-05, + "loss": 0.3261, + "step": 2620 + }, + { + "epoch": 0.22, + "learning_rate": 1.8077422690607725e-05, + "loss": 0.3115, + "step": 2621 + }, + { + "epoch": 0.22, + "learning_rate": 1.807578568682879e-05, + "loss": 0.3017, + "step": 2622 + }, + { + "epoch": 0.22, + "learning_rate": 1.8074148060608197e-05, + "loss": 0.2876, + "step": 2623 + }, + { + "epoch": 0.22, + "learning_rate": 1.807250981207217e-05, + "loss": 0.2832, + "step": 2624 + }, + { + "epoch": 0.23, + "learning_rate": 1.807087094134697e-05, + "loss": 0.3242, + "step": 2625 + }, + { + "epoch": 0.23, + "learning_rate": 1.8069231448558923e-05, + "loss": 0.3752, + "step": 2626 + }, + { + "epoch": 0.23, + "learning_rate": 1.8067591333834382e-05, + "loss": 0.339, + "step": 2627 + }, + { + "epoch": 0.23, + "learning_rate": 1.806595059729977e-05, + "loss": 0.2871, + "step": 2628 + }, + { + "epoch": 0.23, + "learning_rate": 1.8064309239081535e-05, + "loss": 0.3015, + "step": 2629 + }, + { + "epoch": 0.23, + "learning_rate": 1.8062667259306193e-05, + "loss": 0.2793, + "step": 2630 + }, + { + "epoch": 0.23, + "learning_rate": 1.8061024658100298e-05, + "loss": 0.291, + "step": 2631 + }, + { + "epoch": 0.23, + "learning_rate": 1.805938143559045e-05, + "loss": 0.3187, + "step": 2632 + }, + { + "epoch": 0.23, + "learning_rate": 1.8057737591903306e-05, + "loss": 0.2576, + "step": 2633 + }, + { + "epoch": 0.23, + "learning_rate": 1.8056093127165564e-05, + "loss": 0.3286, + "step": 2634 + }, + { + "epoch": 0.23, + "learning_rate": 1.8054448041503966e-05, + "loss": 0.2805, + "step": 2635 + }, + { + "epoch": 0.23, + "learning_rate": 1.8052802335045315e-05, + "loss": 0.2883, + "step": 2636 + }, + { + "epoch": 0.23, + "learning_rate": 1.805115600791645e-05, + "loss": 0.3074, + "step": 2637 + }, + { + "epoch": 0.23, + "learning_rate": 1.804950906024426e-05, + "loss": 0.3187, + "step": 2638 + }, + { + "epoch": 0.23, + "learning_rate": 1.8047861492155687e-05, + "loss": 0.3398, + "step": 2639 + }, + { + "epoch": 0.23, + "learning_rate": 1.8046213303777717e-05, + "loss": 0.312, + "step": 2640 + }, + { + "epoch": 0.23, + "learning_rate": 1.804456449523738e-05, + "loss": 0.3007, + "step": 2641 + }, + { + "epoch": 0.23, + "learning_rate": 1.804291506666176e-05, + "loss": 0.3564, + "step": 2642 + }, + { + "epoch": 0.23, + "learning_rate": 1.804126501817799e-05, + "loss": 0.31, + "step": 2643 + }, + { + "epoch": 0.23, + "learning_rate": 1.8039614349913245e-05, + "loss": 0.3165, + "step": 2644 + }, + { + "epoch": 0.23, + "learning_rate": 1.8037963061994756e-05, + "loss": 0.28, + "step": 2645 + }, + { + "epoch": 0.23, + "learning_rate": 1.8036311154549783e-05, + "loss": 0.3203, + "step": 2646 + }, + { + "epoch": 0.23, + "learning_rate": 1.803465862770566e-05, + "loss": 0.2863, + "step": 2647 + }, + { + "epoch": 0.23, + "learning_rate": 1.8033005481589746e-05, + "loss": 0.2887, + "step": 2648 + }, + { + "epoch": 0.23, + "learning_rate": 1.8031351716329462e-05, + "loss": 0.2966, + "step": 2649 + }, + { + "epoch": 0.23, + "learning_rate": 1.8029697332052277e-05, + "loss": 0.2929, + "step": 2650 + }, + { + "epoch": 0.23, + "learning_rate": 1.8028042328885694e-05, + "loss": 0.29, + "step": 2651 + }, + { + "epoch": 0.23, + "learning_rate": 1.8026386706957278e-05, + "loss": 0.2834, + "step": 2652 + }, + { + "epoch": 0.23, + "learning_rate": 1.8024730466394632e-05, + "loss": 0.3765, + "step": 2653 + }, + { + "epoch": 0.23, + "learning_rate": 1.802307360732541e-05, + "loss": 0.2834, + "step": 2654 + }, + { + "epoch": 0.23, + "learning_rate": 1.8021416129877324e-05, + "loss": 0.3094, + "step": 2655 + }, + { + "epoch": 0.23, + "learning_rate": 1.8019758034178116e-05, + "loss": 0.3654, + "step": 2656 + }, + { + "epoch": 0.23, + "learning_rate": 1.8018099320355586e-05, + "loss": 0.3066, + "step": 2657 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016439988537576e-05, + "loss": 0.3427, + "step": 2658 + }, + { + "epoch": 0.23, + "learning_rate": 1.8014780038851983e-05, + "loss": 0.3094, + "step": 2659 + }, + { + "epoch": 0.23, + "learning_rate": 1.8013119471426748e-05, + "loss": 0.2836, + "step": 2660 + }, + { + "epoch": 0.23, + "learning_rate": 1.8011458286389856e-05, + "loss": 0.3178, + "step": 2661 + }, + { + "epoch": 0.23, + "learning_rate": 1.8009796483869347e-05, + "loss": 0.2842, + "step": 2662 + }, + { + "epoch": 0.23, + "learning_rate": 1.80081340639933e-05, + "loss": 0.2948, + "step": 2663 + }, + { + "epoch": 0.23, + "learning_rate": 1.8006471026889852e-05, + "loss": 0.3229, + "step": 2664 + }, + { + "epoch": 0.23, + "learning_rate": 1.8004807372687175e-05, + "loss": 0.2938, + "step": 2665 + }, + { + "epoch": 0.23, + "learning_rate": 1.8003143101513502e-05, + "loss": 0.3467, + "step": 2666 + }, + { + "epoch": 0.23, + "learning_rate": 1.8001478213497104e-05, + "loss": 0.37, + "step": 2667 + }, + { + "epoch": 0.23, + "learning_rate": 1.79998127087663e-05, + "loss": 0.2593, + "step": 2668 + }, + { + "epoch": 0.23, + "learning_rate": 1.7998146587449457e-05, + "loss": 0.2997, + "step": 2669 + }, + { + "epoch": 0.23, + "learning_rate": 1.7996479849675e-05, + "loss": 0.3116, + "step": 2670 + }, + { + "epoch": 0.23, + "learning_rate": 1.7994812495571387e-05, + "loss": 0.3419, + "step": 2671 + }, + { + "epoch": 0.23, + "learning_rate": 1.799314452526713e-05, + "loss": 0.3844, + "step": 2672 + }, + { + "epoch": 0.23, + "learning_rate": 1.799147593889079e-05, + "loss": 0.3381, + "step": 2673 + }, + { + "epoch": 0.23, + "learning_rate": 1.798980673657097e-05, + "loss": 0.2622, + "step": 2674 + }, + { + "epoch": 0.23, + "learning_rate": 1.7988136918436324e-05, + "loss": 0.3355, + "step": 2675 + }, + { + "epoch": 0.23, + "learning_rate": 1.7986466484615557e-05, + "loss": 0.272, + "step": 2676 + }, + { + "epoch": 0.23, + "learning_rate": 1.7984795435237418e-05, + "loss": 0.3593, + "step": 2677 + }, + { + "epoch": 0.23, + "learning_rate": 1.7983123770430696e-05, + "loss": 0.3203, + "step": 2678 + }, + { + "epoch": 0.23, + "learning_rate": 1.798145149032424e-05, + "loss": 0.3458, + "step": 2679 + }, + { + "epoch": 0.23, + "learning_rate": 1.797977859504694e-05, + "loss": 0.3336, + "step": 2680 + }, + { + "epoch": 0.23, + "learning_rate": 1.797810508472774e-05, + "loss": 0.3352, + "step": 2681 + }, + { + "epoch": 0.23, + "learning_rate": 1.7976430959495617e-05, + "loss": 0.2817, + "step": 2682 + }, + { + "epoch": 0.23, + "learning_rate": 1.797475621947961e-05, + "loss": 0.2972, + "step": 2683 + }, + { + "epoch": 0.23, + "learning_rate": 1.7973080864808795e-05, + "loss": 0.2954, + "step": 2684 + }, + { + "epoch": 0.23, + "learning_rate": 1.797140489561231e-05, + "loss": 0.299, + "step": 2685 + }, + { + "epoch": 0.23, + "learning_rate": 1.7969728312019316e-05, + "loss": 0.3123, + "step": 2686 + }, + { + "epoch": 0.23, + "learning_rate": 1.7968051114159046e-05, + "loss": 0.2874, + "step": 2687 + }, + { + "epoch": 0.23, + "learning_rate": 1.796637330216077e-05, + "loss": 0.3176, + "step": 2688 + }, + { + "epoch": 0.23, + "learning_rate": 1.7964694876153802e-05, + "loss": 0.3162, + "step": 2689 + }, + { + "epoch": 0.23, + "learning_rate": 1.7963015836267502e-05, + "loss": 0.327, + "step": 2690 + }, + { + "epoch": 0.23, + "learning_rate": 1.7961336182631293e-05, + "loss": 0.2814, + "step": 2691 + }, + { + "epoch": 0.23, + "learning_rate": 1.795965591537463e-05, + "loss": 0.2719, + "step": 2692 + }, + { + "epoch": 0.23, + "learning_rate": 1.7957975034627017e-05, + "loss": 0.3038, + "step": 2693 + }, + { + "epoch": 0.23, + "learning_rate": 1.795629354051801e-05, + "loss": 0.3014, + "step": 2694 + }, + { + "epoch": 0.23, + "learning_rate": 1.795461143317721e-05, + "loss": 0.3409, + "step": 2695 + }, + { + "epoch": 0.23, + "learning_rate": 1.7952928712734266e-05, + "loss": 0.3441, + "step": 2696 + }, + { + "epoch": 0.23, + "learning_rate": 1.7951245379318872e-05, + "loss": 0.2802, + "step": 2697 + }, + { + "epoch": 0.23, + "learning_rate": 1.7949561433060775e-05, + "loss": 0.3172, + "step": 2698 + }, + { + "epoch": 0.23, + "learning_rate": 1.794787687408976e-05, + "loss": 0.3266, + "step": 2699 + }, + { + "epoch": 0.23, + "learning_rate": 1.794619170253567e-05, + "loss": 0.2952, + "step": 2700 + }, + { + "epoch": 0.23, + "learning_rate": 1.7944505918528384e-05, + "loss": 0.3082, + "step": 2701 + }, + { + "epoch": 0.23, + "learning_rate": 1.7942819522197837e-05, + "loss": 0.2697, + "step": 2702 + }, + { + "epoch": 0.23, + "learning_rate": 1.794113251367401e-05, + "loss": 0.2893, + "step": 2703 + }, + { + "epoch": 0.23, + "learning_rate": 1.7939444893086925e-05, + "loss": 0.2945, + "step": 2704 + }, + { + "epoch": 0.23, + "learning_rate": 1.793775666056666e-05, + "loss": 0.3228, + "step": 2705 + }, + { + "epoch": 0.23, + "learning_rate": 1.793606781624333e-05, + "loss": 0.2875, + "step": 2706 + }, + { + "epoch": 0.23, + "learning_rate": 1.793437836024711e-05, + "loss": 0.2986, + "step": 2707 + }, + { + "epoch": 0.23, + "learning_rate": 1.793268829270821e-05, + "loss": 0.2694, + "step": 2708 + }, + { + "epoch": 0.23, + "learning_rate": 1.7930997613756892e-05, + "loss": 0.3179, + "step": 2709 + }, + { + "epoch": 0.23, + "learning_rate": 1.7929306323523463e-05, + "loss": 0.3358, + "step": 2710 + }, + { + "epoch": 0.23, + "learning_rate": 1.7927614422138286e-05, + "loss": 0.3011, + "step": 2711 + }, + { + "epoch": 0.23, + "learning_rate": 1.792592190973176e-05, + "loss": 0.3267, + "step": 2712 + }, + { + "epoch": 0.23, + "learning_rate": 1.7924228786434333e-05, + "loss": 0.3232, + "step": 2713 + }, + { + "epoch": 0.23, + "learning_rate": 1.792253505237651e-05, + "loss": 0.307, + "step": 2714 + }, + { + "epoch": 0.23, + "learning_rate": 1.7920840707688833e-05, + "loss": 0.2712, + "step": 2715 + }, + { + "epoch": 0.23, + "learning_rate": 1.791914575250189e-05, + "loss": 0.2921, + "step": 2716 + }, + { + "epoch": 0.23, + "learning_rate": 1.7917450186946323e-05, + "loss": 0.29, + "step": 2717 + }, + { + "epoch": 0.23, + "learning_rate": 1.7915754011152815e-05, + "loss": 0.2966, + "step": 2718 + }, + { + "epoch": 0.23, + "learning_rate": 1.7914057225252103e-05, + "loss": 0.313, + "step": 2719 + }, + { + "epoch": 0.23, + "learning_rate": 1.7912359829374963e-05, + "loss": 0.2825, + "step": 2720 + }, + { + "epoch": 0.23, + "learning_rate": 1.7910661823652223e-05, + "loss": 0.3318, + "step": 2721 + }, + { + "epoch": 0.23, + "learning_rate": 1.790896320821476e-05, + "loss": 0.3194, + "step": 2722 + }, + { + "epoch": 0.23, + "learning_rate": 1.790726398319349e-05, + "loss": 0.329, + "step": 2723 + }, + { + "epoch": 0.23, + "learning_rate": 1.7905564148719383e-05, + "loss": 0.3492, + "step": 2724 + }, + { + "epoch": 0.23, + "learning_rate": 1.7903863704923453e-05, + "loss": 0.2863, + "step": 2725 + }, + { + "epoch": 0.23, + "learning_rate": 1.7902162651936766e-05, + "loss": 0.3461, + "step": 2726 + }, + { + "epoch": 0.23, + "learning_rate": 1.7900460989890424e-05, + "loss": 0.3307, + "step": 2727 + }, + { + "epoch": 0.23, + "learning_rate": 1.789875871891559e-05, + "loss": 0.3148, + "step": 2728 + }, + { + "epoch": 0.23, + "learning_rate": 1.7897055839143457e-05, + "loss": 0.2863, + "step": 2729 + }, + { + "epoch": 0.23, + "learning_rate": 1.7895352350705288e-05, + "loss": 0.305, + "step": 2730 + }, + { + "epoch": 0.23, + "learning_rate": 1.7893648253732364e-05, + "loss": 0.6641, + "step": 2731 + }, + { + "epoch": 0.23, + "learning_rate": 1.7891943548356043e-05, + "loss": 0.3036, + "step": 2732 + }, + { + "epoch": 0.23, + "learning_rate": 1.7890238234707708e-05, + "loss": 0.3477, + "step": 2733 + }, + { + "epoch": 0.23, + "learning_rate": 1.7888532312918793e-05, + "loss": 0.2765, + "step": 2734 + }, + { + "epoch": 0.23, + "learning_rate": 1.7886825783120786e-05, + "loss": 0.3339, + "step": 2735 + }, + { + "epoch": 0.23, + "learning_rate": 1.788511864544522e-05, + "loss": 0.3472, + "step": 2736 + }, + { + "epoch": 0.23, + "learning_rate": 1.7883410900023667e-05, + "loss": 0.3389, + "step": 2737 + }, + { + "epoch": 0.23, + "learning_rate": 1.788170254698776e-05, + "loss": 0.3372, + "step": 2738 + }, + { + "epoch": 0.23, + "learning_rate": 1.787999358646916e-05, + "loss": 0.2847, + "step": 2739 + }, + { + "epoch": 0.23, + "learning_rate": 1.7878284018599594e-05, + "loss": 0.2751, + "step": 2740 + }, + { + "epoch": 0.23, + "learning_rate": 1.7876573843510822e-05, + "loss": 0.2642, + "step": 2741 + }, + { + "epoch": 0.24, + "learning_rate": 1.7874863061334658e-05, + "loss": 0.2982, + "step": 2742 + }, + { + "epoch": 0.24, + "learning_rate": 1.787315167220296e-05, + "loss": 0.2791, + "step": 2743 + }, + { + "epoch": 0.24, + "learning_rate": 1.7871439676247632e-05, + "loss": 0.2979, + "step": 2744 + }, + { + "epoch": 0.24, + "learning_rate": 1.786972707360063e-05, + "loss": 0.2662, + "step": 2745 + }, + { + "epoch": 0.24, + "learning_rate": 1.786801386439395e-05, + "loss": 0.3167, + "step": 2746 + }, + { + "epoch": 0.24, + "learning_rate": 1.786630004875964e-05, + "loss": 0.614, + "step": 2747 + }, + { + "epoch": 0.24, + "learning_rate": 1.7864585626829786e-05, + "loss": 0.2914, + "step": 2748 + }, + { + "epoch": 0.24, + "learning_rate": 1.7862870598736534e-05, + "loss": 0.3242, + "step": 2749 + }, + { + "epoch": 0.24, + "learning_rate": 1.786115496461207e-05, + "loss": 0.3099, + "step": 2750 + }, + { + "epoch": 0.24, + "learning_rate": 1.7859438724588623e-05, + "loss": 0.2908, + "step": 2751 + }, + { + "epoch": 0.24, + "learning_rate": 1.7857721878798476e-05, + "loss": 0.3257, + "step": 2752 + }, + { + "epoch": 0.24, + "learning_rate": 1.785600442737395e-05, + "loss": 0.2869, + "step": 2753 + }, + { + "epoch": 0.24, + "learning_rate": 1.785428637044742e-05, + "loss": 0.2927, + "step": 2754 + }, + { + "epoch": 0.24, + "learning_rate": 1.7852567708151306e-05, + "loss": 0.3333, + "step": 2755 + }, + { + "epoch": 0.24, + "learning_rate": 1.7850848440618075e-05, + "loss": 0.3088, + "step": 2756 + }, + { + "epoch": 0.24, + "learning_rate": 1.7849128567980238e-05, + "loss": 0.3142, + "step": 2757 + }, + { + "epoch": 0.24, + "learning_rate": 1.7847408090370355e-05, + "loss": 0.3382, + "step": 2758 + }, + { + "epoch": 0.24, + "learning_rate": 1.7845687007921034e-05, + "loss": 0.3124, + "step": 2759 + }, + { + "epoch": 0.24, + "learning_rate": 1.784396532076492e-05, + "loss": 0.3123, + "step": 2760 + }, + { + "epoch": 0.24, + "learning_rate": 1.784224302903472e-05, + "loss": 0.2925, + "step": 2761 + }, + { + "epoch": 0.24, + "learning_rate": 1.7840520132863173e-05, + "loss": 0.2896, + "step": 2762 + }, + { + "epoch": 0.24, + "learning_rate": 1.783879663238308e-05, + "loss": 0.2937, + "step": 2763 + }, + { + "epoch": 0.24, + "learning_rate": 1.7837072527727275e-05, + "loss": 0.3057, + "step": 2764 + }, + { + "epoch": 0.24, + "learning_rate": 1.7835347819028642e-05, + "loss": 0.3079, + "step": 2765 + }, + { + "epoch": 0.24, + "learning_rate": 1.7833622506420116e-05, + "loss": 0.2964, + "step": 2766 + }, + { + "epoch": 0.24, + "learning_rate": 1.783189659003467e-05, + "loss": 0.2938, + "step": 2767 + }, + { + "epoch": 0.24, + "learning_rate": 1.783017007000534e-05, + "loss": 0.3206, + "step": 2768 + }, + { + "epoch": 0.24, + "learning_rate": 1.7828442946465188e-05, + "loss": 0.3374, + "step": 2769 + }, + { + "epoch": 0.24, + "learning_rate": 1.7826715219547336e-05, + "loss": 0.3456, + "step": 2770 + }, + { + "epoch": 0.24, + "learning_rate": 1.7824986889384948e-05, + "loss": 0.3585, + "step": 2771 + }, + { + "epoch": 0.24, + "learning_rate": 1.7823257956111233e-05, + "loss": 0.2675, + "step": 2772 + }, + { + "epoch": 0.24, + "learning_rate": 1.782152841985945e-05, + "loss": 0.3141, + "step": 2773 + }, + { + "epoch": 0.24, + "learning_rate": 1.7819798280762907e-05, + "loss": 0.3415, + "step": 2774 + }, + { + "epoch": 0.24, + "learning_rate": 1.781806753895495e-05, + "loss": 0.3244, + "step": 2775 + }, + { + "epoch": 0.24, + "learning_rate": 1.7816336194568976e-05, + "loss": 0.2383, + "step": 2776 + }, + { + "epoch": 0.24, + "learning_rate": 1.781460424773843e-05, + "loss": 0.2368, + "step": 2777 + }, + { + "epoch": 0.24, + "learning_rate": 1.78128716985968e-05, + "loss": 0.3093, + "step": 2778 + }, + { + "epoch": 0.24, + "learning_rate": 1.781113854727763e-05, + "loss": 0.2943, + "step": 2779 + }, + { + "epoch": 0.24, + "learning_rate": 1.780940479391449e-05, + "loss": 0.2877, + "step": 2780 + }, + { + "epoch": 0.24, + "learning_rate": 1.780767043864102e-05, + "loss": 0.308, + "step": 2781 + }, + { + "epoch": 0.24, + "learning_rate": 1.780593548159089e-05, + "loss": 0.269, + "step": 2782 + }, + { + "epoch": 0.24, + "learning_rate": 1.780419992289782e-05, + "loss": 0.2917, + "step": 2783 + }, + { + "epoch": 0.24, + "learning_rate": 1.7802463762695588e-05, + "loss": 0.3575, + "step": 2784 + }, + { + "epoch": 0.24, + "learning_rate": 1.7800727001118e-05, + "loss": 0.2968, + "step": 2785 + }, + { + "epoch": 0.24, + "learning_rate": 1.779898963829892e-05, + "loss": 0.3095, + "step": 2786 + }, + { + "epoch": 0.24, + "learning_rate": 1.7797251674372253e-05, + "loss": 0.3192, + "step": 2787 + }, + { + "epoch": 0.24, + "learning_rate": 1.7795513109471952e-05, + "loss": 0.5713, + "step": 2788 + }, + { + "epoch": 0.24, + "learning_rate": 1.7793773943732026e-05, + "loss": 0.2745, + "step": 2789 + }, + { + "epoch": 0.24, + "learning_rate": 1.7792034177286508e-05, + "loss": 0.368, + "step": 2790 + }, + { + "epoch": 0.24, + "learning_rate": 1.77902938102695e-05, + "loss": 0.2783, + "step": 2791 + }, + { + "epoch": 0.24, + "learning_rate": 1.7788552842815136e-05, + "loss": 0.3057, + "step": 2792 + }, + { + "epoch": 0.24, + "learning_rate": 1.7786811275057606e-05, + "loss": 0.3386, + "step": 2793 + }, + { + "epoch": 0.24, + "learning_rate": 1.778506910713114e-05, + "loss": 0.3011, + "step": 2794 + }, + { + "epoch": 0.24, + "learning_rate": 1.778332633917001e-05, + "loss": 0.3047, + "step": 2795 + }, + { + "epoch": 0.24, + "learning_rate": 1.7781582971308547e-05, + "loss": 0.322, + "step": 2796 + }, + { + "epoch": 0.24, + "learning_rate": 1.777983900368112e-05, + "loss": 0.3665, + "step": 2797 + }, + { + "epoch": 0.24, + "learning_rate": 1.777809443642214e-05, + "loss": 0.2877, + "step": 2798 + }, + { + "epoch": 0.24, + "learning_rate": 1.7776349269666076e-05, + "loss": 0.2598, + "step": 2799 + }, + { + "epoch": 0.24, + "learning_rate": 1.7774603503547432e-05, + "loss": 0.3298, + "step": 2800 + }, + { + "epoch": 0.24, + "learning_rate": 1.7772857138200767e-05, + "loss": 0.3345, + "step": 2801 + }, + { + "epoch": 0.24, + "learning_rate": 1.777111017376068e-05, + "loss": 0.3273, + "step": 2802 + }, + { + "epoch": 0.24, + "learning_rate": 1.7769362610361824e-05, + "loss": 0.3007, + "step": 2803 + }, + { + "epoch": 0.24, + "learning_rate": 1.7767614448138882e-05, + "loss": 0.3427, + "step": 2804 + }, + { + "epoch": 0.24, + "learning_rate": 1.77658656872266e-05, + "loss": 0.3618, + "step": 2805 + }, + { + "epoch": 0.24, + "learning_rate": 1.776411632775976e-05, + "loss": 0.3187, + "step": 2806 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762366369873204e-05, + "loss": 0.2852, + "step": 2807 + }, + { + "epoch": 0.24, + "learning_rate": 1.77606158137018e-05, + "loss": 0.3292, + "step": 2808 + }, + { + "epoch": 0.24, + "learning_rate": 1.7758864659380474e-05, + "loss": 0.314, + "step": 2809 + }, + { + "epoch": 0.24, + "learning_rate": 1.77571129070442e-05, + "loss": 0.3109, + "step": 2810 + }, + { + "epoch": 0.24, + "learning_rate": 1.775536055682799e-05, + "loss": 0.3658, + "step": 2811 + }, + { + "epoch": 0.24, + "learning_rate": 1.775360760886691e-05, + "loss": 0.2958, + "step": 2812 + }, + { + "epoch": 0.24, + "learning_rate": 1.7751854063296068e-05, + "loss": 0.2924, + "step": 2813 + }, + { + "epoch": 0.24, + "learning_rate": 1.7750099920250616e-05, + "loss": 0.2863, + "step": 2814 + }, + { + "epoch": 0.24, + "learning_rate": 1.774834517986576e-05, + "loss": 0.3532, + "step": 2815 + }, + { + "epoch": 0.24, + "learning_rate": 1.774658984227674e-05, + "loss": 0.3488, + "step": 2816 + }, + { + "epoch": 0.24, + "learning_rate": 1.774483390761885e-05, + "loss": 0.308, + "step": 2817 + }, + { + "epoch": 0.24, + "learning_rate": 1.7743077376027433e-05, + "loss": 0.2943, + "step": 2818 + }, + { + "epoch": 0.24, + "learning_rate": 1.7741320247637875e-05, + "loss": 0.2876, + "step": 2819 + }, + { + "epoch": 0.24, + "learning_rate": 1.7739562522585598e-05, + "loss": 0.2805, + "step": 2820 + }, + { + "epoch": 0.24, + "learning_rate": 1.7737804201006084e-05, + "loss": 0.2848, + "step": 2821 + }, + { + "epoch": 0.24, + "learning_rate": 1.773604528303486e-05, + "loss": 0.285, + "step": 2822 + }, + { + "epoch": 0.24, + "learning_rate": 1.773428576880749e-05, + "loss": 0.3236, + "step": 2823 + }, + { + "epoch": 0.24, + "learning_rate": 1.7732525658459586e-05, + "loss": 0.3361, + "step": 2824 + }, + { + "epoch": 0.24, + "learning_rate": 1.7730764952126813e-05, + "loss": 0.2708, + "step": 2825 + }, + { + "epoch": 0.24, + "learning_rate": 1.7729003649944878e-05, + "loss": 0.2791, + "step": 2826 + }, + { + "epoch": 0.24, + "learning_rate": 1.772724175204953e-05, + "loss": 0.2586, + "step": 2827 + }, + { + "epoch": 0.24, + "learning_rate": 1.772547925857657e-05, + "loss": 0.3179, + "step": 2828 + }, + { + "epoch": 0.24, + "learning_rate": 1.7723716169661843e-05, + "loss": 0.2417, + "step": 2829 + }, + { + "epoch": 0.24, + "learning_rate": 1.7721952485441232e-05, + "loss": 0.3043, + "step": 2830 + }, + { + "epoch": 0.24, + "learning_rate": 1.7720188206050682e-05, + "loss": 0.2878, + "step": 2831 + }, + { + "epoch": 0.24, + "learning_rate": 1.7718423331626175e-05, + "loss": 0.2891, + "step": 2832 + }, + { + "epoch": 0.24, + "learning_rate": 1.7716657862303733e-05, + "loss": 0.3344, + "step": 2833 + }, + { + "epoch": 0.24, + "learning_rate": 1.7714891798219432e-05, + "loss": 0.3486, + "step": 2834 + }, + { + "epoch": 0.24, + "learning_rate": 1.771312513950939e-05, + "loss": 0.3138, + "step": 2835 + }, + { + "epoch": 0.24, + "learning_rate": 1.7711357886309777e-05, + "loss": 0.294, + "step": 2836 + }, + { + "epoch": 0.24, + "learning_rate": 1.77095900387568e-05, + "loss": 0.2626, + "step": 2837 + }, + { + "epoch": 0.24, + "learning_rate": 1.7707821596986715e-05, + "loss": 0.3339, + "step": 2838 + }, + { + "epoch": 0.24, + "learning_rate": 1.7706052561135826e-05, + "loss": 0.3048, + "step": 2839 + }, + { + "epoch": 0.24, + "learning_rate": 1.7704282931340488e-05, + "loss": 0.6313, + "step": 2840 + }, + { + "epoch": 0.24, + "learning_rate": 1.7702512707737086e-05, + "loss": 0.2995, + "step": 2841 + }, + { + "epoch": 0.24, + "learning_rate": 1.770074189046206e-05, + "loss": 0.2975, + "step": 2842 + }, + { + "epoch": 0.24, + "learning_rate": 1.7698970479651904e-05, + "loss": 0.2623, + "step": 2843 + }, + { + "epoch": 0.24, + "learning_rate": 1.7697198475443146e-05, + "loss": 0.3027, + "step": 2844 + }, + { + "epoch": 0.24, + "learning_rate": 1.769542587797236e-05, + "loss": 0.3005, + "step": 2845 + }, + { + "epoch": 0.24, + "learning_rate": 1.7693652687376173e-05, + "loss": 0.2664, + "step": 2846 + }, + { + "epoch": 0.24, + "learning_rate": 1.7691878903791252e-05, + "loss": 0.6333, + "step": 2847 + }, + { + "epoch": 0.24, + "learning_rate": 1.7690104527354313e-05, + "loss": 0.3271, + "step": 2848 + }, + { + "epoch": 0.24, + "learning_rate": 1.768832955820211e-05, + "loss": 0.283, + "step": 2849 + }, + { + "epoch": 0.24, + "learning_rate": 1.768655399647146e-05, + "loss": 0.3414, + "step": 2850 + }, + { + "epoch": 0.24, + "learning_rate": 1.7684777842299206e-05, + "loss": 0.3184, + "step": 2851 + }, + { + "epoch": 0.24, + "learning_rate": 1.7683001095822245e-05, + "loss": 0.3103, + "step": 2852 + }, + { + "epoch": 0.24, + "learning_rate": 1.7681223757177526e-05, + "loss": 0.3055, + "step": 2853 + }, + { + "epoch": 0.24, + "learning_rate": 1.7679445826502033e-05, + "loss": 0.306, + "step": 2854 + }, + { + "epoch": 0.24, + "learning_rate": 1.7677667303932797e-05, + "loss": 0.3315, + "step": 2855 + }, + { + "epoch": 0.24, + "learning_rate": 1.7675888189606907e-05, + "loss": 0.3152, + "step": 2856 + }, + { + "epoch": 0.24, + "learning_rate": 1.767410848366148e-05, + "loss": 0.2598, + "step": 2857 + }, + { + "epoch": 0.24, + "learning_rate": 1.7672328186233692e-05, + "loss": 0.3183, + "step": 2858 + }, + { + "epoch": 0.25, + "learning_rate": 1.7670547297460758e-05, + "loss": 0.2806, + "step": 2859 + }, + { + "epoch": 0.25, + "learning_rate": 1.7668765817479937e-05, + "loss": 0.2607, + "step": 2860 + }, + { + "epoch": 0.25, + "learning_rate": 1.766698374642854e-05, + "loss": 0.3216, + "step": 2861 + }, + { + "epoch": 0.25, + "learning_rate": 1.766520108444392e-05, + "loss": 0.262, + "step": 2862 + }, + { + "epoch": 0.25, + "learning_rate": 1.7663417831663474e-05, + "loss": 0.3249, + "step": 2863 + }, + { + "epoch": 0.25, + "learning_rate": 1.766163398822465e-05, + "loss": 0.3141, + "step": 2864 + }, + { + "epoch": 0.25, + "learning_rate": 1.765984955426493e-05, + "loss": 0.3157, + "step": 2865 + }, + { + "epoch": 0.25, + "learning_rate": 1.765806452992186e-05, + "loss": 0.3539, + "step": 2866 + }, + { + "epoch": 0.25, + "learning_rate": 1.7656278915333017e-05, + "loss": 0.2562, + "step": 2867 + }, + { + "epoch": 0.25, + "learning_rate": 1.765449271063602e-05, + "loss": 0.3212, + "step": 2868 + }, + { + "epoch": 0.25, + "learning_rate": 1.7652705915968552e-05, + "loss": 0.3331, + "step": 2869 + }, + { + "epoch": 0.25, + "learning_rate": 1.7650918531468326e-05, + "loss": 0.321, + "step": 2870 + }, + { + "epoch": 0.25, + "learning_rate": 1.76491305572731e-05, + "loss": 0.2733, + "step": 2871 + }, + { + "epoch": 0.25, + "learning_rate": 1.7647341993520687e-05, + "loss": 0.3022, + "step": 2872 + }, + { + "epoch": 0.25, + "learning_rate": 1.764555284034894e-05, + "loss": 0.3089, + "step": 2873 + }, + { + "epoch": 0.25, + "learning_rate": 1.764376309789576e-05, + "loss": 0.3743, + "step": 2874 + }, + { + "epoch": 0.25, + "learning_rate": 1.7641972766299088e-05, + "loss": 0.2706, + "step": 2875 + }, + { + "epoch": 0.25, + "learning_rate": 1.7640181845696914e-05, + "loss": 0.2991, + "step": 2876 + }, + { + "epoch": 0.25, + "learning_rate": 1.7638390336227275e-05, + "loss": 0.3155, + "step": 2877 + }, + { + "epoch": 0.25, + "learning_rate": 1.7636598238028253e-05, + "loss": 0.2684, + "step": 2878 + }, + { + "epoch": 0.25, + "learning_rate": 1.763480555123797e-05, + "loss": 0.2991, + "step": 2879 + }, + { + "epoch": 0.25, + "learning_rate": 1.76330122759946e-05, + "loss": 0.2953, + "step": 2880 + }, + { + "epoch": 0.25, + "learning_rate": 1.7631218412436362e-05, + "loss": 0.2729, + "step": 2881 + }, + { + "epoch": 0.25, + "learning_rate": 1.7629423960701513e-05, + "loss": 0.3105, + "step": 2882 + }, + { + "epoch": 0.25, + "learning_rate": 1.7627628920928366e-05, + "loss": 0.3604, + "step": 2883 + }, + { + "epoch": 0.25, + "learning_rate": 1.7625833293255268e-05, + "loss": 0.311, + "step": 2884 + }, + { + "epoch": 0.25, + "learning_rate": 1.762403707782062e-05, + "loss": 0.3134, + "step": 2885 + }, + { + "epoch": 0.25, + "learning_rate": 1.762224027476287e-05, + "loss": 0.3066, + "step": 2886 + }, + { + "epoch": 0.25, + "learning_rate": 1.7620442884220495e-05, + "loss": 0.2988, + "step": 2887 + }, + { + "epoch": 0.25, + "learning_rate": 1.761864490633204e-05, + "loss": 0.3286, + "step": 2888 + }, + { + "epoch": 0.25, + "learning_rate": 1.7616846341236082e-05, + "loss": 0.2817, + "step": 2889 + }, + { + "epoch": 0.25, + "learning_rate": 1.761504718907124e-05, + "loss": 0.2957, + "step": 2890 + }, + { + "epoch": 0.25, + "learning_rate": 1.7613247449976195e-05, + "loss": 0.2975, + "step": 2891 + }, + { + "epoch": 0.25, + "learning_rate": 1.761144712408965e-05, + "loss": 0.2997, + "step": 2892 + }, + { + "epoch": 0.25, + "learning_rate": 1.760964621155037e-05, + "loss": 0.3035, + "step": 2893 + }, + { + "epoch": 0.25, + "learning_rate": 1.760784471249716e-05, + "loss": 0.3041, + "step": 2894 + }, + { + "epoch": 0.25, + "learning_rate": 1.760604262706887e-05, + "loss": 0.3328, + "step": 2895 + }, + { + "epoch": 0.25, + "learning_rate": 1.76042399554044e-05, + "loss": 0.3155, + "step": 2896 + }, + { + "epoch": 0.25, + "learning_rate": 1.7602436697642686e-05, + "loss": 0.3151, + "step": 2897 + }, + { + "epoch": 0.25, + "learning_rate": 1.7600632853922713e-05, + "loss": 0.265, + "step": 2898 + }, + { + "epoch": 0.25, + "learning_rate": 1.7598828424383522e-05, + "loss": 0.3282, + "step": 2899 + }, + { + "epoch": 0.25, + "learning_rate": 1.759702340916418e-05, + "loss": 0.288, + "step": 2900 + }, + { + "epoch": 0.25, + "learning_rate": 1.7595217808403814e-05, + "loss": 0.2635, + "step": 2901 + }, + { + "epoch": 0.25, + "learning_rate": 1.7593411622241584e-05, + "loss": 0.3057, + "step": 2902 + }, + { + "epoch": 0.25, + "learning_rate": 1.7591604850816705e-05, + "loss": 0.3115, + "step": 2903 + }, + { + "epoch": 0.25, + "learning_rate": 1.758979749426844e-05, + "loss": 0.2652, + "step": 2904 + }, + { + "epoch": 0.25, + "learning_rate": 1.7587989552736085e-05, + "loss": 0.3064, + "step": 2905 + }, + { + "epoch": 0.25, + "learning_rate": 1.7586181026358987e-05, + "loss": 0.3276, + "step": 2906 + }, + { + "epoch": 0.25, + "learning_rate": 1.758437191527654e-05, + "loss": 0.2938, + "step": 2907 + }, + { + "epoch": 0.25, + "learning_rate": 1.7582562219628185e-05, + "loss": 0.2686, + "step": 2908 + }, + { + "epoch": 0.25, + "learning_rate": 1.7580751939553396e-05, + "loss": 0.2975, + "step": 2909 + }, + { + "epoch": 0.25, + "learning_rate": 1.757894107519171e-05, + "loss": 0.3219, + "step": 2910 + }, + { + "epoch": 0.25, + "learning_rate": 1.757712962668269e-05, + "loss": 0.3459, + "step": 2911 + }, + { + "epoch": 0.25, + "learning_rate": 1.7575317594165963e-05, + "loss": 0.3243, + "step": 2912 + }, + { + "epoch": 0.25, + "learning_rate": 1.757350497778118e-05, + "loss": 0.3334, + "step": 2913 + }, + { + "epoch": 0.25, + "learning_rate": 1.757169177766806e-05, + "loss": 0.2744, + "step": 2914 + }, + { + "epoch": 0.25, + "learning_rate": 1.756987799396635e-05, + "loss": 0.2953, + "step": 2915 + }, + { + "epoch": 0.25, + "learning_rate": 1.7568063626815844e-05, + "loss": 0.3466, + "step": 2916 + }, + { + "epoch": 0.25, + "learning_rate": 1.7566248676356394e-05, + "loss": 0.3323, + "step": 2917 + }, + { + "epoch": 0.25, + "learning_rate": 1.7564433142727882e-05, + "loss": 0.3196, + "step": 2918 + }, + { + "epoch": 0.25, + "learning_rate": 1.7562617026070238e-05, + "loss": 0.2961, + "step": 2919 + }, + { + "epoch": 0.25, + "learning_rate": 1.7560800326523442e-05, + "loss": 0.3482, + "step": 2920 + }, + { + "epoch": 0.25, + "learning_rate": 1.7558983044227513e-05, + "loss": 0.2996, + "step": 2921 + }, + { + "epoch": 0.25, + "learning_rate": 1.7557165179322522e-05, + "loss": 0.3297, + "step": 2922 + }, + { + "epoch": 0.25, + "learning_rate": 1.7555346731948587e-05, + "loss": 0.2964, + "step": 2923 + }, + { + "epoch": 0.25, + "learning_rate": 1.755352770224585e-05, + "loss": 0.2969, + "step": 2924 + }, + { + "epoch": 0.25, + "learning_rate": 1.755170809035452e-05, + "loss": 0.2765, + "step": 2925 + }, + { + "epoch": 0.25, + "learning_rate": 1.7549887896414853e-05, + "loss": 0.3168, + "step": 2926 + }, + { + "epoch": 0.25, + "learning_rate": 1.7548067120567126e-05, + "loss": 0.3132, + "step": 2927 + }, + { + "epoch": 0.25, + "learning_rate": 1.754624576295168e-05, + "loss": 0.2957, + "step": 2928 + }, + { + "epoch": 0.25, + "learning_rate": 1.7544423823708903e-05, + "loss": 0.2717, + "step": 2929 + }, + { + "epoch": 0.25, + "learning_rate": 1.7542601302979213e-05, + "loss": 0.3103, + "step": 2930 + }, + { + "epoch": 0.25, + "learning_rate": 1.7540778200903082e-05, + "loss": 0.3256, + "step": 2931 + }, + { + "epoch": 0.25, + "learning_rate": 1.753895451762103e-05, + "loss": 0.2991, + "step": 2932 + }, + { + "epoch": 0.25, + "learning_rate": 1.7537130253273613e-05, + "loss": 0.2603, + "step": 2933 + }, + { + "epoch": 0.25, + "learning_rate": 1.753530540800144e-05, + "loss": 0.3221, + "step": 2934 + }, + { + "epoch": 0.25, + "learning_rate": 1.7533479981945157e-05, + "loss": 0.3535, + "step": 2935 + }, + { + "epoch": 0.25, + "learning_rate": 1.7531653975245463e-05, + "loss": 0.3038, + "step": 2936 + }, + { + "epoch": 0.25, + "learning_rate": 1.7529827388043093e-05, + "loss": 0.2844, + "step": 2937 + }, + { + "epoch": 0.25, + "learning_rate": 1.7528000220478836e-05, + "loss": 0.5901, + "step": 2938 + }, + { + "epoch": 0.25, + "learning_rate": 1.7526172472693518e-05, + "loss": 0.3172, + "step": 2939 + }, + { + "epoch": 0.25, + "learning_rate": 1.7524344144828015e-05, + "loss": 0.327, + "step": 2940 + }, + { + "epoch": 0.25, + "learning_rate": 1.7522515237023242e-05, + "loss": 0.2545, + "step": 2941 + }, + { + "epoch": 0.25, + "learning_rate": 1.7520685749420164e-05, + "loss": 0.274, + "step": 2942 + }, + { + "epoch": 0.25, + "learning_rate": 1.7518855682159793e-05, + "loss": 0.2668, + "step": 2943 + }, + { + "epoch": 0.25, + "learning_rate": 1.7517025035383175e-05, + "loss": 0.2944, + "step": 2944 + }, + { + "epoch": 0.25, + "learning_rate": 1.751519380923141e-05, + "loss": 0.2792, + "step": 2945 + }, + { + "epoch": 0.25, + "learning_rate": 1.751336200384564e-05, + "loss": 0.2963, + "step": 2946 + }, + { + "epoch": 0.25, + "learning_rate": 1.7511529619367055e-05, + "loss": 0.2906, + "step": 2947 + }, + { + "epoch": 0.25, + "learning_rate": 1.7509696655936878e-05, + "loss": 0.2549, + "step": 2948 + }, + { + "epoch": 0.25, + "learning_rate": 1.750786311369639e-05, + "loss": 0.3553, + "step": 2949 + }, + { + "epoch": 0.25, + "learning_rate": 1.7506028992786912e-05, + "loss": 0.2766, + "step": 2950 + }, + { + "epoch": 0.25, + "learning_rate": 1.7504194293349805e-05, + "loss": 0.3689, + "step": 2951 + }, + { + "epoch": 0.25, + "learning_rate": 1.7502359015526488e-05, + "loss": 0.3091, + "step": 2952 + }, + { + "epoch": 0.25, + "learning_rate": 1.75005231594584e-05, + "loss": 0.2937, + "step": 2953 + }, + { + "epoch": 0.25, + "learning_rate": 1.749868672528705e-05, + "loss": 0.3443, + "step": 2954 + }, + { + "epoch": 0.25, + "learning_rate": 1.749684971315398e-05, + "loss": 0.3121, + "step": 2955 + }, + { + "epoch": 0.25, + "learning_rate": 1.749501212320078e-05, + "loss": 0.2676, + "step": 2956 + }, + { + "epoch": 0.25, + "learning_rate": 1.749317395556908e-05, + "loss": 0.2698, + "step": 2957 + }, + { + "epoch": 0.25, + "learning_rate": 1.7491335210400554e-05, + "loss": 0.3367, + "step": 2958 + }, + { + "epoch": 0.25, + "learning_rate": 1.7489495887836922e-05, + "loss": 0.2939, + "step": 2959 + }, + { + "epoch": 0.25, + "learning_rate": 1.7487655988019957e-05, + "loss": 0.2722, + "step": 2960 + }, + { + "epoch": 0.25, + "learning_rate": 1.7485815511091466e-05, + "loss": 0.3036, + "step": 2961 + }, + { + "epoch": 0.25, + "learning_rate": 1.7483974457193307e-05, + "loss": 0.3098, + "step": 2962 + }, + { + "epoch": 0.25, + "learning_rate": 1.748213282646737e-05, + "loss": 0.2928, + "step": 2963 + }, + { + "epoch": 0.25, + "learning_rate": 1.748029061905561e-05, + "loss": 0.295, + "step": 2964 + }, + { + "epoch": 0.25, + "learning_rate": 1.747844783510001e-05, + "loss": 0.3275, + "step": 2965 + }, + { + "epoch": 0.25, + "learning_rate": 1.74766044747426e-05, + "loss": 0.2881, + "step": 2966 + }, + { + "epoch": 0.25, + "learning_rate": 1.747476053812546e-05, + "loss": 0.301, + "step": 2967 + }, + { + "epoch": 0.25, + "learning_rate": 1.7472916025390714e-05, + "loss": 0.2794, + "step": 2968 + }, + { + "epoch": 0.25, + "learning_rate": 1.7471070936680527e-05, + "loss": 0.2684, + "step": 2969 + }, + { + "epoch": 0.25, + "learning_rate": 1.7469225272137104e-05, + "loss": 0.3401, + "step": 2970 + }, + { + "epoch": 0.25, + "learning_rate": 1.7467379031902707e-05, + "loss": 0.3375, + "step": 2971 + }, + { + "epoch": 0.25, + "learning_rate": 1.7465532216119628e-05, + "loss": 0.3051, + "step": 2972 + }, + { + "epoch": 0.25, + "learning_rate": 1.7463684824930215e-05, + "loss": 0.3447, + "step": 2973 + }, + { + "epoch": 0.25, + "learning_rate": 1.7461836858476858e-05, + "loss": 0.2483, + "step": 2974 + }, + { + "epoch": 0.26, + "learning_rate": 1.7459988316901984e-05, + "loss": 0.2654, + "step": 2975 + }, + { + "epoch": 0.26, + "learning_rate": 1.745813920034807e-05, + "loss": 0.6151, + "step": 2976 + }, + { + "epoch": 0.26, + "learning_rate": 1.745628950895764e-05, + "loss": 0.3342, + "step": 2977 + }, + { + "epoch": 0.26, + "learning_rate": 1.7454439242873257e-05, + "loss": 0.2723, + "step": 2978 + }, + { + "epoch": 0.26, + "learning_rate": 1.7452588402237525e-05, + "loss": 0.613, + "step": 2979 + }, + { + "epoch": 0.26, + "learning_rate": 1.7450736987193113e-05, + "loss": 0.3793, + "step": 2980 + }, + { + "epoch": 0.26, + "learning_rate": 1.7448884997882706e-05, + "loss": 0.3612, + "step": 2981 + }, + { + "epoch": 0.26, + "learning_rate": 1.7447032434449045e-05, + "loss": 0.2988, + "step": 2982 + }, + { + "epoch": 0.26, + "learning_rate": 1.7445179297034925e-05, + "loss": 0.3465, + "step": 2983 + }, + { + "epoch": 0.26, + "learning_rate": 1.744332558578317e-05, + "loss": 0.3279, + "step": 2984 + }, + { + "epoch": 0.26, + "learning_rate": 1.744147130083666e-05, + "loss": 0.3226, + "step": 2985 + }, + { + "epoch": 0.26, + "learning_rate": 1.743961644233831e-05, + "loss": 0.3051, + "step": 2986 + }, + { + "epoch": 0.26, + "learning_rate": 1.7437761010431083e-05, + "loss": 0.2554, + "step": 2987 + }, + { + "epoch": 0.26, + "learning_rate": 1.743590500525799e-05, + "loss": 0.2751, + "step": 2988 + }, + { + "epoch": 0.26, + "learning_rate": 1.7434048426962086e-05, + "loss": 0.2797, + "step": 2989 + }, + { + "epoch": 0.26, + "learning_rate": 1.7432191275686454e-05, + "loss": 0.3446, + "step": 2990 + }, + { + "epoch": 0.26, + "learning_rate": 1.7430333551574247e-05, + "loss": 0.6001, + "step": 2991 + }, + { + "epoch": 0.26, + "learning_rate": 1.742847525476864e-05, + "loss": 0.2774, + "step": 2992 + }, + { + "epoch": 0.26, + "learning_rate": 1.742661638541287e-05, + "loss": 0.3793, + "step": 2993 + }, + { + "epoch": 0.26, + "learning_rate": 1.7424756943650203e-05, + "loss": 0.3013, + "step": 2994 + }, + { + "epoch": 0.26, + "learning_rate": 1.7422896929623957e-05, + "loss": 0.2703, + "step": 2995 + }, + { + "epoch": 0.26, + "learning_rate": 1.7421036343477498e-05, + "loss": 0.3277, + "step": 2996 + }, + { + "epoch": 0.26, + "learning_rate": 1.741917518535422e-05, + "loss": 0.4084, + "step": 2997 + }, + { + "epoch": 0.26, + "learning_rate": 1.741731345539758e-05, + "loss": 0.3613, + "step": 2998 + }, + { + "epoch": 0.26, + "learning_rate": 1.7415451153751068e-05, + "loss": 0.3193, + "step": 2999 + }, + { + "epoch": 0.26, + "learning_rate": 1.7413588280558223e-05, + "loss": 0.3335, + "step": 3000 + }, + { + "epoch": 0.26, + "learning_rate": 1.7411724835962623e-05, + "loss": 0.2513, + "step": 3001 + }, + { + "epoch": 0.26, + "learning_rate": 1.74098608201079e-05, + "loss": 0.292, + "step": 3002 + }, + { + "epoch": 0.26, + "learning_rate": 1.7407996233137713e-05, + "loss": 0.2734, + "step": 3003 + }, + { + "epoch": 0.26, + "learning_rate": 1.7406131075195784e-05, + "loss": 0.2944, + "step": 3004 + }, + { + "epoch": 0.26, + "learning_rate": 1.7404265346425867e-05, + "loss": 0.3594, + "step": 3005 + }, + { + "epoch": 0.26, + "learning_rate": 1.740239904697176e-05, + "loss": 0.269, + "step": 3006 + }, + { + "epoch": 0.26, + "learning_rate": 1.740053217697731e-05, + "loss": 0.2882, + "step": 3007 + }, + { + "epoch": 0.26, + "learning_rate": 1.739866473658641e-05, + "loss": 0.2902, + "step": 3008 + }, + { + "epoch": 0.26, + "learning_rate": 1.7396796725942986e-05, + "loss": 0.3079, + "step": 3009 + }, + { + "epoch": 0.26, + "learning_rate": 1.739492814519102e-05, + "loss": 0.2763, + "step": 3010 + }, + { + "epoch": 0.26, + "learning_rate": 1.7393058994474535e-05, + "loss": 0.2678, + "step": 3011 + }, + { + "epoch": 0.26, + "learning_rate": 1.739118927393759e-05, + "loss": 0.3147, + "step": 3012 + }, + { + "epoch": 0.26, + "learning_rate": 1.73893189837243e-05, + "loss": 0.3236, + "step": 3013 + }, + { + "epoch": 0.26, + "learning_rate": 1.7387448123978813e-05, + "loss": 0.2949, + "step": 3014 + }, + { + "epoch": 0.26, + "learning_rate": 1.7385576694845324e-05, + "loss": 0.3339, + "step": 3015 + }, + { + "epoch": 0.26, + "learning_rate": 1.738370469646808e-05, + "loss": 0.307, + "step": 3016 + }, + { + "epoch": 0.26, + "learning_rate": 1.738183212899136e-05, + "loss": 0.2856, + "step": 3017 + }, + { + "epoch": 0.26, + "learning_rate": 1.7379958992559494e-05, + "loss": 0.2893, + "step": 3018 + }, + { + "epoch": 0.26, + "learning_rate": 1.7378085287316853e-05, + "loss": 0.3495, + "step": 3019 + }, + { + "epoch": 0.26, + "learning_rate": 1.737621101340786e-05, + "loss": 0.3138, + "step": 3020 + }, + { + "epoch": 0.26, + "learning_rate": 1.7374336170976964e-05, + "loss": 0.2986, + "step": 3021 + }, + { + "epoch": 0.26, + "learning_rate": 1.7372460760168676e-05, + "loss": 0.3237, + "step": 3022 + }, + { + "epoch": 0.26, + "learning_rate": 1.737058478112754e-05, + "loss": 0.2622, + "step": 3023 + }, + { + "epoch": 0.26, + "learning_rate": 1.7368708233998148e-05, + "loss": 0.2722, + "step": 3024 + }, + { + "epoch": 0.26, + "learning_rate": 1.7366831118925133e-05, + "loss": 0.3397, + "step": 3025 + }, + { + "epoch": 0.26, + "learning_rate": 1.736495343605318e-05, + "loss": 0.3392, + "step": 3026 + }, + { + "epoch": 0.26, + "learning_rate": 1.7363075185527007e-05, + "loss": 0.2745, + "step": 3027 + }, + { + "epoch": 0.26, + "learning_rate": 1.7361196367491378e-05, + "loss": 0.2632, + "step": 3028 + }, + { + "epoch": 0.26, + "learning_rate": 1.735931698209111e-05, + "loss": 0.302, + "step": 3029 + }, + { + "epoch": 0.26, + "learning_rate": 1.735743702947105e-05, + "loss": 0.3304, + "step": 3030 + }, + { + "epoch": 0.26, + "learning_rate": 1.7355556509776093e-05, + "loss": 0.3356, + "step": 3031 + }, + { + "epoch": 0.26, + "learning_rate": 1.7353675423151194e-05, + "loss": 0.2728, + "step": 3032 + }, + { + "epoch": 0.26, + "learning_rate": 1.7351793769741326e-05, + "loss": 0.2748, + "step": 3033 + }, + { + "epoch": 0.26, + "learning_rate": 1.734991154969152e-05, + "loss": 0.33, + "step": 3034 + }, + { + "epoch": 0.26, + "learning_rate": 1.7348028763146843e-05, + "loss": 0.3058, + "step": 3035 + }, + { + "epoch": 0.26, + "learning_rate": 1.7346145410252422e-05, + "loss": 0.3177, + "step": 3036 + }, + { + "epoch": 0.26, + "learning_rate": 1.7344261491153412e-05, + "loss": 0.2975, + "step": 3037 + }, + { + "epoch": 0.26, + "learning_rate": 1.7342377005995014e-05, + "loss": 0.3472, + "step": 3038 + }, + { + "epoch": 0.26, + "learning_rate": 1.7340491954922474e-05, + "loss": 0.2886, + "step": 3039 + }, + { + "epoch": 0.26, + "learning_rate": 1.733860633808109e-05, + "loss": 0.2708, + "step": 3040 + }, + { + "epoch": 0.26, + "learning_rate": 1.7336720155616186e-05, + "loss": 0.2965, + "step": 3041 + }, + { + "epoch": 0.26, + "learning_rate": 1.7334833407673145e-05, + "loss": 0.6443, + "step": 3042 + }, + { + "epoch": 0.26, + "learning_rate": 1.733294609439739e-05, + "loss": 0.2844, + "step": 3043 + }, + { + "epoch": 0.26, + "learning_rate": 1.733105821593438e-05, + "loss": 0.283, + "step": 3044 + }, + { + "epoch": 0.26, + "learning_rate": 1.7329169772429628e-05, + "loss": 0.3101, + "step": 3045 + }, + { + "epoch": 0.26, + "learning_rate": 1.7327280764028683e-05, + "loss": 0.3412, + "step": 3046 + }, + { + "epoch": 0.26, + "learning_rate": 1.7325391190877144e-05, + "loss": 0.6011, + "step": 3047 + }, + { + "epoch": 0.26, + "learning_rate": 1.732350105312065e-05, + "loss": 0.3168, + "step": 3048 + }, + { + "epoch": 0.26, + "learning_rate": 1.7321610350904877e-05, + "loss": 0.2737, + "step": 3049 + }, + { + "epoch": 0.26, + "learning_rate": 1.7319719084375556e-05, + "loss": 0.3342, + "step": 3050 + }, + { + "epoch": 0.26, + "learning_rate": 1.7317827253678456e-05, + "loss": 0.2621, + "step": 3051 + }, + { + "epoch": 0.26, + "learning_rate": 1.731593485895939e-05, + "loss": 0.2642, + "step": 3052 + }, + { + "epoch": 0.26, + "learning_rate": 1.7314041900364215e-05, + "loss": 0.3472, + "step": 3053 + }, + { + "epoch": 0.26, + "learning_rate": 1.731214837803883e-05, + "loss": 0.2865, + "step": 3054 + }, + { + "epoch": 0.26, + "learning_rate": 1.7310254292129175e-05, + "loss": 0.5972, + "step": 3055 + }, + { + "epoch": 0.26, + "learning_rate": 1.730835964278124e-05, + "loss": 0.3008, + "step": 3056 + }, + { + "epoch": 0.26, + "learning_rate": 1.730646443014106e-05, + "loss": 0.2531, + "step": 3057 + }, + { + "epoch": 0.26, + "learning_rate": 1.7304568654354703e-05, + "loss": 0.3138, + "step": 3058 + }, + { + "epoch": 0.26, + "learning_rate": 1.7302672315568284e-05, + "loss": 0.2844, + "step": 3059 + }, + { + "epoch": 0.26, + "learning_rate": 1.730077541392797e-05, + "loss": 0.3058, + "step": 3060 + }, + { + "epoch": 0.26, + "learning_rate": 1.7298877949579962e-05, + "loss": 0.2876, + "step": 3061 + }, + { + "epoch": 0.26, + "learning_rate": 1.7296979922670502e-05, + "loss": 0.324, + "step": 3062 + }, + { + "epoch": 0.26, + "learning_rate": 1.7295081333345887e-05, + "loss": 0.3231, + "step": 3063 + }, + { + "epoch": 0.26, + "learning_rate": 1.729318218175245e-05, + "loss": 0.2648, + "step": 3064 + }, + { + "epoch": 0.26, + "learning_rate": 1.729128246803657e-05, + "loss": 0.317, + "step": 3065 + }, + { + "epoch": 0.26, + "learning_rate": 1.728938219234466e-05, + "loss": 0.2813, + "step": 3066 + }, + { + "epoch": 0.26, + "learning_rate": 1.7287481354823187e-05, + "loss": 0.276, + "step": 3067 + }, + { + "epoch": 0.26, + "learning_rate": 1.7285579955618663e-05, + "loss": 0.3404, + "step": 3068 + }, + { + "epoch": 0.26, + "learning_rate": 1.7283677994877634e-05, + "loss": 0.3178, + "step": 3069 + }, + { + "epoch": 0.26, + "learning_rate": 1.7281775472746695e-05, + "loss": 0.3302, + "step": 3070 + }, + { + "epoch": 0.26, + "learning_rate": 1.7279872389372484e-05, + "loss": 0.2679, + "step": 3071 + }, + { + "epoch": 0.26, + "learning_rate": 1.727796874490168e-05, + "loss": 0.3051, + "step": 3072 + }, + { + "epoch": 0.26, + "learning_rate": 1.7276064539481007e-05, + "loss": 0.3224, + "step": 3073 + }, + { + "epoch": 0.26, + "learning_rate": 1.7274159773257227e-05, + "loss": 0.329, + "step": 3074 + }, + { + "epoch": 0.26, + "learning_rate": 1.727225444637716e-05, + "loss": 0.3153, + "step": 3075 + }, + { + "epoch": 0.26, + "learning_rate": 1.727034855898765e-05, + "loss": 0.335, + "step": 3076 + }, + { + "epoch": 0.26, + "learning_rate": 1.72684421112356e-05, + "loss": 0.3243, + "step": 3077 + }, + { + "epoch": 0.26, + "learning_rate": 1.7266535103267943e-05, + "loss": 0.295, + "step": 3078 + }, + { + "epoch": 0.26, + "learning_rate": 1.7264627535231667e-05, + "loss": 0.3094, + "step": 3079 + }, + { + "epoch": 0.26, + "learning_rate": 1.7262719407273795e-05, + "loss": 0.2799, + "step": 3080 + }, + { + "epoch": 0.26, + "learning_rate": 1.72608107195414e-05, + "loss": 0.3039, + "step": 3081 + }, + { + "epoch": 0.26, + "learning_rate": 1.7258901472181587e-05, + "loss": 0.2769, + "step": 3082 + }, + { + "epoch": 0.26, + "learning_rate": 1.725699166534152e-05, + "loss": 0.3543, + "step": 3083 + }, + { + "epoch": 0.26, + "learning_rate": 1.7255081299168393e-05, + "loss": 0.6128, + "step": 3084 + }, + { + "epoch": 0.26, + "learning_rate": 1.7253170373809447e-05, + "loss": 0.3118, + "step": 3085 + }, + { + "epoch": 0.26, + "learning_rate": 1.7251258889411964e-05, + "loss": 0.3437, + "step": 3086 + }, + { + "epoch": 0.26, + "learning_rate": 1.724934684612328e-05, + "loss": 0.2847, + "step": 3087 + }, + { + "epoch": 0.26, + "learning_rate": 1.724743424409076e-05, + "loss": 0.2795, + "step": 3088 + }, + { + "epoch": 0.26, + "learning_rate": 1.724552108346182e-05, + "loss": 0.297, + "step": 3089 + }, + { + "epoch": 0.26, + "learning_rate": 1.7243607364383916e-05, + "loss": 0.3001, + "step": 3090 + }, + { + "epoch": 0.26, + "learning_rate": 1.7241693087004546e-05, + "loss": 0.303, + "step": 3091 + }, + { + "epoch": 0.27, + "learning_rate": 1.7239778251471255e-05, + "loss": 0.2795, + "step": 3092 + }, + { + "epoch": 0.27, + "learning_rate": 1.723786285793163e-05, + "loss": 0.3063, + "step": 3093 + }, + { + "epoch": 0.27, + "learning_rate": 1.72359469065333e-05, + "loss": 0.3005, + "step": 3094 + }, + { + "epoch": 0.27, + "learning_rate": 1.7234030397423935e-05, + "loss": 0.3278, + "step": 3095 + }, + { + "epoch": 0.27, + "learning_rate": 1.723211333075125e-05, + "loss": 0.2925, + "step": 3096 + }, + { + "epoch": 0.27, + "learning_rate": 1.723019570666301e-05, + "loss": 0.3046, + "step": 3097 + }, + { + "epoch": 0.27, + "learning_rate": 1.7228277525307007e-05, + "loss": 0.2824, + "step": 3098 + }, + { + "epoch": 0.27, + "learning_rate": 1.7226358786831087e-05, + "loss": 0.2944, + "step": 3099 + }, + { + "epoch": 0.27, + "learning_rate": 1.722443949138314e-05, + "loss": 0.3724, + "step": 3100 + }, + { + "epoch": 0.27, + "learning_rate": 1.7222519639111094e-05, + "loss": 0.3059, + "step": 3101 + }, + { + "epoch": 0.27, + "learning_rate": 1.7220599230162917e-05, + "loss": 0.29, + "step": 3102 + }, + { + "epoch": 0.27, + "learning_rate": 1.7218678264686634e-05, + "loss": 0.3082, + "step": 3103 + }, + { + "epoch": 0.27, + "learning_rate": 1.72167567428303e-05, + "loss": 0.2892, + "step": 3104 + }, + { + "epoch": 0.27, + "learning_rate": 1.7214834664742014e-05, + "loss": 0.2596, + "step": 3105 + }, + { + "epoch": 0.27, + "learning_rate": 1.7212912030569923e-05, + "loss": 0.2877, + "step": 3106 + }, + { + "epoch": 0.27, + "learning_rate": 1.7210988840462207e-05, + "loss": 0.2654, + "step": 3107 + }, + { + "epoch": 0.27, + "learning_rate": 1.7209065094567107e-05, + "loss": 0.2755, + "step": 3108 + }, + { + "epoch": 0.27, + "learning_rate": 1.7207140793032892e-05, + "loss": 0.2919, + "step": 3109 + }, + { + "epoch": 0.27, + "learning_rate": 1.720521593600787e-05, + "loss": 0.316, + "step": 3110 + }, + { + "epoch": 0.27, + "learning_rate": 1.720329052364041e-05, + "loss": 0.308, + "step": 3111 + }, + { + "epoch": 0.27, + "learning_rate": 1.7201364556078908e-05, + "loss": 0.2589, + "step": 3112 + }, + { + "epoch": 0.27, + "learning_rate": 1.7199438033471812e-05, + "loss": 0.3171, + "step": 3113 + }, + { + "epoch": 0.27, + "learning_rate": 1.71975109559676e-05, + "loss": 0.2607, + "step": 3114 + }, + { + "epoch": 0.27, + "learning_rate": 1.7195583323714812e-05, + "loss": 0.342, + "step": 3115 + }, + { + "epoch": 0.27, + "learning_rate": 1.7193655136862016e-05, + "loss": 0.3469, + "step": 3116 + }, + { + "epoch": 0.27, + "learning_rate": 1.719172639555782e-05, + "loss": 0.2828, + "step": 3117 + }, + { + "epoch": 0.27, + "learning_rate": 1.7189797099950895e-05, + "loss": 0.3389, + "step": 3118 + }, + { + "epoch": 0.27, + "learning_rate": 1.7187867250189936e-05, + "loss": 0.322, + "step": 3119 + }, + { + "epoch": 0.27, + "learning_rate": 1.7185936846423686e-05, + "loss": 0.6334, + "step": 3120 + }, + { + "epoch": 0.27, + "learning_rate": 1.718400588880093e-05, + "loss": 0.2861, + "step": 3121 + }, + { + "epoch": 0.27, + "learning_rate": 1.7182074377470494e-05, + "loss": 0.2823, + "step": 3122 + }, + { + "epoch": 0.27, + "learning_rate": 1.7180142312581253e-05, + "loss": 0.2714, + "step": 3123 + }, + { + "epoch": 0.27, + "learning_rate": 1.717820969428212e-05, + "loss": 0.3376, + "step": 3124 + }, + { + "epoch": 0.27, + "learning_rate": 1.7176276522722054e-05, + "loss": 0.2822, + "step": 3125 + }, + { + "epoch": 0.27, + "learning_rate": 1.7174342798050056e-05, + "loss": 0.319, + "step": 3126 + }, + { + "epoch": 0.27, + "learning_rate": 1.717240852041516e-05, + "loss": 0.3087, + "step": 3127 + }, + { + "epoch": 0.27, + "learning_rate": 1.717047368996646e-05, + "loss": 0.298, + "step": 3128 + }, + { + "epoch": 0.27, + "learning_rate": 1.7168538306853075e-05, + "loss": 0.2981, + "step": 3129 + }, + { + "epoch": 0.27, + "learning_rate": 1.7166602371224178e-05, + "loss": 0.2862, + "step": 3130 + }, + { + "epoch": 0.27, + "learning_rate": 1.7164665883228982e-05, + "loss": 0.2809, + "step": 3131 + }, + { + "epoch": 0.27, + "learning_rate": 1.716272884301674e-05, + "loss": 0.3267, + "step": 3132 + }, + { + "epoch": 0.27, + "learning_rate": 1.7160791250736754e-05, + "loss": 0.3401, + "step": 3133 + }, + { + "epoch": 0.27, + "learning_rate": 1.7158853106538358e-05, + "loss": 0.27, + "step": 3134 + }, + { + "epoch": 0.27, + "learning_rate": 1.7156914410570937e-05, + "loss": 0.29, + "step": 3135 + }, + { + "epoch": 0.27, + "learning_rate": 1.7154975162983917e-05, + "loss": 0.2925, + "step": 3136 + }, + { + "epoch": 0.27, + "learning_rate": 1.7153035363926766e-05, + "loss": 0.2865, + "step": 3137 + }, + { + "epoch": 0.27, + "learning_rate": 1.7151095013548996e-05, + "loss": 0.3004, + "step": 3138 + }, + { + "epoch": 0.27, + "learning_rate": 1.7149154112000154e-05, + "loss": 0.3009, + "step": 3139 + }, + { + "epoch": 0.27, + "learning_rate": 1.7147212659429837e-05, + "loss": 0.2823, + "step": 3140 + }, + { + "epoch": 0.27, + "learning_rate": 1.7145270655987686e-05, + "loss": 0.2711, + "step": 3141 + }, + { + "epoch": 0.27, + "learning_rate": 1.714332810182338e-05, + "loss": 0.3141, + "step": 3142 + }, + { + "epoch": 0.27, + "learning_rate": 1.7141384997086638e-05, + "loss": 0.299, + "step": 3143 + }, + { + "epoch": 0.27, + "learning_rate": 1.7139441341927224e-05, + "loss": 0.2684, + "step": 3144 + }, + { + "epoch": 0.27, + "learning_rate": 1.7137497136494953e-05, + "loss": 0.2828, + "step": 3145 + }, + { + "epoch": 0.27, + "learning_rate": 1.713555238093967e-05, + "loss": 0.3049, + "step": 3146 + }, + { + "epoch": 0.27, + "learning_rate": 1.7133607075411266e-05, + "loss": 0.3228, + "step": 3147 + }, + { + "epoch": 0.27, + "learning_rate": 1.7131661220059675e-05, + "loss": 0.3062, + "step": 3148 + }, + { + "epoch": 0.27, + "learning_rate": 1.7129714815034876e-05, + "loss": 0.2823, + "step": 3149 + }, + { + "epoch": 0.27, + "learning_rate": 1.7127767860486892e-05, + "loss": 0.3042, + "step": 3150 + }, + { + "epoch": 0.27, + "learning_rate": 1.7125820356565776e-05, + "loss": 0.2822, + "step": 3151 + }, + { + "epoch": 0.27, + "learning_rate": 1.712387230342164e-05, + "loss": 0.308, + "step": 3152 + }, + { + "epoch": 0.27, + "learning_rate": 1.7121923701204623e-05, + "loss": 0.3065, + "step": 3153 + }, + { + "epoch": 0.27, + "learning_rate": 1.711997455006492e-05, + "loss": 0.3438, + "step": 3154 + }, + { + "epoch": 0.27, + "learning_rate": 1.7118024850152763e-05, + "loss": 0.3024, + "step": 3155 + }, + { + "epoch": 0.27, + "learning_rate": 1.7116074601618418e-05, + "loss": 0.2725, + "step": 3156 + }, + { + "epoch": 0.27, + "learning_rate": 1.7114123804612205e-05, + "loss": 0.3035, + "step": 3157 + }, + { + "epoch": 0.27, + "learning_rate": 1.7112172459284478e-05, + "loss": 0.2926, + "step": 3158 + }, + { + "epoch": 0.27, + "learning_rate": 1.7110220565785644e-05, + "loss": 0.2766, + "step": 3159 + }, + { + "epoch": 0.27, + "learning_rate": 1.710826812426614e-05, + "loss": 0.2586, + "step": 3160 + }, + { + "epoch": 0.27, + "learning_rate": 1.710631513487645e-05, + "loss": 0.2908, + "step": 3161 + }, + { + "epoch": 0.27, + "learning_rate": 1.7104361597767107e-05, + "loss": 0.3331, + "step": 3162 + }, + { + "epoch": 0.27, + "learning_rate": 1.7102407513088676e-05, + "loss": 0.2949, + "step": 3163 + }, + { + "epoch": 0.27, + "learning_rate": 1.7100452880991764e-05, + "loss": 0.3762, + "step": 3164 + }, + { + "epoch": 0.27, + "learning_rate": 1.7098497701627027e-05, + "loss": 0.3291, + "step": 3165 + }, + { + "epoch": 0.27, + "learning_rate": 1.709654197514517e-05, + "loss": 0.288, + "step": 3166 + }, + { + "epoch": 0.27, + "learning_rate": 1.7094585701696916e-05, + "loss": 0.2801, + "step": 3167 + }, + { + "epoch": 0.27, + "learning_rate": 1.7092628881433052e-05, + "loss": 0.2711, + "step": 3168 + }, + { + "epoch": 0.27, + "learning_rate": 1.70906715145044e-05, + "loss": 0.2989, + "step": 3169 + }, + { + "epoch": 0.27, + "learning_rate": 1.7088713601061823e-05, + "loss": 0.264, + "step": 3170 + }, + { + "epoch": 0.27, + "learning_rate": 1.708675514125623e-05, + "loss": 0.2844, + "step": 3171 + }, + { + "epoch": 0.27, + "learning_rate": 1.7084796135238566e-05, + "loss": 0.3286, + "step": 3172 + }, + { + "epoch": 0.27, + "learning_rate": 1.7082836583159826e-05, + "loss": 0.304, + "step": 3173 + }, + { + "epoch": 0.27, + "learning_rate": 1.7080876485171035e-05, + "loss": 0.285, + "step": 3174 + }, + { + "epoch": 0.27, + "learning_rate": 1.7078915841423273e-05, + "loss": 0.2709, + "step": 3175 + }, + { + "epoch": 0.27, + "learning_rate": 1.7076954652067657e-05, + "loss": 0.2842, + "step": 3176 + }, + { + "epoch": 0.27, + "learning_rate": 1.7074992917255343e-05, + "loss": 0.3191, + "step": 3177 + }, + { + "epoch": 0.27, + "learning_rate": 1.7073030637137535e-05, + "loss": 0.3039, + "step": 3178 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.2858, + "step": 3179 + }, + { + "epoch": 0.27, + "learning_rate": 1.706910444159045e-05, + "loss": 0.289, + "step": 3180 + }, + { + "epoch": 0.27, + "learning_rate": 1.7067140526463778e-05, + "loss": 0.3314, + "step": 3181 + }, + { + "epoch": 0.27, + "learning_rate": 1.7065176066636836e-05, + "loss": 0.2966, + "step": 3182 + }, + { + "epoch": 0.27, + "learning_rate": 1.7063211062261034e-05, + "loss": 0.3057, + "step": 3183 + }, + { + "epoch": 0.27, + "learning_rate": 1.7061245513487824e-05, + "loss": 0.28, + "step": 3184 + }, + { + "epoch": 0.27, + "learning_rate": 1.70592794204687e-05, + "loss": 0.3273, + "step": 3185 + }, + { + "epoch": 0.27, + "learning_rate": 1.70573127833552e-05, + "loss": 0.3187, + "step": 3186 + }, + { + "epoch": 0.27, + "learning_rate": 1.70553456022989e-05, + "loss": 0.3081, + "step": 3187 + }, + { + "epoch": 0.27, + "learning_rate": 1.7053377877451424e-05, + "loss": 0.365, + "step": 3188 + }, + { + "epoch": 0.27, + "learning_rate": 1.7051409608964433e-05, + "loss": 0.2942, + "step": 3189 + }, + { + "epoch": 0.27, + "learning_rate": 1.704944079698963e-05, + "loss": 0.299, + "step": 3190 + }, + { + "epoch": 0.27, + "learning_rate": 1.7047471441678764e-05, + "loss": 0.2977, + "step": 3191 + }, + { + "epoch": 0.27, + "learning_rate": 1.704550154318362e-05, + "loss": 0.2982, + "step": 3192 + }, + { + "epoch": 0.27, + "learning_rate": 1.704353110165603e-05, + "loss": 0.3495, + "step": 3193 + }, + { + "epoch": 0.27, + "learning_rate": 1.704156011724787e-05, + "loss": 0.2707, + "step": 3194 + }, + { + "epoch": 0.27, + "learning_rate": 1.7039588590111045e-05, + "loss": 0.2767, + "step": 3195 + }, + { + "epoch": 0.27, + "learning_rate": 1.7037616520397515e-05, + "loss": 0.3471, + "step": 3196 + }, + { + "epoch": 0.27, + "learning_rate": 1.7035643908259278e-05, + "loss": 0.2956, + "step": 3197 + }, + { + "epoch": 0.27, + "learning_rate": 1.7033670753848373e-05, + "loss": 0.3131, + "step": 3198 + }, + { + "epoch": 0.27, + "learning_rate": 1.7031697057316883e-05, + "loss": 0.3559, + "step": 3199 + }, + { + "epoch": 0.27, + "learning_rate": 1.702972281881693e-05, + "loss": 0.2711, + "step": 3200 + }, + { + "epoch": 0.27, + "learning_rate": 1.702774803850067e-05, + "loss": 0.2862, + "step": 3201 + }, + { + "epoch": 0.27, + "learning_rate": 1.7025772716520324e-05, + "loss": 0.2958, + "step": 3202 + }, + { + "epoch": 0.27, + "learning_rate": 1.7023796853028125e-05, + "loss": 0.3347, + "step": 3203 + }, + { + "epoch": 0.27, + "learning_rate": 1.7021820448176372e-05, + "loss": 0.2975, + "step": 3204 + }, + { + "epoch": 0.27, + "learning_rate": 1.7019843502117398e-05, + "loss": 0.3403, + "step": 3205 + }, + { + "epoch": 0.27, + "learning_rate": 1.701786601500357e-05, + "loss": 0.2828, + "step": 3206 + }, + { + "epoch": 0.27, + "learning_rate": 1.701588798698731e-05, + "loss": 0.2838, + "step": 3207 + }, + { + "epoch": 0.27, + "learning_rate": 1.7013909418221065e-05, + "loss": 0.3041, + "step": 3208 + }, + { + "epoch": 0.28, + "learning_rate": 1.701193030885734e-05, + "loss": 0.2836, + "step": 3209 + }, + { + "epoch": 0.28, + "learning_rate": 1.7009950659048677e-05, + "loss": 0.3325, + "step": 3210 + }, + { + "epoch": 0.28, + "learning_rate": 1.7007970468947653e-05, + "loss": 0.325, + "step": 3211 + }, + { + "epoch": 0.28, + "learning_rate": 1.7005989738706892e-05, + "loss": 0.3261, + "step": 3212 + }, + { + "epoch": 0.28, + "learning_rate": 1.700400846847906e-05, + "loss": 0.2954, + "step": 3213 + }, + { + "epoch": 0.28, + "learning_rate": 1.7002026658416862e-05, + "loss": 0.3056, + "step": 3214 + }, + { + "epoch": 0.28, + "learning_rate": 1.700004430867305e-05, + "loss": 0.3188, + "step": 3215 + }, + { + "epoch": 0.28, + "learning_rate": 1.6998061419400408e-05, + "loss": 0.2596, + "step": 3216 + }, + { + "epoch": 0.28, + "learning_rate": 1.699607799075177e-05, + "loss": 0.3025, + "step": 3217 + }, + { + "epoch": 0.28, + "learning_rate": 1.699409402288001e-05, + "loss": 0.3573, + "step": 3218 + }, + { + "epoch": 0.28, + "learning_rate": 1.6992109515938042e-05, + "loss": 0.2955, + "step": 3219 + }, + { + "epoch": 0.28, + "learning_rate": 1.699012447007882e-05, + "loss": 0.3124, + "step": 3220 + }, + { + "epoch": 0.28, + "learning_rate": 1.698813888545535e-05, + "loss": 0.2769, + "step": 3221 + }, + { + "epoch": 0.28, + "learning_rate": 1.6986152762220655e-05, + "loss": 0.3116, + "step": 3222 + }, + { + "epoch": 0.28, + "learning_rate": 1.698416610052783e-05, + "loss": 0.3093, + "step": 3223 + }, + { + "epoch": 0.28, + "learning_rate": 1.6982178900529988e-05, + "loss": 0.2751, + "step": 3224 + }, + { + "epoch": 0.28, + "learning_rate": 1.6980191162380298e-05, + "loss": 0.3327, + "step": 3225 + }, + { + "epoch": 0.28, + "learning_rate": 1.6978202886231963e-05, + "loss": 0.287, + "step": 3226 + }, + { + "epoch": 0.28, + "learning_rate": 1.697621407223823e-05, + "loss": 0.3455, + "step": 3227 + }, + { + "epoch": 0.28, + "learning_rate": 1.697422472055239e-05, + "loss": 0.2659, + "step": 3228 + }, + { + "epoch": 0.28, + "learning_rate": 1.6972234831327767e-05, + "loss": 0.2847, + "step": 3229 + }, + { + "epoch": 0.28, + "learning_rate": 1.6970244404717732e-05, + "loss": 0.3066, + "step": 3230 + }, + { + "epoch": 0.28, + "learning_rate": 1.6968253440875702e-05, + "loss": 0.3514, + "step": 3231 + }, + { + "epoch": 0.28, + "learning_rate": 1.6966261939955125e-05, + "loss": 0.2875, + "step": 3232 + }, + { + "epoch": 0.28, + "learning_rate": 1.69642699021095e-05, + "loss": 0.2546, + "step": 3233 + }, + { + "epoch": 0.28, + "learning_rate": 1.6962277327492366e-05, + "loss": 0.3536, + "step": 3234 + }, + { + "epoch": 0.28, + "learning_rate": 1.6960284216257293e-05, + "loss": 0.3553, + "step": 3235 + }, + { + "epoch": 0.28, + "learning_rate": 1.6958290568557905e-05, + "loss": 0.2839, + "step": 3236 + }, + { + "epoch": 0.28, + "learning_rate": 1.695629638454786e-05, + "loss": 0.3006, + "step": 3237 + }, + { + "epoch": 0.28, + "learning_rate": 1.6954301664380867e-05, + "loss": 0.2579, + "step": 3238 + }, + { + "epoch": 0.28, + "learning_rate": 1.6952306408210663e-05, + "loss": 0.284, + "step": 3239 + }, + { + "epoch": 0.28, + "learning_rate": 1.695031061619103e-05, + "loss": 0.2819, + "step": 3240 + }, + { + "epoch": 0.28, + "learning_rate": 1.6948314288475796e-05, + "loss": 0.3775, + "step": 3241 + }, + { + "epoch": 0.28, + "learning_rate": 1.6946317425218834e-05, + "loss": 0.29, + "step": 3242 + }, + { + "epoch": 0.28, + "learning_rate": 1.6944320026574047e-05, + "loss": 0.3154, + "step": 3243 + }, + { + "epoch": 0.28, + "learning_rate": 1.694232209269538e-05, + "loss": 0.3047, + "step": 3244 + }, + { + "epoch": 0.28, + "learning_rate": 1.6940323623736835e-05, + "loss": 0.2916, + "step": 3245 + }, + { + "epoch": 0.28, + "learning_rate": 1.6938324619852435e-05, + "loss": 0.3168, + "step": 3246 + }, + { + "epoch": 0.28, + "learning_rate": 1.693632508119626e-05, + "loss": 0.2517, + "step": 3247 + }, + { + "epoch": 0.28, + "learning_rate": 1.6934325007922418e-05, + "loss": 0.2792, + "step": 3248 + }, + { + "epoch": 0.28, + "learning_rate": 1.6932324400185073e-05, + "loss": 0.3254, + "step": 3249 + }, + { + "epoch": 0.28, + "learning_rate": 1.693032325813841e-05, + "loss": 0.3464, + "step": 3250 + }, + { + "epoch": 0.28, + "learning_rate": 1.6928321581936676e-05, + "loss": 0.2496, + "step": 3251 + }, + { + "epoch": 0.28, + "learning_rate": 1.692631937173415e-05, + "loss": 0.2857, + "step": 3252 + }, + { + "epoch": 0.28, + "learning_rate": 1.692431662768515e-05, + "loss": 0.3168, + "step": 3253 + }, + { + "epoch": 0.28, + "learning_rate": 1.6922313349944037e-05, + "loss": 0.2949, + "step": 3254 + }, + { + "epoch": 0.28, + "learning_rate": 1.6920309538665215e-05, + "loss": 0.3461, + "step": 3255 + }, + { + "epoch": 0.28, + "learning_rate": 1.691830519400313e-05, + "loss": 0.2751, + "step": 3256 + }, + { + "epoch": 0.28, + "learning_rate": 1.6916300316112265e-05, + "loss": 0.3233, + "step": 3257 + }, + { + "epoch": 0.28, + "learning_rate": 1.6914294905147144e-05, + "loss": 0.2827, + "step": 3258 + }, + { + "epoch": 0.28, + "learning_rate": 1.691228896126234e-05, + "loss": 0.2757, + "step": 3259 + }, + { + "epoch": 0.28, + "learning_rate": 1.6910282484612452e-05, + "loss": 0.266, + "step": 3260 + }, + { + "epoch": 0.28, + "learning_rate": 1.690827547535214e-05, + "loss": 0.2808, + "step": 3261 + }, + { + "epoch": 0.28, + "learning_rate": 1.6906267933636087e-05, + "loss": 0.3284, + "step": 3262 + }, + { + "epoch": 0.28, + "learning_rate": 1.6904259859619028e-05, + "loss": 0.2706, + "step": 3263 + }, + { + "epoch": 0.28, + "learning_rate": 1.690225125345573e-05, + "loss": 0.2667, + "step": 3264 + }, + { + "epoch": 0.28, + "learning_rate": 1.6900242115301014e-05, + "loss": 0.2571, + "step": 3265 + }, + { + "epoch": 0.28, + "learning_rate": 1.689823244530973e-05, + "loss": 0.2697, + "step": 3266 + }, + { + "epoch": 0.28, + "learning_rate": 1.6896222243636775e-05, + "loss": 0.3256, + "step": 3267 + }, + { + "epoch": 0.28, + "learning_rate": 1.6894211510437086e-05, + "loss": 0.3239, + "step": 3268 + }, + { + "epoch": 0.28, + "learning_rate": 1.6892200245865635e-05, + "loss": 0.2856, + "step": 3269 + }, + { + "epoch": 0.28, + "learning_rate": 1.6890188450077445e-05, + "loss": 0.2835, + "step": 3270 + }, + { + "epoch": 0.28, + "learning_rate": 1.6888176123227576e-05, + "loss": 0.3461, + "step": 3271 + }, + { + "epoch": 0.28, + "learning_rate": 1.6886163265471127e-05, + "loss": 0.3284, + "step": 3272 + }, + { + "epoch": 0.28, + "learning_rate": 1.688414987696324e-05, + "loss": 0.2874, + "step": 3273 + }, + { + "epoch": 0.28, + "learning_rate": 1.6882135957859095e-05, + "loss": 0.2784, + "step": 3274 + }, + { + "epoch": 0.28, + "learning_rate": 1.6880121508313916e-05, + "loss": 0.3162, + "step": 3275 + }, + { + "epoch": 0.28, + "learning_rate": 1.6878106528482968e-05, + "loss": 0.2836, + "step": 3276 + }, + { + "epoch": 0.28, + "learning_rate": 1.687609101852155e-05, + "loss": 0.2795, + "step": 3277 + }, + { + "epoch": 0.28, + "learning_rate": 1.6874074978585018e-05, + "loss": 0.3029, + "step": 3278 + }, + { + "epoch": 0.28, + "learning_rate": 1.687205840882875e-05, + "loss": 0.265, + "step": 3279 + }, + { + "epoch": 0.28, + "learning_rate": 1.6870041309408174e-05, + "loss": 0.3556, + "step": 3280 + }, + { + "epoch": 0.28, + "learning_rate": 1.6868023680478763e-05, + "loss": 0.2484, + "step": 3281 + }, + { + "epoch": 0.28, + "learning_rate": 1.686600552219602e-05, + "loss": 0.3536, + "step": 3282 + }, + { + "epoch": 0.28, + "learning_rate": 1.6863986834715497e-05, + "loss": 0.2745, + "step": 3283 + }, + { + "epoch": 0.28, + "learning_rate": 1.686196761819279e-05, + "loss": 0.2825, + "step": 3284 + }, + { + "epoch": 0.28, + "learning_rate": 1.6859947872783517e-05, + "loss": 0.2963, + "step": 3285 + }, + { + "epoch": 0.28, + "learning_rate": 1.6857927598643362e-05, + "loss": 0.2841, + "step": 3286 + }, + { + "epoch": 0.28, + "learning_rate": 1.6855906795928033e-05, + "loss": 0.3093, + "step": 3287 + }, + { + "epoch": 0.28, + "learning_rate": 1.6853885464793287e-05, + "loss": 0.2869, + "step": 3288 + }, + { + "epoch": 0.28, + "learning_rate": 1.685186360539491e-05, + "loss": 0.2253, + "step": 3289 + }, + { + "epoch": 0.28, + "learning_rate": 1.6849841217888748e-05, + "loss": 0.2869, + "step": 3290 + }, + { + "epoch": 0.28, + "learning_rate": 1.684781830243067e-05, + "loss": 0.6086, + "step": 3291 + }, + { + "epoch": 0.28, + "learning_rate": 1.684579485917659e-05, + "loss": 0.2919, + "step": 3292 + }, + { + "epoch": 0.28, + "learning_rate": 1.684377088828247e-05, + "loss": 0.3093, + "step": 3293 + }, + { + "epoch": 0.28, + "learning_rate": 1.6841746389904306e-05, + "loss": 0.2957, + "step": 3294 + }, + { + "epoch": 0.28, + "learning_rate": 1.6839721364198134e-05, + "loss": 0.3026, + "step": 3295 + }, + { + "epoch": 0.28, + "learning_rate": 1.683769581132004e-05, + "loss": 0.2643, + "step": 3296 + }, + { + "epoch": 0.28, + "learning_rate": 1.6835669731426137e-05, + "loss": 0.2872, + "step": 3297 + }, + { + "epoch": 0.28, + "learning_rate": 1.6833643124672586e-05, + "loss": 0.3674, + "step": 3298 + }, + { + "epoch": 0.28, + "learning_rate": 1.683161599121559e-05, + "loss": 0.3269, + "step": 3299 + }, + { + "epoch": 0.28, + "learning_rate": 1.682958833121139e-05, + "loss": 0.2796, + "step": 3300 + }, + { + "epoch": 0.28, + "learning_rate": 1.6827560144816266e-05, + "loss": 0.285, + "step": 3301 + }, + { + "epoch": 0.28, + "learning_rate": 1.6825531432186545e-05, + "loss": 0.2755, + "step": 3302 + }, + { + "epoch": 0.28, + "learning_rate": 1.6823502193478583e-05, + "loss": 0.3187, + "step": 3303 + }, + { + "epoch": 0.28, + "learning_rate": 1.6821472428848788e-05, + "loss": 0.283, + "step": 3304 + }, + { + "epoch": 0.28, + "learning_rate": 1.6819442138453605e-05, + "loss": 0.3214, + "step": 3305 + }, + { + "epoch": 0.28, + "learning_rate": 1.681741132244952e-05, + "loss": 0.3398, + "step": 3306 + }, + { + "epoch": 0.28, + "learning_rate": 1.6815379980993055e-05, + "loss": 0.3513, + "step": 3307 + }, + { + "epoch": 0.28, + "learning_rate": 1.6813348114240775e-05, + "loss": 0.2922, + "step": 3308 + }, + { + "epoch": 0.28, + "learning_rate": 1.681131572234929e-05, + "loss": 0.379, + "step": 3309 + }, + { + "epoch": 0.28, + "learning_rate": 1.6809282805475243e-05, + "loss": 0.2955, + "step": 3310 + }, + { + "epoch": 0.28, + "learning_rate": 1.6807249363775326e-05, + "loss": 0.2535, + "step": 3311 + }, + { + "epoch": 0.28, + "learning_rate": 1.6805215397406264e-05, + "loss": 0.3207, + "step": 3312 + }, + { + "epoch": 0.28, + "learning_rate": 1.6803180906524822e-05, + "loss": 0.34, + "step": 3313 + }, + { + "epoch": 0.28, + "learning_rate": 1.680114589128781e-05, + "loss": 0.3007, + "step": 3314 + }, + { + "epoch": 0.28, + "learning_rate": 1.679911035185208e-05, + "loss": 0.3101, + "step": 3315 + }, + { + "epoch": 0.28, + "learning_rate": 1.679707428837452e-05, + "loss": 0.5824, + "step": 3316 + }, + { + "epoch": 0.28, + "learning_rate": 1.679503770101206e-05, + "loss": 0.3018, + "step": 3317 + }, + { + "epoch": 0.28, + "learning_rate": 1.6793000589921666e-05, + "loss": 0.2902, + "step": 3318 + }, + { + "epoch": 0.28, + "learning_rate": 1.679096295526035e-05, + "loss": 0.2965, + "step": 3319 + }, + { + "epoch": 0.28, + "learning_rate": 1.6788924797185174e-05, + "loss": 0.2875, + "step": 3320 + }, + { + "epoch": 0.28, + "learning_rate": 1.6786886115853214e-05, + "loss": 0.3323, + "step": 3321 + }, + { + "epoch": 0.28, + "learning_rate": 1.6784846911421605e-05, + "loss": 0.2855, + "step": 3322 + }, + { + "epoch": 0.28, + "learning_rate": 1.6782807184047524e-05, + "loss": 0.3079, + "step": 3323 + }, + { + "epoch": 0.28, + "learning_rate": 1.678076693388818e-05, + "loss": 0.5862, + "step": 3324 + }, + { + "epoch": 0.29, + "learning_rate": 1.6778726161100825e-05, + "loss": 0.2911, + "step": 3325 + }, + { + "epoch": 0.29, + "learning_rate": 1.6776684865842748e-05, + "loss": 0.2958, + "step": 3326 + }, + { + "epoch": 0.29, + "learning_rate": 1.677464304827129e-05, + "loss": 0.2935, + "step": 3327 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772600708543822e-05, + "loss": 0.3343, + "step": 3328 + }, + { + "epoch": 0.29, + "learning_rate": 1.6770557846817754e-05, + "loss": 0.2747, + "step": 3329 + }, + { + "epoch": 0.29, + "learning_rate": 1.6768514463250544e-05, + "loss": 0.3183, + "step": 3330 + }, + { + "epoch": 0.29, + "learning_rate": 1.676647055799968e-05, + "loss": 0.2773, + "step": 3331 + }, + { + "epoch": 0.29, + "learning_rate": 1.67644261312227e-05, + "loss": 0.3177, + "step": 3332 + }, + { + "epoch": 0.29, + "learning_rate": 1.6762381183077178e-05, + "loss": 0.3218, + "step": 3333 + }, + { + "epoch": 0.29, + "learning_rate": 1.6760335713720727e-05, + "loss": 0.3254, + "step": 3334 + }, + { + "epoch": 0.29, + "learning_rate": 1.6758289723311007e-05, + "loss": 0.278, + "step": 3335 + }, + { + "epoch": 0.29, + "learning_rate": 1.6756243212005704e-05, + "loss": 0.2692, + "step": 3336 + }, + { + "epoch": 0.29, + "learning_rate": 1.6754196179962563e-05, + "loss": 0.3147, + "step": 3337 + }, + { + "epoch": 0.29, + "learning_rate": 1.675214862733935e-05, + "loss": 0.324, + "step": 3338 + }, + { + "epoch": 0.29, + "learning_rate": 1.6750100554293886e-05, + "loss": 0.2687, + "step": 3339 + }, + { + "epoch": 0.29, + "learning_rate": 1.674805196098402e-05, + "loss": 0.2871, + "step": 3340 + }, + { + "epoch": 0.29, + "learning_rate": 1.6746002847567656e-05, + "loss": 0.3069, + "step": 3341 + }, + { + "epoch": 0.29, + "learning_rate": 1.674395321420273e-05, + "loss": 0.2973, + "step": 3342 + }, + { + "epoch": 0.29, + "learning_rate": 1.6741903061047204e-05, + "loss": 0.2627, + "step": 3343 + }, + { + "epoch": 0.29, + "learning_rate": 1.6739852388259107e-05, + "loss": 0.2808, + "step": 3344 + }, + { + "epoch": 0.29, + "learning_rate": 1.6737801195996492e-05, + "loss": 0.3117, + "step": 3345 + }, + { + "epoch": 0.29, + "learning_rate": 1.6735749484417452e-05, + "loss": 0.2991, + "step": 3346 + }, + { + "epoch": 0.29, + "learning_rate": 1.6733697253680124e-05, + "loss": 0.3022, + "step": 3347 + }, + { + "epoch": 0.29, + "learning_rate": 1.6731644503942684e-05, + "loss": 0.2876, + "step": 3348 + }, + { + "epoch": 0.29, + "learning_rate": 1.6729591235363346e-05, + "loss": 0.2958, + "step": 3349 + }, + { + "epoch": 0.29, + "learning_rate": 1.672753744810037e-05, + "loss": 0.2692, + "step": 3350 + }, + { + "epoch": 0.29, + "learning_rate": 1.6725483142312046e-05, + "loss": 0.2693, + "step": 3351 + }, + { + "epoch": 0.29, + "learning_rate": 1.6723428318156715e-05, + "loss": 0.3182, + "step": 3352 + }, + { + "epoch": 0.29, + "learning_rate": 1.6721372975792752e-05, + "loss": 0.3208, + "step": 3353 + }, + { + "epoch": 0.29, + "learning_rate": 1.671931711537857e-05, + "loss": 0.2614, + "step": 3354 + }, + { + "epoch": 0.29, + "learning_rate": 1.6717260737072628e-05, + "loss": 0.3231, + "step": 3355 + }, + { + "epoch": 0.29, + "learning_rate": 1.6715203841033417e-05, + "loss": 0.3244, + "step": 3356 + }, + { + "epoch": 0.29, + "learning_rate": 1.6713146427419473e-05, + "loss": 0.2794, + "step": 3357 + }, + { + "epoch": 0.29, + "learning_rate": 1.6711088496389375e-05, + "loss": 0.3044, + "step": 3358 + }, + { + "epoch": 0.29, + "learning_rate": 1.6709030048101738e-05, + "loss": 0.2933, + "step": 3359 + }, + { + "epoch": 0.29, + "learning_rate": 1.6706971082715212e-05, + "loss": 0.2614, + "step": 3360 + }, + { + "epoch": 0.29, + "learning_rate": 1.6704911600388496e-05, + "loss": 0.3378, + "step": 3361 + }, + { + "epoch": 0.29, + "learning_rate": 1.6702851601280322e-05, + "loss": 0.3207, + "step": 3362 + }, + { + "epoch": 0.29, + "learning_rate": 1.6700791085549474e-05, + "loss": 0.3201, + "step": 3363 + }, + { + "epoch": 0.29, + "learning_rate": 1.669873005335475e-05, + "loss": 0.2818, + "step": 3364 + }, + { + "epoch": 0.29, + "learning_rate": 1.6696668504855016e-05, + "loss": 0.3013, + "step": 3365 + }, + { + "epoch": 0.29, + "learning_rate": 1.6694606440209163e-05, + "loss": 0.2685, + "step": 3366 + }, + { + "epoch": 0.29, + "learning_rate": 1.6692543859576124e-05, + "loss": 0.292, + "step": 3367 + }, + { + "epoch": 0.29, + "learning_rate": 1.6690480763114877e-05, + "loss": 0.2921, + "step": 3368 + }, + { + "epoch": 0.29, + "learning_rate": 1.6688417150984423e-05, + "loss": 0.2457, + "step": 3369 + }, + { + "epoch": 0.29, + "learning_rate": 1.668635302334383e-05, + "loss": 0.326, + "step": 3370 + }, + { + "epoch": 0.29, + "learning_rate": 1.6684288380352182e-05, + "loss": 0.299, + "step": 3371 + }, + { + "epoch": 0.29, + "learning_rate": 1.6682223222168614e-05, + "loss": 0.3047, + "step": 3372 + }, + { + "epoch": 0.29, + "learning_rate": 1.66801575489523e-05, + "loss": 0.3005, + "step": 3373 + }, + { + "epoch": 0.29, + "learning_rate": 1.6678091360862447e-05, + "loss": 0.278, + "step": 3374 + }, + { + "epoch": 0.29, + "learning_rate": 1.667602465805831e-05, + "loss": 0.3246, + "step": 3375 + }, + { + "epoch": 0.29, + "learning_rate": 1.6673957440699183e-05, + "loss": 0.2977, + "step": 3376 + }, + { + "epoch": 0.29, + "learning_rate": 1.667188970894439e-05, + "loss": 0.3099, + "step": 3377 + }, + { + "epoch": 0.29, + "learning_rate": 1.6669821462953303e-05, + "loss": 0.238, + "step": 3378 + }, + { + "epoch": 0.29, + "learning_rate": 1.666775270288534e-05, + "loss": 0.2957, + "step": 3379 + }, + { + "epoch": 0.29, + "learning_rate": 1.666568342889994e-05, + "loss": 0.2776, + "step": 3380 + }, + { + "epoch": 0.29, + "learning_rate": 1.66636136411566e-05, + "loss": 0.3134, + "step": 3381 + }, + { + "epoch": 0.29, + "learning_rate": 1.6661543339814847e-05, + "loss": 0.3475, + "step": 3382 + }, + { + "epoch": 0.29, + "learning_rate": 1.6659472525034245e-05, + "loss": 0.2585, + "step": 3383 + }, + { + "epoch": 0.29, + "learning_rate": 1.6657401196974405e-05, + "loss": 0.3091, + "step": 3384 + }, + { + "epoch": 0.29, + "learning_rate": 1.6655329355794982e-05, + "loss": 0.2728, + "step": 3385 + }, + { + "epoch": 0.29, + "learning_rate": 1.6653257001655652e-05, + "loss": 0.295, + "step": 3386 + }, + { + "epoch": 0.29, + "learning_rate": 1.6651184134716156e-05, + "loss": 0.2575, + "step": 3387 + }, + { + "epoch": 0.29, + "learning_rate": 1.6649110755136242e-05, + "loss": 0.2902, + "step": 3388 + }, + { + "epoch": 0.29, + "learning_rate": 1.664703686307573e-05, + "loss": 0.2437, + "step": 3389 + }, + { + "epoch": 0.29, + "learning_rate": 1.6644962458694457e-05, + "loss": 0.3054, + "step": 3390 + }, + { + "epoch": 0.29, + "learning_rate": 1.6642887542152312e-05, + "loss": 0.3232, + "step": 3391 + }, + { + "epoch": 0.29, + "learning_rate": 1.664081211360922e-05, + "loss": 0.3205, + "step": 3392 + }, + { + "epoch": 0.29, + "learning_rate": 1.663873617322514e-05, + "loss": 0.2703, + "step": 3393 + }, + { + "epoch": 0.29, + "learning_rate": 1.6636659721160088e-05, + "loss": 0.3407, + "step": 3394 + }, + { + "epoch": 0.29, + "learning_rate": 1.663458275757409e-05, + "loss": 0.3225, + "step": 3395 + }, + { + "epoch": 0.29, + "learning_rate": 1.6632505282627238e-05, + "loss": 0.3024, + "step": 3396 + }, + { + "epoch": 0.29, + "learning_rate": 1.663042729647965e-05, + "loss": 0.2923, + "step": 3397 + }, + { + "epoch": 0.29, + "learning_rate": 1.662834879929149e-05, + "loss": 0.2529, + "step": 3398 + }, + { + "epoch": 0.29, + "learning_rate": 1.662626979122295e-05, + "loss": 0.3021, + "step": 3399 + }, + { + "epoch": 0.29, + "learning_rate": 1.6624190272434282e-05, + "loss": 0.321, + "step": 3400 + }, + { + "epoch": 0.29, + "learning_rate": 1.662211024308576e-05, + "loss": 0.3032, + "step": 3401 + }, + { + "epoch": 0.29, + "learning_rate": 1.6620029703337697e-05, + "loss": 0.3085, + "step": 3402 + }, + { + "epoch": 0.29, + "learning_rate": 1.6617948653350455e-05, + "loss": 0.3025, + "step": 3403 + }, + { + "epoch": 0.29, + "learning_rate": 1.6615867093284434e-05, + "loss": 0.308, + "step": 3404 + }, + { + "epoch": 0.29, + "learning_rate": 1.6613785023300063e-05, + "loss": 0.2805, + "step": 3405 + }, + { + "epoch": 0.29, + "learning_rate": 1.6611702443557826e-05, + "loss": 0.259, + "step": 3406 + }, + { + "epoch": 0.29, + "learning_rate": 1.660961935421823e-05, + "loss": 0.3278, + "step": 3407 + }, + { + "epoch": 0.29, + "learning_rate": 1.6607535755441837e-05, + "loss": 0.3074, + "step": 3408 + }, + { + "epoch": 0.29, + "learning_rate": 1.6605451647389238e-05, + "loss": 0.28, + "step": 3409 + }, + { + "epoch": 0.29, + "learning_rate": 1.660336703022106e-05, + "loss": 0.2719, + "step": 3410 + }, + { + "epoch": 0.29, + "learning_rate": 1.6601281904097985e-05, + "loss": 0.6416, + "step": 3411 + }, + { + "epoch": 0.29, + "learning_rate": 1.6599196269180712e-05, + "loss": 0.3318, + "step": 3412 + }, + { + "epoch": 0.29, + "learning_rate": 1.6597110125630004e-05, + "loss": 0.3199, + "step": 3413 + }, + { + "epoch": 0.29, + "learning_rate": 1.659502347360664e-05, + "loss": 0.5948, + "step": 3414 + }, + { + "epoch": 0.29, + "learning_rate": 1.6592936313271456e-05, + "loss": 0.3248, + "step": 3415 + }, + { + "epoch": 0.29, + "learning_rate": 1.659084864478532e-05, + "loss": 0.2456, + "step": 3416 + }, + { + "epoch": 0.29, + "learning_rate": 1.6588760468309134e-05, + "loss": 0.2679, + "step": 3417 + }, + { + "epoch": 0.29, + "learning_rate": 1.6586671784003846e-05, + "loss": 0.2751, + "step": 3418 + }, + { + "epoch": 0.29, + "learning_rate": 1.6584582592030444e-05, + "loss": 0.3278, + "step": 3419 + }, + { + "epoch": 0.29, + "learning_rate": 1.6582492892549954e-05, + "loss": 0.3103, + "step": 3420 + }, + { + "epoch": 0.29, + "learning_rate": 1.6580402685723434e-05, + "loss": 0.3233, + "step": 3421 + }, + { + "epoch": 0.29, + "learning_rate": 1.657831197171199e-05, + "loss": 0.329, + "step": 3422 + }, + { + "epoch": 0.29, + "learning_rate": 1.6576220750676763e-05, + "loss": 0.2979, + "step": 3423 + }, + { + "epoch": 0.29, + "learning_rate": 1.6574129022778936e-05, + "loss": 0.3029, + "step": 3424 + }, + { + "epoch": 0.29, + "learning_rate": 1.6572036788179728e-05, + "loss": 0.259, + "step": 3425 + }, + { + "epoch": 0.29, + "learning_rate": 1.6569944047040394e-05, + "loss": 0.2759, + "step": 3426 + }, + { + "epoch": 0.29, + "learning_rate": 1.6567850799522237e-05, + "loss": 0.3139, + "step": 3427 + }, + { + "epoch": 0.29, + "learning_rate": 1.6565757045786595e-05, + "loss": 0.2843, + "step": 3428 + }, + { + "epoch": 0.29, + "learning_rate": 1.6563662785994843e-05, + "loss": 0.319, + "step": 3429 + }, + { + "epoch": 0.29, + "learning_rate": 1.6561568020308397e-05, + "loss": 0.2744, + "step": 3430 + }, + { + "epoch": 0.29, + "learning_rate": 1.6559472748888703e-05, + "loss": 0.2747, + "step": 3431 + }, + { + "epoch": 0.29, + "learning_rate": 1.655737697189727e-05, + "loss": 0.6115, + "step": 3432 + }, + { + "epoch": 0.29, + "learning_rate": 1.6555280689495613e-05, + "loss": 0.3093, + "step": 3433 + }, + { + "epoch": 0.29, + "learning_rate": 1.6553183901845313e-05, + "loss": 0.3391, + "step": 3434 + }, + { + "epoch": 0.29, + "learning_rate": 1.655108660910798e-05, + "loss": 0.2307, + "step": 3435 + }, + { + "epoch": 0.29, + "learning_rate": 1.654898881144526e-05, + "loss": 0.2841, + "step": 3436 + }, + { + "epoch": 0.29, + "learning_rate": 1.654689050901884e-05, + "loss": 0.292, + "step": 3437 + }, + { + "epoch": 0.29, + "learning_rate": 1.654479170199045e-05, + "loss": 0.3267, + "step": 3438 + }, + { + "epoch": 0.29, + "learning_rate": 1.654269239052186e-05, + "loss": 0.2788, + "step": 3439 + }, + { + "epoch": 0.29, + "learning_rate": 1.654059257477486e-05, + "loss": 0.3215, + "step": 3440 + }, + { + "epoch": 0.29, + "learning_rate": 1.6538492254911305e-05, + "loss": 0.2996, + "step": 3441 + }, + { + "epoch": 0.3, + "learning_rate": 1.6536391431093077e-05, + "loss": 0.3179, + "step": 3442 + }, + { + "epoch": 0.3, + "learning_rate": 1.6534290103482094e-05, + "loss": 0.2978, + "step": 3443 + }, + { + "epoch": 0.3, + "learning_rate": 1.6532188272240314e-05, + "loss": 0.2497, + "step": 3444 + }, + { + "epoch": 0.3, + "learning_rate": 1.6530085937529743e-05, + "loss": 0.3055, + "step": 3445 + }, + { + "epoch": 0.3, + "learning_rate": 1.6527983099512414e-05, + "loss": 0.2488, + "step": 3446 + }, + { + "epoch": 0.3, + "learning_rate": 1.65258797583504e-05, + "loss": 0.2935, + "step": 3447 + }, + { + "epoch": 0.3, + "learning_rate": 1.652377591420582e-05, + "loss": 0.3154, + "step": 3448 + }, + { + "epoch": 0.3, + "learning_rate": 1.6521671567240826e-05, + "loss": 0.2964, + "step": 3449 + }, + { + "epoch": 0.3, + "learning_rate": 1.6519566717617616e-05, + "loss": 0.2855, + "step": 3450 + }, + { + "epoch": 0.3, + "learning_rate": 1.6517461365498416e-05, + "loss": 0.2857, + "step": 3451 + }, + { + "epoch": 0.3, + "learning_rate": 1.6515355511045495e-05, + "loss": 0.3333, + "step": 3452 + }, + { + "epoch": 0.3, + "learning_rate": 1.651324915442117e-05, + "loss": 0.3373, + "step": 3453 + }, + { + "epoch": 0.3, + "learning_rate": 1.651114229578778e-05, + "loss": 0.3002, + "step": 3454 + }, + { + "epoch": 0.3, + "learning_rate": 1.6509034935307716e-05, + "loss": 0.2891, + "step": 3455 + }, + { + "epoch": 0.3, + "learning_rate": 1.6506927073143396e-05, + "loss": 0.2403, + "step": 3456 + }, + { + "epoch": 0.3, + "learning_rate": 1.6504818709457294e-05, + "loss": 0.3245, + "step": 3457 + }, + { + "epoch": 0.3, + "learning_rate": 1.6502709844411907e-05, + "loss": 0.2944, + "step": 3458 + }, + { + "epoch": 0.3, + "learning_rate": 1.6500600478169776e-05, + "loss": 0.2986, + "step": 3459 + }, + { + "epoch": 0.3, + "learning_rate": 1.6498490610893478e-05, + "loss": 0.2884, + "step": 3460 + }, + { + "epoch": 0.3, + "learning_rate": 1.6496380242745635e-05, + "loss": 0.3492, + "step": 3461 + }, + { + "epoch": 0.3, + "learning_rate": 1.6494269373888902e-05, + "loss": 0.3046, + "step": 3462 + }, + { + "epoch": 0.3, + "learning_rate": 1.6492158004485977e-05, + "loss": 0.269, + "step": 3463 + }, + { + "epoch": 0.3, + "learning_rate": 1.6490046134699586e-05, + "loss": 0.2934, + "step": 3464 + }, + { + "epoch": 0.3, + "learning_rate": 1.6487933764692514e-05, + "loss": 0.27, + "step": 3465 + }, + { + "epoch": 0.3, + "learning_rate": 1.648582089462756e-05, + "loss": 0.2817, + "step": 3466 + }, + { + "epoch": 0.3, + "learning_rate": 1.648370752466758e-05, + "loss": 0.2922, + "step": 3467 + }, + { + "epoch": 0.3, + "learning_rate": 1.6481593654975466e-05, + "loss": 0.3057, + "step": 3468 + }, + { + "epoch": 0.3, + "learning_rate": 1.6479479285714133e-05, + "loss": 0.2835, + "step": 3469 + }, + { + "epoch": 0.3, + "learning_rate": 1.647736441704656e-05, + "loss": 0.3217, + "step": 3470 + }, + { + "epoch": 0.3, + "learning_rate": 1.6475249049135736e-05, + "loss": 0.3239, + "step": 3471 + }, + { + "epoch": 0.3, + "learning_rate": 1.6473133182144715e-05, + "loss": 0.2886, + "step": 3472 + }, + { + "epoch": 0.3, + "learning_rate": 1.647101681623657e-05, + "loss": 0.312, + "step": 3473 + }, + { + "epoch": 0.3, + "learning_rate": 1.6468899951574423e-05, + "loss": 0.2891, + "step": 3474 + }, + { + "epoch": 0.3, + "learning_rate": 1.646678258832143e-05, + "loss": 0.2823, + "step": 3475 + }, + { + "epoch": 0.3, + "learning_rate": 1.6464664726640793e-05, + "loss": 0.3081, + "step": 3476 + }, + { + "epoch": 0.3, + "learning_rate": 1.646254636669574e-05, + "loss": 0.2733, + "step": 3477 + }, + { + "epoch": 0.3, + "learning_rate": 1.6460427508649546e-05, + "loss": 0.3147, + "step": 3478 + }, + { + "epoch": 0.3, + "learning_rate": 1.6458308152665522e-05, + "loss": 0.3237, + "step": 3479 + }, + { + "epoch": 0.3, + "learning_rate": 1.6456188298907015e-05, + "loss": 0.3083, + "step": 3480 + }, + { + "epoch": 0.3, + "learning_rate": 1.6454067947537417e-05, + "loss": 0.2651, + "step": 3481 + }, + { + "epoch": 0.3, + "learning_rate": 1.6451947098720148e-05, + "loss": 0.3128, + "step": 3482 + }, + { + "epoch": 0.3, + "learning_rate": 1.644982575261868e-05, + "loss": 0.3183, + "step": 3483 + }, + { + "epoch": 0.3, + "learning_rate": 1.6447703909396514e-05, + "loss": 0.2783, + "step": 3484 + }, + { + "epoch": 0.3, + "learning_rate": 1.6445581569217184e-05, + "loss": 0.3288, + "step": 3485 + }, + { + "epoch": 0.3, + "learning_rate": 1.644345873224428e-05, + "loss": 0.2936, + "step": 3486 + }, + { + "epoch": 0.3, + "learning_rate": 1.6441335398641417e-05, + "loss": 0.3128, + "step": 3487 + }, + { + "epoch": 0.3, + "learning_rate": 1.643921156857224e-05, + "loss": 0.2819, + "step": 3488 + }, + { + "epoch": 0.3, + "learning_rate": 1.6437087242200462e-05, + "loss": 0.3202, + "step": 3489 + }, + { + "epoch": 0.3, + "learning_rate": 1.6434962419689803e-05, + "loss": 0.2733, + "step": 3490 + }, + { + "epoch": 0.3, + "learning_rate": 1.6432837101204036e-05, + "loss": 0.2684, + "step": 3491 + }, + { + "epoch": 0.3, + "learning_rate": 1.6430711286906968e-05, + "loss": 0.2916, + "step": 3492 + }, + { + "epoch": 0.3, + "learning_rate": 1.642858497696245e-05, + "loss": 0.6289, + "step": 3493 + }, + { + "epoch": 0.3, + "learning_rate": 1.642645817153437e-05, + "loss": 0.3239, + "step": 3494 + }, + { + "epoch": 0.3, + "learning_rate": 1.6424330870786648e-05, + "loss": 0.2929, + "step": 3495 + }, + { + "epoch": 0.3, + "learning_rate": 1.6422203074883247e-05, + "loss": 0.2653, + "step": 3496 + }, + { + "epoch": 0.3, + "learning_rate": 1.6420074783988164e-05, + "loss": 0.2754, + "step": 3497 + }, + { + "epoch": 0.3, + "learning_rate": 1.6417945998265436e-05, + "loss": 0.3062, + "step": 3498 + }, + { + "epoch": 0.3, + "learning_rate": 1.6415816717879146e-05, + "loss": 0.3181, + "step": 3499 + }, + { + "epoch": 0.3, + "learning_rate": 1.6413686942993405e-05, + "loss": 0.2699, + "step": 3500 + }, + { + "epoch": 0.3, + "learning_rate": 1.6411556673772365e-05, + "loss": 0.3083, + "step": 3501 + }, + { + "epoch": 0.3, + "learning_rate": 1.6409425910380215e-05, + "loss": 0.2824, + "step": 3502 + }, + { + "epoch": 0.3, + "learning_rate": 1.6407294652981187e-05, + "loss": 0.3623, + "step": 3503 + }, + { + "epoch": 0.3, + "learning_rate": 1.6405162901739545e-05, + "loss": 0.6411, + "step": 3504 + }, + { + "epoch": 0.3, + "learning_rate": 1.6403030656819597e-05, + "loss": 0.3047, + "step": 3505 + }, + { + "epoch": 0.3, + "learning_rate": 1.6400897918385687e-05, + "loss": 0.337, + "step": 3506 + }, + { + "epoch": 0.3, + "learning_rate": 1.6398764686602188e-05, + "loss": 0.5853, + "step": 3507 + }, + { + "epoch": 0.3, + "learning_rate": 1.639663096163353e-05, + "loss": 0.2921, + "step": 3508 + }, + { + "epoch": 0.3, + "learning_rate": 1.639449674364416e-05, + "loss": 0.2695, + "step": 3509 + }, + { + "epoch": 0.3, + "learning_rate": 1.6392362032798578e-05, + "loss": 0.2542, + "step": 3510 + }, + { + "epoch": 0.3, + "learning_rate": 1.6390226829261317e-05, + "loss": 0.2558, + "step": 3511 + }, + { + "epoch": 0.3, + "learning_rate": 1.6388091133196946e-05, + "loss": 0.2843, + "step": 3512 + }, + { + "epoch": 0.3, + "learning_rate": 1.6385954944770076e-05, + "loss": 0.2916, + "step": 3513 + }, + { + "epoch": 0.3, + "learning_rate": 1.638381826414535e-05, + "loss": 0.2812, + "step": 3514 + }, + { + "epoch": 0.3, + "learning_rate": 1.638168109148746e-05, + "loss": 0.2671, + "step": 3515 + }, + { + "epoch": 0.3, + "learning_rate": 1.6379543426961122e-05, + "loss": 0.338, + "step": 3516 + }, + { + "epoch": 0.3, + "learning_rate": 1.6377405270731097e-05, + "loss": 0.2952, + "step": 3517 + }, + { + "epoch": 0.3, + "learning_rate": 1.6375266622962188e-05, + "loss": 0.3425, + "step": 3518 + }, + { + "epoch": 0.3, + "learning_rate": 1.6373127483819227e-05, + "loss": 0.3367, + "step": 3519 + }, + { + "epoch": 0.3, + "learning_rate": 1.637098785346709e-05, + "loss": 0.2607, + "step": 3520 + }, + { + "epoch": 0.3, + "learning_rate": 1.636884773207069e-05, + "loss": 0.308, + "step": 3521 + }, + { + "epoch": 0.3, + "learning_rate": 1.6366707119794978e-05, + "loss": 0.2767, + "step": 3522 + }, + { + "epoch": 0.3, + "learning_rate": 1.636456601680494e-05, + "loss": 0.2756, + "step": 3523 + }, + { + "epoch": 0.3, + "learning_rate": 1.63624244232656e-05, + "loss": 0.2422, + "step": 3524 + }, + { + "epoch": 0.3, + "learning_rate": 1.6360282339342023e-05, + "loss": 0.3226, + "step": 3525 + }, + { + "epoch": 0.3, + "learning_rate": 1.635813976519931e-05, + "loss": 0.2922, + "step": 3526 + }, + { + "epoch": 0.3, + "learning_rate": 1.6355996701002603e-05, + "loss": 0.3536, + "step": 3527 + }, + { + "epoch": 0.3, + "learning_rate": 1.6353853146917073e-05, + "loss": 0.2979, + "step": 3528 + }, + { + "epoch": 0.3, + "learning_rate": 1.635170910310794e-05, + "loss": 0.2936, + "step": 3529 + }, + { + "epoch": 0.3, + "learning_rate": 1.6349564569740454e-05, + "loss": 0.2738, + "step": 3530 + }, + { + "epoch": 0.3, + "learning_rate": 1.6347419546979902e-05, + "loss": 0.3466, + "step": 3531 + }, + { + "epoch": 0.3, + "learning_rate": 1.6345274034991615e-05, + "loss": 0.2838, + "step": 3532 + }, + { + "epoch": 0.3, + "learning_rate": 1.6343128033940962e-05, + "loss": 0.277, + "step": 3533 + }, + { + "epoch": 0.3, + "learning_rate": 1.634098154399334e-05, + "loss": 0.2803, + "step": 3534 + }, + { + "epoch": 0.3, + "learning_rate": 1.6338834565314193e-05, + "loss": 0.2666, + "step": 3535 + }, + { + "epoch": 0.3, + "learning_rate": 1.6336687098069e-05, + "loss": 0.3102, + "step": 3536 + }, + { + "epoch": 0.3, + "learning_rate": 1.6334539142423275e-05, + "loss": 0.3169, + "step": 3537 + }, + { + "epoch": 0.3, + "learning_rate": 1.633239069854257e-05, + "loss": 0.2339, + "step": 3538 + }, + { + "epoch": 0.3, + "learning_rate": 1.6330241766592485e-05, + "loss": 0.3302, + "step": 3539 + }, + { + "epoch": 0.3, + "learning_rate": 1.632809234673864e-05, + "loss": 0.3147, + "step": 3540 + }, + { + "epoch": 0.3, + "learning_rate": 1.6325942439146705e-05, + "loss": 0.2712, + "step": 3541 + }, + { + "epoch": 0.3, + "learning_rate": 1.632379204398238e-05, + "loss": 0.2921, + "step": 3542 + }, + { + "epoch": 0.3, + "learning_rate": 1.632164116141142e-05, + "loss": 0.2944, + "step": 3543 + }, + { + "epoch": 0.3, + "learning_rate": 1.631948979159959e-05, + "loss": 0.2791, + "step": 3544 + }, + { + "epoch": 0.3, + "learning_rate": 1.631733793471271e-05, + "loss": 0.2876, + "step": 3545 + }, + { + "epoch": 0.3, + "learning_rate": 1.6315185590916644e-05, + "loss": 0.2985, + "step": 3546 + }, + { + "epoch": 0.3, + "learning_rate": 1.631303276037727e-05, + "loss": 0.2849, + "step": 3547 + }, + { + "epoch": 0.3, + "learning_rate": 1.631087944326053e-05, + "loss": 0.2609, + "step": 3548 + }, + { + "epoch": 0.3, + "learning_rate": 1.630872563973238e-05, + "loss": 0.269, + "step": 3549 + }, + { + "epoch": 0.3, + "learning_rate": 1.6306571349958833e-05, + "loss": 0.3069, + "step": 3550 + }, + { + "epoch": 0.3, + "learning_rate": 1.630441657410593e-05, + "loss": 0.3502, + "step": 3551 + }, + { + "epoch": 0.3, + "learning_rate": 1.6302261312339745e-05, + "loss": 0.2947, + "step": 3552 + }, + { + "epoch": 0.3, + "learning_rate": 1.63001055648264e-05, + "loss": 0.2584, + "step": 3553 + }, + { + "epoch": 0.3, + "learning_rate": 1.6297949331732047e-05, + "loss": 0.2751, + "step": 3554 + }, + { + "epoch": 0.3, + "learning_rate": 1.629579261322288e-05, + "loss": 0.278, + "step": 3555 + }, + { + "epoch": 0.3, + "learning_rate": 1.6293635409465127e-05, + "loss": 0.2886, + "step": 3556 + }, + { + "epoch": 0.3, + "learning_rate": 1.6291477720625052e-05, + "loss": 0.2904, + "step": 3557 + }, + { + "epoch": 0.3, + "learning_rate": 1.6289319546868966e-05, + "loss": 0.2527, + "step": 3558 + }, + { + "epoch": 0.31, + "learning_rate": 1.62871608883632e-05, + "loss": 0.3637, + "step": 3559 + }, + { + "epoch": 0.31, + "learning_rate": 1.6285001745274143e-05, + "loss": 0.2681, + "step": 3560 + }, + { + "epoch": 0.31, + "learning_rate": 1.62828421177682e-05, + "loss": 0.3171, + "step": 3561 + }, + { + "epoch": 0.31, + "learning_rate": 1.628068200601184e-05, + "loss": 0.256, + "step": 3562 + }, + { + "epoch": 0.31, + "learning_rate": 1.6278521410171538e-05, + "loss": 0.2552, + "step": 3563 + }, + { + "epoch": 0.31, + "learning_rate": 1.627636033041383e-05, + "loss": 0.3043, + "step": 3564 + }, + { + "epoch": 0.31, + "learning_rate": 1.6274198766905286e-05, + "loss": 0.267, + "step": 3565 + }, + { + "epoch": 0.31, + "learning_rate": 1.6272036719812496e-05, + "loss": 0.2993, + "step": 3566 + }, + { + "epoch": 0.31, + "learning_rate": 1.626987418930211e-05, + "loss": 0.3393, + "step": 3567 + }, + { + "epoch": 0.31, + "learning_rate": 1.6267711175540795e-05, + "loss": 0.3575, + "step": 3568 + }, + { + "epoch": 0.31, + "learning_rate": 1.626554767869528e-05, + "loss": 0.3052, + "step": 3569 + }, + { + "epoch": 0.31, + "learning_rate": 1.6263383698932307e-05, + "loss": 0.2604, + "step": 3570 + }, + { + "epoch": 0.31, + "learning_rate": 1.6261219236418667e-05, + "loss": 0.25, + "step": 3571 + }, + { + "epoch": 0.31, + "learning_rate": 1.6259054291321186e-05, + "loss": 0.2901, + "step": 3572 + }, + { + "epoch": 0.31, + "learning_rate": 1.6256888863806724e-05, + "loss": 0.3086, + "step": 3573 + }, + { + "epoch": 0.31, + "learning_rate": 1.625472295404219e-05, + "loss": 0.3322, + "step": 3574 + }, + { + "epoch": 0.31, + "learning_rate": 1.6252556562194514e-05, + "loss": 0.2962, + "step": 3575 + }, + { + "epoch": 0.31, + "learning_rate": 1.625038968843067e-05, + "loss": 0.2575, + "step": 3576 + }, + { + "epoch": 0.31, + "learning_rate": 1.6248222332917672e-05, + "loss": 0.2944, + "step": 3577 + }, + { + "epoch": 0.31, + "learning_rate": 1.6246054495822575e-05, + "loss": 0.2662, + "step": 3578 + }, + { + "epoch": 0.31, + "learning_rate": 1.624388617731246e-05, + "loss": 0.3342, + "step": 3579 + }, + { + "epoch": 0.31, + "learning_rate": 1.624171737755445e-05, + "loss": 0.3099, + "step": 3580 + }, + { + "epoch": 0.31, + "learning_rate": 1.6239548096715703e-05, + "loss": 0.3141, + "step": 3581 + }, + { + "epoch": 0.31, + "learning_rate": 1.6237378334963422e-05, + "loss": 0.2843, + "step": 3582 + }, + { + "epoch": 0.31, + "learning_rate": 1.6235208092464832e-05, + "loss": 0.2454, + "step": 3583 + }, + { + "epoch": 0.31, + "learning_rate": 1.6233037369387223e-05, + "loss": 0.3518, + "step": 3584 + }, + { + "epoch": 0.31, + "learning_rate": 1.6230866165897882e-05, + "loss": 0.2885, + "step": 3585 + }, + { + "epoch": 0.31, + "learning_rate": 1.6228694482164167e-05, + "loss": 0.3163, + "step": 3586 + }, + { + "epoch": 0.31, + "learning_rate": 1.6226522318353462e-05, + "loss": 0.3243, + "step": 3587 + }, + { + "epoch": 0.31, + "learning_rate": 1.6224349674633178e-05, + "loss": 0.2996, + "step": 3588 + }, + { + "epoch": 0.31, + "learning_rate": 1.622217655117078e-05, + "loss": 0.6091, + "step": 3589 + }, + { + "epoch": 0.31, + "learning_rate": 1.6220002948133756e-05, + "loss": 0.3337, + "step": 3590 + }, + { + "epoch": 0.31, + "learning_rate": 1.621782886568964e-05, + "loss": 0.3636, + "step": 3591 + }, + { + "epoch": 0.31, + "learning_rate": 1.6215654304005995e-05, + "loss": 0.2984, + "step": 3592 + }, + { + "epoch": 0.31, + "learning_rate": 1.6213479263250433e-05, + "loss": 0.2817, + "step": 3593 + }, + { + "epoch": 0.31, + "learning_rate": 1.621130374359059e-05, + "loss": 0.2816, + "step": 3594 + }, + { + "epoch": 0.31, + "learning_rate": 1.6209127745194143e-05, + "loss": 0.2599, + "step": 3595 + }, + { + "epoch": 0.31, + "learning_rate": 1.620695126822881e-05, + "loss": 0.3114, + "step": 3596 + }, + { + "epoch": 0.31, + "learning_rate": 1.6204774312862346e-05, + "loss": 0.2878, + "step": 3597 + }, + { + "epoch": 0.31, + "learning_rate": 1.6202596879262536e-05, + "loss": 0.2787, + "step": 3598 + }, + { + "epoch": 0.31, + "learning_rate": 1.6200418967597205e-05, + "loss": 0.3201, + "step": 3599 + }, + { + "epoch": 0.31, + "learning_rate": 1.6198240578034216e-05, + "loss": 0.2548, + "step": 3600 + }, + { + "epoch": 0.31, + "learning_rate": 1.6196061710741472e-05, + "loss": 0.2433, + "step": 3601 + }, + { + "epoch": 0.31, + "learning_rate": 1.6193882365886905e-05, + "loss": 0.3009, + "step": 3602 + }, + { + "epoch": 0.31, + "learning_rate": 1.6191702543638493e-05, + "loss": 0.3177, + "step": 3603 + }, + { + "epoch": 0.31, + "learning_rate": 1.618952224416424e-05, + "loss": 0.2981, + "step": 3604 + }, + { + "epoch": 0.31, + "learning_rate": 1.6187341467632198e-05, + "loss": 0.621, + "step": 3605 + }, + { + "epoch": 0.31, + "learning_rate": 1.6185160214210447e-05, + "loss": 0.337, + "step": 3606 + }, + { + "epoch": 0.31, + "learning_rate": 1.6182978484067106e-05, + "loss": 0.2773, + "step": 3607 + }, + { + "epoch": 0.31, + "learning_rate": 1.618079627737034e-05, + "loss": 0.649, + "step": 3608 + }, + { + "epoch": 0.31, + "learning_rate": 1.617861359428833e-05, + "loss": 0.282, + "step": 3609 + }, + { + "epoch": 0.31, + "learning_rate": 1.617643043498932e-05, + "loss": 0.3079, + "step": 3610 + }, + { + "epoch": 0.31, + "learning_rate": 1.617424679964157e-05, + "loss": 0.2839, + "step": 3611 + }, + { + "epoch": 0.31, + "learning_rate": 1.617206268841338e-05, + "loss": 0.3082, + "step": 3612 + }, + { + "epoch": 0.31, + "learning_rate": 1.61698781014731e-05, + "loss": 0.3101, + "step": 3613 + }, + { + "epoch": 0.31, + "learning_rate": 1.6167693038989098e-05, + "loss": 0.2913, + "step": 3614 + }, + { + "epoch": 0.31, + "learning_rate": 1.6165507501129796e-05, + "loss": 0.2648, + "step": 3615 + }, + { + "epoch": 0.31, + "learning_rate": 1.6163321488063636e-05, + "loss": 0.2505, + "step": 3616 + }, + { + "epoch": 0.31, + "learning_rate": 1.6161134999959115e-05, + "loss": 0.3237, + "step": 3617 + }, + { + "epoch": 0.31, + "learning_rate": 1.615894803698475e-05, + "loss": 0.2932, + "step": 3618 + }, + { + "epoch": 0.31, + "learning_rate": 1.61567605993091e-05, + "loss": 0.2676, + "step": 3619 + }, + { + "epoch": 0.31, + "learning_rate": 1.6154572687100766e-05, + "loss": 0.3368, + "step": 3620 + }, + { + "epoch": 0.31, + "learning_rate": 1.6152384300528375e-05, + "loss": 0.29, + "step": 3621 + }, + { + "epoch": 0.31, + "learning_rate": 1.615019543976061e-05, + "loss": 0.2947, + "step": 3622 + }, + { + "epoch": 0.31, + "learning_rate": 1.6148006104966164e-05, + "loss": 0.3134, + "step": 3623 + }, + { + "epoch": 0.31, + "learning_rate": 1.614581629631379e-05, + "loss": 0.259, + "step": 3624 + }, + { + "epoch": 0.31, + "learning_rate": 1.614362601397226e-05, + "loss": 0.3729, + "step": 3625 + }, + { + "epoch": 0.31, + "learning_rate": 1.6141435258110397e-05, + "loss": 0.2959, + "step": 3626 + }, + { + "epoch": 0.31, + "learning_rate": 1.6139244028897044e-05, + "loss": 0.2861, + "step": 3627 + }, + { + "epoch": 0.31, + "learning_rate": 1.6137052326501098e-05, + "loss": 0.2738, + "step": 3628 + }, + { + "epoch": 0.31, + "learning_rate": 1.613486015109149e-05, + "loss": 0.246, + "step": 3629 + }, + { + "epoch": 0.31, + "learning_rate": 1.6132667502837164e-05, + "loss": 0.3085, + "step": 3630 + }, + { + "epoch": 0.31, + "learning_rate": 1.6130474381907135e-05, + "loss": 0.3641, + "step": 3631 + }, + { + "epoch": 0.31, + "learning_rate": 1.6128280788470432e-05, + "loss": 0.3244, + "step": 3632 + }, + { + "epoch": 0.31, + "learning_rate": 1.6126086722696123e-05, + "loss": 0.2672, + "step": 3633 + }, + { + "epoch": 0.31, + "learning_rate": 1.6123892184753324e-05, + "loss": 0.3281, + "step": 3634 + }, + { + "epoch": 0.31, + "learning_rate": 1.612169717481117e-05, + "loss": 0.3254, + "step": 3635 + }, + { + "epoch": 0.31, + "learning_rate": 1.611950169303885e-05, + "loss": 0.2674, + "step": 3636 + }, + { + "epoch": 0.31, + "learning_rate": 1.6117305739605574e-05, + "loss": 0.3234, + "step": 3637 + }, + { + "epoch": 0.31, + "learning_rate": 1.6115109314680603e-05, + "loss": 0.268, + "step": 3638 + }, + { + "epoch": 0.31, + "learning_rate": 1.6112912418433218e-05, + "loss": 0.2967, + "step": 3639 + }, + { + "epoch": 0.31, + "learning_rate": 1.6110715051032748e-05, + "loss": 0.2777, + "step": 3640 + }, + { + "epoch": 0.31, + "learning_rate": 1.6108517212648556e-05, + "loss": 0.2784, + "step": 3641 + }, + { + "epoch": 0.31, + "learning_rate": 1.6106318903450042e-05, + "loss": 0.2413, + "step": 3642 + }, + { + "epoch": 0.31, + "learning_rate": 1.610412012360664e-05, + "loss": 0.2925, + "step": 3643 + }, + { + "epoch": 0.31, + "learning_rate": 1.6101920873287815e-05, + "loss": 0.2543, + "step": 3644 + }, + { + "epoch": 0.31, + "learning_rate": 1.6099721152663084e-05, + "loss": 0.2842, + "step": 3645 + }, + { + "epoch": 0.31, + "learning_rate": 1.6097520961901983e-05, + "loss": 0.2676, + "step": 3646 + }, + { + "epoch": 0.31, + "learning_rate": 1.6095320301174097e-05, + "loss": 0.2871, + "step": 3647 + }, + { + "epoch": 0.31, + "learning_rate": 1.609311917064904e-05, + "loss": 0.2686, + "step": 3648 + }, + { + "epoch": 0.31, + "learning_rate": 1.6090917570496465e-05, + "loss": 0.3093, + "step": 3649 + }, + { + "epoch": 0.31, + "learning_rate": 1.608871550088606e-05, + "loss": 0.3452, + "step": 3650 + }, + { + "epoch": 0.31, + "learning_rate": 1.6086512961987548e-05, + "loss": 0.3013, + "step": 3651 + }, + { + "epoch": 0.31, + "learning_rate": 1.608430995397069e-05, + "loss": 0.3232, + "step": 3652 + }, + { + "epoch": 0.31, + "learning_rate": 1.608210647700529e-05, + "loss": 0.2588, + "step": 3653 + }, + { + "epoch": 0.31, + "learning_rate": 1.607990253126117e-05, + "loss": 0.2703, + "step": 3654 + }, + { + "epoch": 0.31, + "learning_rate": 1.6077698116908204e-05, + "loss": 0.2668, + "step": 3655 + }, + { + "epoch": 0.31, + "learning_rate": 1.60754932341163e-05, + "loss": 0.32, + "step": 3656 + }, + { + "epoch": 0.31, + "learning_rate": 1.60732878830554e-05, + "loss": 0.3237, + "step": 3657 + }, + { + "epoch": 0.31, + "learning_rate": 1.6071082063895476e-05, + "loss": 0.3441, + "step": 3658 + }, + { + "epoch": 0.31, + "learning_rate": 1.606887577680654e-05, + "loss": 0.3376, + "step": 3659 + }, + { + "epoch": 0.31, + "learning_rate": 1.6066669021958653e-05, + "loss": 0.3409, + "step": 3660 + }, + { + "epoch": 0.31, + "learning_rate": 1.6064461799521892e-05, + "loss": 0.2643, + "step": 3661 + }, + { + "epoch": 0.31, + "learning_rate": 1.6062254109666383e-05, + "loss": 0.2762, + "step": 3662 + }, + { + "epoch": 0.31, + "learning_rate": 1.606004595256228e-05, + "loss": 0.2946, + "step": 3663 + }, + { + "epoch": 0.31, + "learning_rate": 1.6057837328379778e-05, + "loss": 0.2802, + "step": 3664 + }, + { + "epoch": 0.31, + "learning_rate": 1.6055628237289103e-05, + "loss": 0.3182, + "step": 3665 + }, + { + "epoch": 0.31, + "learning_rate": 1.6053418679460534e-05, + "loss": 0.2902, + "step": 3666 + }, + { + "epoch": 0.31, + "learning_rate": 1.605120865506436e-05, + "loss": 0.2957, + "step": 3667 + }, + { + "epoch": 0.31, + "learning_rate": 1.604899816427092e-05, + "loss": 0.2753, + "step": 3668 + }, + { + "epoch": 0.31, + "learning_rate": 1.6046787207250597e-05, + "loss": 0.3199, + "step": 3669 + }, + { + "epoch": 0.31, + "learning_rate": 1.604457578417379e-05, + "loss": 0.3224, + "step": 3670 + }, + { + "epoch": 0.31, + "learning_rate": 1.6042363895210948e-05, + "loss": 0.3215, + "step": 3671 + }, + { + "epoch": 0.31, + "learning_rate": 1.6040151540532553e-05, + "loss": 0.2979, + "step": 3672 + }, + { + "epoch": 0.31, + "learning_rate": 1.6037938720309122e-05, + "loss": 0.3066, + "step": 3673 + }, + { + "epoch": 0.31, + "learning_rate": 1.603572543471121e-05, + "loss": 0.3035, + "step": 3674 + }, + { + "epoch": 0.32, + "learning_rate": 1.6033511683909406e-05, + "loss": 0.2681, + "step": 3675 + }, + { + "epoch": 0.32, + "learning_rate": 1.6031297468074335e-05, + "loss": 0.2849, + "step": 3676 + }, + { + "epoch": 0.32, + "learning_rate": 1.6029082787376653e-05, + "loss": 0.2579, + "step": 3677 + }, + { + "epoch": 0.32, + "learning_rate": 1.602686764198706e-05, + "loss": 0.582, + "step": 3678 + }, + { + "epoch": 0.32, + "learning_rate": 1.6024652032076295e-05, + "loss": 0.2867, + "step": 3679 + }, + { + "epoch": 0.32, + "learning_rate": 1.6022435957815116e-05, + "loss": 0.283, + "step": 3680 + }, + { + "epoch": 0.32, + "learning_rate": 1.602021941937433e-05, + "loss": 0.2457, + "step": 3681 + }, + { + "epoch": 0.32, + "learning_rate": 1.601800241692478e-05, + "loss": 0.3286, + "step": 3682 + }, + { + "epoch": 0.32, + "learning_rate": 1.6015784950637338e-05, + "loss": 0.3573, + "step": 3683 + }, + { + "epoch": 0.32, + "learning_rate": 1.6013567020682917e-05, + "loss": 0.3053, + "step": 3684 + }, + { + "epoch": 0.32, + "learning_rate": 1.6011348627232463e-05, + "loss": 0.2863, + "step": 3685 + }, + { + "epoch": 0.32, + "learning_rate": 1.6009129770456962e-05, + "loss": 0.3561, + "step": 3686 + }, + { + "epoch": 0.32, + "learning_rate": 1.6006910450527428e-05, + "loss": 0.2638, + "step": 3687 + }, + { + "epoch": 0.32, + "learning_rate": 1.600469066761492e-05, + "loss": 0.34, + "step": 3688 + }, + { + "epoch": 0.32, + "learning_rate": 1.6002470421890522e-05, + "loss": 0.2606, + "step": 3689 + }, + { + "epoch": 0.32, + "learning_rate": 1.6000249713525366e-05, + "loss": 0.2829, + "step": 3690 + }, + { + "epoch": 0.32, + "learning_rate": 1.599802854269061e-05, + "loss": 0.3141, + "step": 3691 + }, + { + "epoch": 0.32, + "learning_rate": 1.599580690955745e-05, + "loss": 0.6311, + "step": 3692 + }, + { + "epoch": 0.32, + "learning_rate": 1.599358481429712e-05, + "loss": 0.2985, + "step": 3693 + }, + { + "epoch": 0.32, + "learning_rate": 1.599136225708089e-05, + "loss": 0.3333, + "step": 3694 + }, + { + "epoch": 0.32, + "learning_rate": 1.598913923808006e-05, + "loss": 0.2717, + "step": 3695 + }, + { + "epoch": 0.32, + "learning_rate": 1.5986915757465968e-05, + "loss": 0.3118, + "step": 3696 + }, + { + "epoch": 0.32, + "learning_rate": 1.598469181540999e-05, + "loss": 0.257, + "step": 3697 + }, + { + "epoch": 0.32, + "learning_rate": 1.5982467412083543e-05, + "loss": 0.2535, + "step": 3698 + }, + { + "epoch": 0.32, + "learning_rate": 1.5980242547658068e-05, + "loss": 0.2959, + "step": 3699 + }, + { + "epoch": 0.32, + "learning_rate": 1.5978017222305046e-05, + "loss": 0.3043, + "step": 3700 + }, + { + "epoch": 0.32, + "learning_rate": 1.5975791436195994e-05, + "loss": 0.2791, + "step": 3701 + }, + { + "epoch": 0.32, + "learning_rate": 1.5973565189502463e-05, + "loss": 0.3318, + "step": 3702 + }, + { + "epoch": 0.32, + "learning_rate": 1.597133848239605e-05, + "loss": 0.2953, + "step": 3703 + }, + { + "epoch": 0.32, + "learning_rate": 1.5969111315048365e-05, + "loss": 0.2986, + "step": 3704 + }, + { + "epoch": 0.32, + "learning_rate": 1.5966883687631075e-05, + "loss": 0.2959, + "step": 3705 + }, + { + "epoch": 0.32, + "learning_rate": 1.596465560031588e-05, + "loss": 0.2951, + "step": 3706 + }, + { + "epoch": 0.32, + "learning_rate": 1.5962427053274495e-05, + "loss": 0.2969, + "step": 3707 + }, + { + "epoch": 0.32, + "learning_rate": 1.59601980466787e-05, + "loss": 0.2968, + "step": 3708 + }, + { + "epoch": 0.32, + "learning_rate": 1.595796858070029e-05, + "loss": 0.293, + "step": 3709 + }, + { + "epoch": 0.32, + "learning_rate": 1.5955738655511094e-05, + "loss": 0.3122, + "step": 3710 + }, + { + "epoch": 0.32, + "learning_rate": 1.5953508271282997e-05, + "loss": 0.2566, + "step": 3711 + }, + { + "epoch": 0.32, + "learning_rate": 1.59512774281879e-05, + "loss": 0.2891, + "step": 3712 + }, + { + "epoch": 0.32, + "learning_rate": 1.594904612639774e-05, + "loss": 0.2946, + "step": 3713 + }, + { + "epoch": 0.32, + "learning_rate": 1.5946814366084505e-05, + "loss": 0.317, + "step": 3714 + }, + { + "epoch": 0.32, + "learning_rate": 1.59445821474202e-05, + "loss": 0.2724, + "step": 3715 + }, + { + "epoch": 0.32, + "learning_rate": 1.5942349470576878e-05, + "loss": 0.3065, + "step": 3716 + }, + { + "epoch": 0.32, + "learning_rate": 1.5940116335726615e-05, + "loss": 0.2784, + "step": 3717 + }, + { + "epoch": 0.32, + "learning_rate": 1.5937882743041543e-05, + "loss": 0.3358, + "step": 3718 + }, + { + "epoch": 0.32, + "learning_rate": 1.5935648692693805e-05, + "loss": 0.3549, + "step": 3719 + }, + { + "epoch": 0.32, + "learning_rate": 1.5933414184855597e-05, + "loss": 0.3517, + "step": 3720 + }, + { + "epoch": 0.32, + "learning_rate": 1.5931179219699144e-05, + "loss": 0.2758, + "step": 3721 + }, + { + "epoch": 0.32, + "learning_rate": 1.5928943797396695e-05, + "loss": 0.3008, + "step": 3722 + }, + { + "epoch": 0.32, + "learning_rate": 1.592670791812056e-05, + "loss": 0.3405, + "step": 3723 + }, + { + "epoch": 0.32, + "learning_rate": 1.592447158204306e-05, + "loss": 0.2762, + "step": 3724 + }, + { + "epoch": 0.32, + "learning_rate": 1.5922234789336567e-05, + "loss": 0.3245, + "step": 3725 + }, + { + "epoch": 0.32, + "learning_rate": 1.591999754017348e-05, + "loss": 0.3244, + "step": 3726 + }, + { + "epoch": 0.32, + "learning_rate": 1.5917759834726233e-05, + "loss": 0.2568, + "step": 3727 + }, + { + "epoch": 0.32, + "learning_rate": 1.5915521673167296e-05, + "loss": 0.3459, + "step": 3728 + }, + { + "epoch": 0.32, + "learning_rate": 1.591328305566918e-05, + "loss": 0.3571, + "step": 3729 + }, + { + "epoch": 0.32, + "learning_rate": 1.5911043982404426e-05, + "loss": 0.2926, + "step": 3730 + }, + { + "epoch": 0.32, + "learning_rate": 1.5908804453545608e-05, + "loss": 0.3193, + "step": 3731 + }, + { + "epoch": 0.32, + "learning_rate": 1.590656446926534e-05, + "loss": 0.3146, + "step": 3732 + }, + { + "epoch": 0.32, + "learning_rate": 1.5904324029736266e-05, + "loss": 0.298, + "step": 3733 + }, + { + "epoch": 0.32, + "learning_rate": 1.5902083135131067e-05, + "loss": 0.2803, + "step": 3734 + }, + { + "epoch": 0.32, + "learning_rate": 1.5899841785622468e-05, + "loss": 0.2667, + "step": 3735 + }, + { + "epoch": 0.32, + "learning_rate": 1.5897599981383214e-05, + "loss": 0.2776, + "step": 3736 + }, + { + "epoch": 0.32, + "learning_rate": 1.5895357722586093e-05, + "loss": 0.2954, + "step": 3737 + }, + { + "epoch": 0.32, + "learning_rate": 1.5893115009403932e-05, + "loss": 0.3359, + "step": 3738 + }, + { + "epoch": 0.32, + "learning_rate": 1.5890871842009582e-05, + "loss": 0.3189, + "step": 3739 + }, + { + "epoch": 0.32, + "learning_rate": 1.588862822057594e-05, + "loss": 0.615, + "step": 3740 + }, + { + "epoch": 0.32, + "learning_rate": 1.588638414527593e-05, + "loss": 0.3118, + "step": 3741 + }, + { + "epoch": 0.32, + "learning_rate": 1.5884139616282517e-05, + "loss": 0.3116, + "step": 3742 + }, + { + "epoch": 0.32, + "learning_rate": 1.5881894633768697e-05, + "loss": 0.324, + "step": 3743 + }, + { + "epoch": 0.32, + "learning_rate": 1.58796491979075e-05, + "loss": 0.3676, + "step": 3744 + }, + { + "epoch": 0.32, + "learning_rate": 1.5877403308871997e-05, + "loss": 0.3316, + "step": 3745 + }, + { + "epoch": 0.32, + "learning_rate": 1.5875156966835285e-05, + "loss": 0.2591, + "step": 3746 + }, + { + "epoch": 0.32, + "learning_rate": 1.5872910171970506e-05, + "loss": 0.3391, + "step": 3747 + }, + { + "epoch": 0.32, + "learning_rate": 1.587066292445083e-05, + "loss": 0.5974, + "step": 3748 + }, + { + "epoch": 0.32, + "learning_rate": 1.5868415224449463e-05, + "loss": 0.3163, + "step": 3749 + }, + { + "epoch": 0.32, + "learning_rate": 1.5866167072139645e-05, + "loss": 0.31, + "step": 3750 + }, + { + "epoch": 0.32, + "learning_rate": 1.586391846769466e-05, + "loss": 0.3144, + "step": 3751 + }, + { + "epoch": 0.32, + "learning_rate": 1.586166941128781e-05, + "loss": 0.3107, + "step": 3752 + }, + { + "epoch": 0.32, + "learning_rate": 1.585941990309245e-05, + "loss": 0.2744, + "step": 3753 + }, + { + "epoch": 0.32, + "learning_rate": 1.5857169943281948e-05, + "loss": 0.2595, + "step": 3754 + }, + { + "epoch": 0.32, + "learning_rate": 1.5854919532029734e-05, + "loss": 0.2889, + "step": 3755 + }, + { + "epoch": 0.32, + "learning_rate": 1.5852668669509252e-05, + "loss": 0.3166, + "step": 3756 + }, + { + "epoch": 0.32, + "learning_rate": 1.5850417355893984e-05, + "loss": 0.2836, + "step": 3757 + }, + { + "epoch": 0.32, + "learning_rate": 1.5848165591357458e-05, + "loss": 0.3284, + "step": 3758 + }, + { + "epoch": 0.32, + "learning_rate": 1.5845913376073222e-05, + "loss": 0.2581, + "step": 3759 + }, + { + "epoch": 0.32, + "learning_rate": 1.5843660710214872e-05, + "loss": 0.4052, + "step": 3760 + }, + { + "epoch": 0.32, + "learning_rate": 1.5841407593956026e-05, + "loss": 0.2761, + "step": 3761 + }, + { + "epoch": 0.32, + "learning_rate": 1.5839154027470346e-05, + "loss": 0.3372, + "step": 3762 + }, + { + "epoch": 0.32, + "learning_rate": 1.5836900010931527e-05, + "loss": 0.3278, + "step": 3763 + }, + { + "epoch": 0.32, + "learning_rate": 1.5834645544513296e-05, + "loss": 0.296, + "step": 3764 + }, + { + "epoch": 0.32, + "learning_rate": 1.5832390628389417e-05, + "loss": 0.3231, + "step": 3765 + }, + { + "epoch": 0.32, + "learning_rate": 1.5830135262733684e-05, + "loss": 0.3066, + "step": 3766 + }, + { + "epoch": 0.32, + "learning_rate": 1.5827879447719932e-05, + "loss": 0.2812, + "step": 3767 + }, + { + "epoch": 0.32, + "learning_rate": 1.582562318352203e-05, + "loss": 0.2911, + "step": 3768 + }, + { + "epoch": 0.32, + "learning_rate": 1.582336647031388e-05, + "loss": 0.2672, + "step": 3769 + }, + { + "epoch": 0.32, + "learning_rate": 1.5821109308269416e-05, + "loss": 0.3187, + "step": 3770 + }, + { + "epoch": 0.32, + "learning_rate": 1.581885169756261e-05, + "loss": 0.2862, + "step": 3771 + }, + { + "epoch": 0.32, + "learning_rate": 1.581659363836747e-05, + "loss": 0.2764, + "step": 3772 + }, + { + "epoch": 0.32, + "learning_rate": 1.5814335130858026e-05, + "loss": 0.257, + "step": 3773 + }, + { + "epoch": 0.32, + "learning_rate": 1.581207617520836e-05, + "loss": 0.261, + "step": 3774 + }, + { + "epoch": 0.32, + "learning_rate": 1.5809816771592584e-05, + "loss": 0.2747, + "step": 3775 + }, + { + "epoch": 0.32, + "learning_rate": 1.5807556920184837e-05, + "loss": 0.3214, + "step": 3776 + }, + { + "epoch": 0.32, + "learning_rate": 1.58052966211593e-05, + "loss": 0.2962, + "step": 3777 + }, + { + "epoch": 0.32, + "learning_rate": 1.5803035874690186e-05, + "loss": 0.2794, + "step": 3778 + }, + { + "epoch": 0.32, + "learning_rate": 1.5800774680951736e-05, + "loss": 0.3008, + "step": 3779 + }, + { + "epoch": 0.32, + "learning_rate": 1.579851304011824e-05, + "loss": 0.2864, + "step": 3780 + }, + { + "epoch": 0.32, + "learning_rate": 1.5796250952364008e-05, + "loss": 0.2719, + "step": 3781 + }, + { + "epoch": 0.32, + "learning_rate": 1.579398841786339e-05, + "loss": 0.3212, + "step": 3782 + }, + { + "epoch": 0.32, + "learning_rate": 1.579172543679078e-05, + "loss": 0.2466, + "step": 3783 + }, + { + "epoch": 0.32, + "learning_rate": 1.5789462009320586e-05, + "loss": 0.27, + "step": 3784 + }, + { + "epoch": 0.32, + "learning_rate": 1.578719813562727e-05, + "loss": 0.2386, + "step": 3785 + }, + { + "epoch": 0.32, + "learning_rate": 1.5784933815885315e-05, + "loss": 0.2562, + "step": 3786 + }, + { + "epoch": 0.32, + "learning_rate": 1.5782669050269243e-05, + "loss": 0.3107, + "step": 3787 + }, + { + "epoch": 0.32, + "learning_rate": 1.578040383895362e-05, + "loss": 0.2988, + "step": 3788 + }, + { + "epoch": 0.32, + "learning_rate": 1.5778138182113027e-05, + "loss": 0.3156, + "step": 3789 + }, + { + "epoch": 0.32, + "learning_rate": 1.5775872079922098e-05, + "loss": 0.2906, + "step": 3790 + }, + { + "epoch": 0.32, + "learning_rate": 1.5773605532555484e-05, + "loss": 0.287, + "step": 3791 + }, + { + "epoch": 0.33, + "learning_rate": 1.5771338540187883e-05, + "loss": 0.2913, + "step": 3792 + }, + { + "epoch": 0.33, + "learning_rate": 1.5769071102994024e-05, + "loss": 0.5977, + "step": 3793 + }, + { + "epoch": 0.33, + "learning_rate": 1.5766803221148676e-05, + "loss": 0.3246, + "step": 3794 + }, + { + "epoch": 0.33, + "learning_rate": 1.5764534894826623e-05, + "loss": 0.2846, + "step": 3795 + }, + { + "epoch": 0.33, + "learning_rate": 1.5762266124202708e-05, + "loss": 0.28, + "step": 3796 + }, + { + "epoch": 0.33, + "learning_rate": 1.5759996909451795e-05, + "loss": 0.2986, + "step": 3797 + }, + { + "epoch": 0.33, + "learning_rate": 1.5757727250748773e-05, + "loss": 0.302, + "step": 3798 + }, + { + "epoch": 0.33, + "learning_rate": 1.575545714826859e-05, + "loss": 0.356, + "step": 3799 + }, + { + "epoch": 0.33, + "learning_rate": 1.5753186602186207e-05, + "loss": 0.3202, + "step": 3800 + }, + { + "epoch": 0.33, + "learning_rate": 1.575091561267663e-05, + "loss": 0.307, + "step": 3801 + }, + { + "epoch": 0.33, + "learning_rate": 1.574864417991489e-05, + "loss": 0.2645, + "step": 3802 + }, + { + "epoch": 0.33, + "learning_rate": 1.5746372304076065e-05, + "loss": 0.2705, + "step": 3803 + }, + { + "epoch": 0.33, + "learning_rate": 1.5744099985335255e-05, + "loss": 0.3011, + "step": 3804 + }, + { + "epoch": 0.33, + "learning_rate": 1.5741827223867602e-05, + "loss": 0.6638, + "step": 3805 + }, + { + "epoch": 0.33, + "learning_rate": 1.5739554019848274e-05, + "loss": 0.3149, + "step": 3806 + }, + { + "epoch": 0.33, + "learning_rate": 1.5737280373452487e-05, + "loss": 0.2958, + "step": 3807 + }, + { + "epoch": 0.33, + "learning_rate": 1.5735006284855473e-05, + "loss": 0.3245, + "step": 3808 + }, + { + "epoch": 0.33, + "learning_rate": 1.5732731754232516e-05, + "loss": 0.2841, + "step": 3809 + }, + { + "epoch": 0.33, + "learning_rate": 1.573045678175892e-05, + "loss": 0.3588, + "step": 3810 + }, + { + "epoch": 0.33, + "learning_rate": 1.572818136761003e-05, + "loss": 0.2849, + "step": 3811 + }, + { + "epoch": 0.33, + "learning_rate": 1.5725905511961226e-05, + "loss": 0.3043, + "step": 3812 + }, + { + "epoch": 0.33, + "learning_rate": 1.5723629214987915e-05, + "loss": 0.3508, + "step": 3813 + }, + { + "epoch": 0.33, + "learning_rate": 1.5721352476865546e-05, + "loss": 0.2888, + "step": 3814 + }, + { + "epoch": 0.33, + "learning_rate": 1.5719075297769596e-05, + "loss": 0.2879, + "step": 3815 + }, + { + "epoch": 0.33, + "learning_rate": 1.5716797677875586e-05, + "loss": 0.3232, + "step": 3816 + }, + { + "epoch": 0.33, + "learning_rate": 1.5714519617359054e-05, + "loss": 0.2796, + "step": 3817 + }, + { + "epoch": 0.33, + "learning_rate": 1.571224111639559e-05, + "loss": 0.2831, + "step": 3818 + }, + { + "epoch": 0.33, + "learning_rate": 1.5709962175160806e-05, + "loss": 0.2746, + "step": 3819 + }, + { + "epoch": 0.33, + "learning_rate": 1.5707682793830347e-05, + "loss": 0.2793, + "step": 3820 + }, + { + "epoch": 0.33, + "learning_rate": 1.5705402972579902e-05, + "loss": 0.3566, + "step": 3821 + }, + { + "epoch": 0.33, + "learning_rate": 1.570312271158519e-05, + "loss": 0.2615, + "step": 3822 + }, + { + "epoch": 0.33, + "learning_rate": 1.5700842011021954e-05, + "loss": 0.2676, + "step": 3823 + }, + { + "epoch": 0.33, + "learning_rate": 1.5698560871065986e-05, + "loss": 0.3256, + "step": 3824 + }, + { + "epoch": 0.33, + "learning_rate": 1.5696279291893107e-05, + "loss": 0.2972, + "step": 3825 + }, + { + "epoch": 0.33, + "learning_rate": 1.5693997273679165e-05, + "loss": 0.2914, + "step": 3826 + }, + { + "epoch": 0.33, + "learning_rate": 1.5691714816600045e-05, + "loss": 0.303, + "step": 3827 + }, + { + "epoch": 0.33, + "learning_rate": 1.5689431920831676e-05, + "loss": 0.3151, + "step": 3828 + }, + { + "epoch": 0.33, + "learning_rate": 1.5687148586550003e-05, + "loss": 0.2988, + "step": 3829 + }, + { + "epoch": 0.33, + "learning_rate": 1.568486481393102e-05, + "loss": 0.2723, + "step": 3830 + }, + { + "epoch": 0.33, + "learning_rate": 1.5682580603150742e-05, + "loss": 0.2534, + "step": 3831 + }, + { + "epoch": 0.33, + "learning_rate": 1.5680295954385235e-05, + "loss": 0.2994, + "step": 3832 + }, + { + "epoch": 0.33, + "learning_rate": 1.5678010867810583e-05, + "loss": 0.3505, + "step": 3833 + }, + { + "epoch": 0.33, + "learning_rate": 1.5675725343602904e-05, + "loss": 0.2736, + "step": 3834 + }, + { + "epoch": 0.33, + "learning_rate": 1.5673439381938365e-05, + "loss": 0.2769, + "step": 3835 + }, + { + "epoch": 0.33, + "learning_rate": 1.567115298299315e-05, + "loss": 0.2642, + "step": 3836 + }, + { + "epoch": 0.33, + "learning_rate": 1.5668866146943484e-05, + "loss": 0.2465, + "step": 3837 + }, + { + "epoch": 0.33, + "learning_rate": 1.5666578873965627e-05, + "loss": 0.2972, + "step": 3838 + }, + { + "epoch": 0.33, + "learning_rate": 1.566429116423587e-05, + "loss": 0.293, + "step": 3839 + }, + { + "epoch": 0.33, + "learning_rate": 1.566200301793054e-05, + "loss": 0.2595, + "step": 3840 + }, + { + "epoch": 0.33, + "learning_rate": 1.5659714435225993e-05, + "loss": 0.3223, + "step": 3841 + }, + { + "epoch": 0.33, + "learning_rate": 1.5657425416298623e-05, + "loss": 0.2939, + "step": 3842 + }, + { + "epoch": 0.33, + "learning_rate": 1.5655135961324856e-05, + "loss": 0.2733, + "step": 3843 + }, + { + "epoch": 0.33, + "learning_rate": 1.565284607048115e-05, + "loss": 0.2607, + "step": 3844 + }, + { + "epoch": 0.33, + "learning_rate": 1.5650555743944002e-05, + "loss": 0.3189, + "step": 3845 + }, + { + "epoch": 0.33, + "learning_rate": 1.5648264981889936e-05, + "loss": 0.2677, + "step": 3846 + }, + { + "epoch": 0.33, + "learning_rate": 1.5645973784495517e-05, + "loss": 0.2839, + "step": 3847 + }, + { + "epoch": 0.33, + "learning_rate": 1.5643682151937333e-05, + "loss": 0.296, + "step": 3848 + }, + { + "epoch": 0.33, + "learning_rate": 1.564139008439202e-05, + "loss": 0.2631, + "step": 3849 + }, + { + "epoch": 0.33, + "learning_rate": 1.5639097582036226e-05, + "loss": 0.3555, + "step": 3850 + }, + { + "epoch": 0.33, + "learning_rate": 1.563680464504666e-05, + "loss": 0.2966, + "step": 3851 + }, + { + "epoch": 0.33, + "learning_rate": 1.5634511273600042e-05, + "loss": 0.2507, + "step": 3852 + }, + { + "epoch": 0.33, + "learning_rate": 1.563221746787314e-05, + "loss": 0.2749, + "step": 3853 + }, + { + "epoch": 0.33, + "learning_rate": 1.562992322804274e-05, + "loss": 0.2935, + "step": 3854 + }, + { + "epoch": 0.33, + "learning_rate": 1.5627628554285678e-05, + "loss": 0.3058, + "step": 3855 + }, + { + "epoch": 0.33, + "learning_rate": 1.5625333446778812e-05, + "loss": 0.2646, + "step": 3856 + }, + { + "epoch": 0.33, + "learning_rate": 1.5623037905699043e-05, + "loss": 0.2944, + "step": 3857 + }, + { + "epoch": 0.33, + "learning_rate": 1.5620741931223292e-05, + "loss": 0.2922, + "step": 3858 + }, + { + "epoch": 0.33, + "learning_rate": 1.5618445523528533e-05, + "loss": 0.3265, + "step": 3859 + }, + { + "epoch": 0.33, + "learning_rate": 1.561614868279175e-05, + "loss": 0.287, + "step": 3860 + }, + { + "epoch": 0.33, + "learning_rate": 1.5613851409189974e-05, + "loss": 0.2684, + "step": 3861 + }, + { + "epoch": 0.33, + "learning_rate": 1.5611553702900275e-05, + "loss": 0.301, + "step": 3862 + }, + { + "epoch": 0.33, + "learning_rate": 1.560925556409974e-05, + "loss": 0.2579, + "step": 3863 + }, + { + "epoch": 0.33, + "learning_rate": 1.5606956992965504e-05, + "loss": 0.2759, + "step": 3864 + }, + { + "epoch": 0.33, + "learning_rate": 1.560465798967473e-05, + "loss": 0.2783, + "step": 3865 + }, + { + "epoch": 0.33, + "learning_rate": 1.5602358554404613e-05, + "loss": 0.2875, + "step": 3866 + }, + { + "epoch": 0.33, + "learning_rate": 1.5600058687332375e-05, + "loss": 0.2493, + "step": 3867 + }, + { + "epoch": 0.33, + "learning_rate": 1.5597758388635288e-05, + "loss": 0.2921, + "step": 3868 + }, + { + "epoch": 0.33, + "learning_rate": 1.5595457658490643e-05, + "loss": 0.2942, + "step": 3869 + }, + { + "epoch": 0.33, + "learning_rate": 1.5593156497075767e-05, + "loss": 0.3015, + "step": 3870 + }, + { + "epoch": 0.33, + "learning_rate": 1.559085490456803e-05, + "loss": 0.3109, + "step": 3871 + }, + { + "epoch": 0.33, + "learning_rate": 1.5588552881144814e-05, + "loss": 0.6384, + "step": 3872 + }, + { + "epoch": 0.33, + "learning_rate": 1.5586250426983566e-05, + "loss": 0.333, + "step": 3873 + }, + { + "epoch": 0.33, + "learning_rate": 1.558394754226173e-05, + "loss": 0.3302, + "step": 3874 + }, + { + "epoch": 0.33, + "learning_rate": 1.5581644227156815e-05, + "loss": 0.3281, + "step": 3875 + }, + { + "epoch": 0.33, + "learning_rate": 1.5579340481846338e-05, + "loss": 0.2907, + "step": 3876 + }, + { + "epoch": 0.33, + "learning_rate": 1.5577036306507863e-05, + "loss": 0.3022, + "step": 3877 + }, + { + "epoch": 0.33, + "learning_rate": 1.5574731701318987e-05, + "loss": 0.2982, + "step": 3878 + }, + { + "epoch": 0.33, + "learning_rate": 1.5572426666457342e-05, + "loss": 0.3142, + "step": 3879 + }, + { + "epoch": 0.33, + "learning_rate": 1.557012120210058e-05, + "loss": 0.3098, + "step": 3880 + }, + { + "epoch": 0.33, + "learning_rate": 1.55678153084264e-05, + "loss": 0.2721, + "step": 3881 + }, + { + "epoch": 0.33, + "learning_rate": 1.5565508985612525e-05, + "loss": 0.2855, + "step": 3882 + }, + { + "epoch": 0.33, + "learning_rate": 1.556320223383672e-05, + "loss": 0.3185, + "step": 3883 + }, + { + "epoch": 0.33, + "learning_rate": 1.556089505327677e-05, + "loss": 0.265, + "step": 3884 + }, + { + "epoch": 0.33, + "learning_rate": 1.555858744411052e-05, + "loss": 0.3051, + "step": 3885 + }, + { + "epoch": 0.33, + "learning_rate": 1.5556279406515802e-05, + "loss": 0.2719, + "step": 3886 + }, + { + "epoch": 0.33, + "learning_rate": 1.5553970940670527e-05, + "loss": 0.304, + "step": 3887 + }, + { + "epoch": 0.33, + "learning_rate": 1.5551662046752612e-05, + "loss": 0.2966, + "step": 3888 + }, + { + "epoch": 0.33, + "learning_rate": 1.554935272494002e-05, + "loss": 0.3229, + "step": 3889 + }, + { + "epoch": 0.33, + "learning_rate": 1.554704297541074e-05, + "loss": 0.239, + "step": 3890 + }, + { + "epoch": 0.33, + "learning_rate": 1.5544732798342798e-05, + "loss": 0.2798, + "step": 3891 + }, + { + "epoch": 0.33, + "learning_rate": 1.554242219391425e-05, + "loss": 0.2979, + "step": 3892 + }, + { + "epoch": 0.33, + "learning_rate": 1.554011116230318e-05, + "loss": 0.3093, + "step": 3893 + }, + { + "epoch": 0.33, + "learning_rate": 1.553779970368772e-05, + "loss": 0.332, + "step": 3894 + }, + { + "epoch": 0.33, + "learning_rate": 1.5535487818246023e-05, + "loss": 0.3015, + "step": 3895 + }, + { + "epoch": 0.33, + "learning_rate": 1.553317550615627e-05, + "loss": 0.3169, + "step": 3896 + }, + { + "epoch": 0.33, + "learning_rate": 1.5530862767596697e-05, + "loss": 0.3001, + "step": 3897 + }, + { + "epoch": 0.33, + "learning_rate": 1.5528549602745545e-05, + "loss": 0.3043, + "step": 3898 + }, + { + "epoch": 0.33, + "learning_rate": 1.552623601178111e-05, + "loss": 0.3199, + "step": 3899 + }, + { + "epoch": 0.33, + "learning_rate": 1.552392199488171e-05, + "loss": 0.2902, + "step": 3900 + }, + { + "epoch": 0.33, + "learning_rate": 1.5521607552225698e-05, + "loss": 0.2822, + "step": 3901 + }, + { + "epoch": 0.33, + "learning_rate": 1.5519292683991455e-05, + "loss": 0.2686, + "step": 3902 + }, + { + "epoch": 0.33, + "learning_rate": 1.5516977390357405e-05, + "loss": 0.3297, + "step": 3903 + }, + { + "epoch": 0.33, + "learning_rate": 1.5514661671502e-05, + "loss": 0.2429, + "step": 3904 + }, + { + "epoch": 0.33, + "learning_rate": 1.5512345527603718e-05, + "loss": 0.3002, + "step": 3905 + }, + { + "epoch": 0.33, + "learning_rate": 1.5510028958841085e-05, + "loss": 0.2615, + "step": 3906 + }, + { + "epoch": 0.33, + "learning_rate": 1.5507711965392643e-05, + "loss": 0.2908, + "step": 3907 + }, + { + "epoch": 0.33, + "learning_rate": 1.5505394547436976e-05, + "loss": 0.2653, + "step": 3908 + }, + { + "epoch": 0.34, + "learning_rate": 1.55030767051527e-05, + "loss": 0.2932, + "step": 3909 + }, + { + "epoch": 0.34, + "learning_rate": 1.5500758438718463e-05, + "loss": 0.3299, + "step": 3910 + }, + { + "epoch": 0.34, + "learning_rate": 1.549843974831295e-05, + "loss": 0.3065, + "step": 3911 + }, + { + "epoch": 0.34, + "learning_rate": 1.5496120634114865e-05, + "loss": 0.5555, + "step": 3912 + }, + { + "epoch": 0.34, + "learning_rate": 1.5493801096302964e-05, + "loss": 0.2878, + "step": 3913 + }, + { + "epoch": 0.34, + "learning_rate": 1.5491481135056012e-05, + "loss": 0.2618, + "step": 3914 + }, + { + "epoch": 0.34, + "learning_rate": 1.5489160750552833e-05, + "loss": 0.3099, + "step": 3915 + }, + { + "epoch": 0.34, + "learning_rate": 1.548683994297227e-05, + "loss": 0.3718, + "step": 3916 + }, + { + "epoch": 0.34, + "learning_rate": 1.5484518712493188e-05, + "loss": 0.2573, + "step": 3917 + }, + { + "epoch": 0.34, + "learning_rate": 1.548219705929451e-05, + "loss": 0.3022, + "step": 3918 + }, + { + "epoch": 0.34, + "learning_rate": 1.5479874983555166e-05, + "loss": 0.2458, + "step": 3919 + }, + { + "epoch": 0.34, + "learning_rate": 1.5477552485454136e-05, + "loss": 0.2794, + "step": 3920 + }, + { + "epoch": 0.34, + "learning_rate": 1.5475229565170428e-05, + "loss": 0.3146, + "step": 3921 + }, + { + "epoch": 0.34, + "learning_rate": 1.5472906222883075e-05, + "loss": 0.3178, + "step": 3922 + }, + { + "epoch": 0.34, + "learning_rate": 1.547058245877116e-05, + "loss": 0.2927, + "step": 3923 + }, + { + "epoch": 0.34, + "learning_rate": 1.5468258273013773e-05, + "loss": 0.3034, + "step": 3924 + }, + { + "epoch": 0.34, + "learning_rate": 1.546593366579006e-05, + "loss": 0.3246, + "step": 3925 + }, + { + "epoch": 0.34, + "learning_rate": 1.546360863727919e-05, + "loss": 0.3193, + "step": 3926 + }, + { + "epoch": 0.34, + "learning_rate": 1.546128318766036e-05, + "loss": 0.2811, + "step": 3927 + }, + { + "epoch": 0.34, + "learning_rate": 1.545895731711281e-05, + "loss": 0.2975, + "step": 3928 + }, + { + "epoch": 0.34, + "learning_rate": 1.54566310258158e-05, + "loss": 0.3044, + "step": 3929 + }, + { + "epoch": 0.34, + "learning_rate": 1.5454304313948635e-05, + "loss": 0.2925, + "step": 3930 + }, + { + "epoch": 0.34, + "learning_rate": 1.545197718169064e-05, + "loss": 0.2997, + "step": 3931 + }, + { + "epoch": 0.34, + "learning_rate": 1.544964962922119e-05, + "loss": 0.2923, + "step": 3932 + }, + { + "epoch": 0.34, + "learning_rate": 1.5447321656719668e-05, + "loss": 0.3475, + "step": 3933 + }, + { + "epoch": 0.34, + "learning_rate": 1.544499326436551e-05, + "loss": 0.2952, + "step": 3934 + }, + { + "epoch": 0.34, + "learning_rate": 1.5442664452338178e-05, + "loss": 0.3317, + "step": 3935 + }, + { + "epoch": 0.34, + "learning_rate": 1.544033522081716e-05, + "loss": 0.2699, + "step": 3936 + }, + { + "epoch": 0.34, + "learning_rate": 1.5438005569981986e-05, + "loss": 0.2758, + "step": 3937 + }, + { + "epoch": 0.34, + "learning_rate": 1.5435675500012212e-05, + "loss": 0.2902, + "step": 3938 + }, + { + "epoch": 0.34, + "learning_rate": 1.5433345011087427e-05, + "loss": 0.3263, + "step": 3939 + }, + { + "epoch": 0.34, + "learning_rate": 1.5431014103387263e-05, + "loss": 0.2625, + "step": 3940 + }, + { + "epoch": 0.34, + "learning_rate": 1.542868277709136e-05, + "loss": 0.3054, + "step": 3941 + }, + { + "epoch": 0.34, + "learning_rate": 1.5426351032379418e-05, + "loss": 0.326, + "step": 3942 + }, + { + "epoch": 0.34, + "learning_rate": 1.5424018869431144e-05, + "loss": 0.6332, + "step": 3943 + }, + { + "epoch": 0.34, + "learning_rate": 1.5421686288426303e-05, + "loss": 0.3088, + "step": 3944 + }, + { + "epoch": 0.34, + "learning_rate": 1.541935328954467e-05, + "loss": 0.2856, + "step": 3945 + }, + { + "epoch": 0.34, + "learning_rate": 1.541701987296606e-05, + "loss": 0.3416, + "step": 3946 + }, + { + "epoch": 0.34, + "learning_rate": 1.5414686038870327e-05, + "loss": 0.2977, + "step": 3947 + }, + { + "epoch": 0.34, + "learning_rate": 1.541235178743735e-05, + "loss": 0.2991, + "step": 3948 + }, + { + "epoch": 0.34, + "learning_rate": 1.541001711884704e-05, + "loss": 0.2932, + "step": 3949 + }, + { + "epoch": 0.34, + "learning_rate": 1.540768203327934e-05, + "loss": 0.2958, + "step": 3950 + }, + { + "epoch": 0.34, + "learning_rate": 1.5405346530914233e-05, + "loss": 0.3158, + "step": 3951 + }, + { + "epoch": 0.34, + "learning_rate": 1.5403010611931718e-05, + "loss": 0.308, + "step": 3952 + }, + { + "epoch": 0.34, + "learning_rate": 1.5400674276511848e-05, + "loss": 0.2679, + "step": 3953 + }, + { + "epoch": 0.34, + "learning_rate": 1.5398337524834688e-05, + "loss": 0.2838, + "step": 3954 + }, + { + "epoch": 0.34, + "learning_rate": 1.5396000357080345e-05, + "loss": 0.2902, + "step": 3955 + }, + { + "epoch": 0.34, + "learning_rate": 1.5393662773428956e-05, + "loss": 0.2709, + "step": 3956 + }, + { + "epoch": 0.34, + "learning_rate": 1.5391324774060695e-05, + "loss": 0.2622, + "step": 3957 + }, + { + "epoch": 0.34, + "learning_rate": 1.538898635915576e-05, + "loss": 0.3279, + "step": 3958 + }, + { + "epoch": 0.34, + "learning_rate": 1.5386647528894377e-05, + "loss": 0.3098, + "step": 3959 + }, + { + "epoch": 0.34, + "learning_rate": 1.5384308283456824e-05, + "loss": 0.3135, + "step": 3960 + }, + { + "epoch": 0.34, + "learning_rate": 1.538196862302339e-05, + "loss": 0.3124, + "step": 3961 + }, + { + "epoch": 0.34, + "learning_rate": 1.5379628547774412e-05, + "loss": 0.2665, + "step": 3962 + }, + { + "epoch": 0.34, + "learning_rate": 1.5377288057890246e-05, + "loss": 0.2953, + "step": 3963 + }, + { + "epoch": 0.34, + "learning_rate": 1.5374947153551284e-05, + "loss": 0.2875, + "step": 3964 + }, + { + "epoch": 0.34, + "learning_rate": 1.5372605834937953e-05, + "loss": 0.2756, + "step": 3965 + }, + { + "epoch": 0.34, + "learning_rate": 1.5370264102230716e-05, + "loss": 0.3225, + "step": 3966 + }, + { + "epoch": 0.34, + "learning_rate": 1.5367921955610055e-05, + "loss": 0.2899, + "step": 3967 + }, + { + "epoch": 0.34, + "learning_rate": 1.5365579395256493e-05, + "loss": 0.2505, + "step": 3968 + }, + { + "epoch": 0.34, + "learning_rate": 1.5363236421350584e-05, + "loss": 0.3038, + "step": 3969 + }, + { + "epoch": 0.34, + "learning_rate": 1.536089303407291e-05, + "loss": 0.2897, + "step": 3970 + }, + { + "epoch": 0.34, + "learning_rate": 1.5358549233604093e-05, + "loss": 0.2704, + "step": 3971 + }, + { + "epoch": 0.34, + "learning_rate": 1.535620502012478e-05, + "loss": 0.2425, + "step": 3972 + }, + { + "epoch": 0.34, + "learning_rate": 1.5353860393815642e-05, + "loss": 0.3146, + "step": 3973 + }, + { + "epoch": 0.34, + "learning_rate": 1.5351515354857404e-05, + "loss": 0.2863, + "step": 3974 + }, + { + "epoch": 0.34, + "learning_rate": 1.5349169903430804e-05, + "loss": 0.2706, + "step": 3975 + }, + { + "epoch": 0.34, + "learning_rate": 1.5346824039716622e-05, + "loss": 0.2833, + "step": 3976 + }, + { + "epoch": 0.34, + "learning_rate": 1.5344477763895663e-05, + "loss": 0.3802, + "step": 3977 + }, + { + "epoch": 0.34, + "learning_rate": 1.534213107614876e-05, + "loss": 0.2939, + "step": 3978 + }, + { + "epoch": 0.34, + "learning_rate": 1.5339783976656793e-05, + "loss": 0.3157, + "step": 3979 + }, + { + "epoch": 0.34, + "learning_rate": 1.533743646560066e-05, + "loss": 0.3018, + "step": 3980 + }, + { + "epoch": 0.34, + "learning_rate": 1.53350885431613e-05, + "loss": 0.2964, + "step": 3981 + }, + { + "epoch": 0.34, + "learning_rate": 1.5332740209519674e-05, + "loss": 0.2876, + "step": 3982 + }, + { + "epoch": 0.34, + "learning_rate": 1.5330391464856784e-05, + "loss": 0.3074, + "step": 3983 + }, + { + "epoch": 0.34, + "learning_rate": 1.5328042309353655e-05, + "loss": 0.3092, + "step": 3984 + }, + { + "epoch": 0.34, + "learning_rate": 1.532569274319136e-05, + "loss": 0.59, + "step": 3985 + }, + { + "epoch": 0.34, + "learning_rate": 1.5323342766550978e-05, + "loss": 0.2571, + "step": 3986 + }, + { + "epoch": 0.34, + "learning_rate": 1.5320992379613637e-05, + "loss": 0.2595, + "step": 3987 + }, + { + "epoch": 0.34, + "learning_rate": 1.5318641582560497e-05, + "loss": 0.2527, + "step": 3988 + }, + { + "epoch": 0.34, + "learning_rate": 1.5316290375572746e-05, + "loss": 0.3005, + "step": 3989 + }, + { + "epoch": 0.34, + "learning_rate": 1.5313938758831596e-05, + "loss": 0.2512, + "step": 3990 + }, + { + "epoch": 0.34, + "learning_rate": 1.5311586732518307e-05, + "loss": 0.348, + "step": 3991 + }, + { + "epoch": 0.34, + "learning_rate": 1.530923429681416e-05, + "loss": 0.2892, + "step": 3992 + }, + { + "epoch": 0.34, + "learning_rate": 1.5306881451900462e-05, + "loss": 0.2681, + "step": 3993 + }, + { + "epoch": 0.34, + "learning_rate": 1.5304528197958565e-05, + "loss": 0.3127, + "step": 3994 + }, + { + "epoch": 0.34, + "learning_rate": 1.530217453516985e-05, + "loss": 0.3745, + "step": 3995 + }, + { + "epoch": 0.34, + "learning_rate": 1.5299820463715716e-05, + "loss": 0.2645, + "step": 3996 + }, + { + "epoch": 0.34, + "learning_rate": 1.5297465983777612e-05, + "loss": 0.3236, + "step": 3997 + }, + { + "epoch": 0.34, + "learning_rate": 1.5295111095536997e-05, + "loss": 0.2545, + "step": 3998 + }, + { + "epoch": 0.34, + "learning_rate": 1.529275579917539e-05, + "loss": 0.3099, + "step": 3999 + }, + { + "epoch": 0.34, + "learning_rate": 1.5290400094874316e-05, + "loss": 0.3007, + "step": 4000 + }, + { + "epoch": 0.34, + "learning_rate": 1.5288043982815345e-05, + "loss": 0.2887, + "step": 4001 + }, + { + "epoch": 0.34, + "learning_rate": 1.528568746318007e-05, + "loss": 0.2738, + "step": 4002 + }, + { + "epoch": 0.34, + "learning_rate": 1.5283330536150126e-05, + "loss": 0.2722, + "step": 4003 + }, + { + "epoch": 0.34, + "learning_rate": 1.528097320190717e-05, + "loss": 0.3093, + "step": 4004 + }, + { + "epoch": 0.34, + "learning_rate": 1.5278615460632892e-05, + "loss": 0.3059, + "step": 4005 + }, + { + "epoch": 0.34, + "learning_rate": 1.527625731250901e-05, + "loss": 0.2981, + "step": 4006 + }, + { + "epoch": 0.34, + "learning_rate": 1.5273898757717295e-05, + "loss": 0.2946, + "step": 4007 + }, + { + "epoch": 0.34, + "learning_rate": 1.527153979643952e-05, + "loss": 0.2905, + "step": 4008 + }, + { + "epoch": 0.34, + "learning_rate": 1.5269180428857506e-05, + "loss": 0.2736, + "step": 4009 + }, + { + "epoch": 0.34, + "learning_rate": 1.52668206551531e-05, + "loss": 0.3032, + "step": 4010 + }, + { + "epoch": 0.34, + "learning_rate": 1.526446047550818e-05, + "loss": 0.2682, + "step": 4011 + }, + { + "epoch": 0.34, + "learning_rate": 1.526209989010466e-05, + "loss": 0.2825, + "step": 4012 + }, + { + "epoch": 0.34, + "learning_rate": 1.525973889912448e-05, + "loss": 0.2828, + "step": 4013 + }, + { + "epoch": 0.34, + "learning_rate": 1.5257377502749614e-05, + "loss": 0.3463, + "step": 4014 + }, + { + "epoch": 0.34, + "learning_rate": 1.5255015701162071e-05, + "loss": 0.2434, + "step": 4015 + }, + { + "epoch": 0.34, + "learning_rate": 1.5252653494543883e-05, + "loss": 0.2639, + "step": 4016 + }, + { + "epoch": 0.34, + "learning_rate": 1.5250290883077114e-05, + "loss": 0.2601, + "step": 4017 + }, + { + "epoch": 0.34, + "learning_rate": 1.5247927866943869e-05, + "loss": 0.2635, + "step": 4018 + }, + { + "epoch": 0.34, + "learning_rate": 1.5245564446326273e-05, + "loss": 0.2798, + "step": 4019 + }, + { + "epoch": 0.34, + "learning_rate": 1.5243200621406492e-05, + "loss": 0.284, + "step": 4020 + }, + { + "epoch": 0.34, + "learning_rate": 1.5240836392366713e-05, + "loss": 0.2991, + "step": 4021 + }, + { + "epoch": 0.34, + "learning_rate": 1.523847175938916e-05, + "loss": 0.304, + "step": 4022 + }, + { + "epoch": 0.34, + "learning_rate": 1.523610672265609e-05, + "loss": 0.3409, + "step": 4023 + }, + { + "epoch": 0.34, + "learning_rate": 1.5233741282349783e-05, + "loss": 0.3078, + "step": 4024 + }, + { + "epoch": 0.35, + "learning_rate": 1.523137543865256e-05, + "loss": 0.2864, + "step": 4025 + }, + { + "epoch": 0.35, + "learning_rate": 1.5229009191746769e-05, + "loss": 0.2774, + "step": 4026 + }, + { + "epoch": 0.35, + "learning_rate": 1.5226642541814785e-05, + "loss": 0.2698, + "step": 4027 + }, + { + "epoch": 0.35, + "learning_rate": 1.522427548903902e-05, + "loss": 0.2891, + "step": 4028 + }, + { + "epoch": 0.35, + "learning_rate": 1.5221908033601911e-05, + "loss": 0.3114, + "step": 4029 + }, + { + "epoch": 0.35, + "learning_rate": 1.5219540175685938e-05, + "loss": 0.2944, + "step": 4030 + }, + { + "epoch": 0.35, + "learning_rate": 1.5217171915473592e-05, + "loss": 0.2927, + "step": 4031 + }, + { + "epoch": 0.35, + "learning_rate": 1.5214803253147421e-05, + "loss": 0.348, + "step": 4032 + }, + { + "epoch": 0.35, + "learning_rate": 1.521243418888998e-05, + "loss": 0.2881, + "step": 4033 + }, + { + "epoch": 0.35, + "learning_rate": 1.5210064722883865e-05, + "loss": 0.2661, + "step": 4034 + }, + { + "epoch": 0.35, + "learning_rate": 1.5207694855311707e-05, + "loss": 0.2761, + "step": 4035 + }, + { + "epoch": 0.35, + "learning_rate": 1.5205324586356161e-05, + "loss": 0.2665, + "step": 4036 + }, + { + "epoch": 0.35, + "learning_rate": 1.5202953916199916e-05, + "loss": 0.2978, + "step": 4037 + }, + { + "epoch": 0.35, + "learning_rate": 1.5200582845025688e-05, + "loss": 0.2997, + "step": 4038 + }, + { + "epoch": 0.35, + "learning_rate": 1.5198211373016239e-05, + "loss": 0.3301, + "step": 4039 + }, + { + "epoch": 0.35, + "learning_rate": 1.5195839500354337e-05, + "loss": 0.4129, + "step": 4040 + }, + { + "epoch": 0.35, + "learning_rate": 1.5193467227222803e-05, + "loss": 0.2791, + "step": 4041 + }, + { + "epoch": 0.35, + "learning_rate": 1.5191094553804476e-05, + "loss": 0.2906, + "step": 4042 + }, + { + "epoch": 0.35, + "learning_rate": 1.5188721480282226e-05, + "loss": 0.3106, + "step": 4043 + }, + { + "epoch": 0.35, + "learning_rate": 1.5186348006838966e-05, + "loss": 0.2775, + "step": 4044 + }, + { + "epoch": 0.35, + "learning_rate": 1.5183974133657628e-05, + "loss": 0.288, + "step": 4045 + }, + { + "epoch": 0.35, + "learning_rate": 1.5181599860921182e-05, + "loss": 0.2826, + "step": 4046 + }, + { + "epoch": 0.35, + "learning_rate": 1.5179225188812616e-05, + "loss": 0.6179, + "step": 4047 + }, + { + "epoch": 0.35, + "learning_rate": 1.5176850117514964e-05, + "loss": 0.3457, + "step": 4048 + }, + { + "epoch": 0.35, + "learning_rate": 1.5174474647211291e-05, + "loss": 0.2855, + "step": 4049 + }, + { + "epoch": 0.35, + "learning_rate": 1.5172098778084672e-05, + "loss": 0.3423, + "step": 4050 + }, + { + "epoch": 0.35, + "learning_rate": 1.5169722510318242e-05, + "loss": 0.2305, + "step": 4051 + }, + { + "epoch": 0.35, + "learning_rate": 1.516734584409514e-05, + "loss": 0.3118, + "step": 4052 + }, + { + "epoch": 0.35, + "learning_rate": 1.5164968779598558e-05, + "loss": 0.3058, + "step": 4053 + }, + { + "epoch": 0.35, + "learning_rate": 1.51625913170117e-05, + "loss": 0.2775, + "step": 4054 + }, + { + "epoch": 0.35, + "learning_rate": 1.5160213456517812e-05, + "loss": 0.2543, + "step": 4055 + }, + { + "epoch": 0.35, + "learning_rate": 1.5157835198300169e-05, + "loss": 0.2903, + "step": 4056 + }, + { + "epoch": 0.35, + "learning_rate": 1.5155456542542072e-05, + "loss": 0.2977, + "step": 4057 + }, + { + "epoch": 0.35, + "learning_rate": 1.5153077489426865e-05, + "loss": 0.2975, + "step": 4058 + }, + { + "epoch": 0.35, + "learning_rate": 1.5150698039137903e-05, + "loss": 0.3013, + "step": 4059 + }, + { + "epoch": 0.35, + "learning_rate": 1.5148318191858588e-05, + "loss": 0.2806, + "step": 4060 + }, + { + "epoch": 0.35, + "learning_rate": 1.5145937947772344e-05, + "loss": 0.3035, + "step": 4061 + }, + { + "epoch": 0.35, + "learning_rate": 1.514355730706263e-05, + "loss": 0.3047, + "step": 4062 + }, + { + "epoch": 0.35, + "learning_rate": 1.5141176269912931e-05, + "loss": 0.2446, + "step": 4063 + }, + { + "epoch": 0.35, + "learning_rate": 1.5138794836506772e-05, + "loss": 0.3136, + "step": 4064 + }, + { + "epoch": 0.35, + "learning_rate": 1.5136413007027699e-05, + "loss": 0.2704, + "step": 4065 + }, + { + "epoch": 0.35, + "learning_rate": 1.5134030781659288e-05, + "loss": 0.3084, + "step": 4066 + }, + { + "epoch": 0.35, + "learning_rate": 1.5131648160585155e-05, + "loss": 0.2347, + "step": 4067 + }, + { + "epoch": 0.35, + "learning_rate": 1.512926514398894e-05, + "loss": 0.278, + "step": 4068 + }, + { + "epoch": 0.35, + "learning_rate": 1.5126881732054305e-05, + "loss": 0.2468, + "step": 4069 + }, + { + "epoch": 0.35, + "learning_rate": 1.5124497924964966e-05, + "loss": 0.3073, + "step": 4070 + }, + { + "epoch": 0.35, + "learning_rate": 1.5122113722904646e-05, + "loss": 0.2711, + "step": 4071 + }, + { + "epoch": 0.35, + "learning_rate": 1.5119729126057108e-05, + "loss": 0.2786, + "step": 4072 + }, + { + "epoch": 0.35, + "learning_rate": 1.5117344134606146e-05, + "loss": 0.3053, + "step": 4073 + }, + { + "epoch": 0.35, + "learning_rate": 1.5114958748735584e-05, + "loss": 0.2523, + "step": 4074 + }, + { + "epoch": 0.35, + "learning_rate": 1.5112572968629275e-05, + "loss": 0.2957, + "step": 4075 + }, + { + "epoch": 0.35, + "learning_rate": 1.5110186794471105e-05, + "loss": 0.2795, + "step": 4076 + }, + { + "epoch": 0.35, + "learning_rate": 1.5107800226444988e-05, + "loss": 0.3082, + "step": 4077 + }, + { + "epoch": 0.35, + "learning_rate": 1.5105413264734866e-05, + "loss": 0.2845, + "step": 4078 + }, + { + "epoch": 0.35, + "learning_rate": 1.5103025909524718e-05, + "loss": 0.2731, + "step": 4079 + }, + { + "epoch": 0.35, + "learning_rate": 1.5100638160998544e-05, + "loss": 0.6196, + "step": 4080 + }, + { + "epoch": 0.35, + "learning_rate": 1.5098250019340385e-05, + "loss": 0.3105, + "step": 4081 + }, + { + "epoch": 0.35, + "learning_rate": 1.5095861484734307e-05, + "loss": 0.2779, + "step": 4082 + }, + { + "epoch": 0.35, + "learning_rate": 1.5093472557364404e-05, + "loss": 0.2938, + "step": 4083 + }, + { + "epoch": 0.35, + "learning_rate": 1.5091083237414806e-05, + "loss": 0.2758, + "step": 4084 + }, + { + "epoch": 0.35, + "learning_rate": 1.5088693525069664e-05, + "loss": 0.2797, + "step": 4085 + }, + { + "epoch": 0.35, + "learning_rate": 1.508630342051317e-05, + "loss": 0.3054, + "step": 4086 + }, + { + "epoch": 0.35, + "learning_rate": 1.5083912923929545e-05, + "loss": 0.2538, + "step": 4087 + }, + { + "epoch": 0.35, + "learning_rate": 1.5081522035503025e-05, + "loss": 0.2682, + "step": 4088 + }, + { + "epoch": 0.35, + "learning_rate": 1.50791307554179e-05, + "loss": 0.2856, + "step": 4089 + }, + { + "epoch": 0.35, + "learning_rate": 1.5076739083858472e-05, + "loss": 0.297, + "step": 4090 + }, + { + "epoch": 0.35, + "learning_rate": 1.5074347021009081e-05, + "loss": 0.2659, + "step": 4091 + }, + { + "epoch": 0.35, + "learning_rate": 1.5071954567054094e-05, + "loss": 0.3081, + "step": 4092 + }, + { + "epoch": 0.35, + "learning_rate": 1.506956172217791e-05, + "loss": 0.3233, + "step": 4093 + }, + { + "epoch": 0.35, + "learning_rate": 1.5067168486564959e-05, + "loss": 0.2972, + "step": 4094 + }, + { + "epoch": 0.35, + "learning_rate": 1.5064774860399699e-05, + "loss": 0.3282, + "step": 4095 + }, + { + "epoch": 0.35, + "learning_rate": 1.5062380843866618e-05, + "loss": 0.2553, + "step": 4096 + }, + { + "epoch": 0.35, + "learning_rate": 1.5059986437150233e-05, + "loss": 0.3025, + "step": 4097 + }, + { + "epoch": 0.35, + "learning_rate": 1.5057591640435098e-05, + "loss": 0.2498, + "step": 4098 + }, + { + "epoch": 0.35, + "learning_rate": 1.505519645390579e-05, + "loss": 0.2912, + "step": 4099 + }, + { + "epoch": 0.35, + "learning_rate": 1.5052800877746915e-05, + "loss": 0.2963, + "step": 4100 + }, + { + "epoch": 0.35, + "learning_rate": 1.5050404912143118e-05, + "loss": 0.3167, + "step": 4101 + }, + { + "epoch": 0.35, + "learning_rate": 1.5048008557279064e-05, + "loss": 0.3188, + "step": 4102 + }, + { + "epoch": 0.35, + "learning_rate": 1.5045611813339456e-05, + "loss": 0.3483, + "step": 4103 + }, + { + "epoch": 0.35, + "learning_rate": 1.5043214680509018e-05, + "loss": 0.2523, + "step": 4104 + }, + { + "epoch": 0.35, + "learning_rate": 1.5040817158972509e-05, + "loss": 0.2682, + "step": 4105 + }, + { + "epoch": 0.35, + "learning_rate": 1.5038419248914725e-05, + "loss": 0.3566, + "step": 4106 + }, + { + "epoch": 0.35, + "learning_rate": 1.5036020950520476e-05, + "loss": 0.3027, + "step": 4107 + }, + { + "epoch": 0.35, + "learning_rate": 1.5033622263974621e-05, + "loss": 0.2971, + "step": 4108 + }, + { + "epoch": 0.35, + "learning_rate": 1.5031223189462031e-05, + "loss": 0.2941, + "step": 4109 + }, + { + "epoch": 0.35, + "learning_rate": 1.5028823727167621e-05, + "loss": 0.3111, + "step": 4110 + }, + { + "epoch": 0.35, + "learning_rate": 1.5026423877276322e-05, + "loss": 0.3297, + "step": 4111 + }, + { + "epoch": 0.35, + "learning_rate": 1.5024023639973109e-05, + "loss": 0.3239, + "step": 4112 + }, + { + "epoch": 0.35, + "learning_rate": 1.5021623015442976e-05, + "loss": 0.3541, + "step": 4113 + }, + { + "epoch": 0.35, + "learning_rate": 1.5019222003870954e-05, + "loss": 0.3144, + "step": 4114 + }, + { + "epoch": 0.35, + "learning_rate": 1.5016820605442105e-05, + "loss": 0.2922, + "step": 4115 + }, + { + "epoch": 0.35, + "learning_rate": 1.501441882034151e-05, + "loss": 0.321, + "step": 4116 + }, + { + "epoch": 0.35, + "learning_rate": 1.5012016648754291e-05, + "loss": 0.2629, + "step": 4117 + }, + { + "epoch": 0.35, + "learning_rate": 1.500961409086559e-05, + "loss": 0.2981, + "step": 4118 + }, + { + "epoch": 0.35, + "learning_rate": 1.500721114686059e-05, + "loss": 0.2495, + "step": 4119 + }, + { + "epoch": 0.35, + "learning_rate": 1.5004807816924494e-05, + "loss": 0.3297, + "step": 4120 + }, + { + "epoch": 0.35, + "learning_rate": 1.5002404101242543e-05, + "loss": 0.2517, + "step": 4121 + }, + { + "epoch": 0.35, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.3335, + "step": 4122 + }, + { + "epoch": 0.35, + "learning_rate": 1.4997595513382166e-05, + "loss": 0.3159, + "step": 4123 + }, + { + "epoch": 0.35, + "learning_rate": 1.4995190641574361e-05, + "loss": 0.3029, + "step": 4124 + }, + { + "epoch": 0.35, + "learning_rate": 1.4992785384761945e-05, + "loss": 0.2993, + "step": 4125 + }, + { + "epoch": 0.35, + "learning_rate": 1.49903797431303e-05, + "loss": 0.2734, + "step": 4126 + }, + { + "epoch": 0.35, + "learning_rate": 1.4987973716864843e-05, + "loss": 0.3224, + "step": 4127 + }, + { + "epoch": 0.35, + "learning_rate": 1.4985567306151018e-05, + "loss": 0.3124, + "step": 4128 + }, + { + "epoch": 0.35, + "learning_rate": 1.4983160511174302e-05, + "loss": 0.2655, + "step": 4129 + }, + { + "epoch": 0.35, + "learning_rate": 1.4980753332120193e-05, + "loss": 0.3051, + "step": 4130 + }, + { + "epoch": 0.35, + "learning_rate": 1.497834576917423e-05, + "loss": 0.2711, + "step": 4131 + }, + { + "epoch": 0.35, + "learning_rate": 1.4975937822521972e-05, + "loss": 0.3042, + "step": 4132 + }, + { + "epoch": 0.35, + "learning_rate": 1.4973529492349013e-05, + "loss": 0.3492, + "step": 4133 + }, + { + "epoch": 0.35, + "learning_rate": 1.497112077884098e-05, + "loss": 0.2485, + "step": 4134 + }, + { + "epoch": 0.35, + "learning_rate": 1.4968711682183515e-05, + "loss": 0.2875, + "step": 4135 + }, + { + "epoch": 0.35, + "learning_rate": 1.4966302202562308e-05, + "loss": 0.2903, + "step": 4136 + }, + { + "epoch": 0.35, + "learning_rate": 1.4963892340163067e-05, + "loss": 0.6328, + "step": 4137 + }, + { + "epoch": 0.35, + "learning_rate": 1.4961482095171529e-05, + "loss": 0.2867, + "step": 4138 + }, + { + "epoch": 0.35, + "learning_rate": 1.4959071467773467e-05, + "loss": 0.329, + "step": 4139 + }, + { + "epoch": 0.35, + "learning_rate": 1.4956660458154679e-05, + "loss": 0.2579, + "step": 4140 + }, + { + "epoch": 0.35, + "learning_rate": 1.4954249066501e-05, + "loss": 0.2817, + "step": 4141 + }, + { + "epoch": 0.36, + "learning_rate": 1.4951837292998277e-05, + "loss": 0.3085, + "step": 4142 + }, + { + "epoch": 0.36, + "learning_rate": 1.4949425137832406e-05, + "loss": 0.2899, + "step": 4143 + }, + { + "epoch": 0.36, + "learning_rate": 1.4947012601189299e-05, + "loss": 0.2602, + "step": 4144 + }, + { + "epoch": 0.36, + "learning_rate": 1.4944599683254903e-05, + "loss": 0.3393, + "step": 4145 + }, + { + "epoch": 0.36, + "learning_rate": 1.4942186384215198e-05, + "loss": 0.2863, + "step": 4146 + }, + { + "epoch": 0.36, + "learning_rate": 1.4939772704256187e-05, + "loss": 0.3138, + "step": 4147 + }, + { + "epoch": 0.36, + "learning_rate": 1.4937358643563906e-05, + "loss": 0.3074, + "step": 4148 + }, + { + "epoch": 0.36, + "learning_rate": 1.4934944202324413e-05, + "loss": 0.3161, + "step": 4149 + }, + { + "epoch": 0.36, + "learning_rate": 1.4932529380723806e-05, + "loss": 0.2722, + "step": 4150 + }, + { + "epoch": 0.36, + "learning_rate": 1.4930114178948207e-05, + "loss": 0.2604, + "step": 4151 + }, + { + "epoch": 0.36, + "learning_rate": 1.4927698597183768e-05, + "loss": 0.3197, + "step": 4152 + }, + { + "epoch": 0.36, + "learning_rate": 1.4925282635616671e-05, + "loss": 0.2986, + "step": 4153 + }, + { + "epoch": 0.36, + "learning_rate": 1.4922866294433122e-05, + "loss": 0.2798, + "step": 4154 + }, + { + "epoch": 0.36, + "learning_rate": 1.4920449573819366e-05, + "loss": 0.2947, + "step": 4155 + }, + { + "epoch": 0.36, + "learning_rate": 1.491803247396167e-05, + "loss": 0.2785, + "step": 4156 + }, + { + "epoch": 0.36, + "learning_rate": 1.4915614995046329e-05, + "loss": 0.3163, + "step": 4157 + }, + { + "epoch": 0.36, + "learning_rate": 1.4913197137259675e-05, + "loss": 0.3738, + "step": 4158 + }, + { + "epoch": 0.36, + "learning_rate": 1.4910778900788061e-05, + "loss": 0.2656, + "step": 4159 + }, + { + "epoch": 0.36, + "learning_rate": 1.4908360285817875e-05, + "loss": 0.3268, + "step": 4160 + }, + { + "epoch": 0.36, + "learning_rate": 1.490594129253553e-05, + "loss": 0.344, + "step": 4161 + }, + { + "epoch": 0.36, + "learning_rate": 1.4903521921127472e-05, + "loss": 0.5894, + "step": 4162 + }, + { + "epoch": 0.36, + "learning_rate": 1.4901102171780175e-05, + "loss": 0.3341, + "step": 4163 + }, + { + "epoch": 0.36, + "learning_rate": 1.4898682044680135e-05, + "loss": 0.2888, + "step": 4164 + }, + { + "epoch": 0.36, + "learning_rate": 1.4896261540013894e-05, + "loss": 0.6628, + "step": 4165 + }, + { + "epoch": 0.36, + "learning_rate": 1.4893840657968001e-05, + "loss": 0.2863, + "step": 4166 + }, + { + "epoch": 0.36, + "learning_rate": 1.4891419398729057e-05, + "loss": 0.2621, + "step": 4167 + }, + { + "epoch": 0.36, + "learning_rate": 1.488899776248367e-05, + "loss": 0.2929, + "step": 4168 + }, + { + "epoch": 0.36, + "learning_rate": 1.4886575749418494e-05, + "loss": 0.3234, + "step": 4169 + }, + { + "epoch": 0.36, + "learning_rate": 1.4884153359720205e-05, + "loss": 0.3334, + "step": 4170 + }, + { + "epoch": 0.36, + "learning_rate": 1.488173059357551e-05, + "loss": 0.269, + "step": 4171 + }, + { + "epoch": 0.36, + "learning_rate": 1.4879307451171141e-05, + "loss": 0.2747, + "step": 4172 + }, + { + "epoch": 0.36, + "learning_rate": 1.4876883932693864e-05, + "loss": 0.3294, + "step": 4173 + }, + { + "epoch": 0.36, + "learning_rate": 1.4874460038330469e-05, + "loss": 0.2809, + "step": 4174 + }, + { + "epoch": 0.36, + "learning_rate": 1.487203576826778e-05, + "loss": 0.296, + "step": 4175 + }, + { + "epoch": 0.36, + "learning_rate": 1.4869611122692649e-05, + "loss": 0.3059, + "step": 4176 + }, + { + "epoch": 0.36, + "learning_rate": 1.4867186101791951e-05, + "loss": 0.3608, + "step": 4177 + }, + { + "epoch": 0.36, + "learning_rate": 1.48647607057526e-05, + "loss": 0.2745, + "step": 4178 + }, + { + "epoch": 0.36, + "learning_rate": 1.4862334934761533e-05, + "loss": 0.3418, + "step": 4179 + }, + { + "epoch": 0.36, + "learning_rate": 1.485990878900571e-05, + "loss": 0.2679, + "step": 4180 + }, + { + "epoch": 0.36, + "learning_rate": 1.4857482268672136e-05, + "loss": 0.2737, + "step": 4181 + }, + { + "epoch": 0.36, + "learning_rate": 1.4855055373947829e-05, + "loss": 0.2919, + "step": 4182 + }, + { + "epoch": 0.36, + "learning_rate": 1.485262810501984e-05, + "loss": 0.3055, + "step": 4183 + }, + { + "epoch": 0.36, + "learning_rate": 1.4850200462075255e-05, + "loss": 0.3151, + "step": 4184 + }, + { + "epoch": 0.36, + "learning_rate": 1.4847772445301186e-05, + "loss": 0.2498, + "step": 4185 + }, + { + "epoch": 0.36, + "learning_rate": 1.4845344054884772e-05, + "loss": 0.2716, + "step": 4186 + }, + { + "epoch": 0.36, + "learning_rate": 1.4842915291013176e-05, + "loss": 0.3465, + "step": 4187 + }, + { + "epoch": 0.36, + "learning_rate": 1.4840486153873599e-05, + "loss": 0.2859, + "step": 4188 + }, + { + "epoch": 0.36, + "learning_rate": 1.483805664365327e-05, + "loss": 0.2778, + "step": 4189 + }, + { + "epoch": 0.36, + "learning_rate": 1.4835626760539437e-05, + "loss": 0.3149, + "step": 4190 + }, + { + "epoch": 0.36, + "learning_rate": 1.4833196504719389e-05, + "loss": 0.308, + "step": 4191 + }, + { + "epoch": 0.36, + "learning_rate": 1.4830765876380438e-05, + "loss": 0.2543, + "step": 4192 + }, + { + "epoch": 0.36, + "learning_rate": 1.482833487570992e-05, + "loss": 0.3069, + "step": 4193 + }, + { + "epoch": 0.36, + "learning_rate": 1.4825903502895207e-05, + "loss": 0.3235, + "step": 4194 + }, + { + "epoch": 0.36, + "learning_rate": 1.4823471758123697e-05, + "loss": 0.2879, + "step": 4195 + }, + { + "epoch": 0.36, + "learning_rate": 1.482103964158282e-05, + "loss": 0.2627, + "step": 4196 + }, + { + "epoch": 0.36, + "learning_rate": 1.4818607153460025e-05, + "loss": 0.2626, + "step": 4197 + }, + { + "epoch": 0.36, + "learning_rate": 1.4816174293942804e-05, + "loss": 0.3358, + "step": 4198 + }, + { + "epoch": 0.36, + "learning_rate": 1.4813741063218662e-05, + "loss": 0.2737, + "step": 4199 + }, + { + "epoch": 0.36, + "learning_rate": 1.4811307461475151e-05, + "loss": 0.3, + "step": 4200 + }, + { + "epoch": 0.36, + "learning_rate": 1.4808873488899829e-05, + "loss": 0.2836, + "step": 4201 + }, + { + "epoch": 0.36, + "learning_rate": 1.4806439145680298e-05, + "loss": 0.2899, + "step": 4202 + }, + { + "epoch": 0.36, + "learning_rate": 1.4804004432004191e-05, + "loss": 0.2928, + "step": 4203 + }, + { + "epoch": 0.36, + "learning_rate": 1.4801569348059158e-05, + "loss": 0.2711, + "step": 4204 + }, + { + "epoch": 0.36, + "learning_rate": 1.4799133894032887e-05, + "loss": 0.3206, + "step": 4205 + }, + { + "epoch": 0.36, + "learning_rate": 1.4796698070113084e-05, + "loss": 0.3285, + "step": 4206 + }, + { + "epoch": 0.36, + "learning_rate": 1.4794261876487496e-05, + "loss": 0.2906, + "step": 4207 + }, + { + "epoch": 0.36, + "learning_rate": 1.4791825313343896e-05, + "loss": 0.3313, + "step": 4208 + }, + { + "epoch": 0.36, + "learning_rate": 1.4789388380870074e-05, + "loss": 0.257, + "step": 4209 + }, + { + "epoch": 0.36, + "learning_rate": 1.4786951079253861e-05, + "loss": 0.3441, + "step": 4210 + }, + { + "epoch": 0.36, + "learning_rate": 1.4784513408683115e-05, + "loss": 0.299, + "step": 4211 + }, + { + "epoch": 0.36, + "learning_rate": 1.4782075369345715e-05, + "loss": 0.2814, + "step": 4212 + }, + { + "epoch": 0.36, + "learning_rate": 1.4779636961429573e-05, + "loss": 0.257, + "step": 4213 + }, + { + "epoch": 0.36, + "learning_rate": 1.477719818512263e-05, + "loss": 0.3127, + "step": 4214 + }, + { + "epoch": 0.36, + "learning_rate": 1.4774759040612859e-05, + "loss": 0.2875, + "step": 4215 + }, + { + "epoch": 0.36, + "learning_rate": 1.477231952808825e-05, + "loss": 0.3006, + "step": 4216 + }, + { + "epoch": 0.36, + "learning_rate": 1.4769879647736835e-05, + "loss": 0.3078, + "step": 4217 + }, + { + "epoch": 0.36, + "learning_rate": 1.4767439399746666e-05, + "loss": 0.255, + "step": 4218 + }, + { + "epoch": 0.36, + "learning_rate": 1.4764998784305825e-05, + "loss": 0.2906, + "step": 4219 + }, + { + "epoch": 0.36, + "learning_rate": 1.4762557801602422e-05, + "loss": 0.2764, + "step": 4220 + }, + { + "epoch": 0.36, + "learning_rate": 1.476011645182459e-05, + "loss": 0.2819, + "step": 4221 + }, + { + "epoch": 0.36, + "learning_rate": 1.4757674735160512e-05, + "loss": 0.2874, + "step": 4222 + }, + { + "epoch": 0.36, + "learning_rate": 1.4755232651798368e-05, + "loss": 0.2423, + "step": 4223 + }, + { + "epoch": 0.36, + "learning_rate": 1.475279020192639e-05, + "loss": 0.3406, + "step": 4224 + }, + { + "epoch": 0.36, + "learning_rate": 1.4750347385732826e-05, + "loss": 0.2884, + "step": 4225 + }, + { + "epoch": 0.36, + "learning_rate": 1.4747904203405959e-05, + "loss": 0.3337, + "step": 4226 + }, + { + "epoch": 0.36, + "learning_rate": 1.4745460655134091e-05, + "loss": 0.2497, + "step": 4227 + }, + { + "epoch": 0.36, + "learning_rate": 1.474301674110557e-05, + "loss": 0.3538, + "step": 4228 + }, + { + "epoch": 0.36, + "learning_rate": 1.4740572461508753e-05, + "loss": 0.3043, + "step": 4229 + }, + { + "epoch": 0.36, + "learning_rate": 1.4738127816532034e-05, + "loss": 0.2672, + "step": 4230 + }, + { + "epoch": 0.36, + "learning_rate": 1.4735682806363834e-05, + "loss": 0.3094, + "step": 4231 + }, + { + "epoch": 0.36, + "learning_rate": 1.4733237431192604e-05, + "loss": 0.2589, + "step": 4232 + }, + { + "epoch": 0.36, + "learning_rate": 1.4730791691206818e-05, + "loss": 0.3208, + "step": 4233 + }, + { + "epoch": 0.36, + "learning_rate": 1.4728345586594986e-05, + "loss": 0.3356, + "step": 4234 + }, + { + "epoch": 0.36, + "learning_rate": 1.4725899117545638e-05, + "loss": 0.2686, + "step": 4235 + }, + { + "epoch": 0.36, + "learning_rate": 1.4723452284247341e-05, + "loss": 0.3402, + "step": 4236 + }, + { + "epoch": 0.36, + "learning_rate": 1.4721005086888678e-05, + "loss": 0.2506, + "step": 4237 + }, + { + "epoch": 0.36, + "learning_rate": 1.4718557525658272e-05, + "loss": 0.3091, + "step": 4238 + }, + { + "epoch": 0.36, + "learning_rate": 1.4716109600744766e-05, + "loss": 0.2957, + "step": 4239 + }, + { + "epoch": 0.36, + "learning_rate": 1.4713661312336832e-05, + "loss": 0.2809, + "step": 4240 + }, + { + "epoch": 0.36, + "learning_rate": 1.4711212660623181e-05, + "loss": 0.2968, + "step": 4241 + }, + { + "epoch": 0.36, + "learning_rate": 1.4708763645792531e-05, + "loss": 0.3277, + "step": 4242 + }, + { + "epoch": 0.36, + "learning_rate": 1.4706314268033652e-05, + "loss": 0.2744, + "step": 4243 + }, + { + "epoch": 0.36, + "learning_rate": 1.4703864527535321e-05, + "loss": 0.3157, + "step": 4244 + }, + { + "epoch": 0.36, + "learning_rate": 1.4701414424486353e-05, + "loss": 0.3535, + "step": 4245 + }, + { + "epoch": 0.36, + "learning_rate": 1.4698963959075592e-05, + "loss": 0.3309, + "step": 4246 + }, + { + "epoch": 0.36, + "learning_rate": 1.4696513131491907e-05, + "loss": 0.2908, + "step": 4247 + }, + { + "epoch": 0.36, + "learning_rate": 1.4694061941924199e-05, + "loss": 0.2975, + "step": 4248 + }, + { + "epoch": 0.36, + "learning_rate": 1.4691610390561389e-05, + "loss": 0.286, + "step": 4249 + }, + { + "epoch": 0.36, + "learning_rate": 1.4689158477592433e-05, + "loss": 0.3047, + "step": 4250 + }, + { + "epoch": 0.36, + "learning_rate": 1.4686706203206309e-05, + "loss": 0.2431, + "step": 4251 + }, + { + "epoch": 0.36, + "learning_rate": 1.4684253567592029e-05, + "loss": 0.3116, + "step": 4252 + }, + { + "epoch": 0.36, + "learning_rate": 1.4681800570938628e-05, + "loss": 0.2908, + "step": 4253 + }, + { + "epoch": 0.36, + "learning_rate": 1.4679347213435176e-05, + "loss": 0.2996, + "step": 4254 + }, + { + "epoch": 0.36, + "learning_rate": 1.4676893495270762e-05, + "loss": 0.2726, + "step": 4255 + }, + { + "epoch": 0.36, + "learning_rate": 1.4674439416634505e-05, + "loss": 0.3203, + "step": 4256 + }, + { + "epoch": 0.36, + "learning_rate": 1.4671984977715556e-05, + "loss": 0.329, + "step": 4257 + }, + { + "epoch": 0.36, + "learning_rate": 1.4669530178703089e-05, + "loss": 0.288, + "step": 4258 + }, + { + "epoch": 0.37, + "learning_rate": 1.4667075019786306e-05, + "loss": 0.2962, + "step": 4259 + }, + { + "epoch": 0.37, + "learning_rate": 1.4664619501154445e-05, + "loss": 0.2955, + "step": 4260 + }, + { + "epoch": 0.37, + "learning_rate": 1.4662163622996758e-05, + "loss": 0.296, + "step": 4261 + }, + { + "epoch": 0.37, + "learning_rate": 1.465970738550254e-05, + "loss": 0.2678, + "step": 4262 + }, + { + "epoch": 0.37, + "learning_rate": 1.4657250788861099e-05, + "loss": 0.2631, + "step": 4263 + }, + { + "epoch": 0.37, + "learning_rate": 1.4654793833261777e-05, + "loss": 0.2744, + "step": 4264 + }, + { + "epoch": 0.37, + "learning_rate": 1.4652336518893948e-05, + "loss": 0.2983, + "step": 4265 + }, + { + "epoch": 0.37, + "learning_rate": 1.464987884594701e-05, + "loss": 0.2576, + "step": 4266 + }, + { + "epoch": 0.37, + "learning_rate": 1.4647420814610384e-05, + "loss": 0.2767, + "step": 4267 + }, + { + "epoch": 0.37, + "learning_rate": 1.4644962425073526e-05, + "loss": 0.3093, + "step": 4268 + }, + { + "epoch": 0.37, + "learning_rate": 1.4642503677525917e-05, + "loss": 0.6454, + "step": 4269 + }, + { + "epoch": 0.37, + "learning_rate": 1.4640044572157062e-05, + "loss": 0.3005, + "step": 4270 + }, + { + "epoch": 0.37, + "learning_rate": 1.4637585109156498e-05, + "loss": 0.254, + "step": 4271 + }, + { + "epoch": 0.37, + "learning_rate": 1.4635125288713789e-05, + "loss": 0.3408, + "step": 4272 + }, + { + "epoch": 0.37, + "learning_rate": 1.4632665111018525e-05, + "loss": 0.3262, + "step": 4273 + }, + { + "epoch": 0.37, + "learning_rate": 1.4630204576260328e-05, + "loss": 0.282, + "step": 4274 + }, + { + "epoch": 0.37, + "learning_rate": 1.4627743684628838e-05, + "loss": 0.278, + "step": 4275 + }, + { + "epoch": 0.37, + "learning_rate": 1.4625282436313733e-05, + "loss": 0.2499, + "step": 4276 + }, + { + "epoch": 0.37, + "learning_rate": 1.4622820831504712e-05, + "loss": 0.2925, + "step": 4277 + }, + { + "epoch": 0.37, + "learning_rate": 1.46203588703915e-05, + "loss": 0.2936, + "step": 4278 + }, + { + "epoch": 0.37, + "learning_rate": 1.461789655316386e-05, + "loss": 0.3112, + "step": 4279 + }, + { + "epoch": 0.37, + "learning_rate": 1.461543388001157e-05, + "loss": 0.2738, + "step": 4280 + }, + { + "epoch": 0.37, + "learning_rate": 1.4612970851124442e-05, + "loss": 0.3252, + "step": 4281 + }, + { + "epoch": 0.37, + "learning_rate": 1.4610507466692312e-05, + "loss": 0.2949, + "step": 4282 + }, + { + "epoch": 0.37, + "learning_rate": 1.460804372690505e-05, + "loss": 0.2961, + "step": 4283 + }, + { + "epoch": 0.37, + "learning_rate": 1.4605579631952544e-05, + "loss": 0.2932, + "step": 4284 + }, + { + "epoch": 0.37, + "learning_rate": 1.4603115182024721e-05, + "loss": 0.2913, + "step": 4285 + }, + { + "epoch": 0.37, + "learning_rate": 1.4600650377311523e-05, + "loss": 0.323, + "step": 4286 + }, + { + "epoch": 0.37, + "learning_rate": 1.4598185218002925e-05, + "loss": 0.2891, + "step": 4287 + }, + { + "epoch": 0.37, + "learning_rate": 1.4595719704288932e-05, + "loss": 0.2695, + "step": 4288 + }, + { + "epoch": 0.37, + "learning_rate": 1.4593253836359573e-05, + "loss": 0.2529, + "step": 4289 + }, + { + "epoch": 0.37, + "learning_rate": 1.4590787614404902e-05, + "loss": 0.3531, + "step": 4290 + }, + { + "epoch": 0.37, + "learning_rate": 1.4588321038615005e-05, + "loss": 0.2762, + "step": 4291 + }, + { + "epoch": 0.37, + "learning_rate": 1.4585854109179995e-05, + "loss": 0.2966, + "step": 4292 + }, + { + "epoch": 0.37, + "learning_rate": 1.4583386826290013e-05, + "loss": 0.2784, + "step": 4293 + }, + { + "epoch": 0.37, + "learning_rate": 1.4580919190135219e-05, + "loss": 0.2909, + "step": 4294 + }, + { + "epoch": 0.37, + "learning_rate": 1.457845120090581e-05, + "loss": 0.2937, + "step": 4295 + }, + { + "epoch": 0.37, + "learning_rate": 1.4575982858792002e-05, + "loss": 0.3417, + "step": 4296 + }, + { + "epoch": 0.37, + "learning_rate": 1.4573514163984044e-05, + "loss": 0.3081, + "step": 4297 + }, + { + "epoch": 0.37, + "learning_rate": 1.4571045116672219e-05, + "loss": 0.2868, + "step": 4298 + }, + { + "epoch": 0.37, + "learning_rate": 1.4568575717046819e-05, + "loss": 0.3148, + "step": 4299 + }, + { + "epoch": 0.37, + "learning_rate": 1.4566105965298179e-05, + "loss": 0.2949, + "step": 4300 + }, + { + "epoch": 0.37, + "learning_rate": 1.4563635861616652e-05, + "loss": 0.3117, + "step": 4301 + }, + { + "epoch": 0.37, + "learning_rate": 1.4561165406192622e-05, + "loss": 0.3089, + "step": 4302 + }, + { + "epoch": 0.37, + "learning_rate": 1.4558694599216496e-05, + "loss": 0.2485, + "step": 4303 + }, + { + "epoch": 0.37, + "learning_rate": 1.455622344087872e-05, + "loss": 0.3022, + "step": 4304 + }, + { + "epoch": 0.37, + "learning_rate": 1.4553751931369755e-05, + "loss": 0.3172, + "step": 4305 + }, + { + "epoch": 0.37, + "learning_rate": 1.4551280070880089e-05, + "loss": 0.2928, + "step": 4306 + }, + { + "epoch": 0.37, + "learning_rate": 1.4548807859600248e-05, + "loss": 0.3112, + "step": 4307 + }, + { + "epoch": 0.37, + "learning_rate": 1.4546335297720769e-05, + "loss": 0.2914, + "step": 4308 + }, + { + "epoch": 0.37, + "learning_rate": 1.454386238543223e-05, + "loss": 0.3488, + "step": 4309 + }, + { + "epoch": 0.37, + "learning_rate": 1.4541389122925229e-05, + "loss": 0.3497, + "step": 4310 + }, + { + "epoch": 0.37, + "learning_rate": 1.4538915510390397e-05, + "loss": 0.2661, + "step": 4311 + }, + { + "epoch": 0.37, + "learning_rate": 1.4536441548018385e-05, + "loss": 0.2687, + "step": 4312 + }, + { + "epoch": 0.37, + "learning_rate": 1.4533967235999872e-05, + "loss": 0.3354, + "step": 4313 + }, + { + "epoch": 0.37, + "learning_rate": 1.453149257452557e-05, + "loss": 0.2734, + "step": 4314 + }, + { + "epoch": 0.37, + "learning_rate": 1.4529017563786208e-05, + "loss": 0.2393, + "step": 4315 + }, + { + "epoch": 0.37, + "learning_rate": 1.452654220397255e-05, + "loss": 0.3446, + "step": 4316 + }, + { + "epoch": 0.37, + "learning_rate": 1.4524066495275388e-05, + "loss": 0.2862, + "step": 4317 + }, + { + "epoch": 0.37, + "learning_rate": 1.4521590437885533e-05, + "loss": 0.2607, + "step": 4318 + }, + { + "epoch": 0.37, + "learning_rate": 1.451911403199383e-05, + "loss": 0.2426, + "step": 4319 + }, + { + "epoch": 0.37, + "learning_rate": 1.4516637277791149e-05, + "loss": 0.3161, + "step": 4320 + }, + { + "epoch": 0.37, + "learning_rate": 1.4514160175468379e-05, + "loss": 0.3187, + "step": 4321 + }, + { + "epoch": 0.37, + "learning_rate": 1.451168272521645e-05, + "loss": 0.2853, + "step": 4322 + }, + { + "epoch": 0.37, + "learning_rate": 1.4509204927226307e-05, + "loss": 0.3265, + "step": 4323 + }, + { + "epoch": 0.37, + "learning_rate": 1.4506726781688935e-05, + "loss": 0.3212, + "step": 4324 + }, + { + "epoch": 0.37, + "learning_rate": 1.4504248288795328e-05, + "loss": 0.2377, + "step": 4325 + }, + { + "epoch": 0.37, + "learning_rate": 1.450176944873652e-05, + "loss": 0.3303, + "step": 4326 + }, + { + "epoch": 0.37, + "learning_rate": 1.4499290261703565e-05, + "loss": 0.3043, + "step": 4327 + }, + { + "epoch": 0.37, + "learning_rate": 1.4496810727887547e-05, + "loss": 0.2609, + "step": 4328 + }, + { + "epoch": 0.37, + "learning_rate": 1.449433084747958e-05, + "loss": 0.2778, + "step": 4329 + }, + { + "epoch": 0.37, + "learning_rate": 1.4491850620670798e-05, + "loss": 0.3074, + "step": 4330 + }, + { + "epoch": 0.37, + "learning_rate": 1.4489370047652364e-05, + "loss": 0.2453, + "step": 4331 + }, + { + "epoch": 0.37, + "learning_rate": 1.4486889128615472e-05, + "loss": 0.2794, + "step": 4332 + }, + { + "epoch": 0.37, + "learning_rate": 1.4484407863751335e-05, + "loss": 0.2837, + "step": 4333 + }, + { + "epoch": 0.37, + "learning_rate": 1.4481926253251197e-05, + "loss": 0.2922, + "step": 4334 + }, + { + "epoch": 0.37, + "learning_rate": 1.4479444297306326e-05, + "loss": 0.2958, + "step": 4335 + }, + { + "epoch": 0.37, + "learning_rate": 1.4476961996108027e-05, + "loss": 0.2794, + "step": 4336 + }, + { + "epoch": 0.37, + "learning_rate": 1.4474479349847617e-05, + "loss": 0.2917, + "step": 4337 + }, + { + "epoch": 0.37, + "learning_rate": 1.4471996358716451e-05, + "loss": 0.311, + "step": 4338 + }, + { + "epoch": 0.37, + "learning_rate": 1.4469513022905898e-05, + "loss": 0.3389, + "step": 4339 + }, + { + "epoch": 0.37, + "learning_rate": 1.4467029342607368e-05, + "loss": 0.3246, + "step": 4340 + }, + { + "epoch": 0.37, + "learning_rate": 1.4464545318012286e-05, + "loss": 0.2924, + "step": 4341 + }, + { + "epoch": 0.37, + "learning_rate": 1.4462060949312114e-05, + "loss": 0.3387, + "step": 4342 + }, + { + "epoch": 0.37, + "learning_rate": 1.4459576236698331e-05, + "loss": 0.2733, + "step": 4343 + }, + { + "epoch": 0.37, + "learning_rate": 1.4457091180362445e-05, + "loss": 0.2754, + "step": 4344 + }, + { + "epoch": 0.37, + "learning_rate": 1.4454605780495998e-05, + "loss": 0.2828, + "step": 4345 + }, + { + "epoch": 0.37, + "learning_rate": 1.4452120037290547e-05, + "loss": 0.3292, + "step": 4346 + }, + { + "epoch": 0.37, + "learning_rate": 1.4449633950937678e-05, + "loss": 0.3133, + "step": 4347 + }, + { + "epoch": 0.37, + "learning_rate": 1.4447147521629013e-05, + "loss": 0.2632, + "step": 4348 + }, + { + "epoch": 0.37, + "learning_rate": 1.4444660749556192e-05, + "loss": 0.2484, + "step": 4349 + }, + { + "epoch": 0.37, + "learning_rate": 1.4442173634910881e-05, + "loss": 0.2896, + "step": 4350 + }, + { + "epoch": 0.37, + "learning_rate": 1.4439686177884778e-05, + "loss": 0.2784, + "step": 4351 + }, + { + "epoch": 0.37, + "learning_rate": 1.4437198378669598e-05, + "loss": 0.2953, + "step": 4352 + }, + { + "epoch": 0.37, + "learning_rate": 1.4434710237457094e-05, + "loss": 0.3451, + "step": 4353 + }, + { + "epoch": 0.37, + "learning_rate": 1.4432221754439037e-05, + "loss": 0.2583, + "step": 4354 + }, + { + "epoch": 0.37, + "learning_rate": 1.4429732929807227e-05, + "loss": 0.2822, + "step": 4355 + }, + { + "epoch": 0.37, + "learning_rate": 1.4427243763753488e-05, + "loss": 0.25, + "step": 4356 + }, + { + "epoch": 0.37, + "learning_rate": 1.4424754256469681e-05, + "loss": 0.2886, + "step": 4357 + }, + { + "epoch": 0.37, + "learning_rate": 1.4422264408147676e-05, + "loss": 0.2718, + "step": 4358 + }, + { + "epoch": 0.37, + "learning_rate": 1.4419774218979383e-05, + "loss": 0.283, + "step": 4359 + }, + { + "epoch": 0.37, + "learning_rate": 1.4417283689156731e-05, + "loss": 0.2921, + "step": 4360 + }, + { + "epoch": 0.37, + "learning_rate": 1.4414792818871676e-05, + "loss": 0.3076, + "step": 4361 + }, + { + "epoch": 0.37, + "learning_rate": 1.441230160831621e-05, + "loss": 0.3334, + "step": 4362 + }, + { + "epoch": 0.37, + "learning_rate": 1.4409810057682333e-05, + "loss": 0.2635, + "step": 4363 + }, + { + "epoch": 0.37, + "learning_rate": 1.4407318167162092e-05, + "loss": 0.3124, + "step": 4364 + }, + { + "epoch": 0.37, + "learning_rate": 1.4404825936947539e-05, + "loss": 0.3156, + "step": 4365 + }, + { + "epoch": 0.37, + "learning_rate": 1.440233336723077e-05, + "loss": 0.3383, + "step": 4366 + }, + { + "epoch": 0.37, + "learning_rate": 1.4399840458203896e-05, + "loss": 0.2731, + "step": 4367 + }, + { + "epoch": 0.37, + "learning_rate": 1.4397347210059059e-05, + "loss": 0.2467, + "step": 4368 + }, + { + "epoch": 0.37, + "learning_rate": 1.439485362298843e-05, + "loss": 0.3231, + "step": 4369 + }, + { + "epoch": 0.37, + "learning_rate": 1.4392359697184197e-05, + "loss": 0.2736, + "step": 4370 + }, + { + "epoch": 0.37, + "learning_rate": 1.4389865432838583e-05, + "loss": 0.3091, + "step": 4371 + }, + { + "epoch": 0.37, + "learning_rate": 1.4387370830143832e-05, + "loss": 0.2951, + "step": 4372 + }, + { + "epoch": 0.37, + "learning_rate": 1.4384875889292216e-05, + "loss": 0.2574, + "step": 4373 + }, + { + "epoch": 0.37, + "learning_rate": 1.4382380610476032e-05, + "loss": 0.3156, + "step": 4374 + }, + { + "epoch": 0.38, + "learning_rate": 1.4379884993887605e-05, + "loss": 0.2957, + "step": 4375 + }, + { + "epoch": 0.38, + "learning_rate": 1.4377389039719285e-05, + "loss": 0.3148, + "step": 4376 + }, + { + "epoch": 0.38, + "learning_rate": 1.4374892748163447e-05, + "loss": 0.3152, + "step": 4377 + }, + { + "epoch": 0.38, + "learning_rate": 1.4372396119412493e-05, + "loss": 0.2703, + "step": 4378 + }, + { + "epoch": 0.38, + "learning_rate": 1.4369899153658848e-05, + "loss": 0.2795, + "step": 4379 + }, + { + "epoch": 0.38, + "learning_rate": 1.436740185109497e-05, + "loss": 0.3353, + "step": 4380 + }, + { + "epoch": 0.38, + "learning_rate": 1.436490421191334e-05, + "loss": 0.3352, + "step": 4381 + }, + { + "epoch": 0.38, + "learning_rate": 1.436240623630646e-05, + "loss": 0.3149, + "step": 4382 + }, + { + "epoch": 0.38, + "learning_rate": 1.4359907924466863e-05, + "loss": 0.2802, + "step": 4383 + }, + { + "epoch": 0.38, + "learning_rate": 1.4357409276587105e-05, + "loss": 0.2801, + "step": 4384 + }, + { + "epoch": 0.38, + "learning_rate": 1.4354910292859769e-05, + "loss": 0.2872, + "step": 4385 + }, + { + "epoch": 0.38, + "learning_rate": 1.4352410973477466e-05, + "loss": 0.2979, + "step": 4386 + }, + { + "epoch": 0.38, + "learning_rate": 1.4349911318632832e-05, + "loss": 0.3075, + "step": 4387 + }, + { + "epoch": 0.38, + "learning_rate": 1.434741132851853e-05, + "loss": 0.278, + "step": 4388 + }, + { + "epoch": 0.38, + "learning_rate": 1.434491100332724e-05, + "loss": 0.2346, + "step": 4389 + }, + { + "epoch": 0.38, + "learning_rate": 1.4342410343251683e-05, + "loss": 0.3038, + "step": 4390 + }, + { + "epoch": 0.38, + "learning_rate": 1.4339909348484589e-05, + "loss": 0.2928, + "step": 4391 + }, + { + "epoch": 0.38, + "learning_rate": 1.4337408019218728e-05, + "loss": 0.2984, + "step": 4392 + }, + { + "epoch": 0.38, + "learning_rate": 1.4334906355646887e-05, + "loss": 0.2754, + "step": 4393 + }, + { + "epoch": 0.38, + "learning_rate": 1.4332404357961884e-05, + "loss": 0.2935, + "step": 4394 + }, + { + "epoch": 0.38, + "learning_rate": 1.4329902026356564e-05, + "loss": 0.2723, + "step": 4395 + }, + { + "epoch": 0.38, + "learning_rate": 1.4327399361023785e-05, + "loss": 0.29, + "step": 4396 + }, + { + "epoch": 0.38, + "learning_rate": 1.4324896362156451e-05, + "loss": 0.2786, + "step": 4397 + }, + { + "epoch": 0.38, + "learning_rate": 1.432239302994747e-05, + "loss": 0.3416, + "step": 4398 + }, + { + "epoch": 0.38, + "learning_rate": 1.4319889364589794e-05, + "loss": 0.3063, + "step": 4399 + }, + { + "epoch": 0.38, + "learning_rate": 1.4317385366276393e-05, + "loss": 0.2725, + "step": 4400 + }, + { + "epoch": 0.38, + "learning_rate": 1.4314881035200259e-05, + "loss": 0.2886, + "step": 4401 + }, + { + "epoch": 0.38, + "learning_rate": 1.4312376371554417e-05, + "loss": 0.291, + "step": 4402 + }, + { + "epoch": 0.38, + "learning_rate": 1.430987137553191e-05, + "loss": 0.2755, + "step": 4403 + }, + { + "epoch": 0.38, + "learning_rate": 1.4307366047325814e-05, + "loss": 0.3079, + "step": 4404 + }, + { + "epoch": 0.38, + "learning_rate": 1.4304860387129225e-05, + "loss": 0.2746, + "step": 4405 + }, + { + "epoch": 0.38, + "learning_rate": 1.4302354395135269e-05, + "loss": 0.2892, + "step": 4406 + }, + { + "epoch": 0.38, + "learning_rate": 1.4299848071537097e-05, + "loss": 0.2817, + "step": 4407 + }, + { + "epoch": 0.38, + "learning_rate": 1.4297341416527881e-05, + "loss": 0.2639, + "step": 4408 + }, + { + "epoch": 0.38, + "learning_rate": 1.4294834430300822e-05, + "loss": 0.2954, + "step": 4409 + }, + { + "epoch": 0.38, + "learning_rate": 1.4292327113049145e-05, + "loss": 0.3021, + "step": 4410 + }, + { + "epoch": 0.38, + "learning_rate": 1.4289819464966104e-05, + "loss": 0.3441, + "step": 4411 + }, + { + "epoch": 0.38, + "learning_rate": 1.4287311486244975e-05, + "loss": 0.3043, + "step": 4412 + }, + { + "epoch": 0.38, + "learning_rate": 1.428480317707906e-05, + "loss": 0.2898, + "step": 4413 + }, + { + "epoch": 0.38, + "learning_rate": 1.4282294537661692e-05, + "loss": 0.3163, + "step": 4414 + }, + { + "epoch": 0.38, + "learning_rate": 1.4279785568186217e-05, + "loss": 0.2556, + "step": 4415 + }, + { + "epoch": 0.38, + "learning_rate": 1.4277276268846017e-05, + "loss": 0.3167, + "step": 4416 + }, + { + "epoch": 0.38, + "learning_rate": 1.4274766639834498e-05, + "loss": 0.2766, + "step": 4417 + }, + { + "epoch": 0.38, + "learning_rate": 1.4272256681345087e-05, + "loss": 0.2863, + "step": 4418 + }, + { + "epoch": 0.38, + "learning_rate": 1.4269746393571244e-05, + "loss": 0.2793, + "step": 4419 + }, + { + "epoch": 0.38, + "learning_rate": 1.4267235776706445e-05, + "loss": 0.3325, + "step": 4420 + }, + { + "epoch": 0.38, + "learning_rate": 1.4264724830944198e-05, + "loss": 0.2736, + "step": 4421 + }, + { + "epoch": 0.38, + "learning_rate": 1.4262213556478033e-05, + "loss": 0.2925, + "step": 4422 + }, + { + "epoch": 0.38, + "learning_rate": 1.4259701953501509e-05, + "loss": 0.2755, + "step": 4423 + }, + { + "epoch": 0.38, + "learning_rate": 1.4257190022208203e-05, + "loss": 0.2681, + "step": 4424 + }, + { + "epoch": 0.38, + "learning_rate": 1.4254677762791727e-05, + "loss": 0.3132, + "step": 4425 + }, + { + "epoch": 0.38, + "learning_rate": 1.425216517544571e-05, + "loss": 0.326, + "step": 4426 + }, + { + "epoch": 0.38, + "learning_rate": 1.4249652260363815e-05, + "loss": 0.6044, + "step": 4427 + }, + { + "epoch": 0.38, + "learning_rate": 1.4247139017739722e-05, + "loss": 0.3224, + "step": 4428 + }, + { + "epoch": 0.38, + "learning_rate": 1.4244625447767138e-05, + "loss": 0.311, + "step": 4429 + }, + { + "epoch": 0.38, + "learning_rate": 1.4242111550639797e-05, + "loss": 0.2689, + "step": 4430 + }, + { + "epoch": 0.38, + "learning_rate": 1.4239597326551459e-05, + "loss": 0.3358, + "step": 4431 + }, + { + "epoch": 0.38, + "learning_rate": 1.4237082775695907e-05, + "loss": 0.3406, + "step": 4432 + }, + { + "epoch": 0.38, + "learning_rate": 1.4234567898266954e-05, + "loss": 0.2959, + "step": 4433 + }, + { + "epoch": 0.38, + "learning_rate": 1.423205269445843e-05, + "loss": 0.3246, + "step": 4434 + }, + { + "epoch": 0.38, + "learning_rate": 1.422953716446419e-05, + "loss": 0.6313, + "step": 4435 + }, + { + "epoch": 0.38, + "learning_rate": 1.4227021308478129e-05, + "loss": 0.3161, + "step": 4436 + }, + { + "epoch": 0.38, + "learning_rate": 1.4224505126694153e-05, + "loss": 0.2886, + "step": 4437 + }, + { + "epoch": 0.38, + "learning_rate": 1.4221988619306192e-05, + "loss": 0.3649, + "step": 4438 + }, + { + "epoch": 0.38, + "learning_rate": 1.4219471786508212e-05, + "loss": 0.2962, + "step": 4439 + }, + { + "epoch": 0.38, + "learning_rate": 1.4216954628494195e-05, + "loss": 0.296, + "step": 4440 + }, + { + "epoch": 0.38, + "learning_rate": 1.4214437145458153e-05, + "loss": 0.2706, + "step": 4441 + }, + { + "epoch": 0.38, + "learning_rate": 1.4211919337594118e-05, + "loss": 0.2719, + "step": 4442 + }, + { + "epoch": 0.38, + "learning_rate": 1.420940120509615e-05, + "loss": 0.2982, + "step": 4443 + }, + { + "epoch": 0.38, + "learning_rate": 1.4206882748158341e-05, + "loss": 0.2864, + "step": 4444 + }, + { + "epoch": 0.38, + "learning_rate": 1.4204363966974798e-05, + "loss": 0.29, + "step": 4445 + }, + { + "epoch": 0.38, + "learning_rate": 1.420184486173965e-05, + "loss": 0.2858, + "step": 4446 + }, + { + "epoch": 0.38, + "learning_rate": 1.4199325432647067e-05, + "loss": 0.2722, + "step": 4447 + }, + { + "epoch": 0.38, + "learning_rate": 1.4196805679891225e-05, + "loss": 0.2625, + "step": 4448 + }, + { + "epoch": 0.38, + "learning_rate": 1.4194285603666337e-05, + "loss": 0.3297, + "step": 4449 + }, + { + "epoch": 0.38, + "learning_rate": 1.4191765204166643e-05, + "loss": 0.2593, + "step": 4450 + }, + { + "epoch": 0.38, + "learning_rate": 1.4189244481586398e-05, + "loss": 0.6046, + "step": 4451 + }, + { + "epoch": 0.38, + "learning_rate": 1.4186723436119887e-05, + "loss": 0.3358, + "step": 4452 + }, + { + "epoch": 0.38, + "learning_rate": 1.4184202067961422e-05, + "loss": 0.2955, + "step": 4453 + }, + { + "epoch": 0.38, + "learning_rate": 1.4181680377305336e-05, + "loss": 0.2835, + "step": 4454 + }, + { + "epoch": 0.38, + "learning_rate": 1.4179158364345986e-05, + "loss": 0.3147, + "step": 4455 + }, + { + "epoch": 0.38, + "learning_rate": 1.4176636029277764e-05, + "loss": 0.3148, + "step": 4456 + }, + { + "epoch": 0.38, + "learning_rate": 1.4174113372295071e-05, + "loss": 0.3585, + "step": 4457 + }, + { + "epoch": 0.38, + "learning_rate": 1.4171590393592346e-05, + "loss": 0.2871, + "step": 4458 + }, + { + "epoch": 0.38, + "learning_rate": 1.4169067093364047e-05, + "loss": 0.2687, + "step": 4459 + }, + { + "epoch": 0.38, + "learning_rate": 1.4166543471804653e-05, + "loss": 0.2679, + "step": 4460 + }, + { + "epoch": 0.38, + "learning_rate": 1.4164019529108677e-05, + "loss": 0.2631, + "step": 4461 + }, + { + "epoch": 0.38, + "learning_rate": 1.4161495265470649e-05, + "loss": 0.2864, + "step": 4462 + }, + { + "epoch": 0.38, + "learning_rate": 1.415897068108513e-05, + "loss": 0.3023, + "step": 4463 + }, + { + "epoch": 0.38, + "learning_rate": 1.4156445776146703e-05, + "loss": 0.2661, + "step": 4464 + }, + { + "epoch": 0.38, + "learning_rate": 1.415392055084997e-05, + "loss": 0.3284, + "step": 4465 + }, + { + "epoch": 0.38, + "learning_rate": 1.415139500538957e-05, + "loss": 0.3329, + "step": 4466 + }, + { + "epoch": 0.38, + "learning_rate": 1.4148869139960151e-05, + "loss": 0.3131, + "step": 4467 + }, + { + "epoch": 0.38, + "learning_rate": 1.4146342954756402e-05, + "loss": 0.253, + "step": 4468 + }, + { + "epoch": 0.38, + "learning_rate": 1.4143816449973025e-05, + "loss": 0.2968, + "step": 4469 + }, + { + "epoch": 0.38, + "learning_rate": 1.4141289625804748e-05, + "loss": 0.3346, + "step": 4470 + }, + { + "epoch": 0.38, + "learning_rate": 1.4138762482446335e-05, + "loss": 0.2781, + "step": 4471 + }, + { + "epoch": 0.38, + "learning_rate": 1.4136235020092558e-05, + "loss": 0.2485, + "step": 4472 + }, + { + "epoch": 0.38, + "learning_rate": 1.4133707238938222e-05, + "loss": 0.2839, + "step": 4473 + }, + { + "epoch": 0.38, + "learning_rate": 1.4131179139178157e-05, + "loss": 0.313, + "step": 4474 + }, + { + "epoch": 0.38, + "learning_rate": 1.412865072100722e-05, + "loss": 0.2728, + "step": 4475 + }, + { + "epoch": 0.38, + "learning_rate": 1.4126121984620283e-05, + "loss": 0.2833, + "step": 4476 + }, + { + "epoch": 0.38, + "learning_rate": 1.4123592930212251e-05, + "loss": 0.2365, + "step": 4477 + }, + { + "epoch": 0.38, + "learning_rate": 1.4121063557978051e-05, + "loss": 0.2928, + "step": 4478 + }, + { + "epoch": 0.38, + "learning_rate": 1.4118533868112637e-05, + "loss": 0.343, + "step": 4479 + }, + { + "epoch": 0.38, + "learning_rate": 1.411600386081098e-05, + "loss": 0.3048, + "step": 4480 + }, + { + "epoch": 0.38, + "learning_rate": 1.4113473536268083e-05, + "loss": 0.2992, + "step": 4481 + }, + { + "epoch": 0.38, + "learning_rate": 1.4110942894678971e-05, + "loss": 0.3042, + "step": 4482 + }, + { + "epoch": 0.38, + "learning_rate": 1.41084119362387e-05, + "loss": 0.2762, + "step": 4483 + }, + { + "epoch": 0.38, + "learning_rate": 1.4105880661142331e-05, + "loss": 0.2833, + "step": 4484 + }, + { + "epoch": 0.38, + "learning_rate": 1.4103349069584971e-05, + "loss": 0.2654, + "step": 4485 + }, + { + "epoch": 0.38, + "learning_rate": 1.4100817161761738e-05, + "loss": 0.3018, + "step": 4486 + }, + { + "epoch": 0.38, + "learning_rate": 1.409828493786778e-05, + "loss": 0.257, + "step": 4487 + }, + { + "epoch": 0.38, + "learning_rate": 1.409575239809827e-05, + "loss": 0.273, + "step": 4488 + }, + { + "epoch": 0.38, + "learning_rate": 1.4093219542648405e-05, + "loss": 0.2712, + "step": 4489 + }, + { + "epoch": 0.38, + "learning_rate": 1.4090686371713403e-05, + "loss": 0.2744, + "step": 4490 + }, + { + "epoch": 0.38, + "learning_rate": 1.4088152885488504e-05, + "loss": 0.2911, + "step": 4491 + }, + { + "epoch": 0.39, + "learning_rate": 1.4085619084168983e-05, + "loss": 0.2342, + "step": 4492 + }, + { + "epoch": 0.39, + "learning_rate": 1.4083084967950131e-05, + "loss": 0.2644, + "step": 4493 + }, + { + "epoch": 0.39, + "learning_rate": 1.4080550537027264e-05, + "loss": 0.2812, + "step": 4494 + }, + { + "epoch": 0.39, + "learning_rate": 1.4078015791595724e-05, + "loss": 0.2603, + "step": 4495 + }, + { + "epoch": 0.39, + "learning_rate": 1.407548073185088e-05, + "loss": 0.2922, + "step": 4496 + }, + { + "epoch": 0.39, + "learning_rate": 1.4072945357988118e-05, + "loss": 0.3272, + "step": 4497 + }, + { + "epoch": 0.39, + "learning_rate": 1.4070409670202849e-05, + "loss": 0.3045, + "step": 4498 + }, + { + "epoch": 0.39, + "learning_rate": 1.4067873668690517e-05, + "loss": 0.288, + "step": 4499 + }, + { + "epoch": 0.39, + "learning_rate": 1.4065337353646583e-05, + "loss": 0.3287, + "step": 4500 + }, + { + "epoch": 0.39, + "learning_rate": 1.4062800725266532e-05, + "loss": 0.2673, + "step": 4501 + }, + { + "epoch": 0.39, + "learning_rate": 1.406026378374588e-05, + "loss": 0.301, + "step": 4502 + }, + { + "epoch": 0.39, + "learning_rate": 1.4057726529280154e-05, + "loss": 0.2833, + "step": 4503 + }, + { + "epoch": 0.39, + "learning_rate": 1.4055188962064918e-05, + "loss": 0.3015, + "step": 4504 + }, + { + "epoch": 0.39, + "learning_rate": 1.4052651082295754e-05, + "loss": 0.2495, + "step": 4505 + }, + { + "epoch": 0.39, + "learning_rate": 1.405011289016827e-05, + "loss": 0.3339, + "step": 4506 + }, + { + "epoch": 0.39, + "learning_rate": 1.4047574385878095e-05, + "loss": 0.2712, + "step": 4507 + }, + { + "epoch": 0.39, + "learning_rate": 1.4045035569620886e-05, + "loss": 0.3035, + "step": 4508 + }, + { + "epoch": 0.39, + "learning_rate": 1.4042496441592323e-05, + "loss": 0.3135, + "step": 4509 + }, + { + "epoch": 0.39, + "learning_rate": 1.4039957001988112e-05, + "loss": 0.3093, + "step": 4510 + }, + { + "epoch": 0.39, + "learning_rate": 1.4037417251003972e-05, + "loss": 0.2986, + "step": 4511 + }, + { + "epoch": 0.39, + "learning_rate": 1.4034877188835662e-05, + "loss": 0.3021, + "step": 4512 + }, + { + "epoch": 0.39, + "learning_rate": 1.4032336815678957e-05, + "loss": 0.3362, + "step": 4513 + }, + { + "epoch": 0.39, + "learning_rate": 1.4029796131729652e-05, + "loss": 0.2817, + "step": 4514 + }, + { + "epoch": 0.39, + "learning_rate": 1.4027255137183575e-05, + "loss": 0.2631, + "step": 4515 + }, + { + "epoch": 0.39, + "learning_rate": 1.4024713832236571e-05, + "loss": 0.353, + "step": 4516 + }, + { + "epoch": 0.39, + "learning_rate": 1.4022172217084512e-05, + "loss": 0.2555, + "step": 4517 + }, + { + "epoch": 0.39, + "learning_rate": 1.4019630291923289e-05, + "loss": 0.2841, + "step": 4518 + }, + { + "epoch": 0.39, + "learning_rate": 1.4017088056948826e-05, + "loss": 0.2895, + "step": 4519 + }, + { + "epoch": 0.39, + "learning_rate": 1.4014545512357068e-05, + "loss": 0.2599, + "step": 4520 + }, + { + "epoch": 0.39, + "learning_rate": 1.4012002658343976e-05, + "loss": 0.2904, + "step": 4521 + }, + { + "epoch": 0.39, + "learning_rate": 1.4009459495105542e-05, + "loss": 0.2791, + "step": 4522 + }, + { + "epoch": 0.39, + "learning_rate": 1.4006916022837784e-05, + "loss": 0.2493, + "step": 4523 + }, + { + "epoch": 0.39, + "learning_rate": 1.4004372241736736e-05, + "loss": 0.2982, + "step": 4524 + }, + { + "epoch": 0.39, + "learning_rate": 1.4001828151998462e-05, + "loss": 0.289, + "step": 4525 + }, + { + "epoch": 0.39, + "learning_rate": 1.3999283753819047e-05, + "loss": 0.3278, + "step": 4526 + }, + { + "epoch": 0.39, + "learning_rate": 1.3996739047394601e-05, + "loss": 0.2567, + "step": 4527 + }, + { + "epoch": 0.39, + "learning_rate": 1.399419403292126e-05, + "loss": 0.2598, + "step": 4528 + }, + { + "epoch": 0.39, + "learning_rate": 1.3991648710595179e-05, + "loss": 0.2757, + "step": 4529 + }, + { + "epoch": 0.39, + "learning_rate": 1.3989103080612533e-05, + "loss": 0.2809, + "step": 4530 + }, + { + "epoch": 0.39, + "learning_rate": 1.3986557143169539e-05, + "loss": 0.2926, + "step": 4531 + }, + { + "epoch": 0.39, + "learning_rate": 1.3984010898462417e-05, + "loss": 0.2939, + "step": 4532 + }, + { + "epoch": 0.39, + "learning_rate": 1.3981464346687419e-05, + "loss": 0.2754, + "step": 4533 + }, + { + "epoch": 0.39, + "learning_rate": 1.3978917488040822e-05, + "loss": 0.2684, + "step": 4534 + }, + { + "epoch": 0.39, + "learning_rate": 1.3976370322718928e-05, + "loss": 0.6372, + "step": 4535 + }, + { + "epoch": 0.39, + "learning_rate": 1.3973822850918055e-05, + "loss": 0.2732, + "step": 4536 + }, + { + "epoch": 0.39, + "learning_rate": 1.3971275072834552e-05, + "loss": 0.2924, + "step": 4537 + }, + { + "epoch": 0.39, + "learning_rate": 1.3968726988664788e-05, + "loss": 0.277, + "step": 4538 + }, + { + "epoch": 0.39, + "learning_rate": 1.396617859860516e-05, + "loss": 0.3034, + "step": 4539 + }, + { + "epoch": 0.39, + "learning_rate": 1.3963629902852082e-05, + "loss": 0.2946, + "step": 4540 + }, + { + "epoch": 0.39, + "learning_rate": 1.3961080901601996e-05, + "loss": 0.2529, + "step": 4541 + }, + { + "epoch": 0.39, + "learning_rate": 1.3958531595051367e-05, + "loss": 0.2889, + "step": 4542 + }, + { + "epoch": 0.39, + "learning_rate": 1.3955981983396683e-05, + "loss": 0.2687, + "step": 4543 + }, + { + "epoch": 0.39, + "learning_rate": 1.3953432066834454e-05, + "loss": 0.282, + "step": 4544 + }, + { + "epoch": 0.39, + "learning_rate": 1.3950881845561214e-05, + "loss": 0.2836, + "step": 4545 + }, + { + "epoch": 0.39, + "learning_rate": 1.3948331319773525e-05, + "loss": 0.3462, + "step": 4546 + }, + { + "epoch": 0.39, + "learning_rate": 1.3945780489667968e-05, + "loss": 0.2766, + "step": 4547 + }, + { + "epoch": 0.39, + "learning_rate": 1.3943229355441145e-05, + "loss": 0.3083, + "step": 4548 + }, + { + "epoch": 0.39, + "learning_rate": 1.3940677917289689e-05, + "loss": 0.3028, + "step": 4549 + }, + { + "epoch": 0.39, + "learning_rate": 1.393812617541025e-05, + "loss": 0.2505, + "step": 4550 + }, + { + "epoch": 0.39, + "learning_rate": 1.3935574129999504e-05, + "loss": 0.2574, + "step": 4551 + }, + { + "epoch": 0.39, + "learning_rate": 1.3933021781254152e-05, + "loss": 0.2752, + "step": 4552 + }, + { + "epoch": 0.39, + "learning_rate": 1.3930469129370913e-05, + "loss": 0.3438, + "step": 4553 + }, + { + "epoch": 0.39, + "learning_rate": 1.3927916174546536e-05, + "loss": 0.2852, + "step": 4554 + }, + { + "epoch": 0.39, + "learning_rate": 1.3925362916977787e-05, + "loss": 0.3031, + "step": 4555 + }, + { + "epoch": 0.39, + "learning_rate": 1.3922809356861462e-05, + "loss": 0.3129, + "step": 4556 + }, + { + "epoch": 0.39, + "learning_rate": 1.3920255494394373e-05, + "loss": 0.3022, + "step": 4557 + }, + { + "epoch": 0.39, + "learning_rate": 1.3917701329773364e-05, + "loss": 0.2629, + "step": 4558 + }, + { + "epoch": 0.39, + "learning_rate": 1.3915146863195292e-05, + "loss": 0.2819, + "step": 4559 + }, + { + "epoch": 0.39, + "learning_rate": 1.3912592094857044e-05, + "loss": 0.2679, + "step": 4560 + }, + { + "epoch": 0.39, + "learning_rate": 1.3910037024955534e-05, + "loss": 0.2628, + "step": 4561 + }, + { + "epoch": 0.39, + "learning_rate": 1.3907481653687687e-05, + "loss": 0.2759, + "step": 4562 + }, + { + "epoch": 0.39, + "learning_rate": 1.390492598125046e-05, + "loss": 0.3445, + "step": 4563 + }, + { + "epoch": 0.39, + "learning_rate": 1.3902370007840835e-05, + "loss": 0.2772, + "step": 4564 + }, + { + "epoch": 0.39, + "learning_rate": 1.3899813733655814e-05, + "loss": 0.3097, + "step": 4565 + }, + { + "epoch": 0.39, + "learning_rate": 1.389725715889242e-05, + "loss": 0.2908, + "step": 4566 + }, + { + "epoch": 0.39, + "learning_rate": 1.3894700283747697e-05, + "loss": 0.3004, + "step": 4567 + }, + { + "epoch": 0.39, + "learning_rate": 1.3892143108418723e-05, + "loss": 0.6012, + "step": 4568 + }, + { + "epoch": 0.39, + "learning_rate": 1.388958563310259e-05, + "loss": 0.3231, + "step": 4569 + }, + { + "epoch": 0.39, + "learning_rate": 1.3887027857996416e-05, + "loss": 0.3204, + "step": 4570 + }, + { + "epoch": 0.39, + "learning_rate": 1.3884469783297339e-05, + "loss": 0.6304, + "step": 4571 + }, + { + "epoch": 0.39, + "learning_rate": 1.3881911409202525e-05, + "loss": 0.255, + "step": 4572 + }, + { + "epoch": 0.39, + "learning_rate": 1.3879352735909163e-05, + "loss": 0.3339, + "step": 4573 + }, + { + "epoch": 0.39, + "learning_rate": 1.387679376361446e-05, + "loss": 0.315, + "step": 4574 + }, + { + "epoch": 0.39, + "learning_rate": 1.3874234492515649e-05, + "loss": 0.3082, + "step": 4575 + }, + { + "epoch": 0.39, + "learning_rate": 1.3871674922809985e-05, + "loss": 0.2715, + "step": 4576 + }, + { + "epoch": 0.39, + "learning_rate": 1.386911505469475e-05, + "loss": 0.2662, + "step": 4577 + }, + { + "epoch": 0.39, + "learning_rate": 1.3866554888367243e-05, + "loss": 0.2839, + "step": 4578 + }, + { + "epoch": 0.39, + "learning_rate": 1.3863994424024792e-05, + "loss": 0.2803, + "step": 4579 + }, + { + "epoch": 0.39, + "learning_rate": 1.3861433661864744e-05, + "loss": 0.2603, + "step": 4580 + }, + { + "epoch": 0.39, + "learning_rate": 1.3858872602084467e-05, + "loss": 0.2668, + "step": 4581 + }, + { + "epoch": 0.39, + "learning_rate": 1.385631124488136e-05, + "loss": 0.2881, + "step": 4582 + }, + { + "epoch": 0.39, + "learning_rate": 1.3853749590452834e-05, + "loss": 0.28, + "step": 4583 + }, + { + "epoch": 0.39, + "learning_rate": 1.3851187638996331e-05, + "loss": 0.2723, + "step": 4584 + }, + { + "epoch": 0.39, + "learning_rate": 1.3848625390709315e-05, + "loss": 0.2834, + "step": 4585 + }, + { + "epoch": 0.39, + "learning_rate": 1.3846062845789275e-05, + "loss": 0.2627, + "step": 4586 + }, + { + "epoch": 0.39, + "learning_rate": 1.3843500004433708e-05, + "loss": 0.3146, + "step": 4587 + }, + { + "epoch": 0.39, + "learning_rate": 1.3840936866840155e-05, + "loss": 0.2851, + "step": 4588 + }, + { + "epoch": 0.39, + "learning_rate": 1.3838373433206167e-05, + "loss": 0.3091, + "step": 4589 + }, + { + "epoch": 0.39, + "learning_rate": 1.3835809703729322e-05, + "loss": 0.3173, + "step": 4590 + }, + { + "epoch": 0.39, + "learning_rate": 1.3833245678607215e-05, + "loss": 0.2961, + "step": 4591 + }, + { + "epoch": 0.39, + "learning_rate": 1.3830681358037477e-05, + "loss": 0.2628, + "step": 4592 + }, + { + "epoch": 0.39, + "learning_rate": 1.3828116742217744e-05, + "loss": 0.3027, + "step": 4593 + }, + { + "epoch": 0.39, + "learning_rate": 1.3825551831345685e-05, + "loss": 0.2852, + "step": 4594 + }, + { + "epoch": 0.39, + "learning_rate": 1.3822986625618997e-05, + "loss": 0.2632, + "step": 4595 + }, + { + "epoch": 0.39, + "learning_rate": 1.382042112523539e-05, + "loss": 0.2593, + "step": 4596 + }, + { + "epoch": 0.39, + "learning_rate": 1.38178553303926e-05, + "loss": 0.3315, + "step": 4597 + }, + { + "epoch": 0.39, + "learning_rate": 1.3815289241288383e-05, + "loss": 0.2971, + "step": 4598 + }, + { + "epoch": 0.39, + "learning_rate": 1.3812722858120528e-05, + "loss": 0.2962, + "step": 4599 + }, + { + "epoch": 0.39, + "learning_rate": 1.3810156181086832e-05, + "loss": 0.2778, + "step": 4600 + }, + { + "epoch": 0.39, + "learning_rate": 1.3807589210385123e-05, + "loss": 0.3069, + "step": 4601 + }, + { + "epoch": 0.39, + "learning_rate": 1.3805021946213251e-05, + "loss": 0.2755, + "step": 4602 + }, + { + "epoch": 0.39, + "learning_rate": 1.3802454388769091e-05, + "loss": 0.3104, + "step": 4603 + }, + { + "epoch": 0.39, + "learning_rate": 1.3799886538250534e-05, + "loss": 0.3015, + "step": 4604 + }, + { + "epoch": 0.39, + "learning_rate": 1.3797318394855496e-05, + "loss": 0.2944, + "step": 4605 + }, + { + "epoch": 0.39, + "learning_rate": 1.3794749958781924e-05, + "loss": 0.2846, + "step": 4606 + }, + { + "epoch": 0.39, + "learning_rate": 1.3792181230227773e-05, + "loss": 0.3088, + "step": 4607 + }, + { + "epoch": 0.39, + "learning_rate": 1.3789612209391031e-05, + "loss": 0.3127, + "step": 4608 + }, + { + "epoch": 0.4, + "learning_rate": 1.3787042896469705e-05, + "loss": 0.2922, + "step": 4609 + }, + { + "epoch": 0.4, + "learning_rate": 1.3784473291661824e-05, + "loss": 0.2703, + "step": 4610 + }, + { + "epoch": 0.4, + "learning_rate": 1.3781903395165441e-05, + "loss": 0.309, + "step": 4611 + }, + { + "epoch": 0.4, + "learning_rate": 1.3779333207178632e-05, + "loss": 0.3019, + "step": 4612 + }, + { + "epoch": 0.4, + "learning_rate": 1.3776762727899494e-05, + "loss": 0.2759, + "step": 4613 + }, + { + "epoch": 0.4, + "learning_rate": 1.3774191957526144e-05, + "loss": 0.2824, + "step": 4614 + }, + { + "epoch": 0.4, + "learning_rate": 1.3771620896256732e-05, + "loss": 0.2808, + "step": 4615 + }, + { + "epoch": 0.4, + "learning_rate": 1.3769049544289415e-05, + "loss": 0.2684, + "step": 4616 + }, + { + "epoch": 0.4, + "learning_rate": 1.3766477901822379e-05, + "loss": 0.2996, + "step": 4617 + }, + { + "epoch": 0.4, + "learning_rate": 1.3763905969053841e-05, + "loss": 0.2628, + "step": 4618 + }, + { + "epoch": 0.4, + "learning_rate": 1.3761333746182028e-05, + "loss": 0.2499, + "step": 4619 + }, + { + "epoch": 0.4, + "learning_rate": 1.3758761233405195e-05, + "loss": 0.2328, + "step": 4620 + }, + { + "epoch": 0.4, + "learning_rate": 1.3756188430921618e-05, + "loss": 0.234, + "step": 4621 + }, + { + "epoch": 0.4, + "learning_rate": 1.3753615338929598e-05, + "loss": 0.35, + "step": 4622 + }, + { + "epoch": 0.4, + "learning_rate": 1.3751041957627456e-05, + "loss": 0.3224, + "step": 4623 + }, + { + "epoch": 0.4, + "learning_rate": 1.374846828721353e-05, + "loss": 0.3289, + "step": 4624 + }, + { + "epoch": 0.4, + "learning_rate": 1.3745894327886192e-05, + "loss": 0.2624, + "step": 4625 + }, + { + "epoch": 0.4, + "learning_rate": 1.3743320079843828e-05, + "loss": 0.2701, + "step": 4626 + }, + { + "epoch": 0.4, + "learning_rate": 1.3740745543284852e-05, + "loss": 0.2567, + "step": 4627 + }, + { + "epoch": 0.4, + "learning_rate": 1.3738170718407689e-05, + "loss": 0.3145, + "step": 4628 + }, + { + "epoch": 0.4, + "learning_rate": 1.37355956054108e-05, + "loss": 0.3188, + "step": 4629 + }, + { + "epoch": 0.4, + "learning_rate": 1.373302020449266e-05, + "loss": 0.3017, + "step": 4630 + }, + { + "epoch": 0.4, + "learning_rate": 1.3730444515851766e-05, + "loss": 0.2667, + "step": 4631 + }, + { + "epoch": 0.4, + "learning_rate": 1.3727868539686641e-05, + "loss": 0.3004, + "step": 4632 + }, + { + "epoch": 0.4, + "learning_rate": 1.3725292276195832e-05, + "loss": 0.3397, + "step": 4633 + }, + { + "epoch": 0.4, + "learning_rate": 1.3722715725577902e-05, + "loss": 0.3406, + "step": 4634 + }, + { + "epoch": 0.4, + "learning_rate": 1.3720138888031436e-05, + "loss": 0.2543, + "step": 4635 + }, + { + "epoch": 0.4, + "learning_rate": 1.3717561763755045e-05, + "loss": 0.2812, + "step": 4636 + }, + { + "epoch": 0.4, + "learning_rate": 1.3714984352947365e-05, + "loss": 0.3057, + "step": 4637 + }, + { + "epoch": 0.4, + "learning_rate": 1.3712406655807047e-05, + "loss": 0.3049, + "step": 4638 + }, + { + "epoch": 0.4, + "learning_rate": 1.3709828672532766e-05, + "loss": 0.2825, + "step": 4639 + }, + { + "epoch": 0.4, + "learning_rate": 1.3707250403323222e-05, + "loss": 0.2918, + "step": 4640 + }, + { + "epoch": 0.4, + "learning_rate": 1.3704671848377136e-05, + "loss": 0.272, + "step": 4641 + }, + { + "epoch": 0.4, + "learning_rate": 1.3702093007893249e-05, + "loss": 0.2676, + "step": 4642 + }, + { + "epoch": 0.4, + "learning_rate": 1.3699513882070323e-05, + "loss": 0.2919, + "step": 4643 + }, + { + "epoch": 0.4, + "learning_rate": 1.369693447110715e-05, + "loss": 0.277, + "step": 4644 + }, + { + "epoch": 0.4, + "learning_rate": 1.3694354775202534e-05, + "loss": 0.6479, + "step": 4645 + }, + { + "epoch": 0.4, + "learning_rate": 1.3691774794555306e-05, + "loss": 0.3091, + "step": 4646 + }, + { + "epoch": 0.4, + "learning_rate": 1.368919452936432e-05, + "loss": 0.309, + "step": 4647 + }, + { + "epoch": 0.4, + "learning_rate": 1.3686613979828444e-05, + "loss": 0.2812, + "step": 4648 + }, + { + "epoch": 0.4, + "learning_rate": 1.3684033146146585e-05, + "loss": 0.3076, + "step": 4649 + }, + { + "epoch": 0.4, + "learning_rate": 1.368145202851765e-05, + "loss": 0.2703, + "step": 4650 + }, + { + "epoch": 0.4, + "learning_rate": 1.3678870627140585e-05, + "loss": 0.2853, + "step": 4651 + }, + { + "epoch": 0.4, + "learning_rate": 1.3676288942214348e-05, + "loss": 0.3292, + "step": 4652 + }, + { + "epoch": 0.4, + "learning_rate": 1.3673706973937928e-05, + "loss": 0.3247, + "step": 4653 + }, + { + "epoch": 0.4, + "learning_rate": 1.3671124722510325e-05, + "loss": 0.2856, + "step": 4654 + }, + { + "epoch": 0.4, + "learning_rate": 1.3668542188130567e-05, + "loss": 0.253, + "step": 4655 + }, + { + "epoch": 0.4, + "learning_rate": 1.3665959370997706e-05, + "loss": 0.2986, + "step": 4656 + }, + { + "epoch": 0.4, + "learning_rate": 1.3663376271310809e-05, + "loss": 0.3129, + "step": 4657 + }, + { + "epoch": 0.4, + "learning_rate": 1.3660792889268967e-05, + "loss": 0.2698, + "step": 4658 + }, + { + "epoch": 0.4, + "learning_rate": 1.3658209225071301e-05, + "loss": 0.2264, + "step": 4659 + }, + { + "epoch": 0.4, + "learning_rate": 1.3655625278916947e-05, + "loss": 0.3402, + "step": 4660 + }, + { + "epoch": 0.4, + "learning_rate": 1.3653041051005056e-05, + "loss": 0.2313, + "step": 4661 + }, + { + "epoch": 0.4, + "learning_rate": 1.3650456541534811e-05, + "loss": 0.2849, + "step": 4662 + }, + { + "epoch": 0.4, + "learning_rate": 1.3647871750705412e-05, + "loss": 0.2709, + "step": 4663 + }, + { + "epoch": 0.4, + "learning_rate": 1.3645286678716084e-05, + "loss": 0.2778, + "step": 4664 + }, + { + "epoch": 0.4, + "learning_rate": 1.3642701325766073e-05, + "loss": 0.2761, + "step": 4665 + }, + { + "epoch": 0.4, + "learning_rate": 1.364011569205464e-05, + "loss": 0.2565, + "step": 4666 + }, + { + "epoch": 0.4, + "learning_rate": 1.3637529777781077e-05, + "loss": 0.2918, + "step": 4667 + }, + { + "epoch": 0.4, + "learning_rate": 1.3634943583144693e-05, + "loss": 0.2708, + "step": 4668 + }, + { + "epoch": 0.4, + "learning_rate": 1.3632357108344819e-05, + "loss": 0.2958, + "step": 4669 + }, + { + "epoch": 0.4, + "learning_rate": 1.3629770353580804e-05, + "loss": 0.2606, + "step": 4670 + }, + { + "epoch": 0.4, + "learning_rate": 1.3627183319052026e-05, + "loss": 0.2998, + "step": 4671 + }, + { + "epoch": 0.4, + "learning_rate": 1.3624596004957884e-05, + "loss": 0.2653, + "step": 4672 + }, + { + "epoch": 0.4, + "learning_rate": 1.3622008411497787e-05, + "loss": 0.2742, + "step": 4673 + }, + { + "epoch": 0.4, + "learning_rate": 1.361942053887118e-05, + "loss": 0.3118, + "step": 4674 + }, + { + "epoch": 0.4, + "learning_rate": 1.3616832387277525e-05, + "loss": 0.239, + "step": 4675 + }, + { + "epoch": 0.4, + "learning_rate": 1.3614243956916297e-05, + "loss": 0.2817, + "step": 4676 + }, + { + "epoch": 0.4, + "learning_rate": 1.3611655247987004e-05, + "loss": 0.2899, + "step": 4677 + }, + { + "epoch": 0.4, + "learning_rate": 1.360906626068917e-05, + "loss": 0.2753, + "step": 4678 + }, + { + "epoch": 0.4, + "learning_rate": 1.3606476995222344e-05, + "loss": 0.2676, + "step": 4679 + }, + { + "epoch": 0.4, + "learning_rate": 1.3603887451786088e-05, + "loss": 0.3101, + "step": 4680 + }, + { + "epoch": 0.4, + "learning_rate": 1.3601297630579996e-05, + "loss": 0.3201, + "step": 4681 + }, + { + "epoch": 0.4, + "learning_rate": 1.359870753180368e-05, + "loss": 0.2611, + "step": 4682 + }, + { + "epoch": 0.4, + "learning_rate": 1.3596117155656763e-05, + "loss": 0.2714, + "step": 4683 + }, + { + "epoch": 0.4, + "learning_rate": 1.3593526502338909e-05, + "loss": 0.2728, + "step": 4684 + }, + { + "epoch": 0.4, + "learning_rate": 1.3590935572049787e-05, + "loss": 0.3516, + "step": 4685 + }, + { + "epoch": 0.4, + "learning_rate": 1.3588344364989096e-05, + "loss": 0.2943, + "step": 4686 + }, + { + "epoch": 0.4, + "learning_rate": 1.358575288135655e-05, + "loss": 0.2857, + "step": 4687 + }, + { + "epoch": 0.4, + "learning_rate": 1.358316112135189e-05, + "loss": 0.3317, + "step": 4688 + }, + { + "epoch": 0.4, + "learning_rate": 1.3580569085174877e-05, + "loss": 0.2747, + "step": 4689 + }, + { + "epoch": 0.4, + "learning_rate": 1.357797677302529e-05, + "loss": 0.2361, + "step": 4690 + }, + { + "epoch": 0.4, + "learning_rate": 1.3575384185102933e-05, + "loss": 0.2667, + "step": 4691 + }, + { + "epoch": 0.4, + "learning_rate": 1.357279132160763e-05, + "loss": 0.2779, + "step": 4692 + }, + { + "epoch": 0.4, + "learning_rate": 1.3570198182739222e-05, + "loss": 0.2538, + "step": 4693 + }, + { + "epoch": 0.4, + "learning_rate": 1.3567604768697585e-05, + "loss": 0.2997, + "step": 4694 + }, + { + "epoch": 0.4, + "learning_rate": 1.3565011079682597e-05, + "loss": 0.2556, + "step": 4695 + }, + { + "epoch": 0.4, + "learning_rate": 1.356241711589417e-05, + "loss": 0.2922, + "step": 4696 + }, + { + "epoch": 0.4, + "learning_rate": 1.3559822877532234e-05, + "loss": 0.6287, + "step": 4697 + }, + { + "epoch": 0.4, + "learning_rate": 1.3557228364796742e-05, + "loss": 0.3558, + "step": 4698 + }, + { + "epoch": 0.4, + "learning_rate": 1.3554633577887663e-05, + "loss": 0.2886, + "step": 4699 + }, + { + "epoch": 0.4, + "learning_rate": 1.3552038517004991e-05, + "loss": 0.3183, + "step": 4700 + }, + { + "epoch": 0.4, + "learning_rate": 1.3549443182348743e-05, + "loss": 0.2421, + "step": 4701 + }, + { + "epoch": 0.4, + "learning_rate": 1.3546847574118951e-05, + "loss": 0.3222, + "step": 4702 + }, + { + "epoch": 0.4, + "learning_rate": 1.3544251692515675e-05, + "loss": 0.3259, + "step": 4703 + }, + { + "epoch": 0.4, + "learning_rate": 1.3541655537738992e-05, + "loss": 0.2913, + "step": 4704 + }, + { + "epoch": 0.4, + "learning_rate": 1.3539059109988999e-05, + "loss": 0.2619, + "step": 4705 + }, + { + "epoch": 0.4, + "learning_rate": 1.3536462409465816e-05, + "loss": 0.3214, + "step": 4706 + }, + { + "epoch": 0.4, + "learning_rate": 1.3533865436369584e-05, + "loss": 0.3145, + "step": 4707 + }, + { + "epoch": 0.4, + "learning_rate": 1.3531268190900467e-05, + "loss": 0.3072, + "step": 4708 + }, + { + "epoch": 0.4, + "learning_rate": 1.3528670673258645e-05, + "loss": 0.269, + "step": 4709 + }, + { + "epoch": 0.4, + "learning_rate": 1.3526072883644326e-05, + "loss": 0.2829, + "step": 4710 + }, + { + "epoch": 0.4, + "learning_rate": 1.3523474822257729e-05, + "loss": 0.3028, + "step": 4711 + }, + { + "epoch": 0.4, + "learning_rate": 1.3520876489299104e-05, + "loss": 0.2947, + "step": 4712 + }, + { + "epoch": 0.4, + "learning_rate": 1.3518277884968718e-05, + "loss": 0.2807, + "step": 4713 + }, + { + "epoch": 0.4, + "learning_rate": 1.3515679009466856e-05, + "loss": 0.3173, + "step": 4714 + }, + { + "epoch": 0.4, + "learning_rate": 1.3513079862993825e-05, + "loss": 0.2651, + "step": 4715 + }, + { + "epoch": 0.4, + "learning_rate": 1.3510480445749958e-05, + "loss": 0.2939, + "step": 4716 + }, + { + "epoch": 0.4, + "learning_rate": 1.3507880757935605e-05, + "loss": 0.3118, + "step": 4717 + }, + { + "epoch": 0.4, + "learning_rate": 1.3505280799751134e-05, + "loss": 0.2829, + "step": 4718 + }, + { + "epoch": 0.4, + "learning_rate": 1.3502680571396943e-05, + "loss": 0.3098, + "step": 4719 + }, + { + "epoch": 0.4, + "learning_rate": 1.3500080073073436e-05, + "loss": 0.3193, + "step": 4720 + }, + { + "epoch": 0.4, + "learning_rate": 1.3497479304981053e-05, + "loss": 0.2904, + "step": 4721 + }, + { + "epoch": 0.4, + "learning_rate": 1.349487826732025e-05, + "loss": 0.2912, + "step": 4722 + }, + { + "epoch": 0.4, + "learning_rate": 1.3492276960291495e-05, + "loss": 0.3058, + "step": 4723 + }, + { + "epoch": 0.4, + "learning_rate": 1.3489675384095291e-05, + "loss": 0.2505, + "step": 4724 + }, + { + "epoch": 0.41, + "learning_rate": 1.3487073538932149e-05, + "loss": 0.3296, + "step": 4725 + }, + { + "epoch": 0.41, + "learning_rate": 1.348447142500261e-05, + "loss": 0.5804, + "step": 4726 + }, + { + "epoch": 0.41, + "learning_rate": 1.348186904250723e-05, + "loss": 0.3107, + "step": 4727 + }, + { + "epoch": 0.41, + "learning_rate": 1.3479266391646588e-05, + "loss": 0.2668, + "step": 4728 + }, + { + "epoch": 0.41, + "learning_rate": 1.3476663472621286e-05, + "loss": 0.2903, + "step": 4729 + }, + { + "epoch": 0.41, + "learning_rate": 1.347406028563194e-05, + "loss": 0.2913, + "step": 4730 + }, + { + "epoch": 0.41, + "learning_rate": 1.3471456830879195e-05, + "loss": 0.3231, + "step": 4731 + }, + { + "epoch": 0.41, + "learning_rate": 1.3468853108563709e-05, + "loss": 0.2511, + "step": 4732 + }, + { + "epoch": 0.41, + "learning_rate": 1.3466249118886165e-05, + "loss": 0.2686, + "step": 4733 + }, + { + "epoch": 0.41, + "learning_rate": 1.3463644862047267e-05, + "loss": 0.2515, + "step": 4734 + }, + { + "epoch": 0.41, + "learning_rate": 1.3461040338247737e-05, + "loss": 0.3395, + "step": 4735 + }, + { + "epoch": 0.41, + "learning_rate": 1.3458435547688323e-05, + "loss": 0.2753, + "step": 4736 + }, + { + "epoch": 0.41, + "learning_rate": 1.3455830490569782e-05, + "loss": 0.2785, + "step": 4737 + }, + { + "epoch": 0.41, + "learning_rate": 1.3453225167092902e-05, + "loss": 0.2432, + "step": 4738 + }, + { + "epoch": 0.41, + "learning_rate": 1.3450619577458488e-05, + "loss": 0.2924, + "step": 4739 + }, + { + "epoch": 0.41, + "learning_rate": 1.344801372186737e-05, + "loss": 0.3099, + "step": 4740 + }, + { + "epoch": 0.41, + "learning_rate": 1.344540760052039e-05, + "loss": 0.299, + "step": 4741 + }, + { + "epoch": 0.41, + "learning_rate": 1.3442801213618417e-05, + "loss": 0.3189, + "step": 4742 + }, + { + "epoch": 0.41, + "learning_rate": 1.344019456136234e-05, + "loss": 0.2726, + "step": 4743 + }, + { + "epoch": 0.41, + "learning_rate": 1.3437587643953062e-05, + "loss": 0.3393, + "step": 4744 + }, + { + "epoch": 0.41, + "learning_rate": 1.3434980461591514e-05, + "loss": 0.2943, + "step": 4745 + }, + { + "epoch": 0.41, + "learning_rate": 1.3432373014478644e-05, + "loss": 0.3002, + "step": 4746 + }, + { + "epoch": 0.41, + "learning_rate": 1.3429765302815423e-05, + "loss": 0.2913, + "step": 4747 + }, + { + "epoch": 0.41, + "learning_rate": 1.3427157326802843e-05, + "loss": 0.2818, + "step": 4748 + }, + { + "epoch": 0.41, + "learning_rate": 1.3424549086641905e-05, + "loss": 0.2925, + "step": 4749 + }, + { + "epoch": 0.41, + "learning_rate": 1.3421940582533645e-05, + "loss": 0.3036, + "step": 4750 + }, + { + "epoch": 0.41, + "learning_rate": 1.3419331814679115e-05, + "loss": 0.3173, + "step": 4751 + }, + { + "epoch": 0.41, + "learning_rate": 1.3416722783279386e-05, + "loss": 0.2527, + "step": 4752 + }, + { + "epoch": 0.41, + "learning_rate": 1.3414113488535542e-05, + "loss": 0.343, + "step": 4753 + }, + { + "epoch": 0.41, + "learning_rate": 1.3411503930648704e-05, + "loss": 0.2633, + "step": 4754 + }, + { + "epoch": 0.41, + "learning_rate": 1.3408894109820002e-05, + "loss": 0.2786, + "step": 4755 + }, + { + "epoch": 0.41, + "learning_rate": 1.340628402625058e-05, + "loss": 0.2847, + "step": 4756 + }, + { + "epoch": 0.41, + "learning_rate": 1.340367368014162e-05, + "loss": 0.2754, + "step": 4757 + }, + { + "epoch": 0.41, + "learning_rate": 1.3401063071694309e-05, + "loss": 0.3082, + "step": 4758 + }, + { + "epoch": 0.41, + "learning_rate": 1.339845220110986e-05, + "loss": 0.3127, + "step": 4759 + }, + { + "epoch": 0.41, + "learning_rate": 1.3395841068589513e-05, + "loss": 0.2743, + "step": 4760 + }, + { + "epoch": 0.41, + "learning_rate": 1.3393229674334512e-05, + "loss": 0.2956, + "step": 4761 + }, + { + "epoch": 0.41, + "learning_rate": 1.3390618018546135e-05, + "loss": 0.3106, + "step": 4762 + }, + { + "epoch": 0.41, + "learning_rate": 1.3388006101425674e-05, + "loss": 0.2979, + "step": 4763 + }, + { + "epoch": 0.41, + "learning_rate": 1.3385393923174443e-05, + "loss": 0.3387, + "step": 4764 + }, + { + "epoch": 0.41, + "learning_rate": 1.3382781483993777e-05, + "loss": 0.3207, + "step": 4765 + }, + { + "epoch": 0.41, + "learning_rate": 1.3380168784085028e-05, + "loss": 0.2396, + "step": 4766 + }, + { + "epoch": 0.41, + "learning_rate": 1.3377555823649573e-05, + "loss": 0.3193, + "step": 4767 + }, + { + "epoch": 0.41, + "learning_rate": 1.3374942602888803e-05, + "loss": 0.3192, + "step": 4768 + }, + { + "epoch": 0.41, + "learning_rate": 1.3372329122004135e-05, + "loss": 0.2903, + "step": 4769 + }, + { + "epoch": 0.41, + "learning_rate": 1.3369715381197e-05, + "loss": 0.3156, + "step": 4770 + }, + { + "epoch": 0.41, + "learning_rate": 1.3367101380668852e-05, + "loss": 0.3001, + "step": 4771 + }, + { + "epoch": 0.41, + "learning_rate": 1.3364487120621168e-05, + "loss": 0.2541, + "step": 4772 + }, + { + "epoch": 0.41, + "learning_rate": 1.3361872601255441e-05, + "loss": 0.3005, + "step": 4773 + }, + { + "epoch": 0.41, + "learning_rate": 1.3359257822773187e-05, + "loss": 0.2616, + "step": 4774 + }, + { + "epoch": 0.41, + "learning_rate": 1.3356642785375937e-05, + "loss": 0.2515, + "step": 4775 + }, + { + "epoch": 0.41, + "learning_rate": 1.3354027489265246e-05, + "loss": 0.2652, + "step": 4776 + }, + { + "epoch": 0.41, + "learning_rate": 1.335141193464269e-05, + "loss": 0.2809, + "step": 4777 + }, + { + "epoch": 0.41, + "learning_rate": 1.3348796121709862e-05, + "loss": 0.2805, + "step": 4778 + }, + { + "epoch": 0.41, + "learning_rate": 1.3346180050668376e-05, + "loss": 0.2776, + "step": 4779 + }, + { + "epoch": 0.41, + "learning_rate": 1.3343563721719865e-05, + "loss": 0.2867, + "step": 4780 + }, + { + "epoch": 0.41, + "learning_rate": 1.3340947135065986e-05, + "loss": 0.3054, + "step": 4781 + }, + { + "epoch": 0.41, + "learning_rate": 1.3338330290908408e-05, + "loss": 0.2798, + "step": 4782 + }, + { + "epoch": 0.41, + "learning_rate": 1.3335713189448824e-05, + "loss": 0.3165, + "step": 4783 + }, + { + "epoch": 0.41, + "learning_rate": 1.3333095830888954e-05, + "loss": 0.2949, + "step": 4784 + }, + { + "epoch": 0.41, + "learning_rate": 1.3330478215430523e-05, + "loss": 0.2692, + "step": 4785 + }, + { + "epoch": 0.41, + "learning_rate": 1.332786034327529e-05, + "loss": 0.317, + "step": 4786 + }, + { + "epoch": 0.41, + "learning_rate": 1.3325242214625022e-05, + "loss": 0.2678, + "step": 4787 + }, + { + "epoch": 0.41, + "learning_rate": 1.332262382968152e-05, + "loss": 0.3068, + "step": 4788 + }, + { + "epoch": 0.41, + "learning_rate": 1.3320005188646587e-05, + "loss": 0.3077, + "step": 4789 + }, + { + "epoch": 0.41, + "learning_rate": 1.331738629172206e-05, + "loss": 0.2463, + "step": 4790 + }, + { + "epoch": 0.41, + "learning_rate": 1.3314767139109786e-05, + "loss": 0.2664, + "step": 4791 + }, + { + "epoch": 0.41, + "learning_rate": 1.3312147731011642e-05, + "loss": 0.3241, + "step": 4792 + }, + { + "epoch": 0.41, + "learning_rate": 1.3309528067629518e-05, + "loss": 0.2742, + "step": 4793 + }, + { + "epoch": 0.41, + "learning_rate": 1.330690814916532e-05, + "loss": 0.2962, + "step": 4794 + }, + { + "epoch": 0.41, + "learning_rate": 1.3304287975820985e-05, + "loss": 0.3509, + "step": 4795 + }, + { + "epoch": 0.41, + "learning_rate": 1.3301667547798458e-05, + "loss": 0.2891, + "step": 4796 + }, + { + "epoch": 0.41, + "learning_rate": 1.3299046865299713e-05, + "loss": 0.2622, + "step": 4797 + }, + { + "epoch": 0.41, + "learning_rate": 1.3296425928526735e-05, + "loss": 0.2841, + "step": 4798 + }, + { + "epoch": 0.41, + "learning_rate": 1.3293804737681533e-05, + "loss": 0.3107, + "step": 4799 + }, + { + "epoch": 0.41, + "learning_rate": 1.3291183292966141e-05, + "loss": 0.3146, + "step": 4800 + }, + { + "epoch": 0.41, + "learning_rate": 1.3288561594582599e-05, + "loss": 0.2556, + "step": 4801 + }, + { + "epoch": 0.41, + "learning_rate": 1.3285939642732979e-05, + "loss": 0.3171, + "step": 4802 + }, + { + "epoch": 0.41, + "learning_rate": 1.3283317437619369e-05, + "loss": 0.27, + "step": 4803 + }, + { + "epoch": 0.41, + "learning_rate": 1.3280694979443873e-05, + "loss": 0.3049, + "step": 4804 + }, + { + "epoch": 0.41, + "learning_rate": 1.3278072268408621e-05, + "loss": 0.2706, + "step": 4805 + }, + { + "epoch": 0.41, + "learning_rate": 1.3275449304715753e-05, + "loss": 0.2852, + "step": 4806 + }, + { + "epoch": 0.41, + "learning_rate": 1.3272826088567441e-05, + "loss": 0.2865, + "step": 4807 + }, + { + "epoch": 0.41, + "learning_rate": 1.3270202620165861e-05, + "loss": 0.3076, + "step": 4808 + }, + { + "epoch": 0.41, + "learning_rate": 1.3267578899713223e-05, + "loss": 0.3184, + "step": 4809 + }, + { + "epoch": 0.41, + "learning_rate": 1.3264954927411751e-05, + "loss": 0.2737, + "step": 4810 + }, + { + "epoch": 0.41, + "learning_rate": 1.3262330703463682e-05, + "loss": 0.291, + "step": 4811 + }, + { + "epoch": 0.41, + "learning_rate": 1.3259706228071286e-05, + "loss": 0.2676, + "step": 4812 + }, + { + "epoch": 0.41, + "learning_rate": 1.3257081501436839e-05, + "loss": 0.313, + "step": 4813 + }, + { + "epoch": 0.41, + "learning_rate": 1.3254456523762643e-05, + "loss": 0.2609, + "step": 4814 + }, + { + "epoch": 0.41, + "learning_rate": 1.3251831295251019e-05, + "loss": 0.2407, + "step": 4815 + }, + { + "epoch": 0.41, + "learning_rate": 1.3249205816104307e-05, + "loss": 0.2679, + "step": 4816 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246580086524868e-05, + "loss": 0.3162, + "step": 4817 + }, + { + "epoch": 0.41, + "learning_rate": 1.3243954106715074e-05, + "loss": 0.3313, + "step": 4818 + }, + { + "epoch": 0.41, + "learning_rate": 1.3241327876877328e-05, + "loss": 0.2509, + "step": 4819 + }, + { + "epoch": 0.41, + "learning_rate": 1.3238701397214044e-05, + "loss": 0.6091, + "step": 4820 + }, + { + "epoch": 0.41, + "learning_rate": 1.3236074667927659e-05, + "loss": 0.3187, + "step": 4821 + }, + { + "epoch": 0.41, + "learning_rate": 1.3233447689220629e-05, + "loss": 0.2988, + "step": 4822 + }, + { + "epoch": 0.41, + "learning_rate": 1.3230820461295429e-05, + "loss": 0.2989, + "step": 4823 + }, + { + "epoch": 0.41, + "learning_rate": 1.3228192984354552e-05, + "loss": 0.2506, + "step": 4824 + }, + { + "epoch": 0.41, + "learning_rate": 1.3225565258600507e-05, + "loss": 0.2597, + "step": 4825 + }, + { + "epoch": 0.41, + "learning_rate": 1.3222937284235835e-05, + "loss": 0.2708, + "step": 4826 + }, + { + "epoch": 0.41, + "learning_rate": 1.3220309061463081e-05, + "loss": 0.3536, + "step": 4827 + }, + { + "epoch": 0.41, + "learning_rate": 1.3217680590484813e-05, + "loss": 0.2786, + "step": 4828 + }, + { + "epoch": 0.41, + "learning_rate": 1.3215051871503628e-05, + "loss": 0.2586, + "step": 4829 + }, + { + "epoch": 0.41, + "learning_rate": 1.321242290472213e-05, + "loss": 0.2776, + "step": 4830 + }, + { + "epoch": 0.41, + "learning_rate": 1.3209793690342947e-05, + "loss": 0.2984, + "step": 4831 + }, + { + "epoch": 0.41, + "learning_rate": 1.3207164228568725e-05, + "loss": 0.2921, + "step": 4832 + }, + { + "epoch": 0.41, + "learning_rate": 1.3204534519602133e-05, + "loss": 0.6091, + "step": 4833 + }, + { + "epoch": 0.41, + "learning_rate": 1.3201904563645853e-05, + "loss": 0.2847, + "step": 4834 + }, + { + "epoch": 0.41, + "learning_rate": 1.319927436090259e-05, + "loss": 0.2813, + "step": 4835 + }, + { + "epoch": 0.41, + "learning_rate": 1.3196643911575072e-05, + "loss": 0.2755, + "step": 4836 + }, + { + "epoch": 0.41, + "learning_rate": 1.319401321586603e-05, + "loss": 0.5737, + "step": 4837 + }, + { + "epoch": 0.41, + "learning_rate": 1.3191382273978237e-05, + "loss": 0.3258, + "step": 4838 + }, + { + "epoch": 0.41, + "learning_rate": 1.3188751086114464e-05, + "loss": 0.2497, + "step": 4839 + }, + { + "epoch": 0.41, + "learning_rate": 1.3186119652477514e-05, + "loss": 0.3023, + "step": 4840 + }, + { + "epoch": 0.41, + "learning_rate": 1.3183487973270204e-05, + "loss": 0.2723, + "step": 4841 + }, + { + "epoch": 0.42, + "learning_rate": 1.318085604869537e-05, + "loss": 0.2432, + "step": 4842 + }, + { + "epoch": 0.42, + "learning_rate": 1.3178223878955874e-05, + "loss": 0.254, + "step": 4843 + }, + { + "epoch": 0.42, + "learning_rate": 1.3175591464254581e-05, + "loss": 0.2863, + "step": 4844 + }, + { + "epoch": 0.42, + "learning_rate": 1.317295880479439e-05, + "loss": 0.336, + "step": 4845 + }, + { + "epoch": 0.42, + "learning_rate": 1.3170325900778211e-05, + "loss": 0.3009, + "step": 4846 + }, + { + "epoch": 0.42, + "learning_rate": 1.3167692752408978e-05, + "loss": 0.3116, + "step": 4847 + }, + { + "epoch": 0.42, + "learning_rate": 1.3165059359889639e-05, + "loss": 0.3154, + "step": 4848 + }, + { + "epoch": 0.42, + "learning_rate": 1.3162425723423162e-05, + "loss": 0.31, + "step": 4849 + }, + { + "epoch": 0.42, + "learning_rate": 1.3159791843212542e-05, + "loss": 0.2816, + "step": 4850 + }, + { + "epoch": 0.42, + "learning_rate": 1.3157157719460774e-05, + "loss": 0.3444, + "step": 4851 + }, + { + "epoch": 0.42, + "learning_rate": 1.3154523352370894e-05, + "loss": 0.6122, + "step": 4852 + }, + { + "epoch": 0.42, + "learning_rate": 1.3151888742145932e-05, + "loss": 0.306, + "step": 4853 + }, + { + "epoch": 0.42, + "learning_rate": 1.3149253888988967e-05, + "loss": 0.2873, + "step": 4854 + }, + { + "epoch": 0.42, + "learning_rate": 1.3146618793103074e-05, + "loss": 0.2795, + "step": 4855 + }, + { + "epoch": 0.42, + "learning_rate": 1.3143983454691348e-05, + "loss": 0.3004, + "step": 4856 + }, + { + "epoch": 0.42, + "learning_rate": 1.3141347873956915e-05, + "loss": 0.2885, + "step": 4857 + }, + { + "epoch": 0.42, + "learning_rate": 1.3138712051102908e-05, + "loss": 0.2515, + "step": 4858 + }, + { + "epoch": 0.42, + "learning_rate": 1.3136075986332485e-05, + "loss": 0.3378, + "step": 4859 + }, + { + "epoch": 0.42, + "learning_rate": 1.3133439679848824e-05, + "loss": 0.277, + "step": 4860 + }, + { + "epoch": 0.42, + "learning_rate": 1.3130803131855113e-05, + "loss": 0.3376, + "step": 4861 + }, + { + "epoch": 0.42, + "learning_rate": 1.3128166342554567e-05, + "loss": 0.297, + "step": 4862 + }, + { + "epoch": 0.42, + "learning_rate": 1.3125529312150414e-05, + "loss": 0.3082, + "step": 4863 + }, + { + "epoch": 0.42, + "learning_rate": 1.312289204084591e-05, + "loss": 0.3073, + "step": 4864 + }, + { + "epoch": 0.42, + "learning_rate": 1.3120254528844312e-05, + "loss": 0.3297, + "step": 4865 + }, + { + "epoch": 0.42, + "learning_rate": 1.3117616776348915e-05, + "loss": 0.2859, + "step": 4866 + }, + { + "epoch": 0.42, + "learning_rate": 1.3114978783563022e-05, + "loss": 0.3062, + "step": 4867 + }, + { + "epoch": 0.42, + "learning_rate": 1.3112340550689955e-05, + "loss": 0.2731, + "step": 4868 + }, + { + "epoch": 0.42, + "learning_rate": 1.310970207793306e-05, + "loss": 0.2795, + "step": 4869 + }, + { + "epoch": 0.42, + "learning_rate": 1.3107063365495692e-05, + "loss": 0.2925, + "step": 4870 + }, + { + "epoch": 0.42, + "learning_rate": 1.3104424413581231e-05, + "loss": 0.2915, + "step": 4871 + }, + { + "epoch": 0.42, + "learning_rate": 1.3101785222393075e-05, + "loss": 0.2886, + "step": 4872 + }, + { + "epoch": 0.42, + "learning_rate": 1.3099145792134642e-05, + "loss": 0.3122, + "step": 4873 + }, + { + "epoch": 0.42, + "learning_rate": 1.3096506123009368e-05, + "loss": 0.2537, + "step": 4874 + }, + { + "epoch": 0.42, + "learning_rate": 1.3093866215220698e-05, + "loss": 0.2968, + "step": 4875 + }, + { + "epoch": 0.42, + "learning_rate": 1.309122606897211e-05, + "loss": 0.3199, + "step": 4876 + }, + { + "epoch": 0.42, + "learning_rate": 1.3088585684467088e-05, + "loss": 0.2534, + "step": 4877 + }, + { + "epoch": 0.42, + "learning_rate": 1.3085945061909144e-05, + "loss": 0.2666, + "step": 4878 + }, + { + "epoch": 0.42, + "learning_rate": 1.3083304201501803e-05, + "loss": 0.2632, + "step": 4879 + }, + { + "epoch": 0.42, + "learning_rate": 1.3080663103448607e-05, + "loss": 0.2697, + "step": 4880 + }, + { + "epoch": 0.42, + "learning_rate": 1.3078021767953125e-05, + "loss": 0.256, + "step": 4881 + }, + { + "epoch": 0.42, + "learning_rate": 1.3075380195218931e-05, + "loss": 0.2749, + "step": 4882 + }, + { + "epoch": 0.42, + "learning_rate": 1.307273838544963e-05, + "loss": 0.2484, + "step": 4883 + }, + { + "epoch": 0.42, + "learning_rate": 1.3070096338848835e-05, + "loss": 0.316, + "step": 4884 + }, + { + "epoch": 0.42, + "learning_rate": 1.3067454055620184e-05, + "loss": 0.2657, + "step": 4885 + }, + { + "epoch": 0.42, + "learning_rate": 1.306481153596733e-05, + "loss": 0.2841, + "step": 4886 + }, + { + "epoch": 0.42, + "learning_rate": 1.3062168780093949e-05, + "loss": 0.2512, + "step": 4887 + }, + { + "epoch": 0.42, + "learning_rate": 1.3059525788203728e-05, + "loss": 0.2742, + "step": 4888 + }, + { + "epoch": 0.42, + "learning_rate": 1.3056882560500378e-05, + "loss": 0.2917, + "step": 4889 + }, + { + "epoch": 0.42, + "learning_rate": 1.3054239097187625e-05, + "loss": 0.2507, + "step": 4890 + }, + { + "epoch": 0.42, + "learning_rate": 1.305159539846921e-05, + "loss": 0.3232, + "step": 4891 + }, + { + "epoch": 0.42, + "learning_rate": 1.3048951464548902e-05, + "loss": 0.6477, + "step": 4892 + }, + { + "epoch": 0.42, + "learning_rate": 1.3046307295630482e-05, + "loss": 0.295, + "step": 4893 + }, + { + "epoch": 0.42, + "learning_rate": 1.3043662891917748e-05, + "loss": 0.2845, + "step": 4894 + }, + { + "epoch": 0.42, + "learning_rate": 1.3041018253614518e-05, + "loss": 0.3442, + "step": 4895 + }, + { + "epoch": 0.42, + "learning_rate": 1.3038373380924623e-05, + "loss": 0.3137, + "step": 4896 + }, + { + "epoch": 0.42, + "learning_rate": 1.3035728274051924e-05, + "loss": 0.2985, + "step": 4897 + }, + { + "epoch": 0.42, + "learning_rate": 1.3033082933200287e-05, + "loss": 0.2945, + "step": 4898 + }, + { + "epoch": 0.42, + "learning_rate": 1.3030437358573606e-05, + "loss": 0.2918, + "step": 4899 + }, + { + "epoch": 0.42, + "learning_rate": 1.302779155037579e-05, + "loss": 0.3008, + "step": 4900 + }, + { + "epoch": 0.42, + "learning_rate": 1.302514550881076e-05, + "loss": 0.2714, + "step": 4901 + }, + { + "epoch": 0.42, + "learning_rate": 1.3022499234082463e-05, + "loss": 0.3484, + "step": 4902 + }, + { + "epoch": 0.42, + "learning_rate": 1.3019852726394857e-05, + "loss": 0.3029, + "step": 4903 + }, + { + "epoch": 0.42, + "learning_rate": 1.3017205985951926e-05, + "loss": 0.2746, + "step": 4904 + }, + { + "epoch": 0.42, + "learning_rate": 1.3014559012957665e-05, + "loss": 0.2798, + "step": 4905 + }, + { + "epoch": 0.42, + "learning_rate": 1.3011911807616091e-05, + "loss": 0.2724, + "step": 4906 + }, + { + "epoch": 0.42, + "learning_rate": 1.3009264370131239e-05, + "loss": 0.3561, + "step": 4907 + }, + { + "epoch": 0.42, + "learning_rate": 1.3006616700707156e-05, + "loss": 0.2535, + "step": 4908 + }, + { + "epoch": 0.42, + "learning_rate": 1.3003968799547915e-05, + "loss": 0.291, + "step": 4909 + }, + { + "epoch": 0.42, + "learning_rate": 1.30013206668576e-05, + "loss": 0.3071, + "step": 4910 + }, + { + "epoch": 0.42, + "learning_rate": 1.2998672302840318e-05, + "loss": 0.2776, + "step": 4911 + }, + { + "epoch": 0.42, + "learning_rate": 1.2996023707700197e-05, + "loss": 0.2747, + "step": 4912 + }, + { + "epoch": 0.42, + "learning_rate": 1.2993374881641367e-05, + "loss": 0.2949, + "step": 4913 + }, + { + "epoch": 0.42, + "learning_rate": 1.2990725824867995e-05, + "loss": 0.3315, + "step": 4914 + }, + { + "epoch": 0.42, + "learning_rate": 1.2988076537584254e-05, + "loss": 0.2838, + "step": 4915 + }, + { + "epoch": 0.42, + "learning_rate": 1.2985427019994335e-05, + "loss": 0.2912, + "step": 4916 + }, + { + "epoch": 0.42, + "learning_rate": 1.2982777272302454e-05, + "loss": 0.3068, + "step": 4917 + }, + { + "epoch": 0.42, + "learning_rate": 1.2980127294712839e-05, + "loss": 0.6093, + "step": 4918 + }, + { + "epoch": 0.42, + "learning_rate": 1.2977477087429739e-05, + "loss": 0.2781, + "step": 4919 + }, + { + "epoch": 0.42, + "learning_rate": 1.2974826650657418e-05, + "loss": 0.2736, + "step": 4920 + }, + { + "epoch": 0.42, + "learning_rate": 1.2972175984600157e-05, + "loss": 0.3271, + "step": 4921 + }, + { + "epoch": 0.42, + "learning_rate": 1.2969525089462253e-05, + "loss": 0.3005, + "step": 4922 + }, + { + "epoch": 0.42, + "learning_rate": 1.2966873965448032e-05, + "loss": 0.2636, + "step": 4923 + }, + { + "epoch": 0.42, + "learning_rate": 1.2964222612761825e-05, + "loss": 0.2827, + "step": 4924 + }, + { + "epoch": 0.42, + "learning_rate": 1.2961571031607985e-05, + "loss": 0.3504, + "step": 4925 + }, + { + "epoch": 0.42, + "learning_rate": 1.2958919222190885e-05, + "loss": 0.3324, + "step": 4926 + }, + { + "epoch": 0.42, + "learning_rate": 1.295626718471491e-05, + "loss": 0.3079, + "step": 4927 + }, + { + "epoch": 0.42, + "learning_rate": 1.295361491938447e-05, + "loss": 0.2971, + "step": 4928 + }, + { + "epoch": 0.42, + "learning_rate": 1.2950962426403981e-05, + "loss": 0.3065, + "step": 4929 + }, + { + "epoch": 0.42, + "learning_rate": 1.2948309705977893e-05, + "loss": 0.6127, + "step": 4930 + }, + { + "epoch": 0.42, + "learning_rate": 1.2945656758310663e-05, + "loss": 0.2986, + "step": 4931 + }, + { + "epoch": 0.42, + "learning_rate": 1.294300358360676e-05, + "loss": 0.3096, + "step": 4932 + }, + { + "epoch": 0.42, + "learning_rate": 1.294035018207069e-05, + "loss": 0.3211, + "step": 4933 + }, + { + "epoch": 0.42, + "learning_rate": 1.2937696553906949e-05, + "loss": 0.3181, + "step": 4934 + }, + { + "epoch": 0.42, + "learning_rate": 1.2935042699320075e-05, + "loss": 0.2803, + "step": 4935 + }, + { + "epoch": 0.42, + "learning_rate": 1.2932388618514616e-05, + "loss": 0.2709, + "step": 4936 + }, + { + "epoch": 0.42, + "learning_rate": 1.2929734311695125e-05, + "loss": 0.2866, + "step": 4937 + }, + { + "epoch": 0.42, + "learning_rate": 1.2927079779066196e-05, + "loss": 0.3434, + "step": 4938 + }, + { + "epoch": 0.42, + "learning_rate": 1.2924425020832419e-05, + "loss": 0.3034, + "step": 4939 + }, + { + "epoch": 0.42, + "learning_rate": 1.292177003719841e-05, + "loss": 0.2884, + "step": 4940 + }, + { + "epoch": 0.42, + "learning_rate": 1.2919114828368806e-05, + "loss": 0.2755, + "step": 4941 + }, + { + "epoch": 0.42, + "learning_rate": 1.291645939454825e-05, + "loss": 0.2856, + "step": 4942 + }, + { + "epoch": 0.42, + "learning_rate": 1.2913803735941417e-05, + "loss": 0.2612, + "step": 4943 + }, + { + "epoch": 0.42, + "learning_rate": 1.291114785275299e-05, + "loss": 0.3066, + "step": 4944 + }, + { + "epoch": 0.42, + "learning_rate": 1.2908491745187673e-05, + "loss": 0.269, + "step": 4945 + }, + { + "epoch": 0.42, + "learning_rate": 1.290583541345018e-05, + "loss": 0.3151, + "step": 4946 + }, + { + "epoch": 0.42, + "learning_rate": 1.2903178857745256e-05, + "loss": 0.3021, + "step": 4947 + }, + { + "epoch": 0.42, + "learning_rate": 1.2900522078277645e-05, + "loss": 0.3053, + "step": 4948 + }, + { + "epoch": 0.42, + "learning_rate": 1.2897865075252125e-05, + "loss": 0.2658, + "step": 4949 + }, + { + "epoch": 0.42, + "learning_rate": 1.2895207848873488e-05, + "loss": 0.2978, + "step": 4950 + }, + { + "epoch": 0.42, + "learning_rate": 1.2892550399346533e-05, + "loss": 0.2809, + "step": 4951 + }, + { + "epoch": 0.42, + "learning_rate": 1.2889892726876085e-05, + "loss": 0.2707, + "step": 4952 + }, + { + "epoch": 0.42, + "learning_rate": 1.2887234831666987e-05, + "loss": 0.299, + "step": 4953 + }, + { + "epoch": 0.42, + "learning_rate": 1.2884576713924093e-05, + "loss": 0.3063, + "step": 4954 + }, + { + "epoch": 0.42, + "learning_rate": 1.2881918373852278e-05, + "loss": 0.2911, + "step": 4955 + }, + { + "epoch": 0.42, + "learning_rate": 1.2879259811656435e-05, + "loss": 0.2944, + "step": 4956 + }, + { + "epoch": 0.42, + "learning_rate": 1.2876601027541475e-05, + "loss": 0.3289, + "step": 4957 + }, + { + "epoch": 0.42, + "learning_rate": 1.287394202171232e-05, + "loss": 0.2614, + "step": 4958 + }, + { + "epoch": 0.43, + "learning_rate": 1.2871282794373916e-05, + "loss": 0.2828, + "step": 4959 + }, + { + "epoch": 0.43, + "learning_rate": 1.286862334573122e-05, + "loss": 0.3165, + "step": 4960 + }, + { + "epoch": 0.43, + "learning_rate": 1.286596367598921e-05, + "loss": 0.238, + "step": 4961 + }, + { + "epoch": 0.43, + "learning_rate": 1.2863303785352883e-05, + "loss": 0.2675, + "step": 4962 + }, + { + "epoch": 0.43, + "learning_rate": 1.2860643674027246e-05, + "loss": 0.2759, + "step": 4963 + }, + { + "epoch": 0.43, + "learning_rate": 1.2857983342217333e-05, + "loss": 0.2704, + "step": 4964 + }, + { + "epoch": 0.43, + "learning_rate": 1.2855322790128182e-05, + "loss": 0.3199, + "step": 4965 + }, + { + "epoch": 0.43, + "learning_rate": 1.2852662017964863e-05, + "loss": 0.2951, + "step": 4966 + }, + { + "epoch": 0.43, + "learning_rate": 1.2850001025932444e-05, + "loss": 0.2819, + "step": 4967 + }, + { + "epoch": 0.43, + "learning_rate": 1.2847339814236033e-05, + "loss": 0.2407, + "step": 4968 + }, + { + "epoch": 0.43, + "learning_rate": 1.284467838308074e-05, + "loss": 0.2733, + "step": 4969 + }, + { + "epoch": 0.43, + "learning_rate": 1.2842016732671689e-05, + "loss": 0.2523, + "step": 4970 + }, + { + "epoch": 0.43, + "learning_rate": 1.2839354863214035e-05, + "loss": 0.2656, + "step": 4971 + }, + { + "epoch": 0.43, + "learning_rate": 1.2836692774912935e-05, + "loss": 0.2934, + "step": 4972 + }, + { + "epoch": 0.43, + "learning_rate": 1.2834030467973572e-05, + "loss": 0.3209, + "step": 4973 + }, + { + "epoch": 0.43, + "learning_rate": 1.2831367942601146e-05, + "loss": 0.2899, + "step": 4974 + }, + { + "epoch": 0.43, + "learning_rate": 1.2828705199000869e-05, + "loss": 0.2903, + "step": 4975 + }, + { + "epoch": 0.43, + "learning_rate": 1.2826042237377972e-05, + "loss": 0.3071, + "step": 4976 + }, + { + "epoch": 0.43, + "learning_rate": 1.2823379057937702e-05, + "loss": 0.3071, + "step": 4977 + }, + { + "epoch": 0.43, + "learning_rate": 1.2820715660885328e-05, + "loss": 0.2733, + "step": 4978 + }, + { + "epoch": 0.43, + "learning_rate": 1.2818052046426125e-05, + "loss": 0.3013, + "step": 4979 + }, + { + "epoch": 0.43, + "learning_rate": 1.2815388214765397e-05, + "loss": 0.2896, + "step": 4980 + }, + { + "epoch": 0.43, + "learning_rate": 1.2812724166108454e-05, + "loss": 0.6068, + "step": 4981 + }, + { + "epoch": 0.43, + "learning_rate": 1.281005990066063e-05, + "loss": 0.2501, + "step": 4982 + }, + { + "epoch": 0.43, + "learning_rate": 1.2807395418627278e-05, + "loss": 0.3109, + "step": 4983 + }, + { + "epoch": 0.43, + "learning_rate": 1.2804730720213756e-05, + "loss": 0.3362, + "step": 4984 + }, + { + "epoch": 0.43, + "learning_rate": 1.280206580562545e-05, + "loss": 0.2627, + "step": 4985 + }, + { + "epoch": 0.43, + "learning_rate": 1.2799400675067754e-05, + "loss": 0.3087, + "step": 4986 + }, + { + "epoch": 0.43, + "learning_rate": 1.2796735328746089e-05, + "loss": 0.2274, + "step": 4987 + }, + { + "epoch": 0.43, + "learning_rate": 1.2794069766865884e-05, + "loss": 0.2737, + "step": 4988 + }, + { + "epoch": 0.43, + "learning_rate": 1.2791403989632586e-05, + "loss": 0.3082, + "step": 4989 + }, + { + "epoch": 0.43, + "learning_rate": 1.2788737997251665e-05, + "loss": 0.2284, + "step": 4990 + }, + { + "epoch": 0.43, + "learning_rate": 1.2786071789928593e-05, + "loss": 0.3211, + "step": 4991 + }, + { + "epoch": 0.43, + "learning_rate": 1.2783405367868878e-05, + "loss": 0.3515, + "step": 4992 + }, + { + "epoch": 0.43, + "learning_rate": 1.2780738731278028e-05, + "loss": 0.2344, + "step": 4993 + }, + { + "epoch": 0.43, + "learning_rate": 1.2778071880361577e-05, + "loss": 0.2803, + "step": 4994 + }, + { + "epoch": 0.43, + "learning_rate": 1.2775404815325074e-05, + "loss": 0.2524, + "step": 4995 + }, + { + "epoch": 0.43, + "learning_rate": 1.277273753637408e-05, + "loss": 0.3319, + "step": 4996 + }, + { + "epoch": 0.43, + "learning_rate": 1.277007004371418e-05, + "loss": 0.2653, + "step": 4997 + }, + { + "epoch": 0.43, + "learning_rate": 1.2767402337550966e-05, + "loss": 0.3116, + "step": 4998 + }, + { + "epoch": 0.43, + "learning_rate": 1.2764734418090052e-05, + "loss": 0.3344, + "step": 4999 + }, + { + "epoch": 0.43, + "learning_rate": 1.2762066285537071e-05, + "loss": 0.3002, + "step": 5000 + }, + { + "epoch": 0.43, + "learning_rate": 1.275939794009767e-05, + "loss": 0.267, + "step": 5001 + }, + { + "epoch": 0.43, + "learning_rate": 1.275672938197751e-05, + "loss": 0.2889, + "step": 5002 + }, + { + "epoch": 0.43, + "learning_rate": 1.275406061138227e-05, + "loss": 0.2973, + "step": 5003 + }, + { + "epoch": 0.43, + "learning_rate": 1.2751391628517649e-05, + "loss": 0.3354, + "step": 5004 + }, + { + "epoch": 0.43, + "learning_rate": 1.2748722433589351e-05, + "loss": 0.3162, + "step": 5005 + }, + { + "epoch": 0.43, + "learning_rate": 1.2746053026803114e-05, + "loss": 0.2952, + "step": 5006 + }, + { + "epoch": 0.43, + "learning_rate": 1.2743383408364678e-05, + "loss": 0.3312, + "step": 5007 + }, + { + "epoch": 0.43, + "learning_rate": 1.2740713578479802e-05, + "loss": 0.2976, + "step": 5008 + }, + { + "epoch": 0.43, + "learning_rate": 1.2738043537354268e-05, + "loss": 0.2833, + "step": 5009 + }, + { + "epoch": 0.43, + "learning_rate": 1.2735373285193867e-05, + "loss": 0.2736, + "step": 5010 + }, + { + "epoch": 0.43, + "learning_rate": 1.2732702822204406e-05, + "loss": 0.2896, + "step": 5011 + }, + { + "epoch": 0.43, + "learning_rate": 1.2730032148591716e-05, + "loss": 0.6, + "step": 5012 + }, + { + "epoch": 0.43, + "learning_rate": 1.2727361264561637e-05, + "loss": 0.3231, + "step": 5013 + }, + { + "epoch": 0.43, + "learning_rate": 1.2724690170320031e-05, + "loss": 0.2505, + "step": 5014 + }, + { + "epoch": 0.43, + "learning_rate": 1.2722018866072768e-05, + "loss": 0.2782, + "step": 5015 + }, + { + "epoch": 0.43, + "learning_rate": 1.2719347352025741e-05, + "loss": 0.2721, + "step": 5016 + }, + { + "epoch": 0.43, + "learning_rate": 1.2716675628384856e-05, + "loss": 0.3256, + "step": 5017 + }, + { + "epoch": 0.43, + "learning_rate": 1.2714003695356037e-05, + "loss": 0.3231, + "step": 5018 + }, + { + "epoch": 0.43, + "learning_rate": 1.2711331553145223e-05, + "loss": 0.2431, + "step": 5019 + }, + { + "epoch": 0.43, + "learning_rate": 1.270865920195837e-05, + "loss": 0.3104, + "step": 5020 + }, + { + "epoch": 0.43, + "learning_rate": 1.2705986642001451e-05, + "loss": 0.2838, + "step": 5021 + }, + { + "epoch": 0.43, + "learning_rate": 1.2703313873480451e-05, + "loss": 0.3663, + "step": 5022 + }, + { + "epoch": 0.43, + "learning_rate": 1.2700640896601377e-05, + "loss": 0.3014, + "step": 5023 + }, + { + "epoch": 0.43, + "learning_rate": 1.2697967711570243e-05, + "loss": 0.3015, + "step": 5024 + }, + { + "epoch": 0.43, + "learning_rate": 1.2695294318593092e-05, + "loss": 0.2964, + "step": 5025 + }, + { + "epoch": 0.43, + "learning_rate": 1.2692620717875972e-05, + "loss": 0.2776, + "step": 5026 + }, + { + "epoch": 0.43, + "learning_rate": 1.2689946909624951e-05, + "loss": 0.3105, + "step": 5027 + }, + { + "epoch": 0.43, + "learning_rate": 1.2687272894046116e-05, + "loss": 0.251, + "step": 5028 + }, + { + "epoch": 0.43, + "learning_rate": 1.2684598671345563e-05, + "loss": 0.2552, + "step": 5029 + }, + { + "epoch": 0.43, + "learning_rate": 1.2681924241729409e-05, + "loss": 0.2737, + "step": 5030 + }, + { + "epoch": 0.43, + "learning_rate": 1.2679249605403786e-05, + "loss": 0.2887, + "step": 5031 + }, + { + "epoch": 0.43, + "learning_rate": 1.2676574762574842e-05, + "loss": 0.2668, + "step": 5032 + }, + { + "epoch": 0.43, + "learning_rate": 1.2673899713448743e-05, + "loss": 0.288, + "step": 5033 + }, + { + "epoch": 0.43, + "learning_rate": 1.2671224458231664e-05, + "loss": 0.3062, + "step": 5034 + }, + { + "epoch": 0.43, + "learning_rate": 1.2668548997129807e-05, + "loss": 0.2898, + "step": 5035 + }, + { + "epoch": 0.43, + "learning_rate": 1.2665873330349371e-05, + "loss": 0.2919, + "step": 5036 + }, + { + "epoch": 0.43, + "learning_rate": 1.2663197458096597e-05, + "loss": 0.2697, + "step": 5037 + }, + { + "epoch": 0.43, + "learning_rate": 1.266052138057772e-05, + "loss": 0.2736, + "step": 5038 + }, + { + "epoch": 0.43, + "learning_rate": 1.2657845097999e-05, + "loss": 0.2621, + "step": 5039 + }, + { + "epoch": 0.43, + "learning_rate": 1.2655168610566715e-05, + "loss": 0.287, + "step": 5040 + }, + { + "epoch": 0.43, + "learning_rate": 1.265249191848715e-05, + "loss": 0.2751, + "step": 5041 + }, + { + "epoch": 0.43, + "learning_rate": 1.264981502196662e-05, + "loss": 0.311, + "step": 5042 + }, + { + "epoch": 0.43, + "learning_rate": 1.2647137921211435e-05, + "loss": 0.2762, + "step": 5043 + }, + { + "epoch": 0.43, + "learning_rate": 1.264446061642794e-05, + "loss": 0.3188, + "step": 5044 + }, + { + "epoch": 0.43, + "learning_rate": 1.2641783107822491e-05, + "loss": 0.2769, + "step": 5045 + }, + { + "epoch": 0.43, + "learning_rate": 1.2639105395601452e-05, + "loss": 0.297, + "step": 5046 + }, + { + "epoch": 0.43, + "learning_rate": 1.263642747997121e-05, + "loss": 0.2922, + "step": 5047 + }, + { + "epoch": 0.43, + "learning_rate": 1.2633749361138162e-05, + "loss": 0.2924, + "step": 5048 + }, + { + "epoch": 0.43, + "learning_rate": 1.2631071039308728e-05, + "loss": 0.2505, + "step": 5049 + }, + { + "epoch": 0.43, + "learning_rate": 1.2628392514689339e-05, + "loss": 0.317, + "step": 5050 + }, + { + "epoch": 0.43, + "learning_rate": 1.2625713787486442e-05, + "loss": 0.2849, + "step": 5051 + }, + { + "epoch": 0.43, + "learning_rate": 1.2623034857906501e-05, + "loss": 0.2952, + "step": 5052 + }, + { + "epoch": 0.43, + "learning_rate": 1.2620355726155995e-05, + "loss": 0.5823, + "step": 5053 + }, + { + "epoch": 0.43, + "learning_rate": 1.2617676392441419e-05, + "loss": 0.3296, + "step": 5054 + }, + { + "epoch": 0.43, + "learning_rate": 1.2614996856969275e-05, + "loss": 0.2885, + "step": 5055 + }, + { + "epoch": 0.43, + "learning_rate": 1.2612317119946099e-05, + "loss": 0.2552, + "step": 5056 + }, + { + "epoch": 0.43, + "learning_rate": 1.2609637181578424e-05, + "loss": 0.3136, + "step": 5057 + }, + { + "epoch": 0.43, + "learning_rate": 1.260695704207281e-05, + "loss": 0.3046, + "step": 5058 + }, + { + "epoch": 0.43, + "learning_rate": 1.2604276701635832e-05, + "loss": 0.2936, + "step": 5059 + }, + { + "epoch": 0.43, + "learning_rate": 1.260159616047407e-05, + "loss": 0.325, + "step": 5060 + }, + { + "epoch": 0.43, + "learning_rate": 1.2598915418794136e-05, + "loss": 0.3215, + "step": 5061 + }, + { + "epoch": 0.43, + "learning_rate": 1.2596234476802636e-05, + "loss": 0.2955, + "step": 5062 + }, + { + "epoch": 0.43, + "learning_rate": 1.2593553334706212e-05, + "loss": 0.2825, + "step": 5063 + }, + { + "epoch": 0.43, + "learning_rate": 1.2590871992711517e-05, + "loss": 0.3372, + "step": 5064 + }, + { + "epoch": 0.43, + "learning_rate": 1.2588190451025209e-05, + "loss": 0.2682, + "step": 5065 + }, + { + "epoch": 0.43, + "learning_rate": 1.2585508709853971e-05, + "loss": 0.2754, + "step": 5066 + }, + { + "epoch": 0.43, + "learning_rate": 1.2582826769404492e-05, + "loss": 0.3116, + "step": 5067 + }, + { + "epoch": 0.43, + "learning_rate": 1.2580144629883494e-05, + "loss": 0.3187, + "step": 5068 + }, + { + "epoch": 0.43, + "learning_rate": 1.257746229149769e-05, + "loss": 0.3624, + "step": 5069 + }, + { + "epoch": 0.43, + "learning_rate": 1.2574779754453831e-05, + "loss": 0.3005, + "step": 5070 + }, + { + "epoch": 0.43, + "learning_rate": 1.2572097018958674e-05, + "loss": 0.3066, + "step": 5071 + }, + { + "epoch": 0.43, + "learning_rate": 1.2569414085218986e-05, + "loss": 0.2709, + "step": 5072 + }, + { + "epoch": 0.43, + "learning_rate": 1.2566730953441554e-05, + "loss": 0.2873, + "step": 5073 + }, + { + "epoch": 0.43, + "learning_rate": 1.2564047623833186e-05, + "loss": 0.5846, + "step": 5074 + }, + { + "epoch": 0.44, + "learning_rate": 1.2561364096600694e-05, + "loss": 0.2693, + "step": 5075 + }, + { + "epoch": 0.44, + "learning_rate": 1.2558680371950913e-05, + "loss": 0.2677, + "step": 5076 + }, + { + "epoch": 0.44, + "learning_rate": 1.2555996450090693e-05, + "loss": 0.2746, + "step": 5077 + }, + { + "epoch": 0.44, + "learning_rate": 1.2553312331226896e-05, + "loss": 0.5933, + "step": 5078 + }, + { + "epoch": 0.44, + "learning_rate": 1.2550628015566402e-05, + "loss": 0.2944, + "step": 5079 + }, + { + "epoch": 0.44, + "learning_rate": 1.2547943503316105e-05, + "loss": 0.3016, + "step": 5080 + }, + { + "epoch": 0.44, + "learning_rate": 1.2545258794682906e-05, + "loss": 0.3448, + "step": 5081 + }, + { + "epoch": 0.44, + "learning_rate": 1.2542573889873741e-05, + "loss": 0.2834, + "step": 5082 + }, + { + "epoch": 0.44, + "learning_rate": 1.2539888789095542e-05, + "loss": 0.2861, + "step": 5083 + }, + { + "epoch": 0.44, + "learning_rate": 1.2537203492555265e-05, + "loss": 0.3148, + "step": 5084 + }, + { + "epoch": 0.44, + "learning_rate": 1.2534518000459884e-05, + "loss": 0.319, + "step": 5085 + }, + { + "epoch": 0.44, + "learning_rate": 1.2531832313016374e-05, + "loss": 0.2494, + "step": 5086 + }, + { + "epoch": 0.44, + "learning_rate": 1.252914643043174e-05, + "loss": 0.6106, + "step": 5087 + }, + { + "epoch": 0.44, + "learning_rate": 1.2526460352912994e-05, + "loss": 0.3207, + "step": 5088 + }, + { + "epoch": 0.44, + "learning_rate": 1.252377408066717e-05, + "loss": 0.3076, + "step": 5089 + }, + { + "epoch": 0.44, + "learning_rate": 1.2521087613901313e-05, + "loss": 0.3112, + "step": 5090 + }, + { + "epoch": 0.44, + "learning_rate": 1.2518400952822475e-05, + "loss": 0.3275, + "step": 5091 + }, + { + "epoch": 0.44, + "learning_rate": 1.251571409763774e-05, + "loss": 0.3105, + "step": 5092 + }, + { + "epoch": 0.44, + "learning_rate": 1.251302704855419e-05, + "loss": 0.3038, + "step": 5093 + }, + { + "epoch": 0.44, + "learning_rate": 1.2510339805778932e-05, + "loss": 0.3319, + "step": 5094 + }, + { + "epoch": 0.44, + "learning_rate": 1.2507652369519085e-05, + "loss": 0.3226, + "step": 5095 + }, + { + "epoch": 0.44, + "learning_rate": 1.2504964739981787e-05, + "loss": 0.2773, + "step": 5096 + }, + { + "epoch": 0.44, + "learning_rate": 1.2502276917374183e-05, + "loss": 0.2769, + "step": 5097 + }, + { + "epoch": 0.44, + "learning_rate": 1.2499588901903437e-05, + "loss": 0.2853, + "step": 5098 + }, + { + "epoch": 0.44, + "learning_rate": 1.2496900693776732e-05, + "loss": 0.2924, + "step": 5099 + }, + { + "epoch": 0.44, + "learning_rate": 1.2494212293201255e-05, + "loss": 0.257, + "step": 5100 + }, + { + "epoch": 0.44, + "learning_rate": 1.2491523700384222e-05, + "loss": 0.3021, + "step": 5101 + }, + { + "epoch": 0.44, + "learning_rate": 1.2488834915532852e-05, + "loss": 0.3013, + "step": 5102 + }, + { + "epoch": 0.44, + "learning_rate": 1.2486145938854384e-05, + "loss": 0.2578, + "step": 5103 + }, + { + "epoch": 0.44, + "learning_rate": 1.2483456770556073e-05, + "loss": 0.2798, + "step": 5104 + }, + { + "epoch": 0.44, + "learning_rate": 1.2480767410845185e-05, + "loss": 0.326, + "step": 5105 + }, + { + "epoch": 0.44, + "learning_rate": 1.2478077859929e-05, + "loss": 0.3029, + "step": 5106 + }, + { + "epoch": 0.44, + "learning_rate": 1.247538811801482e-05, + "loss": 0.2978, + "step": 5107 + }, + { + "epoch": 0.44, + "learning_rate": 1.2472698185309954e-05, + "loss": 0.3493, + "step": 5108 + }, + { + "epoch": 0.44, + "learning_rate": 1.2470008062021732e-05, + "loss": 0.3455, + "step": 5109 + }, + { + "epoch": 0.44, + "learning_rate": 1.2467317748357493e-05, + "loss": 0.2995, + "step": 5110 + }, + { + "epoch": 0.44, + "learning_rate": 1.2464627244524595e-05, + "loss": 0.3005, + "step": 5111 + }, + { + "epoch": 0.44, + "learning_rate": 1.2461936550730402e-05, + "loss": 0.3138, + "step": 5112 + }, + { + "epoch": 0.44, + "learning_rate": 1.2459245667182307e-05, + "loss": 0.3345, + "step": 5113 + }, + { + "epoch": 0.44, + "learning_rate": 1.2456554594087709e-05, + "loss": 0.2744, + "step": 5114 + }, + { + "epoch": 0.44, + "learning_rate": 1.2453863331654019e-05, + "loss": 0.276, + "step": 5115 + }, + { + "epoch": 0.44, + "learning_rate": 1.245117188008867e-05, + "loss": 0.288, + "step": 5116 + }, + { + "epoch": 0.44, + "learning_rate": 1.2448480239599103e-05, + "loss": 0.2554, + "step": 5117 + }, + { + "epoch": 0.44, + "learning_rate": 1.2445788410392778e-05, + "loss": 0.2917, + "step": 5118 + }, + { + "epoch": 0.44, + "learning_rate": 1.2443096392677165e-05, + "loss": 0.3129, + "step": 5119 + }, + { + "epoch": 0.44, + "learning_rate": 1.2440404186659757e-05, + "loss": 0.2881, + "step": 5120 + }, + { + "epoch": 0.44, + "learning_rate": 1.243771179254805e-05, + "loss": 0.2708, + "step": 5121 + }, + { + "epoch": 0.44, + "learning_rate": 1.2435019210549564e-05, + "loss": 0.5685, + "step": 5122 + }, + { + "epoch": 0.44, + "learning_rate": 1.2432326440871832e-05, + "loss": 0.2533, + "step": 5123 + }, + { + "epoch": 0.44, + "learning_rate": 1.2429633483722392e-05, + "loss": 0.2368, + "step": 5124 + }, + { + "epoch": 0.44, + "learning_rate": 1.242694033930881e-05, + "loss": 0.2086, + "step": 5125 + }, + { + "epoch": 0.44, + "learning_rate": 1.2424247007838659e-05, + "loss": 0.2679, + "step": 5126 + }, + { + "epoch": 0.44, + "learning_rate": 1.2421553489519527e-05, + "loss": 0.2884, + "step": 5127 + }, + { + "epoch": 0.44, + "learning_rate": 1.2418859784559016e-05, + "loss": 0.2932, + "step": 5128 + }, + { + "epoch": 0.44, + "learning_rate": 1.2416165893164746e-05, + "loss": 0.3159, + "step": 5129 + }, + { + "epoch": 0.44, + "learning_rate": 1.241347181554435e-05, + "loss": 0.2684, + "step": 5130 + }, + { + "epoch": 0.44, + "learning_rate": 1.2410777551905469e-05, + "loss": 0.3151, + "step": 5131 + }, + { + "epoch": 0.44, + "learning_rate": 1.2408083102455766e-05, + "loss": 0.3011, + "step": 5132 + }, + { + "epoch": 0.44, + "learning_rate": 1.2405388467402915e-05, + "loss": 0.2405, + "step": 5133 + }, + { + "epoch": 0.44, + "learning_rate": 1.2402693646954607e-05, + "loss": 0.2706, + "step": 5134 + }, + { + "epoch": 0.44, + "learning_rate": 1.2399998641318547e-05, + "loss": 0.3016, + "step": 5135 + }, + { + "epoch": 0.44, + "learning_rate": 1.2397303450702449e-05, + "loss": 0.3594, + "step": 5136 + }, + { + "epoch": 0.44, + "learning_rate": 1.2394608075314048e-05, + "loss": 0.3301, + "step": 5137 + }, + { + "epoch": 0.44, + "learning_rate": 1.2391912515361085e-05, + "loss": 0.2711, + "step": 5138 + }, + { + "epoch": 0.44, + "learning_rate": 1.238921677105133e-05, + "loss": 0.2408, + "step": 5139 + }, + { + "epoch": 0.44, + "learning_rate": 1.2386520842592544e-05, + "loss": 0.2906, + "step": 5140 + }, + { + "epoch": 0.44, + "learning_rate": 1.2383824730192529e-05, + "loss": 0.3082, + "step": 5141 + }, + { + "epoch": 0.44, + "learning_rate": 1.2381128434059082e-05, + "loss": 0.2953, + "step": 5142 + }, + { + "epoch": 0.44, + "learning_rate": 1.2378431954400021e-05, + "loss": 0.3179, + "step": 5143 + }, + { + "epoch": 0.44, + "learning_rate": 1.2375735291423174e-05, + "loss": 0.3124, + "step": 5144 + }, + { + "epoch": 0.44, + "learning_rate": 1.2373038445336392e-05, + "loss": 0.256, + "step": 5145 + }, + { + "epoch": 0.44, + "learning_rate": 1.237034141634753e-05, + "loss": 0.2609, + "step": 5146 + }, + { + "epoch": 0.44, + "learning_rate": 1.2367644204664468e-05, + "loss": 0.2864, + "step": 5147 + }, + { + "epoch": 0.44, + "learning_rate": 1.2364946810495088e-05, + "loss": 0.2834, + "step": 5148 + }, + { + "epoch": 0.44, + "learning_rate": 1.2362249234047295e-05, + "loss": 0.2791, + "step": 5149 + }, + { + "epoch": 0.44, + "learning_rate": 1.2359551475529e-05, + "loss": 0.2488, + "step": 5150 + }, + { + "epoch": 0.44, + "learning_rate": 1.2356853535148137e-05, + "loss": 0.2633, + "step": 5151 + }, + { + "epoch": 0.44, + "learning_rate": 1.2354155413112647e-05, + "loss": 0.2738, + "step": 5152 + }, + { + "epoch": 0.44, + "learning_rate": 1.2351457109630493e-05, + "loss": 0.2678, + "step": 5153 + }, + { + "epoch": 0.44, + "learning_rate": 1.2348758624909644e-05, + "loss": 0.3131, + "step": 5154 + }, + { + "epoch": 0.44, + "learning_rate": 1.2346059959158085e-05, + "loss": 0.2427, + "step": 5155 + }, + { + "epoch": 0.44, + "learning_rate": 1.2343361112583819e-05, + "loss": 0.297, + "step": 5156 + }, + { + "epoch": 0.44, + "learning_rate": 1.234066208539485e-05, + "loss": 0.2605, + "step": 5157 + }, + { + "epoch": 0.44, + "learning_rate": 1.233796287779922e-05, + "loss": 0.265, + "step": 5158 + }, + { + "epoch": 0.44, + "learning_rate": 1.2335263490004961e-05, + "loss": 0.5809, + "step": 5159 + }, + { + "epoch": 0.44, + "learning_rate": 1.2332563922220132e-05, + "loss": 0.3103, + "step": 5160 + }, + { + "epoch": 0.44, + "learning_rate": 1.2329864174652802e-05, + "loss": 0.3275, + "step": 5161 + }, + { + "epoch": 0.44, + "learning_rate": 1.2327164247511051e-05, + "loss": 0.2773, + "step": 5162 + }, + { + "epoch": 0.44, + "learning_rate": 1.2324464141002981e-05, + "loss": 0.3287, + "step": 5163 + }, + { + "epoch": 0.44, + "learning_rate": 1.23217638553367e-05, + "loss": 0.2722, + "step": 5164 + }, + { + "epoch": 0.44, + "learning_rate": 1.2319063390720331e-05, + "loss": 0.2862, + "step": 5165 + }, + { + "epoch": 0.44, + "learning_rate": 1.2316362747362019e-05, + "loss": 0.2706, + "step": 5166 + }, + { + "epoch": 0.44, + "learning_rate": 1.2313661925469908e-05, + "loss": 0.27, + "step": 5167 + }, + { + "epoch": 0.44, + "learning_rate": 1.2310960925252171e-05, + "loss": 0.2825, + "step": 5168 + }, + { + "epoch": 0.44, + "learning_rate": 1.2308259746916982e-05, + "loss": 0.2767, + "step": 5169 + }, + { + "epoch": 0.44, + "learning_rate": 1.2305558390672539e-05, + "loss": 0.27, + "step": 5170 + }, + { + "epoch": 0.44, + "learning_rate": 1.2302856856727043e-05, + "loss": 0.2974, + "step": 5171 + }, + { + "epoch": 0.44, + "learning_rate": 1.2300155145288724e-05, + "loss": 0.2477, + "step": 5172 + }, + { + "epoch": 0.44, + "learning_rate": 1.2297453256565812e-05, + "loss": 0.2212, + "step": 5173 + }, + { + "epoch": 0.44, + "learning_rate": 1.2294751190766552e-05, + "loss": 0.2523, + "step": 5174 + }, + { + "epoch": 0.44, + "learning_rate": 1.2292048948099214e-05, + "loss": 0.2648, + "step": 5175 + }, + { + "epoch": 0.44, + "learning_rate": 1.228934652877206e-05, + "loss": 0.2862, + "step": 5176 + }, + { + "epoch": 0.44, + "learning_rate": 1.2286643932993396e-05, + "loss": 0.2982, + "step": 5177 + }, + { + "epoch": 0.44, + "learning_rate": 1.2283941160971512e-05, + "loss": 0.2551, + "step": 5178 + }, + { + "epoch": 0.44, + "learning_rate": 1.2281238212914727e-05, + "loss": 0.2878, + "step": 5179 + }, + { + "epoch": 0.44, + "learning_rate": 1.2278535089031377e-05, + "loss": 0.2994, + "step": 5180 + }, + { + "epoch": 0.44, + "learning_rate": 1.22758317895298e-05, + "loss": 0.2975, + "step": 5181 + }, + { + "epoch": 0.44, + "learning_rate": 1.2273128314618353e-05, + "loss": 0.2916, + "step": 5182 + }, + { + "epoch": 0.44, + "learning_rate": 1.2270424664505405e-05, + "loss": 0.2406, + "step": 5183 + }, + { + "epoch": 0.44, + "learning_rate": 1.2267720839399347e-05, + "loss": 0.2346, + "step": 5184 + }, + { + "epoch": 0.44, + "learning_rate": 1.2265016839508568e-05, + "loss": 0.3267, + "step": 5185 + }, + { + "epoch": 0.44, + "learning_rate": 1.2262312665041482e-05, + "loss": 0.3231, + "step": 5186 + }, + { + "epoch": 0.44, + "learning_rate": 1.2259608316206519e-05, + "loss": 0.2839, + "step": 5187 + }, + { + "epoch": 0.44, + "learning_rate": 1.2256903793212107e-05, + "loss": 0.2876, + "step": 5188 + }, + { + "epoch": 0.44, + "learning_rate": 1.2254199096266705e-05, + "loss": 0.2913, + "step": 5189 + }, + { + "epoch": 0.44, + "learning_rate": 1.2251494225578775e-05, + "loss": 0.29, + "step": 5190 + }, + { + "epoch": 0.44, + "learning_rate": 1.2248789181356793e-05, + "loss": 0.2804, + "step": 5191 + }, + { + "epoch": 0.45, + "learning_rate": 1.2246083963809256e-05, + "loss": 0.339, + "step": 5192 + }, + { + "epoch": 0.45, + "learning_rate": 1.2243378573144663e-05, + "loss": 0.3062, + "step": 5193 + }, + { + "epoch": 0.45, + "learning_rate": 1.2240673009571536e-05, + "loss": 0.2581, + "step": 5194 + }, + { + "epoch": 0.45, + "learning_rate": 1.2237967273298401e-05, + "loss": 0.2989, + "step": 5195 + }, + { + "epoch": 0.45, + "learning_rate": 1.223526136453381e-05, + "loss": 0.282, + "step": 5196 + }, + { + "epoch": 0.45, + "learning_rate": 1.2232555283486319e-05, + "loss": 0.2779, + "step": 5197 + }, + { + "epoch": 0.45, + "learning_rate": 1.2229849030364496e-05, + "loss": 0.3223, + "step": 5198 + }, + { + "epoch": 0.45, + "learning_rate": 1.2227142605376928e-05, + "loss": 0.3434, + "step": 5199 + }, + { + "epoch": 0.45, + "learning_rate": 1.2224436008732213e-05, + "loss": 0.2767, + "step": 5200 + }, + { + "epoch": 0.45, + "learning_rate": 1.222172924063896e-05, + "loss": 0.3041, + "step": 5201 + }, + { + "epoch": 0.45, + "learning_rate": 1.2219022301305796e-05, + "loss": 0.6394, + "step": 5202 + }, + { + "epoch": 0.45, + "learning_rate": 1.221631519094136e-05, + "loss": 0.2842, + "step": 5203 + }, + { + "epoch": 0.45, + "learning_rate": 1.2213607909754297e-05, + "loss": 0.2719, + "step": 5204 + }, + { + "epoch": 0.45, + "learning_rate": 1.2210900457953274e-05, + "loss": 0.2858, + "step": 5205 + }, + { + "epoch": 0.45, + "learning_rate": 1.2208192835746973e-05, + "loss": 0.3089, + "step": 5206 + }, + { + "epoch": 0.45, + "learning_rate": 1.2205485043344074e-05, + "loss": 0.2872, + "step": 5207 + }, + { + "epoch": 0.45, + "learning_rate": 1.2202777080953285e-05, + "loss": 0.3708, + "step": 5208 + }, + { + "epoch": 0.45, + "learning_rate": 1.2200068948783325e-05, + "loss": 0.2668, + "step": 5209 + }, + { + "epoch": 0.45, + "learning_rate": 1.2197360647042922e-05, + "loss": 0.2662, + "step": 5210 + }, + { + "epoch": 0.45, + "learning_rate": 1.2194652175940817e-05, + "loss": 0.264, + "step": 5211 + }, + { + "epoch": 0.45, + "learning_rate": 1.2191943535685766e-05, + "loss": 0.3219, + "step": 5212 + }, + { + "epoch": 0.45, + "learning_rate": 1.218923472648654e-05, + "loss": 0.3112, + "step": 5213 + }, + { + "epoch": 0.45, + "learning_rate": 1.2186525748551914e-05, + "loss": 0.2975, + "step": 5214 + }, + { + "epoch": 0.45, + "learning_rate": 1.2183816602090693e-05, + "loss": 0.3105, + "step": 5215 + }, + { + "epoch": 0.45, + "learning_rate": 1.2181107287311675e-05, + "loss": 0.2872, + "step": 5216 + }, + { + "epoch": 0.45, + "learning_rate": 1.2178397804423685e-05, + "loss": 0.284, + "step": 5217 + }, + { + "epoch": 0.45, + "learning_rate": 1.217568815363556e-05, + "loss": 0.2841, + "step": 5218 + }, + { + "epoch": 0.45, + "learning_rate": 1.2172978335156136e-05, + "loss": 0.2945, + "step": 5219 + }, + { + "epoch": 0.45, + "learning_rate": 1.2170268349194281e-05, + "loss": 0.3187, + "step": 5220 + }, + { + "epoch": 0.45, + "learning_rate": 1.2167558195958867e-05, + "loss": 0.2859, + "step": 5221 + }, + { + "epoch": 0.45, + "learning_rate": 1.2164847875658776e-05, + "loss": 0.3312, + "step": 5222 + }, + { + "epoch": 0.45, + "learning_rate": 1.2162137388502908e-05, + "loss": 0.2891, + "step": 5223 + }, + { + "epoch": 0.45, + "learning_rate": 1.2159426734700175e-05, + "loss": 0.2958, + "step": 5224 + }, + { + "epoch": 0.45, + "learning_rate": 1.2156715914459498e-05, + "loss": 0.2496, + "step": 5225 + }, + { + "epoch": 0.45, + "learning_rate": 1.2154004927989815e-05, + "loss": 0.2876, + "step": 5226 + }, + { + "epoch": 0.45, + "learning_rate": 1.2151293775500076e-05, + "loss": 0.337, + "step": 5227 + }, + { + "epoch": 0.45, + "learning_rate": 1.214858245719924e-05, + "loss": 0.2776, + "step": 5228 + }, + { + "epoch": 0.45, + "learning_rate": 1.2145870973296288e-05, + "loss": 0.2784, + "step": 5229 + }, + { + "epoch": 0.45, + "learning_rate": 1.2143159324000204e-05, + "loss": 0.2466, + "step": 5230 + }, + { + "epoch": 0.45, + "learning_rate": 1.2140447509519988e-05, + "loss": 0.272, + "step": 5231 + }, + { + "epoch": 0.45, + "learning_rate": 1.2137735530064653e-05, + "loss": 0.3425, + "step": 5232 + }, + { + "epoch": 0.45, + "learning_rate": 1.2135023385843228e-05, + "loss": 0.3345, + "step": 5233 + }, + { + "epoch": 0.45, + "learning_rate": 1.2132311077064749e-05, + "loss": 0.2472, + "step": 5234 + }, + { + "epoch": 0.45, + "learning_rate": 1.2129598603938269e-05, + "loss": 0.2385, + "step": 5235 + }, + { + "epoch": 0.45, + "learning_rate": 1.212688596667285e-05, + "loss": 0.2809, + "step": 5236 + }, + { + "epoch": 0.45, + "learning_rate": 1.2124173165477572e-05, + "loss": 0.3051, + "step": 5237 + }, + { + "epoch": 0.45, + "learning_rate": 1.2121460200561521e-05, + "loss": 0.3031, + "step": 5238 + }, + { + "epoch": 0.45, + "learning_rate": 1.21187470721338e-05, + "loss": 0.3035, + "step": 5239 + }, + { + "epoch": 0.45, + "learning_rate": 1.2116033780403524e-05, + "loss": 0.2922, + "step": 5240 + }, + { + "epoch": 0.45, + "learning_rate": 1.211332032557982e-05, + "loss": 0.311, + "step": 5241 + }, + { + "epoch": 0.45, + "learning_rate": 1.2110606707871828e-05, + "loss": 0.2844, + "step": 5242 + }, + { + "epoch": 0.45, + "learning_rate": 1.2107892927488698e-05, + "loss": 0.2643, + "step": 5243 + }, + { + "epoch": 0.45, + "learning_rate": 1.2105178984639601e-05, + "loss": 0.2689, + "step": 5244 + }, + { + "epoch": 0.45, + "learning_rate": 1.2102464879533704e-05, + "loss": 0.3397, + "step": 5245 + }, + { + "epoch": 0.45, + "learning_rate": 1.2099750612380205e-05, + "loss": 0.3127, + "step": 5246 + }, + { + "epoch": 0.45, + "learning_rate": 1.2097036183388305e-05, + "loss": 0.3026, + "step": 5247 + }, + { + "epoch": 0.45, + "learning_rate": 1.2094321592767217e-05, + "loss": 0.3033, + "step": 5248 + }, + { + "epoch": 0.45, + "learning_rate": 1.209160684072617e-05, + "loss": 0.2864, + "step": 5249 + }, + { + "epoch": 0.45, + "learning_rate": 1.20888919274744e-05, + "loss": 0.2635, + "step": 5250 + }, + { + "epoch": 0.45, + "learning_rate": 1.2086176853221166e-05, + "loss": 0.6339, + "step": 5251 + }, + { + "epoch": 0.45, + "learning_rate": 1.2083461618175723e-05, + "loss": 0.2958, + "step": 5252 + }, + { + "epoch": 0.45, + "learning_rate": 1.2080746222547356e-05, + "loss": 0.5901, + "step": 5253 + }, + { + "epoch": 0.45, + "learning_rate": 1.2078030666545351e-05, + "loss": 0.2417, + "step": 5254 + }, + { + "epoch": 0.45, + "learning_rate": 1.207531495037901e-05, + "loss": 0.281, + "step": 5255 + }, + { + "epoch": 0.45, + "learning_rate": 1.207259907425765e-05, + "loss": 0.276, + "step": 5256 + }, + { + "epoch": 0.45, + "learning_rate": 1.206988303839059e-05, + "loss": 0.2995, + "step": 5257 + }, + { + "epoch": 0.45, + "learning_rate": 1.2067166842987175e-05, + "loss": 0.2656, + "step": 5258 + }, + { + "epoch": 0.45, + "learning_rate": 1.2064450488256751e-05, + "loss": 0.2841, + "step": 5259 + }, + { + "epoch": 0.45, + "learning_rate": 1.206173397440869e-05, + "loss": 0.3264, + "step": 5260 + }, + { + "epoch": 0.45, + "learning_rate": 1.2059017301652359e-05, + "loss": 0.3846, + "step": 5261 + }, + { + "epoch": 0.45, + "learning_rate": 1.2056300470197144e-05, + "loss": 0.2714, + "step": 5262 + }, + { + "epoch": 0.45, + "learning_rate": 1.2053583480252456e-05, + "loss": 0.3253, + "step": 5263 + }, + { + "epoch": 0.45, + "learning_rate": 1.2050866332027695e-05, + "loss": 0.2585, + "step": 5264 + }, + { + "epoch": 0.45, + "learning_rate": 1.2048149025732295e-05, + "loss": 0.2858, + "step": 5265 + }, + { + "epoch": 0.45, + "learning_rate": 1.2045431561575685e-05, + "loss": 0.3005, + "step": 5266 + }, + { + "epoch": 0.45, + "learning_rate": 1.2042713939767318e-05, + "loss": 0.2812, + "step": 5267 + }, + { + "epoch": 0.45, + "learning_rate": 1.2039996160516654e-05, + "loss": 0.2631, + "step": 5268 + }, + { + "epoch": 0.45, + "learning_rate": 1.2037278224033166e-05, + "loss": 0.2798, + "step": 5269 + }, + { + "epoch": 0.45, + "learning_rate": 1.2034560130526341e-05, + "loss": 0.239, + "step": 5270 + }, + { + "epoch": 0.45, + "learning_rate": 1.203184188020567e-05, + "loss": 0.3223, + "step": 5271 + }, + { + "epoch": 0.45, + "learning_rate": 1.2029123473280668e-05, + "loss": 0.2745, + "step": 5272 + }, + { + "epoch": 0.45, + "learning_rate": 1.2026404909960856e-05, + "loss": 0.2731, + "step": 5273 + }, + { + "epoch": 0.45, + "learning_rate": 1.2023686190455766e-05, + "loss": 0.2752, + "step": 5274 + }, + { + "epoch": 0.45, + "learning_rate": 1.2020967314974945e-05, + "loss": 0.2837, + "step": 5275 + }, + { + "epoch": 0.45, + "learning_rate": 1.2018248283727947e-05, + "loss": 0.2931, + "step": 5276 + }, + { + "epoch": 0.45, + "learning_rate": 1.2015529096924343e-05, + "loss": 0.2755, + "step": 5277 + }, + { + "epoch": 0.45, + "learning_rate": 1.2012809754773718e-05, + "loss": 0.2768, + "step": 5278 + }, + { + "epoch": 0.45, + "learning_rate": 1.2010090257485663e-05, + "loss": 0.3058, + "step": 5279 + }, + { + "epoch": 0.45, + "learning_rate": 1.2007370605269782e-05, + "loss": 0.2674, + "step": 5280 + }, + { + "epoch": 0.45, + "learning_rate": 1.200465079833569e-05, + "loss": 0.2902, + "step": 5281 + }, + { + "epoch": 0.45, + "learning_rate": 1.2001930836893026e-05, + "loss": 0.3199, + "step": 5282 + }, + { + "epoch": 0.45, + "learning_rate": 1.1999210721151421e-05, + "loss": 0.3159, + "step": 5283 + }, + { + "epoch": 0.45, + "learning_rate": 1.1996490451320532e-05, + "loss": 0.3105, + "step": 5284 + }, + { + "epoch": 0.45, + "learning_rate": 1.1993770027610023e-05, + "loss": 0.61, + "step": 5285 + }, + { + "epoch": 0.45, + "learning_rate": 1.1991049450229577e-05, + "loss": 0.3182, + "step": 5286 + }, + { + "epoch": 0.45, + "learning_rate": 1.1988328719388873e-05, + "loss": 0.274, + "step": 5287 + }, + { + "epoch": 0.45, + "learning_rate": 1.1985607835297618e-05, + "loss": 0.3, + "step": 5288 + }, + { + "epoch": 0.45, + "learning_rate": 1.1982886798165521e-05, + "loss": 0.288, + "step": 5289 + }, + { + "epoch": 0.45, + "learning_rate": 1.1980165608202303e-05, + "loss": 0.2626, + "step": 5290 + }, + { + "epoch": 0.45, + "learning_rate": 1.1977444265617713e-05, + "loss": 0.3077, + "step": 5291 + }, + { + "epoch": 0.45, + "learning_rate": 1.1974722770621483e-05, + "loss": 0.26, + "step": 5292 + }, + { + "epoch": 0.45, + "learning_rate": 1.1972001123423382e-05, + "loss": 0.2897, + "step": 5293 + }, + { + "epoch": 0.45, + "learning_rate": 1.1969279324233179e-05, + "loss": 0.2702, + "step": 5294 + }, + { + "epoch": 0.45, + "learning_rate": 1.1966557373260654e-05, + "loss": 0.2867, + "step": 5295 + }, + { + "epoch": 0.45, + "learning_rate": 1.1963835270715604e-05, + "loss": 0.2562, + "step": 5296 + }, + { + "epoch": 0.45, + "learning_rate": 1.1961113016807838e-05, + "loss": 0.2557, + "step": 5297 + }, + { + "epoch": 0.45, + "learning_rate": 1.1958390611747167e-05, + "loss": 0.2822, + "step": 5298 + }, + { + "epoch": 0.45, + "learning_rate": 1.1955668055743429e-05, + "loss": 0.2919, + "step": 5299 + }, + { + "epoch": 0.45, + "learning_rate": 1.1952945349006455e-05, + "loss": 0.2864, + "step": 5300 + }, + { + "epoch": 0.45, + "learning_rate": 1.1950222491746109e-05, + "loss": 0.3005, + "step": 5301 + }, + { + "epoch": 0.45, + "learning_rate": 1.1947499484172245e-05, + "loss": 0.2961, + "step": 5302 + }, + { + "epoch": 0.45, + "learning_rate": 1.1944776326494745e-05, + "loss": 0.3578, + "step": 5303 + }, + { + "epoch": 0.45, + "learning_rate": 1.1942053018923494e-05, + "loss": 0.2657, + "step": 5304 + }, + { + "epoch": 0.45, + "learning_rate": 1.1939329561668396e-05, + "loss": 0.3127, + "step": 5305 + }, + { + "epoch": 0.45, + "learning_rate": 1.1936605954939355e-05, + "loss": 0.2883, + "step": 5306 + }, + { + "epoch": 0.45, + "learning_rate": 1.1933882198946296e-05, + "loss": 0.3423, + "step": 5307 + }, + { + "epoch": 0.45, + "learning_rate": 1.1931158293899154e-05, + "loss": 0.2751, + "step": 5308 + }, + { + "epoch": 0.46, + "learning_rate": 1.1928434240007869e-05, + "loss": 0.3028, + "step": 5309 + }, + { + "epoch": 0.46, + "learning_rate": 1.1925710037482405e-05, + "loss": 0.3023, + "step": 5310 + }, + { + "epoch": 0.46, + "learning_rate": 1.1922985686532726e-05, + "loss": 0.6001, + "step": 5311 + }, + { + "epoch": 0.46, + "learning_rate": 1.192026118736881e-05, + "loss": 0.28, + "step": 5312 + }, + { + "epoch": 0.46, + "learning_rate": 1.1917536540200655e-05, + "loss": 0.3248, + "step": 5313 + }, + { + "epoch": 0.46, + "learning_rate": 1.1914811745238256e-05, + "loss": 0.2647, + "step": 5314 + }, + { + "epoch": 0.46, + "learning_rate": 1.1912086802691627e-05, + "loss": 0.2952, + "step": 5315 + }, + { + "epoch": 0.46, + "learning_rate": 1.1909361712770796e-05, + "loss": 0.268, + "step": 5316 + }, + { + "epoch": 0.46, + "learning_rate": 1.19066364756858e-05, + "loss": 0.2814, + "step": 5317 + }, + { + "epoch": 0.46, + "learning_rate": 1.1903911091646684e-05, + "loss": 0.2876, + "step": 5318 + }, + { + "epoch": 0.46, + "learning_rate": 1.190118556086351e-05, + "loss": 0.3043, + "step": 5319 + }, + { + "epoch": 0.46, + "learning_rate": 1.1898459883546346e-05, + "loss": 0.3002, + "step": 5320 + }, + { + "epoch": 0.46, + "learning_rate": 1.1895734059905275e-05, + "loss": 0.2681, + "step": 5321 + }, + { + "epoch": 0.46, + "learning_rate": 1.1893008090150389e-05, + "loss": 0.2776, + "step": 5322 + }, + { + "epoch": 0.46, + "learning_rate": 1.1890281974491794e-05, + "loss": 0.2712, + "step": 5323 + }, + { + "epoch": 0.46, + "learning_rate": 1.1887555713139605e-05, + "loss": 0.3186, + "step": 5324 + }, + { + "epoch": 0.46, + "learning_rate": 1.1884829306303947e-05, + "loss": 0.2463, + "step": 5325 + }, + { + "epoch": 0.46, + "learning_rate": 1.188210275419496e-05, + "loss": 0.2753, + "step": 5326 + }, + { + "epoch": 0.46, + "learning_rate": 1.1879376057022793e-05, + "loss": 0.2542, + "step": 5327 + }, + { + "epoch": 0.46, + "learning_rate": 1.1876649214997602e-05, + "loss": 0.2787, + "step": 5328 + }, + { + "epoch": 0.46, + "learning_rate": 1.187392222832957e-05, + "loss": 0.2897, + "step": 5329 + }, + { + "epoch": 0.46, + "learning_rate": 1.1871195097228864e-05, + "loss": 0.2876, + "step": 5330 + }, + { + "epoch": 0.46, + "learning_rate": 1.186846782190569e-05, + "loss": 0.2892, + "step": 5331 + }, + { + "epoch": 0.46, + "learning_rate": 1.186574040257025e-05, + "loss": 0.3063, + "step": 5332 + }, + { + "epoch": 0.46, + "learning_rate": 1.1863012839432755e-05, + "loss": 0.2781, + "step": 5333 + }, + { + "epoch": 0.46, + "learning_rate": 1.1860285132703435e-05, + "loss": 0.256, + "step": 5334 + }, + { + "epoch": 0.46, + "learning_rate": 1.1857557282592531e-05, + "loss": 0.2599, + "step": 5335 + }, + { + "epoch": 0.46, + "learning_rate": 1.1854829289310293e-05, + "loss": 0.2654, + "step": 5336 + }, + { + "epoch": 0.46, + "learning_rate": 1.1852101153066976e-05, + "loss": 0.3451, + "step": 5337 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849372874072852e-05, + "loss": 0.6012, + "step": 5338 + }, + { + "epoch": 0.46, + "learning_rate": 1.1846644452538207e-05, + "loss": 0.289, + "step": 5339 + }, + { + "epoch": 0.46, + "learning_rate": 1.184391588867333e-05, + "loss": 0.2855, + "step": 5340 + }, + { + "epoch": 0.46, + "learning_rate": 1.1841187182688527e-05, + "loss": 0.2746, + "step": 5341 + }, + { + "epoch": 0.46, + "learning_rate": 1.1838458334794116e-05, + "loss": 0.291, + "step": 5342 + }, + { + "epoch": 0.46, + "learning_rate": 1.1835729345200422e-05, + "loss": 0.3144, + "step": 5343 + }, + { + "epoch": 0.46, + "learning_rate": 1.1833000214117776e-05, + "loss": 0.3074, + "step": 5344 + }, + { + "epoch": 0.46, + "learning_rate": 1.1830270941756532e-05, + "loss": 0.2659, + "step": 5345 + }, + { + "epoch": 0.46, + "learning_rate": 1.1827541528327052e-05, + "loss": 0.2766, + "step": 5346 + }, + { + "epoch": 0.46, + "learning_rate": 1.1824811974039694e-05, + "loss": 0.2963, + "step": 5347 + }, + { + "epoch": 0.46, + "learning_rate": 1.182208227910485e-05, + "loss": 0.2395, + "step": 5348 + }, + { + "epoch": 0.46, + "learning_rate": 1.1819352443732908e-05, + "loss": 0.2582, + "step": 5349 + }, + { + "epoch": 0.46, + "learning_rate": 1.181662246813427e-05, + "loss": 0.2554, + "step": 5350 + }, + { + "epoch": 0.46, + "learning_rate": 1.1813892352519343e-05, + "loss": 0.2722, + "step": 5351 + }, + { + "epoch": 0.46, + "learning_rate": 1.1811162097098559e-05, + "loss": 0.2477, + "step": 5352 + }, + { + "epoch": 0.46, + "learning_rate": 1.180843170208235e-05, + "loss": 0.2471, + "step": 5353 + }, + { + "epoch": 0.46, + "learning_rate": 1.180570116768116e-05, + "loss": 0.2558, + "step": 5354 + }, + { + "epoch": 0.46, + "learning_rate": 1.180297049410545e-05, + "loss": 0.3052, + "step": 5355 + }, + { + "epoch": 0.46, + "learning_rate": 1.1800239681565679e-05, + "loss": 0.2889, + "step": 5356 + }, + { + "epoch": 0.46, + "learning_rate": 1.1797508730272329e-05, + "loss": 0.2632, + "step": 5357 + }, + { + "epoch": 0.46, + "learning_rate": 1.179477764043589e-05, + "loss": 0.2872, + "step": 5358 + }, + { + "epoch": 0.46, + "learning_rate": 1.1792046412266857e-05, + "loss": 0.2975, + "step": 5359 + }, + { + "epoch": 0.46, + "learning_rate": 1.1789315045975742e-05, + "loss": 0.2983, + "step": 5360 + }, + { + "epoch": 0.46, + "learning_rate": 1.1786583541773064e-05, + "loss": 0.2526, + "step": 5361 + }, + { + "epoch": 0.46, + "learning_rate": 1.1783851899869357e-05, + "loss": 0.3323, + "step": 5362 + }, + { + "epoch": 0.46, + "learning_rate": 1.1781120120475156e-05, + "loss": 0.2587, + "step": 5363 + }, + { + "epoch": 0.46, + "learning_rate": 1.1778388203801019e-05, + "loss": 0.3342, + "step": 5364 + }, + { + "epoch": 0.46, + "learning_rate": 1.1775656150057507e-05, + "loss": 0.3036, + "step": 5365 + }, + { + "epoch": 0.46, + "learning_rate": 1.1772923959455188e-05, + "loss": 0.266, + "step": 5366 + }, + { + "epoch": 0.46, + "learning_rate": 1.1770191632204656e-05, + "loss": 0.2959, + "step": 5367 + }, + { + "epoch": 0.46, + "learning_rate": 1.1767459168516497e-05, + "loss": 0.2777, + "step": 5368 + }, + { + "epoch": 0.46, + "learning_rate": 1.176472656860132e-05, + "loss": 0.338, + "step": 5369 + }, + { + "epoch": 0.46, + "learning_rate": 1.176199383266974e-05, + "loss": 0.3168, + "step": 5370 + }, + { + "epoch": 0.46, + "learning_rate": 1.175926096093238e-05, + "loss": 0.2654, + "step": 5371 + }, + { + "epoch": 0.46, + "learning_rate": 1.1756527953599877e-05, + "loss": 0.5797, + "step": 5372 + }, + { + "epoch": 0.46, + "learning_rate": 1.175379481088288e-05, + "loss": 0.2917, + "step": 5373 + }, + { + "epoch": 0.46, + "learning_rate": 1.1751061532992045e-05, + "loss": 0.6058, + "step": 5374 + }, + { + "epoch": 0.46, + "learning_rate": 1.1748328120138038e-05, + "loss": 0.3195, + "step": 5375 + }, + { + "epoch": 0.46, + "learning_rate": 1.1745594572531538e-05, + "loss": 0.3198, + "step": 5376 + }, + { + "epoch": 0.46, + "learning_rate": 1.1742860890383234e-05, + "loss": 0.2981, + "step": 5377 + }, + { + "epoch": 0.46, + "learning_rate": 1.1740127073903826e-05, + "loss": 0.3001, + "step": 5378 + }, + { + "epoch": 0.46, + "learning_rate": 1.1737393123304019e-05, + "loss": 0.343, + "step": 5379 + }, + { + "epoch": 0.46, + "learning_rate": 1.1734659038794535e-05, + "loss": 0.2911, + "step": 5380 + }, + { + "epoch": 0.46, + "learning_rate": 1.1731924820586106e-05, + "loss": 0.2665, + "step": 5381 + }, + { + "epoch": 0.46, + "learning_rate": 1.1729190468889466e-05, + "loss": 0.3011, + "step": 5382 + }, + { + "epoch": 0.46, + "learning_rate": 1.172645598391537e-05, + "loss": 0.3096, + "step": 5383 + }, + { + "epoch": 0.46, + "learning_rate": 1.172372136587458e-05, + "loss": 0.3055, + "step": 5384 + }, + { + "epoch": 0.46, + "learning_rate": 1.1720986614977861e-05, + "loss": 0.28, + "step": 5385 + }, + { + "epoch": 0.46, + "learning_rate": 1.1718251731436001e-05, + "loss": 0.2758, + "step": 5386 + }, + { + "epoch": 0.46, + "learning_rate": 1.1715516715459784e-05, + "loss": 0.2798, + "step": 5387 + }, + { + "epoch": 0.46, + "learning_rate": 1.1712781567260018e-05, + "loss": 0.2996, + "step": 5388 + }, + { + "epoch": 0.46, + "learning_rate": 1.171004628704751e-05, + "loss": 0.2979, + "step": 5389 + }, + { + "epoch": 0.46, + "learning_rate": 1.1707310875033085e-05, + "loss": 0.2941, + "step": 5390 + }, + { + "epoch": 0.46, + "learning_rate": 1.170457533142757e-05, + "loss": 0.2889, + "step": 5391 + }, + { + "epoch": 0.46, + "learning_rate": 1.1701839656441813e-05, + "loss": 0.2754, + "step": 5392 + }, + { + "epoch": 0.46, + "learning_rate": 1.1699103850286668e-05, + "loss": 0.274, + "step": 5393 + }, + { + "epoch": 0.46, + "learning_rate": 1.169636791317299e-05, + "loss": 0.3447, + "step": 5394 + }, + { + "epoch": 0.46, + "learning_rate": 1.1693631845311657e-05, + "loss": 0.2928, + "step": 5395 + }, + { + "epoch": 0.46, + "learning_rate": 1.1690895646913551e-05, + "loss": 0.293, + "step": 5396 + }, + { + "epoch": 0.46, + "learning_rate": 1.168815931818956e-05, + "loss": 0.2974, + "step": 5397 + }, + { + "epoch": 0.46, + "learning_rate": 1.1685422859350592e-05, + "loss": 0.3529, + "step": 5398 + }, + { + "epoch": 0.46, + "learning_rate": 1.1682686270607558e-05, + "loss": 0.2847, + "step": 5399 + }, + { + "epoch": 0.46, + "learning_rate": 1.1679949552171382e-05, + "loss": 0.3314, + "step": 5400 + }, + { + "epoch": 0.46, + "learning_rate": 1.1677212704252994e-05, + "loss": 0.285, + "step": 5401 + }, + { + "epoch": 0.46, + "learning_rate": 1.1674475727063337e-05, + "loss": 0.3358, + "step": 5402 + }, + { + "epoch": 0.46, + "learning_rate": 1.1671738620813367e-05, + "loss": 0.325, + "step": 5403 + }, + { + "epoch": 0.46, + "learning_rate": 1.1669001385714041e-05, + "loss": 0.2756, + "step": 5404 + }, + { + "epoch": 0.46, + "learning_rate": 1.166626402197634e-05, + "loss": 0.3076, + "step": 5405 + }, + { + "epoch": 0.46, + "learning_rate": 1.1663526529811235e-05, + "loss": 0.2558, + "step": 5406 + }, + { + "epoch": 0.46, + "learning_rate": 1.166078890942973e-05, + "loss": 0.2808, + "step": 5407 + }, + { + "epoch": 0.46, + "learning_rate": 1.165805116104282e-05, + "loss": 0.2755, + "step": 5408 + }, + { + "epoch": 0.46, + "learning_rate": 1.1655313284861521e-05, + "loss": 0.3037, + "step": 5409 + }, + { + "epoch": 0.46, + "learning_rate": 1.165257528109685e-05, + "loss": 0.298, + "step": 5410 + }, + { + "epoch": 0.46, + "learning_rate": 1.1649837149959844e-05, + "loss": 0.2918, + "step": 5411 + }, + { + "epoch": 0.46, + "learning_rate": 1.1647098891661543e-05, + "loss": 0.3099, + "step": 5412 + }, + { + "epoch": 0.46, + "learning_rate": 1.1644360506412997e-05, + "loss": 0.2764, + "step": 5413 + }, + { + "epoch": 0.46, + "learning_rate": 1.164162199442527e-05, + "loss": 0.2953, + "step": 5414 + }, + { + "epoch": 0.46, + "learning_rate": 1.1638883355909429e-05, + "loss": 0.2708, + "step": 5415 + }, + { + "epoch": 0.46, + "learning_rate": 1.1636144591076557e-05, + "loss": 0.288, + "step": 5416 + }, + { + "epoch": 0.46, + "learning_rate": 1.1633405700137744e-05, + "loss": 0.2725, + "step": 5417 + }, + { + "epoch": 0.46, + "learning_rate": 1.163066668330409e-05, + "loss": 0.2901, + "step": 5418 + }, + { + "epoch": 0.46, + "learning_rate": 1.162792754078671e-05, + "loss": 0.5836, + "step": 5419 + }, + { + "epoch": 0.46, + "learning_rate": 1.1625188272796714e-05, + "loss": 0.289, + "step": 5420 + }, + { + "epoch": 0.46, + "learning_rate": 1.1622448879545238e-05, + "loss": 0.2429, + "step": 5421 + }, + { + "epoch": 0.46, + "learning_rate": 1.161970936124342e-05, + "loss": 0.2787, + "step": 5422 + }, + { + "epoch": 0.46, + "learning_rate": 1.1616969718102404e-05, + "loss": 0.2667, + "step": 5423 + }, + { + "epoch": 0.46, + "learning_rate": 1.1614229950333358e-05, + "loss": 0.2722, + "step": 5424 + }, + { + "epoch": 0.47, + "learning_rate": 1.161149005814744e-05, + "loss": 0.2934, + "step": 5425 + }, + { + "epoch": 0.47, + "learning_rate": 1.1608750041755832e-05, + "loss": 0.249, + "step": 5426 + }, + { + "epoch": 0.47, + "learning_rate": 1.1606009901369718e-05, + "loss": 0.2656, + "step": 5427 + }, + { + "epoch": 0.47, + "learning_rate": 1.1603269637200296e-05, + "loss": 0.6384, + "step": 5428 + }, + { + "epoch": 0.47, + "learning_rate": 1.1600529249458773e-05, + "loss": 0.3127, + "step": 5429 + }, + { + "epoch": 0.47, + "learning_rate": 1.1597788738356365e-05, + "loss": 0.2834, + "step": 5430 + }, + { + "epoch": 0.47, + "learning_rate": 1.1595048104104296e-05, + "loss": 0.3002, + "step": 5431 + }, + { + "epoch": 0.47, + "learning_rate": 1.15923073469138e-05, + "loss": 0.2895, + "step": 5432 + }, + { + "epoch": 0.47, + "learning_rate": 1.1589566466996124e-05, + "loss": 0.6115, + "step": 5433 + }, + { + "epoch": 0.47, + "learning_rate": 1.1586825464562515e-05, + "loss": 0.311, + "step": 5434 + }, + { + "epoch": 0.47, + "learning_rate": 1.1584084339824242e-05, + "loss": 0.2893, + "step": 5435 + }, + { + "epoch": 0.47, + "learning_rate": 1.1581343092992574e-05, + "loss": 0.3311, + "step": 5436 + }, + { + "epoch": 0.47, + "learning_rate": 1.1578601724278794e-05, + "loss": 0.291, + "step": 5437 + }, + { + "epoch": 0.47, + "learning_rate": 1.1575860233894195e-05, + "loss": 0.2982, + "step": 5438 + }, + { + "epoch": 0.47, + "learning_rate": 1.1573118622050075e-05, + "loss": 0.3025, + "step": 5439 + }, + { + "epoch": 0.47, + "learning_rate": 1.1570376888957742e-05, + "loss": 0.3111, + "step": 5440 + }, + { + "epoch": 0.47, + "learning_rate": 1.1567635034828521e-05, + "loss": 0.2822, + "step": 5441 + }, + { + "epoch": 0.47, + "learning_rate": 1.1564893059873734e-05, + "loss": 0.2585, + "step": 5442 + }, + { + "epoch": 0.47, + "learning_rate": 1.1562150964304727e-05, + "loss": 0.2985, + "step": 5443 + }, + { + "epoch": 0.47, + "learning_rate": 1.1559408748332841e-05, + "loss": 0.3049, + "step": 5444 + }, + { + "epoch": 0.47, + "learning_rate": 1.1556666412169435e-05, + "loss": 0.3305, + "step": 5445 + }, + { + "epoch": 0.47, + "learning_rate": 1.1553923956025871e-05, + "loss": 0.2461, + "step": 5446 + }, + { + "epoch": 0.47, + "learning_rate": 1.1551181380113528e-05, + "loss": 0.287, + "step": 5447 + }, + { + "epoch": 0.47, + "learning_rate": 1.1548438684643789e-05, + "loss": 0.282, + "step": 5448 + }, + { + "epoch": 0.47, + "learning_rate": 1.1545695869828044e-05, + "loss": 0.3109, + "step": 5449 + }, + { + "epoch": 0.47, + "learning_rate": 1.1542952935877703e-05, + "loss": 0.2466, + "step": 5450 + }, + { + "epoch": 0.47, + "learning_rate": 1.1540209883004171e-05, + "loss": 0.2564, + "step": 5451 + }, + { + "epoch": 0.47, + "learning_rate": 1.1537466711418874e-05, + "loss": 0.2736, + "step": 5452 + }, + { + "epoch": 0.47, + "learning_rate": 1.1534723421333239e-05, + "loss": 0.2795, + "step": 5453 + }, + { + "epoch": 0.47, + "learning_rate": 1.1531980012958706e-05, + "loss": 0.2607, + "step": 5454 + }, + { + "epoch": 0.47, + "learning_rate": 1.152923648650672e-05, + "loss": 0.2598, + "step": 5455 + }, + { + "epoch": 0.47, + "learning_rate": 1.1526492842188746e-05, + "loss": 0.2688, + "step": 5456 + }, + { + "epoch": 0.47, + "learning_rate": 1.1523749080216246e-05, + "loss": 0.2988, + "step": 5457 + }, + { + "epoch": 0.47, + "learning_rate": 1.1521005200800694e-05, + "loss": 0.2971, + "step": 5458 + }, + { + "epoch": 0.47, + "learning_rate": 1.1518261204153578e-05, + "loss": 0.2606, + "step": 5459 + }, + { + "epoch": 0.47, + "learning_rate": 1.151551709048639e-05, + "loss": 0.2984, + "step": 5460 + }, + { + "epoch": 0.47, + "learning_rate": 1.1512772860010633e-05, + "loss": 0.3318, + "step": 5461 + }, + { + "epoch": 0.47, + "learning_rate": 1.1510028512937818e-05, + "loss": 0.3378, + "step": 5462 + }, + { + "epoch": 0.47, + "learning_rate": 1.1507284049479467e-05, + "loss": 0.2794, + "step": 5463 + }, + { + "epoch": 0.47, + "learning_rate": 1.150453946984711e-05, + "loss": 0.3212, + "step": 5464 + }, + { + "epoch": 0.47, + "learning_rate": 1.1501794774252284e-05, + "loss": 0.2617, + "step": 5465 + }, + { + "epoch": 0.47, + "learning_rate": 1.149904996290654e-05, + "loss": 0.6039, + "step": 5466 + }, + { + "epoch": 0.47, + "learning_rate": 1.1496305036021427e-05, + "loss": 0.2834, + "step": 5467 + }, + { + "epoch": 0.47, + "learning_rate": 1.1493559993808518e-05, + "loss": 0.3163, + "step": 5468 + }, + { + "epoch": 0.47, + "learning_rate": 1.1490814836479384e-05, + "loss": 0.2886, + "step": 5469 + }, + { + "epoch": 0.47, + "learning_rate": 1.148806956424561e-05, + "loss": 0.2926, + "step": 5470 + }, + { + "epoch": 0.47, + "learning_rate": 1.148532417731879e-05, + "loss": 0.3063, + "step": 5471 + }, + { + "epoch": 0.47, + "learning_rate": 1.1482578675910514e-05, + "loss": 0.2313, + "step": 5472 + }, + { + "epoch": 0.47, + "learning_rate": 1.1479833060232401e-05, + "loss": 0.2841, + "step": 5473 + }, + { + "epoch": 0.47, + "learning_rate": 1.1477087330496071e-05, + "loss": 0.2817, + "step": 5474 + }, + { + "epoch": 0.47, + "learning_rate": 1.1474341486913146e-05, + "loss": 0.2673, + "step": 5475 + }, + { + "epoch": 0.47, + "learning_rate": 1.1471595529695266e-05, + "loss": 0.2932, + "step": 5476 + }, + { + "epoch": 0.47, + "learning_rate": 1.1468849459054073e-05, + "loss": 0.2584, + "step": 5477 + }, + { + "epoch": 0.47, + "learning_rate": 1.146610327520122e-05, + "loss": 0.2826, + "step": 5478 + }, + { + "epoch": 0.47, + "learning_rate": 1.1463356978348373e-05, + "loss": 0.3217, + "step": 5479 + }, + { + "epoch": 0.47, + "learning_rate": 1.14606105687072e-05, + "loss": 0.2513, + "step": 5480 + }, + { + "epoch": 0.47, + "learning_rate": 1.145786404648938e-05, + "loss": 0.3086, + "step": 5481 + }, + { + "epoch": 0.47, + "learning_rate": 1.1455117411906604e-05, + "loss": 0.3024, + "step": 5482 + }, + { + "epoch": 0.47, + "learning_rate": 1.145237066517057e-05, + "loss": 0.3093, + "step": 5483 + }, + { + "epoch": 0.47, + "learning_rate": 1.1449623806492977e-05, + "loss": 0.296, + "step": 5484 + }, + { + "epoch": 0.47, + "learning_rate": 1.1446876836085546e-05, + "loss": 0.2818, + "step": 5485 + }, + { + "epoch": 0.47, + "learning_rate": 1.1444129754159998e-05, + "loss": 0.2956, + "step": 5486 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441382560928063e-05, + "loss": 0.2988, + "step": 5487 + }, + { + "epoch": 0.47, + "learning_rate": 1.1438635256601484e-05, + "loss": 0.3042, + "step": 5488 + }, + { + "epoch": 0.47, + "learning_rate": 1.1435887841392009e-05, + "loss": 0.3234, + "step": 5489 + }, + { + "epoch": 0.47, + "learning_rate": 1.1433140315511392e-05, + "loss": 0.2617, + "step": 5490 + }, + { + "epoch": 0.47, + "learning_rate": 1.14303926791714e-05, + "loss": 0.3085, + "step": 5491 + }, + { + "epoch": 0.47, + "learning_rate": 1.142764493258381e-05, + "loss": 0.2785, + "step": 5492 + }, + { + "epoch": 0.47, + "learning_rate": 1.1424897075960402e-05, + "loss": 0.2516, + "step": 5493 + }, + { + "epoch": 0.47, + "learning_rate": 1.1422149109512967e-05, + "loss": 0.3103, + "step": 5494 + }, + { + "epoch": 0.47, + "learning_rate": 1.1419401033453308e-05, + "loss": 0.2817, + "step": 5495 + }, + { + "epoch": 0.47, + "learning_rate": 1.1416652847993231e-05, + "loss": 0.2701, + "step": 5496 + }, + { + "epoch": 0.47, + "learning_rate": 1.1413904553344551e-05, + "loss": 0.2699, + "step": 5497 + }, + { + "epoch": 0.47, + "learning_rate": 1.1411156149719094e-05, + "loss": 0.6191, + "step": 5498 + }, + { + "epoch": 0.47, + "learning_rate": 1.1408407637328694e-05, + "loss": 0.2776, + "step": 5499 + }, + { + "epoch": 0.47, + "learning_rate": 1.1405659016385191e-05, + "loss": 0.2715, + "step": 5500 + }, + { + "epoch": 0.47, + "learning_rate": 1.1402910287100436e-05, + "loss": 0.2797, + "step": 5501 + }, + { + "epoch": 0.47, + "learning_rate": 1.1400161449686293e-05, + "loss": 0.2726, + "step": 5502 + }, + { + "epoch": 0.47, + "learning_rate": 1.1397412504354621e-05, + "loss": 0.3041, + "step": 5503 + }, + { + "epoch": 0.47, + "learning_rate": 1.1394663451317296e-05, + "loss": 0.3093, + "step": 5504 + }, + { + "epoch": 0.47, + "learning_rate": 1.1391914290786206e-05, + "loss": 0.2958, + "step": 5505 + }, + { + "epoch": 0.47, + "learning_rate": 1.1389165022973238e-05, + "loss": 0.2703, + "step": 5506 + }, + { + "epoch": 0.47, + "learning_rate": 1.1386415648090296e-05, + "loss": 0.2881, + "step": 5507 + }, + { + "epoch": 0.47, + "learning_rate": 1.1383666166349286e-05, + "loss": 0.3035, + "step": 5508 + }, + { + "epoch": 0.47, + "learning_rate": 1.1380916577962127e-05, + "loss": 0.3101, + "step": 5509 + }, + { + "epoch": 0.47, + "learning_rate": 1.1378166883140738e-05, + "loss": 0.3297, + "step": 5510 + }, + { + "epoch": 0.47, + "learning_rate": 1.1375417082097055e-05, + "loss": 0.2722, + "step": 5511 + }, + { + "epoch": 0.47, + "learning_rate": 1.1372667175043021e-05, + "loss": 0.3062, + "step": 5512 + }, + { + "epoch": 0.47, + "learning_rate": 1.1369917162190585e-05, + "loss": 0.2759, + "step": 5513 + }, + { + "epoch": 0.47, + "learning_rate": 1.1367167043751701e-05, + "loss": 0.2617, + "step": 5514 + }, + { + "epoch": 0.47, + "learning_rate": 1.1364416819938338e-05, + "loss": 0.2924, + "step": 5515 + }, + { + "epoch": 0.47, + "learning_rate": 1.1361666490962468e-05, + "loss": 0.2826, + "step": 5516 + }, + { + "epoch": 0.47, + "learning_rate": 1.1358916057036074e-05, + "loss": 0.2865, + "step": 5517 + }, + { + "epoch": 0.47, + "learning_rate": 1.1356165518371142e-05, + "loss": 0.2792, + "step": 5518 + }, + { + "epoch": 0.47, + "learning_rate": 1.1353414875179673e-05, + "loss": 0.2476, + "step": 5519 + }, + { + "epoch": 0.47, + "learning_rate": 1.1350664127673675e-05, + "loss": 0.3047, + "step": 5520 + }, + { + "epoch": 0.47, + "learning_rate": 1.1347913276065162e-05, + "loss": 0.2776, + "step": 5521 + }, + { + "epoch": 0.47, + "learning_rate": 1.134516232056615e-05, + "loss": 0.261, + "step": 5522 + }, + { + "epoch": 0.47, + "learning_rate": 1.1342411261388672e-05, + "loss": 0.3355, + "step": 5523 + }, + { + "epoch": 0.47, + "learning_rate": 1.133966009874477e-05, + "loss": 0.2893, + "step": 5524 + }, + { + "epoch": 0.47, + "learning_rate": 1.1336908832846485e-05, + "loss": 0.2431, + "step": 5525 + }, + { + "epoch": 0.47, + "learning_rate": 1.1334157463905876e-05, + "loss": 0.3019, + "step": 5526 + }, + { + "epoch": 0.47, + "learning_rate": 1.1331405992134999e-05, + "loss": 0.2971, + "step": 5527 + }, + { + "epoch": 0.47, + "learning_rate": 1.1328654417745931e-05, + "loss": 0.2949, + "step": 5528 + }, + { + "epoch": 0.47, + "learning_rate": 1.132590274095074e-05, + "loss": 0.5835, + "step": 5529 + }, + { + "epoch": 0.47, + "learning_rate": 1.132315096196152e-05, + "loss": 0.27, + "step": 5530 + }, + { + "epoch": 0.47, + "learning_rate": 1.1320399080990361e-05, + "loss": 0.3137, + "step": 5531 + }, + { + "epoch": 0.47, + "learning_rate": 1.1317647098249364e-05, + "loss": 0.2603, + "step": 5532 + }, + { + "epoch": 0.47, + "learning_rate": 1.1314895013950645e-05, + "loss": 0.2945, + "step": 5533 + }, + { + "epoch": 0.47, + "learning_rate": 1.1312142828306309e-05, + "loss": 0.2939, + "step": 5534 + }, + { + "epoch": 0.47, + "learning_rate": 1.1309390541528492e-05, + "loss": 0.3442, + "step": 5535 + }, + { + "epoch": 0.47, + "learning_rate": 1.130663815382932e-05, + "loss": 0.3185, + "step": 5536 + }, + { + "epoch": 0.47, + "learning_rate": 1.1303885665420932e-05, + "loss": 0.2907, + "step": 5537 + }, + { + "epoch": 0.47, + "learning_rate": 1.1301133076515482e-05, + "loss": 0.2894, + "step": 5538 + }, + { + "epoch": 0.47, + "learning_rate": 1.1298380387325124e-05, + "loss": 0.257, + "step": 5539 + }, + { + "epoch": 0.47, + "learning_rate": 1.1295627598062025e-05, + "loss": 0.2973, + "step": 5540 + }, + { + "epoch": 0.47, + "learning_rate": 1.1292874708938346e-05, + "loss": 0.2645, + "step": 5541 + }, + { + "epoch": 0.48, + "learning_rate": 1.1290121720166277e-05, + "loss": 0.2813, + "step": 5542 + }, + { + "epoch": 0.48, + "learning_rate": 1.1287368631957998e-05, + "loss": 0.2355, + "step": 5543 + }, + { + "epoch": 0.48, + "learning_rate": 1.1284615444525706e-05, + "loss": 0.2487, + "step": 5544 + }, + { + "epoch": 0.48, + "learning_rate": 1.1281862158081605e-05, + "loss": 0.2957, + "step": 5545 + }, + { + "epoch": 0.48, + "learning_rate": 1.1279108772837901e-05, + "loss": 0.3004, + "step": 5546 + }, + { + "epoch": 0.48, + "learning_rate": 1.1276355289006818e-05, + "loss": 0.3286, + "step": 5547 + }, + { + "epoch": 0.48, + "learning_rate": 1.127360170680057e-05, + "loss": 0.2992, + "step": 5548 + }, + { + "epoch": 0.48, + "learning_rate": 1.1270848026431396e-05, + "loss": 0.5975, + "step": 5549 + }, + { + "epoch": 0.48, + "learning_rate": 1.1268094248111536e-05, + "loss": 0.2502, + "step": 5550 + }, + { + "epoch": 0.48, + "learning_rate": 1.1265340372053237e-05, + "loss": 0.3278, + "step": 5551 + }, + { + "epoch": 0.48, + "learning_rate": 1.1262586398468759e-05, + "loss": 0.3055, + "step": 5552 + }, + { + "epoch": 0.48, + "learning_rate": 1.1259832327570354e-05, + "loss": 0.2346, + "step": 5553 + }, + { + "epoch": 0.48, + "learning_rate": 1.1257078159570303e-05, + "loss": 0.3109, + "step": 5554 + }, + { + "epoch": 0.48, + "learning_rate": 1.1254323894680876e-05, + "loss": 0.2964, + "step": 5555 + }, + { + "epoch": 0.48, + "learning_rate": 1.125156953311436e-05, + "loss": 0.2648, + "step": 5556 + }, + { + "epoch": 0.48, + "learning_rate": 1.1248815075083051e-05, + "loss": 0.269, + "step": 5557 + }, + { + "epoch": 0.48, + "learning_rate": 1.1246060520799244e-05, + "loss": 0.2689, + "step": 5558 + }, + { + "epoch": 0.48, + "learning_rate": 1.1243305870475255e-05, + "loss": 0.305, + "step": 5559 + }, + { + "epoch": 0.48, + "learning_rate": 1.1240551124323386e-05, + "loss": 0.2968, + "step": 5560 + }, + { + "epoch": 0.48, + "learning_rate": 1.1237796282555968e-05, + "loss": 0.2881, + "step": 5561 + }, + { + "epoch": 0.48, + "learning_rate": 1.1235041345385328e-05, + "loss": 0.2873, + "step": 5562 + }, + { + "epoch": 0.48, + "learning_rate": 1.1232286313023807e-05, + "loss": 0.3195, + "step": 5563 + }, + { + "epoch": 0.48, + "learning_rate": 1.1229531185683743e-05, + "loss": 0.3179, + "step": 5564 + }, + { + "epoch": 0.48, + "learning_rate": 1.1226775963577492e-05, + "loss": 0.2891, + "step": 5565 + }, + { + "epoch": 0.48, + "learning_rate": 1.1224020646917413e-05, + "loss": 0.3074, + "step": 5566 + }, + { + "epoch": 0.48, + "learning_rate": 1.1221265235915865e-05, + "loss": 0.2572, + "step": 5567 + }, + { + "epoch": 0.48, + "learning_rate": 1.1218509730785231e-05, + "loss": 0.2892, + "step": 5568 + }, + { + "epoch": 0.48, + "learning_rate": 1.1215754131737889e-05, + "loss": 0.303, + "step": 5569 + }, + { + "epoch": 0.48, + "learning_rate": 1.1212998438986223e-05, + "loss": 0.2982, + "step": 5570 + }, + { + "epoch": 0.48, + "learning_rate": 1.1210242652742632e-05, + "loss": 0.2475, + "step": 5571 + }, + { + "epoch": 0.48, + "learning_rate": 1.1207486773219515e-05, + "loss": 0.2832, + "step": 5572 + }, + { + "epoch": 0.48, + "learning_rate": 1.1204730800629289e-05, + "loss": 0.2927, + "step": 5573 + }, + { + "epoch": 0.48, + "learning_rate": 1.1201974735184362e-05, + "loss": 0.2658, + "step": 5574 + }, + { + "epoch": 0.48, + "learning_rate": 1.1199218577097163e-05, + "loss": 0.3198, + "step": 5575 + }, + { + "epoch": 0.48, + "learning_rate": 1.119646232658012e-05, + "loss": 0.2575, + "step": 5576 + }, + { + "epoch": 0.48, + "learning_rate": 1.1193705983845673e-05, + "loss": 0.3147, + "step": 5577 + }, + { + "epoch": 0.48, + "learning_rate": 1.119094954910627e-05, + "loss": 0.3093, + "step": 5578 + }, + { + "epoch": 0.48, + "learning_rate": 1.1188193022574356e-05, + "loss": 0.2626, + "step": 5579 + }, + { + "epoch": 0.48, + "learning_rate": 1.1185436404462398e-05, + "loss": 0.3241, + "step": 5580 + }, + { + "epoch": 0.48, + "learning_rate": 1.1182679694982857e-05, + "loss": 0.2479, + "step": 5581 + }, + { + "epoch": 0.48, + "learning_rate": 1.1179922894348207e-05, + "loss": 0.2788, + "step": 5582 + }, + { + "epoch": 0.48, + "learning_rate": 1.1177166002770937e-05, + "loss": 0.3054, + "step": 5583 + }, + { + "epoch": 0.48, + "learning_rate": 1.1174409020463524e-05, + "loss": 0.5701, + "step": 5584 + }, + { + "epoch": 0.48, + "learning_rate": 1.1171651947638468e-05, + "loss": 0.2875, + "step": 5585 + }, + { + "epoch": 0.48, + "learning_rate": 1.1168894784508268e-05, + "loss": 0.2573, + "step": 5586 + }, + { + "epoch": 0.48, + "learning_rate": 1.1166137531285435e-05, + "loss": 0.2822, + "step": 5587 + }, + { + "epoch": 0.48, + "learning_rate": 1.1163380188182482e-05, + "loss": 0.2219, + "step": 5588 + }, + { + "epoch": 0.48, + "learning_rate": 1.1160622755411932e-05, + "loss": 0.2692, + "step": 5589 + }, + { + "epoch": 0.48, + "learning_rate": 1.1157865233186315e-05, + "loss": 0.3212, + "step": 5590 + }, + { + "epoch": 0.48, + "learning_rate": 1.1155107621718168e-05, + "loss": 0.304, + "step": 5591 + }, + { + "epoch": 0.48, + "learning_rate": 1.1152349921220036e-05, + "loss": 0.302, + "step": 5592 + }, + { + "epoch": 0.48, + "learning_rate": 1.114959213190446e-05, + "loss": 0.2794, + "step": 5593 + }, + { + "epoch": 0.48, + "learning_rate": 1.1146834253984008e-05, + "loss": 0.2825, + "step": 5594 + }, + { + "epoch": 0.48, + "learning_rate": 1.1144076287671232e-05, + "loss": 0.2881, + "step": 5595 + }, + { + "epoch": 0.48, + "learning_rate": 1.1141318233178713e-05, + "loss": 0.6, + "step": 5596 + }, + { + "epoch": 0.48, + "learning_rate": 1.1138560090719025e-05, + "loss": 0.3169, + "step": 5597 + }, + { + "epoch": 0.48, + "learning_rate": 1.113580186050475e-05, + "loss": 0.3161, + "step": 5598 + }, + { + "epoch": 0.48, + "learning_rate": 1.1133043542748481e-05, + "loss": 0.3279, + "step": 5599 + }, + { + "epoch": 0.48, + "learning_rate": 1.1130285137662811e-05, + "loss": 0.2771, + "step": 5600 + }, + { + "epoch": 0.48, + "learning_rate": 1.1127526645460347e-05, + "loss": 0.3382, + "step": 5601 + }, + { + "epoch": 0.48, + "learning_rate": 1.1124768066353705e-05, + "loss": 0.3408, + "step": 5602 + }, + { + "epoch": 0.48, + "learning_rate": 1.1122009400555495e-05, + "loss": 0.3117, + "step": 5603 + }, + { + "epoch": 0.48, + "learning_rate": 1.1119250648278345e-05, + "loss": 0.2899, + "step": 5604 + }, + { + "epoch": 0.48, + "learning_rate": 1.1116491809734886e-05, + "loss": 0.3174, + "step": 5605 + }, + { + "epoch": 0.48, + "learning_rate": 1.1113732885137755e-05, + "loss": 0.2377, + "step": 5606 + }, + { + "epoch": 0.48, + "learning_rate": 1.1110973874699595e-05, + "loss": 0.2486, + "step": 5607 + }, + { + "epoch": 0.48, + "learning_rate": 1.110821477863306e-05, + "loss": 0.2451, + "step": 5608 + }, + { + "epoch": 0.48, + "learning_rate": 1.1105455597150805e-05, + "loss": 0.3058, + "step": 5609 + }, + { + "epoch": 0.48, + "learning_rate": 1.1102696330465495e-05, + "loss": 0.3296, + "step": 5610 + }, + { + "epoch": 0.48, + "learning_rate": 1.10999369787898e-05, + "loss": 0.2969, + "step": 5611 + }, + { + "epoch": 0.48, + "learning_rate": 1.1097177542336399e-05, + "loss": 0.2857, + "step": 5612 + }, + { + "epoch": 0.48, + "learning_rate": 1.1094418021317972e-05, + "loss": 0.2979, + "step": 5613 + }, + { + "epoch": 0.48, + "learning_rate": 1.109165841594721e-05, + "loss": 0.2902, + "step": 5614 + }, + { + "epoch": 0.48, + "learning_rate": 1.1088898726436814e-05, + "loss": 0.2621, + "step": 5615 + }, + { + "epoch": 0.48, + "learning_rate": 1.1086138952999487e-05, + "loss": 0.2905, + "step": 5616 + }, + { + "epoch": 0.48, + "learning_rate": 1.1083379095847933e-05, + "loss": 0.2766, + "step": 5617 + }, + { + "epoch": 0.48, + "learning_rate": 1.1080619155194873e-05, + "loss": 0.2778, + "step": 5618 + }, + { + "epoch": 0.48, + "learning_rate": 1.1077859131253026e-05, + "loss": 0.3027, + "step": 5619 + }, + { + "epoch": 0.48, + "learning_rate": 1.1075099024235123e-05, + "loss": 0.2944, + "step": 5620 + }, + { + "epoch": 0.48, + "learning_rate": 1.1072338834353902e-05, + "loss": 0.298, + "step": 5621 + }, + { + "epoch": 0.48, + "learning_rate": 1.10695785618221e-05, + "loss": 0.3042, + "step": 5622 + }, + { + "epoch": 0.48, + "learning_rate": 1.1066818206852472e-05, + "loss": 0.35, + "step": 5623 + }, + { + "epoch": 0.48, + "learning_rate": 1.1064057769657764e-05, + "loss": 0.2962, + "step": 5624 + }, + { + "epoch": 0.48, + "learning_rate": 1.106129725045074e-05, + "loss": 0.3158, + "step": 5625 + }, + { + "epoch": 0.48, + "learning_rate": 1.1058536649444167e-05, + "loss": 0.2773, + "step": 5626 + }, + { + "epoch": 0.48, + "learning_rate": 1.1055775966850821e-05, + "loss": 0.3075, + "step": 5627 + }, + { + "epoch": 0.48, + "learning_rate": 1.1053015202883483e-05, + "loss": 0.2625, + "step": 5628 + }, + { + "epoch": 0.48, + "learning_rate": 1.1050254357754932e-05, + "loss": 0.2972, + "step": 5629 + }, + { + "epoch": 0.48, + "learning_rate": 1.104749343167797e-05, + "loss": 0.2762, + "step": 5630 + }, + { + "epoch": 0.48, + "learning_rate": 1.1044732424865386e-05, + "loss": 0.2307, + "step": 5631 + }, + { + "epoch": 0.48, + "learning_rate": 1.104197133752999e-05, + "loss": 0.3394, + "step": 5632 + }, + { + "epoch": 0.48, + "learning_rate": 1.1039210169884589e-05, + "loss": 0.3195, + "step": 5633 + }, + { + "epoch": 0.48, + "learning_rate": 1.1036448922142004e-05, + "loss": 0.3192, + "step": 5634 + }, + { + "epoch": 0.48, + "learning_rate": 1.1033687594515062e-05, + "loss": 0.3105, + "step": 5635 + }, + { + "epoch": 0.48, + "learning_rate": 1.1030926187216581e-05, + "loss": 0.2896, + "step": 5636 + }, + { + "epoch": 0.48, + "learning_rate": 1.1028164700459409e-05, + "loss": 0.2466, + "step": 5637 + }, + { + "epoch": 0.48, + "learning_rate": 1.1025403134456378e-05, + "loss": 0.2784, + "step": 5638 + }, + { + "epoch": 0.48, + "learning_rate": 1.1022641489420342e-05, + "loss": 0.2577, + "step": 5639 + }, + { + "epoch": 0.48, + "learning_rate": 1.1019879765564155e-05, + "loss": 0.2577, + "step": 5640 + }, + { + "epoch": 0.48, + "learning_rate": 1.1017117963100672e-05, + "loss": 0.2607, + "step": 5641 + }, + { + "epoch": 0.48, + "learning_rate": 1.1014356082242766e-05, + "loss": 0.3171, + "step": 5642 + }, + { + "epoch": 0.48, + "learning_rate": 1.1011594123203302e-05, + "loss": 0.2711, + "step": 5643 + }, + { + "epoch": 0.48, + "learning_rate": 1.100883208619516e-05, + "loss": 0.2964, + "step": 5644 + }, + { + "epoch": 0.48, + "learning_rate": 1.1006069971431228e-05, + "loss": 0.282, + "step": 5645 + }, + { + "epoch": 0.48, + "learning_rate": 1.1003307779124392e-05, + "loss": 0.2993, + "step": 5646 + }, + { + "epoch": 0.48, + "learning_rate": 1.1000545509487555e-05, + "loss": 0.3268, + "step": 5647 + }, + { + "epoch": 0.48, + "learning_rate": 1.0997783162733608e-05, + "loss": 0.296, + "step": 5648 + }, + { + "epoch": 0.48, + "learning_rate": 1.0995020739075468e-05, + "loss": 0.2925, + "step": 5649 + }, + { + "epoch": 0.48, + "learning_rate": 1.0992258238726046e-05, + "loss": 0.2502, + "step": 5650 + }, + { + "epoch": 0.48, + "learning_rate": 1.0989495661898259e-05, + "loss": 0.3224, + "step": 5651 + }, + { + "epoch": 0.48, + "learning_rate": 1.0986733008805038e-05, + "loss": 0.27, + "step": 5652 + }, + { + "epoch": 0.48, + "learning_rate": 1.0983970279659311e-05, + "loss": 0.3111, + "step": 5653 + }, + { + "epoch": 0.48, + "learning_rate": 1.0981207474674021e-05, + "loss": 0.2794, + "step": 5654 + }, + { + "epoch": 0.48, + "learning_rate": 1.0978444594062104e-05, + "loss": 0.2609, + "step": 5655 + }, + { + "epoch": 0.48, + "learning_rate": 1.0975681638036513e-05, + "loss": 0.2902, + "step": 5656 + }, + { + "epoch": 0.48, + "learning_rate": 1.0972918606810198e-05, + "loss": 0.2939, + "step": 5657 + }, + { + "epoch": 0.48, + "learning_rate": 1.0970155500596127e-05, + "loss": 0.3184, + "step": 5658 + }, + { + "epoch": 0.49, + "learning_rate": 1.0967392319607267e-05, + "loss": 0.2737, + "step": 5659 + }, + { + "epoch": 0.49, + "learning_rate": 1.0964629064056583e-05, + "loss": 0.3092, + "step": 5660 + }, + { + "epoch": 0.49, + "learning_rate": 1.096186573415706e-05, + "loss": 0.3104, + "step": 5661 + }, + { + "epoch": 0.49, + "learning_rate": 1.0959102330121676e-05, + "loss": 0.2681, + "step": 5662 + }, + { + "epoch": 0.49, + "learning_rate": 1.0956338852163424e-05, + "loss": 0.2479, + "step": 5663 + }, + { + "epoch": 0.49, + "learning_rate": 1.0953575300495299e-05, + "loss": 0.3253, + "step": 5664 + }, + { + "epoch": 0.49, + "learning_rate": 1.0950811675330303e-05, + "loss": 0.3229, + "step": 5665 + }, + { + "epoch": 0.49, + "learning_rate": 1.0948047976881439e-05, + "loss": 0.2815, + "step": 5666 + }, + { + "epoch": 0.49, + "learning_rate": 1.0945284205361723e-05, + "loss": 0.2849, + "step": 5667 + }, + { + "epoch": 0.49, + "learning_rate": 1.0942520360984172e-05, + "loss": 0.2917, + "step": 5668 + }, + { + "epoch": 0.49, + "learning_rate": 1.0939756443961809e-05, + "loss": 0.3042, + "step": 5669 + }, + { + "epoch": 0.49, + "learning_rate": 1.093699245450766e-05, + "loss": 0.2601, + "step": 5670 + }, + { + "epoch": 0.49, + "learning_rate": 1.0934228392834763e-05, + "loss": 0.2871, + "step": 5671 + }, + { + "epoch": 0.49, + "learning_rate": 1.0931464259156158e-05, + "loss": 0.2992, + "step": 5672 + }, + { + "epoch": 0.49, + "learning_rate": 1.0928700053684893e-05, + "loss": 0.2832, + "step": 5673 + }, + { + "epoch": 0.49, + "learning_rate": 1.0925935776634014e-05, + "loss": 0.2737, + "step": 5674 + }, + { + "epoch": 0.49, + "learning_rate": 1.0923171428216581e-05, + "loss": 0.2444, + "step": 5675 + }, + { + "epoch": 0.49, + "learning_rate": 1.0920407008645656e-05, + "loss": 0.3256, + "step": 5676 + }, + { + "epoch": 0.49, + "learning_rate": 1.0917642518134304e-05, + "loss": 0.225, + "step": 5677 + }, + { + "epoch": 0.49, + "learning_rate": 1.0914877956895604e-05, + "loss": 0.2691, + "step": 5678 + }, + { + "epoch": 0.49, + "learning_rate": 1.0912113325142632e-05, + "loss": 0.2818, + "step": 5679 + }, + { + "epoch": 0.49, + "learning_rate": 1.0909348623088472e-05, + "loss": 0.3021, + "step": 5680 + }, + { + "epoch": 0.49, + "learning_rate": 1.090658385094621e-05, + "loss": 0.311, + "step": 5681 + }, + { + "epoch": 0.49, + "learning_rate": 1.0903819008928948e-05, + "loss": 0.2844, + "step": 5682 + }, + { + "epoch": 0.49, + "learning_rate": 1.090105409724978e-05, + "loss": 0.2791, + "step": 5683 + }, + { + "epoch": 0.49, + "learning_rate": 1.0898289116121817e-05, + "loss": 0.2706, + "step": 5684 + }, + { + "epoch": 0.49, + "learning_rate": 1.0895524065758164e-05, + "loss": 0.3007, + "step": 5685 + }, + { + "epoch": 0.49, + "learning_rate": 1.0892758946371943e-05, + "loss": 0.2937, + "step": 5686 + }, + { + "epoch": 0.49, + "learning_rate": 1.0889993758176276e-05, + "loss": 0.307, + "step": 5687 + }, + { + "epoch": 0.49, + "learning_rate": 1.0887228501384287e-05, + "loss": 0.3426, + "step": 5688 + }, + { + "epoch": 0.49, + "learning_rate": 1.0884463176209105e-05, + "loss": 0.2866, + "step": 5689 + }, + { + "epoch": 0.49, + "learning_rate": 1.0881697782863874e-05, + "loss": 0.2861, + "step": 5690 + }, + { + "epoch": 0.49, + "learning_rate": 1.0878932321561734e-05, + "loss": 0.2752, + "step": 5691 + }, + { + "epoch": 0.49, + "learning_rate": 1.0876166792515836e-05, + "loss": 0.2538, + "step": 5692 + }, + { + "epoch": 0.49, + "learning_rate": 1.0873401195939328e-05, + "loss": 0.2941, + "step": 5693 + }, + { + "epoch": 0.49, + "learning_rate": 1.0870635532045375e-05, + "loss": 0.3277, + "step": 5694 + }, + { + "epoch": 0.49, + "learning_rate": 1.086786980104713e-05, + "loss": 0.2917, + "step": 5695 + }, + { + "epoch": 0.49, + "learning_rate": 1.0865104003157774e-05, + "loss": 0.278, + "step": 5696 + }, + { + "epoch": 0.49, + "learning_rate": 1.0862338138590479e-05, + "loss": 0.2872, + "step": 5697 + }, + { + "epoch": 0.49, + "learning_rate": 1.0859572207558416e-05, + "loss": 0.3156, + "step": 5698 + }, + { + "epoch": 0.49, + "learning_rate": 1.085680621027478e-05, + "loss": 0.2991, + "step": 5699 + }, + { + "epoch": 0.49, + "learning_rate": 1.085404014695275e-05, + "loss": 0.2949, + "step": 5700 + }, + { + "epoch": 0.49, + "learning_rate": 1.0851274017805525e-05, + "loss": 0.2551, + "step": 5701 + }, + { + "epoch": 0.49, + "learning_rate": 1.0848507823046306e-05, + "loss": 0.3223, + "step": 5702 + }, + { + "epoch": 0.49, + "learning_rate": 1.0845741562888297e-05, + "loss": 0.292, + "step": 5703 + }, + { + "epoch": 0.49, + "learning_rate": 1.084297523754471e-05, + "loss": 0.2451, + "step": 5704 + }, + { + "epoch": 0.49, + "learning_rate": 1.0840208847228753e-05, + "loss": 0.3699, + "step": 5705 + }, + { + "epoch": 0.49, + "learning_rate": 1.0837442392153651e-05, + "loss": 0.2649, + "step": 5706 + }, + { + "epoch": 0.49, + "learning_rate": 1.083467587253263e-05, + "loss": 0.2656, + "step": 5707 + }, + { + "epoch": 0.49, + "learning_rate": 1.0831909288578913e-05, + "loss": 0.2858, + "step": 5708 + }, + { + "epoch": 0.49, + "learning_rate": 1.082914264050574e-05, + "loss": 0.314, + "step": 5709 + }, + { + "epoch": 0.49, + "learning_rate": 1.082637592852635e-05, + "loss": 0.2957, + "step": 5710 + }, + { + "epoch": 0.49, + "learning_rate": 1.0823609152853987e-05, + "loss": 0.2497, + "step": 5711 + }, + { + "epoch": 0.49, + "learning_rate": 1.0820842313701898e-05, + "loss": 0.3034, + "step": 5712 + }, + { + "epoch": 0.49, + "learning_rate": 1.0818075411283341e-05, + "loss": 0.2827, + "step": 5713 + }, + { + "epoch": 0.49, + "learning_rate": 1.081530844581157e-05, + "loss": 0.2714, + "step": 5714 + }, + { + "epoch": 0.49, + "learning_rate": 1.0812541417499855e-05, + "loss": 0.2974, + "step": 5715 + }, + { + "epoch": 0.49, + "learning_rate": 1.0809774326561464e-05, + "loss": 0.2956, + "step": 5716 + }, + { + "epoch": 0.49, + "learning_rate": 1.080700717320967e-05, + "loss": 0.2864, + "step": 5717 + }, + { + "epoch": 0.49, + "learning_rate": 1.080423995765775e-05, + "loss": 0.3052, + "step": 5718 + }, + { + "epoch": 0.49, + "learning_rate": 1.0801472680118984e-05, + "loss": 0.3207, + "step": 5719 + }, + { + "epoch": 0.49, + "learning_rate": 1.0798705340806668e-05, + "loss": 0.2517, + "step": 5720 + }, + { + "epoch": 0.49, + "learning_rate": 1.0795937939934088e-05, + "loss": 0.28, + "step": 5721 + }, + { + "epoch": 0.49, + "learning_rate": 1.0793170477714546e-05, + "loss": 0.2748, + "step": 5722 + }, + { + "epoch": 0.49, + "learning_rate": 1.0790402954361344e-05, + "loss": 0.2958, + "step": 5723 + }, + { + "epoch": 0.49, + "learning_rate": 1.0787635370087786e-05, + "loss": 0.2582, + "step": 5724 + }, + { + "epoch": 0.49, + "learning_rate": 1.0784867725107187e-05, + "loss": 0.3481, + "step": 5725 + }, + { + "epoch": 0.49, + "learning_rate": 1.078210001963286e-05, + "loss": 0.2842, + "step": 5726 + }, + { + "epoch": 0.49, + "learning_rate": 1.0779332253878127e-05, + "loss": 0.2576, + "step": 5727 + }, + { + "epoch": 0.49, + "learning_rate": 1.0776564428056317e-05, + "loss": 0.2343, + "step": 5728 + }, + { + "epoch": 0.49, + "learning_rate": 1.0773796542380757e-05, + "loss": 0.2874, + "step": 5729 + }, + { + "epoch": 0.49, + "learning_rate": 1.0771028597064785e-05, + "loss": 0.2637, + "step": 5730 + }, + { + "epoch": 0.49, + "learning_rate": 1.0768260592321735e-05, + "loss": 0.2996, + "step": 5731 + }, + { + "epoch": 0.49, + "learning_rate": 1.076549252836496e-05, + "loss": 0.3318, + "step": 5732 + }, + { + "epoch": 0.49, + "learning_rate": 1.0762724405407795e-05, + "loss": 0.5643, + "step": 5733 + }, + { + "epoch": 0.49, + "learning_rate": 1.0759956223663608e-05, + "loss": 0.2958, + "step": 5734 + }, + { + "epoch": 0.49, + "learning_rate": 1.075718798334575e-05, + "loss": 0.2444, + "step": 5735 + }, + { + "epoch": 0.49, + "learning_rate": 1.075441968466758e-05, + "loss": 0.6011, + "step": 5736 + }, + { + "epoch": 0.49, + "learning_rate": 1.0751651327842474e-05, + "loss": 0.2748, + "step": 5737 + }, + { + "epoch": 0.49, + "learning_rate": 1.0748882913083794e-05, + "loss": 0.2813, + "step": 5738 + }, + { + "epoch": 0.49, + "learning_rate": 1.074611444060492e-05, + "loss": 0.3068, + "step": 5739 + }, + { + "epoch": 0.49, + "learning_rate": 1.074334591061923e-05, + "loss": 0.2938, + "step": 5740 + }, + { + "epoch": 0.49, + "learning_rate": 1.0740577323340112e-05, + "loss": 0.2624, + "step": 5741 + }, + { + "epoch": 0.49, + "learning_rate": 1.0737808678980954e-05, + "loss": 0.3124, + "step": 5742 + }, + { + "epoch": 0.49, + "learning_rate": 1.0735039977755147e-05, + "loss": 0.2632, + "step": 5743 + }, + { + "epoch": 0.49, + "learning_rate": 1.0732271219876092e-05, + "loss": 0.2575, + "step": 5744 + }, + { + "epoch": 0.49, + "learning_rate": 1.0729502405557188e-05, + "loss": 0.3065, + "step": 5745 + }, + { + "epoch": 0.49, + "learning_rate": 1.0726733535011844e-05, + "loss": 0.2885, + "step": 5746 + }, + { + "epoch": 0.49, + "learning_rate": 1.0723964608453467e-05, + "loss": 0.2744, + "step": 5747 + }, + { + "epoch": 0.49, + "learning_rate": 1.0721195626095477e-05, + "loss": 0.2945, + "step": 5748 + }, + { + "epoch": 0.49, + "learning_rate": 1.0718426588151296e-05, + "loss": 0.2759, + "step": 5749 + }, + { + "epoch": 0.49, + "learning_rate": 1.071565749483434e-05, + "loss": 0.3277, + "step": 5750 + }, + { + "epoch": 0.49, + "learning_rate": 1.0712888346358041e-05, + "loss": 0.2669, + "step": 5751 + }, + { + "epoch": 0.49, + "learning_rate": 1.0710119142935829e-05, + "loss": 0.2228, + "step": 5752 + }, + { + "epoch": 0.49, + "learning_rate": 1.0707349884781142e-05, + "loss": 0.2715, + "step": 5753 + }, + { + "epoch": 0.49, + "learning_rate": 1.0704580572107424e-05, + "loss": 0.3269, + "step": 5754 + }, + { + "epoch": 0.49, + "learning_rate": 1.0701811205128115e-05, + "loss": 0.2429, + "step": 5755 + }, + { + "epoch": 0.49, + "learning_rate": 1.0699041784056667e-05, + "loss": 0.311, + "step": 5756 + }, + { + "epoch": 0.49, + "learning_rate": 1.0696272309106532e-05, + "loss": 0.2752, + "step": 5757 + }, + { + "epoch": 0.49, + "learning_rate": 1.0693502780491168e-05, + "loss": 0.2734, + "step": 5758 + }, + { + "epoch": 0.49, + "learning_rate": 1.0690733198424035e-05, + "loss": 0.3068, + "step": 5759 + }, + { + "epoch": 0.49, + "learning_rate": 1.06879635631186e-05, + "loss": 0.3011, + "step": 5760 + }, + { + "epoch": 0.49, + "learning_rate": 1.0685193874788335e-05, + "loss": 0.2622, + "step": 5761 + }, + { + "epoch": 0.49, + "learning_rate": 1.0682424133646712e-05, + "loss": 0.2632, + "step": 5762 + }, + { + "epoch": 0.49, + "learning_rate": 1.0679654339907208e-05, + "loss": 0.2817, + "step": 5763 + }, + { + "epoch": 0.49, + "learning_rate": 1.0676884493783304e-05, + "loss": 0.62, + "step": 5764 + }, + { + "epoch": 0.49, + "learning_rate": 1.0674114595488489e-05, + "loss": 0.2929, + "step": 5765 + }, + { + "epoch": 0.49, + "learning_rate": 1.0671344645236253e-05, + "loss": 0.3076, + "step": 5766 + }, + { + "epoch": 0.49, + "learning_rate": 1.0668574643240087e-05, + "loss": 0.2897, + "step": 5767 + }, + { + "epoch": 0.49, + "learning_rate": 1.0665804589713494e-05, + "loss": 0.3101, + "step": 5768 + }, + { + "epoch": 0.49, + "learning_rate": 1.066303448486997e-05, + "loss": 0.3101, + "step": 5769 + }, + { + "epoch": 0.49, + "learning_rate": 1.0660264328923024e-05, + "loss": 0.2764, + "step": 5770 + }, + { + "epoch": 0.49, + "learning_rate": 1.0657494122086165e-05, + "loss": 0.2588, + "step": 5771 + }, + { + "epoch": 0.49, + "learning_rate": 1.0654723864572909e-05, + "loss": 0.2718, + "step": 5772 + }, + { + "epoch": 0.49, + "learning_rate": 1.0651953556596777e-05, + "loss": 0.3024, + "step": 5773 + }, + { + "epoch": 0.49, + "learning_rate": 1.064918319837128e-05, + "loss": 0.2729, + "step": 5774 + }, + { + "epoch": 0.5, + "learning_rate": 1.0646412790109954e-05, + "loss": 0.2581, + "step": 5775 + }, + { + "epoch": 0.5, + "learning_rate": 1.0643642332026323e-05, + "loss": 0.3369, + "step": 5776 + }, + { + "epoch": 0.5, + "learning_rate": 1.064087182433392e-05, + "loss": 0.2566, + "step": 5777 + }, + { + "epoch": 0.5, + "learning_rate": 1.0638101267246283e-05, + "loss": 0.2369, + "step": 5778 + }, + { + "epoch": 0.5, + "learning_rate": 1.0635330660976955e-05, + "loss": 0.2759, + "step": 5779 + }, + { + "epoch": 0.5, + "learning_rate": 1.0632560005739481e-05, + "loss": 0.3203, + "step": 5780 + }, + { + "epoch": 0.5, + "learning_rate": 1.0629789301747404e-05, + "loss": 0.2769, + "step": 5781 + }, + { + "epoch": 0.5, + "learning_rate": 1.0627018549214284e-05, + "loss": 0.2582, + "step": 5782 + }, + { + "epoch": 0.5, + "learning_rate": 1.0624247748353666e-05, + "loss": 0.2404, + "step": 5783 + }, + { + "epoch": 0.5, + "learning_rate": 1.062147689937912e-05, + "loss": 0.3202, + "step": 5784 + }, + { + "epoch": 0.5, + "learning_rate": 1.0618706002504202e-05, + "loss": 0.2665, + "step": 5785 + }, + { + "epoch": 0.5, + "learning_rate": 1.0615935057942485e-05, + "loss": 0.3236, + "step": 5786 + }, + { + "epoch": 0.5, + "learning_rate": 1.0613164065907539e-05, + "loss": 0.2955, + "step": 5787 + }, + { + "epoch": 0.5, + "learning_rate": 1.0610393026612933e-05, + "loss": 0.6254, + "step": 5788 + }, + { + "epoch": 0.5, + "learning_rate": 1.0607621940272253e-05, + "loss": 0.2924, + "step": 5789 + }, + { + "epoch": 0.5, + "learning_rate": 1.060485080709907e-05, + "loss": 0.3092, + "step": 5790 + }, + { + "epoch": 0.5, + "learning_rate": 1.0602079627306979e-05, + "loss": 0.265, + "step": 5791 + }, + { + "epoch": 0.5, + "learning_rate": 1.0599308401109564e-05, + "loss": 0.2678, + "step": 5792 + }, + { + "epoch": 0.5, + "learning_rate": 1.0596537128720421e-05, + "loss": 0.3041, + "step": 5793 + }, + { + "epoch": 0.5, + "learning_rate": 1.0593765810353142e-05, + "loss": 0.2837, + "step": 5794 + }, + { + "epoch": 0.5, + "learning_rate": 1.059099444622133e-05, + "loss": 0.2853, + "step": 5795 + }, + { + "epoch": 0.5, + "learning_rate": 1.0588223036538583e-05, + "loss": 0.2825, + "step": 5796 + }, + { + "epoch": 0.5, + "learning_rate": 1.0585451581518513e-05, + "loss": 0.2611, + "step": 5797 + }, + { + "epoch": 0.5, + "learning_rate": 1.0582680081374728e-05, + "loss": 0.3231, + "step": 5798 + }, + { + "epoch": 0.5, + "learning_rate": 1.0579908536320842e-05, + "loss": 0.3279, + "step": 5799 + }, + { + "epoch": 0.5, + "learning_rate": 1.057713694657047e-05, + "loss": 0.2585, + "step": 5800 + }, + { + "epoch": 0.5, + "learning_rate": 1.0574365312337235e-05, + "loss": 0.2878, + "step": 5801 + }, + { + "epoch": 0.5, + "learning_rate": 1.0571593633834758e-05, + "loss": 0.2378, + "step": 5802 + }, + { + "epoch": 0.5, + "learning_rate": 1.056882191127667e-05, + "loss": 0.294, + "step": 5803 + }, + { + "epoch": 0.5, + "learning_rate": 1.0566050144876599e-05, + "loss": 0.296, + "step": 5804 + }, + { + "epoch": 0.5, + "learning_rate": 1.0563278334848178e-05, + "loss": 0.2853, + "step": 5805 + }, + { + "epoch": 0.5, + "learning_rate": 1.0560506481405048e-05, + "loss": 0.3073, + "step": 5806 + }, + { + "epoch": 0.5, + "learning_rate": 1.0557734584760849e-05, + "loss": 0.2751, + "step": 5807 + }, + { + "epoch": 0.5, + "learning_rate": 1.0554962645129223e-05, + "loss": 0.2862, + "step": 5808 + }, + { + "epoch": 0.5, + "learning_rate": 1.0552190662723816e-05, + "loss": 0.287, + "step": 5809 + }, + { + "epoch": 0.5, + "learning_rate": 1.0549418637758284e-05, + "loss": 0.2589, + "step": 5810 + }, + { + "epoch": 0.5, + "learning_rate": 1.0546646570446277e-05, + "loss": 0.3345, + "step": 5811 + }, + { + "epoch": 0.5, + "learning_rate": 1.0543874461001456e-05, + "loss": 0.36, + "step": 5812 + }, + { + "epoch": 0.5, + "learning_rate": 1.0541102309637477e-05, + "loss": 0.2893, + "step": 5813 + }, + { + "epoch": 0.5, + "learning_rate": 1.0538330116568006e-05, + "loss": 0.2719, + "step": 5814 + }, + { + "epoch": 0.5, + "learning_rate": 1.0535557882006708e-05, + "loss": 0.2906, + "step": 5815 + }, + { + "epoch": 0.5, + "learning_rate": 1.0532785606167256e-05, + "loss": 0.3034, + "step": 5816 + }, + { + "epoch": 0.5, + "learning_rate": 1.0530013289263318e-05, + "loss": 0.266, + "step": 5817 + }, + { + "epoch": 0.5, + "learning_rate": 1.0527240931508582e-05, + "loss": 0.278, + "step": 5818 + }, + { + "epoch": 0.5, + "learning_rate": 1.0524468533116716e-05, + "loss": 0.3, + "step": 5819 + }, + { + "epoch": 0.5, + "learning_rate": 1.052169609430141e-05, + "loss": 0.2811, + "step": 5820 + }, + { + "epoch": 0.5, + "learning_rate": 1.0518923615276342e-05, + "loss": 0.2853, + "step": 5821 + }, + { + "epoch": 0.5, + "learning_rate": 1.051615109625521e-05, + "loss": 0.2915, + "step": 5822 + }, + { + "epoch": 0.5, + "learning_rate": 1.0513378537451697e-05, + "loss": 0.3004, + "step": 5823 + }, + { + "epoch": 0.5, + "learning_rate": 1.0510605939079505e-05, + "loss": 0.2691, + "step": 5824 + }, + { + "epoch": 0.5, + "learning_rate": 1.0507833301352335e-05, + "loss": 0.2947, + "step": 5825 + }, + { + "epoch": 0.5, + "learning_rate": 1.0505060624483878e-05, + "loss": 0.2592, + "step": 5826 + }, + { + "epoch": 0.5, + "learning_rate": 1.0502287908687847e-05, + "loss": 0.2948, + "step": 5827 + }, + { + "epoch": 0.5, + "learning_rate": 1.0499515154177941e-05, + "loss": 0.2931, + "step": 5828 + }, + { + "epoch": 0.5, + "learning_rate": 1.049674236116788e-05, + "loss": 0.288, + "step": 5829 + }, + { + "epoch": 0.5, + "learning_rate": 1.049396952987137e-05, + "loss": 0.6166, + "step": 5830 + }, + { + "epoch": 0.5, + "learning_rate": 1.049119666050213e-05, + "loss": 0.31, + "step": 5831 + }, + { + "epoch": 0.5, + "learning_rate": 1.048842375327388e-05, + "loss": 0.277, + "step": 5832 + }, + { + "epoch": 0.5, + "learning_rate": 1.0485650808400339e-05, + "loss": 0.29, + "step": 5833 + }, + { + "epoch": 0.5, + "learning_rate": 1.0482877826095233e-05, + "loss": 0.2904, + "step": 5834 + }, + { + "epoch": 0.5, + "learning_rate": 1.0480104806572288e-05, + "loss": 0.337, + "step": 5835 + }, + { + "epoch": 0.5, + "learning_rate": 1.0477331750045239e-05, + "loss": 0.2444, + "step": 5836 + }, + { + "epoch": 0.5, + "learning_rate": 1.047455865672782e-05, + "loss": 0.2681, + "step": 5837 + }, + { + "epoch": 0.5, + "learning_rate": 1.0471785526833762e-05, + "loss": 0.2759, + "step": 5838 + }, + { + "epoch": 0.5, + "learning_rate": 1.0469012360576807e-05, + "loss": 0.2717, + "step": 5839 + }, + { + "epoch": 0.5, + "learning_rate": 1.0466239158170697e-05, + "loss": 0.3179, + "step": 5840 + }, + { + "epoch": 0.5, + "learning_rate": 1.0463465919829175e-05, + "loss": 0.3065, + "step": 5841 + }, + { + "epoch": 0.5, + "learning_rate": 1.046069264576599e-05, + "loss": 0.3027, + "step": 5842 + }, + { + "epoch": 0.5, + "learning_rate": 1.0457919336194892e-05, + "loss": 0.2836, + "step": 5843 + }, + { + "epoch": 0.5, + "learning_rate": 1.0455145991329639e-05, + "loss": 0.3084, + "step": 5844 + }, + { + "epoch": 0.5, + "learning_rate": 1.0452372611383977e-05, + "loss": 0.2818, + "step": 5845 + }, + { + "epoch": 0.5, + "learning_rate": 1.0449599196571671e-05, + "loss": 0.2429, + "step": 5846 + }, + { + "epoch": 0.5, + "learning_rate": 1.044682574710648e-05, + "loss": 0.2764, + "step": 5847 + }, + { + "epoch": 0.5, + "learning_rate": 1.0444052263202169e-05, + "loss": 0.2596, + "step": 5848 + }, + { + "epoch": 0.5, + "learning_rate": 1.04412787450725e-05, + "loss": 0.2999, + "step": 5849 + }, + { + "epoch": 0.5, + "learning_rate": 1.043850519293125e-05, + "loss": 0.3206, + "step": 5850 + }, + { + "epoch": 0.5, + "learning_rate": 1.0435731606992188e-05, + "loss": 0.2603, + "step": 5851 + }, + { + "epoch": 0.5, + "learning_rate": 1.0432957987469081e-05, + "loss": 0.2875, + "step": 5852 + }, + { + "epoch": 0.5, + "learning_rate": 1.0430184334575715e-05, + "loss": 0.2653, + "step": 5853 + }, + { + "epoch": 0.5, + "learning_rate": 1.0427410648525863e-05, + "loss": 0.2701, + "step": 5854 + }, + { + "epoch": 0.5, + "learning_rate": 1.0424636929533315e-05, + "loss": 0.3293, + "step": 5855 + }, + { + "epoch": 0.5, + "learning_rate": 1.0421863177811848e-05, + "loss": 0.2586, + "step": 5856 + }, + { + "epoch": 0.5, + "learning_rate": 1.0419089393575253e-05, + "loss": 0.2738, + "step": 5857 + }, + { + "epoch": 0.5, + "learning_rate": 1.041631557703732e-05, + "loss": 0.2709, + "step": 5858 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413541728411836e-05, + "loss": 0.2313, + "step": 5859 + }, + { + "epoch": 0.5, + "learning_rate": 1.04107678479126e-05, + "loss": 0.2802, + "step": 5860 + }, + { + "epoch": 0.5, + "learning_rate": 1.0407993935753406e-05, + "loss": 0.2375, + "step": 5861 + }, + { + "epoch": 0.5, + "learning_rate": 1.0405219992148057e-05, + "loss": 0.3206, + "step": 5862 + }, + { + "epoch": 0.5, + "learning_rate": 1.0402446017310355e-05, + "loss": 0.2903, + "step": 5863 + }, + { + "epoch": 0.5, + "learning_rate": 1.0399672011454101e-05, + "loss": 0.2972, + "step": 5864 + }, + { + "epoch": 0.5, + "learning_rate": 1.0396897974793102e-05, + "loss": 0.2703, + "step": 5865 + }, + { + "epoch": 0.5, + "learning_rate": 1.039412390754117e-05, + "loss": 0.3087, + "step": 5866 + }, + { + "epoch": 0.5, + "learning_rate": 1.0391349809912115e-05, + "loss": 0.2697, + "step": 5867 + }, + { + "epoch": 0.5, + "learning_rate": 1.0388575682119748e-05, + "loss": 0.2578, + "step": 5868 + }, + { + "epoch": 0.5, + "learning_rate": 1.0385801524377888e-05, + "loss": 0.3052, + "step": 5869 + }, + { + "epoch": 0.5, + "learning_rate": 1.0383027336900356e-05, + "loss": 0.3104, + "step": 5870 + }, + { + "epoch": 0.5, + "learning_rate": 1.0380253119900967e-05, + "loss": 0.2667, + "step": 5871 + }, + { + "epoch": 0.5, + "learning_rate": 1.0377478873593546e-05, + "loss": 0.2794, + "step": 5872 + }, + { + "epoch": 0.5, + "learning_rate": 1.0374704598191918e-05, + "loss": 0.3147, + "step": 5873 + }, + { + "epoch": 0.5, + "learning_rate": 1.0371930293909911e-05, + "loss": 0.2961, + "step": 5874 + }, + { + "epoch": 0.5, + "learning_rate": 1.0369155960961356e-05, + "loss": 0.2938, + "step": 5875 + }, + { + "epoch": 0.5, + "learning_rate": 1.0366381599560086e-05, + "loss": 0.2693, + "step": 5876 + }, + { + "epoch": 0.5, + "learning_rate": 1.036360720991993e-05, + "loss": 0.2653, + "step": 5877 + }, + { + "epoch": 0.5, + "learning_rate": 1.0360832792254727e-05, + "loss": 0.3221, + "step": 5878 + }, + { + "epoch": 0.5, + "learning_rate": 1.0358058346778314e-05, + "loss": 0.2729, + "step": 5879 + }, + { + "epoch": 0.5, + "learning_rate": 1.0355283873704533e-05, + "loss": 0.2859, + "step": 5880 + }, + { + "epoch": 0.5, + "learning_rate": 1.0352509373247227e-05, + "loss": 0.2946, + "step": 5881 + }, + { + "epoch": 0.5, + "learning_rate": 1.0349734845620244e-05, + "loss": 0.2281, + "step": 5882 + }, + { + "epoch": 0.5, + "learning_rate": 1.0346960291037424e-05, + "loss": 0.3042, + "step": 5883 + }, + { + "epoch": 0.5, + "learning_rate": 1.034418570971262e-05, + "loss": 0.5502, + "step": 5884 + }, + { + "epoch": 0.5, + "learning_rate": 1.034141110185968e-05, + "loss": 0.2851, + "step": 5885 + }, + { + "epoch": 0.5, + "learning_rate": 1.0338636467692462e-05, + "loss": 0.328, + "step": 5886 + }, + { + "epoch": 0.5, + "learning_rate": 1.0335861807424816e-05, + "loss": 0.2556, + "step": 5887 + }, + { + "epoch": 0.5, + "learning_rate": 1.0333087121270602e-05, + "loss": 0.3177, + "step": 5888 + }, + { + "epoch": 0.5, + "learning_rate": 1.0330312409443681e-05, + "loss": 0.293, + "step": 5889 + }, + { + "epoch": 0.5, + "learning_rate": 1.0327537672157908e-05, + "loss": 0.2901, + "step": 5890 + }, + { + "epoch": 0.5, + "learning_rate": 1.0324762909627151e-05, + "loss": 0.2851, + "step": 5891 + }, + { + "epoch": 0.51, + "learning_rate": 1.0321988122065274e-05, + "loss": 0.2742, + "step": 5892 + }, + { + "epoch": 0.51, + "learning_rate": 1.0319213309686145e-05, + "loss": 0.3129, + "step": 5893 + }, + { + "epoch": 0.51, + "learning_rate": 1.031643847270363e-05, + "loss": 0.2683, + "step": 5894 + }, + { + "epoch": 0.51, + "learning_rate": 1.03136636113316e-05, + "loss": 0.2502, + "step": 5895 + }, + { + "epoch": 0.51, + "learning_rate": 1.031088872578393e-05, + "loss": 0.264, + "step": 5896 + }, + { + "epoch": 0.51, + "learning_rate": 1.0308113816274492e-05, + "loss": 0.2546, + "step": 5897 + }, + { + "epoch": 0.51, + "learning_rate": 1.0305338883017163e-05, + "loss": 0.2793, + "step": 5898 + }, + { + "epoch": 0.51, + "learning_rate": 1.0302563926225824e-05, + "loss": 0.2556, + "step": 5899 + }, + { + "epoch": 0.51, + "learning_rate": 1.0299788946114352e-05, + "loss": 0.3304, + "step": 5900 + }, + { + "epoch": 0.51, + "learning_rate": 1.0297013942896628e-05, + "loss": 0.2787, + "step": 5901 + }, + { + "epoch": 0.51, + "learning_rate": 1.0294238916786537e-05, + "loss": 0.3022, + "step": 5902 + }, + { + "epoch": 0.51, + "learning_rate": 1.0291463867997967e-05, + "loss": 0.2983, + "step": 5903 + }, + { + "epoch": 0.51, + "learning_rate": 1.0288688796744797e-05, + "loss": 0.3044, + "step": 5904 + }, + { + "epoch": 0.51, + "learning_rate": 1.0285913703240927e-05, + "loss": 0.2782, + "step": 5905 + }, + { + "epoch": 0.51, + "learning_rate": 1.0283138587700236e-05, + "loss": 0.2772, + "step": 5906 + }, + { + "epoch": 0.51, + "learning_rate": 1.0280363450336623e-05, + "loss": 0.2653, + "step": 5907 + }, + { + "epoch": 0.51, + "learning_rate": 1.0277588291363984e-05, + "loss": 0.2966, + "step": 5908 + }, + { + "epoch": 0.51, + "learning_rate": 1.027481311099621e-05, + "loss": 0.2208, + "step": 5909 + }, + { + "epoch": 0.51, + "learning_rate": 1.0272037909447197e-05, + "loss": 0.3234, + "step": 5910 + }, + { + "epoch": 0.51, + "learning_rate": 1.0269262686930845e-05, + "loss": 0.2675, + "step": 5911 + }, + { + "epoch": 0.51, + "learning_rate": 1.026648744366106e-05, + "loss": 0.248, + "step": 5912 + }, + { + "epoch": 0.51, + "learning_rate": 1.0263712179851736e-05, + "loss": 0.2637, + "step": 5913 + }, + { + "epoch": 0.51, + "learning_rate": 1.0260936895716781e-05, + "loss": 0.2807, + "step": 5914 + }, + { + "epoch": 0.51, + "learning_rate": 1.0258161591470105e-05, + "loss": 0.2745, + "step": 5915 + }, + { + "epoch": 0.51, + "learning_rate": 1.0255386267325602e-05, + "loss": 0.2509, + "step": 5916 + }, + { + "epoch": 0.51, + "learning_rate": 1.0252610923497188e-05, + "loss": 0.2764, + "step": 5917 + }, + { + "epoch": 0.51, + "learning_rate": 1.0249835560198772e-05, + "loss": 0.29, + "step": 5918 + }, + { + "epoch": 0.51, + "learning_rate": 1.0247060177644267e-05, + "loss": 0.2548, + "step": 5919 + }, + { + "epoch": 0.51, + "learning_rate": 1.0244284776047589e-05, + "loss": 0.2979, + "step": 5920 + }, + { + "epoch": 0.51, + "learning_rate": 1.024150935562264e-05, + "loss": 0.2521, + "step": 5921 + }, + { + "epoch": 0.51, + "learning_rate": 1.023873391658335e-05, + "loss": 0.2816, + "step": 5922 + }, + { + "epoch": 0.51, + "learning_rate": 1.0235958459143623e-05, + "loss": 0.2781, + "step": 5923 + }, + { + "epoch": 0.51, + "learning_rate": 1.0233182983517387e-05, + "loss": 0.2908, + "step": 5924 + }, + { + "epoch": 0.51, + "learning_rate": 1.0230407489918556e-05, + "loss": 0.2952, + "step": 5925 + }, + { + "epoch": 0.51, + "learning_rate": 1.0227631978561057e-05, + "loss": 0.2786, + "step": 5926 + }, + { + "epoch": 0.51, + "learning_rate": 1.0224856449658811e-05, + "loss": 0.2825, + "step": 5927 + }, + { + "epoch": 0.51, + "learning_rate": 1.022208090342574e-05, + "loss": 0.2765, + "step": 5928 + }, + { + "epoch": 0.51, + "learning_rate": 1.0219305340075767e-05, + "loss": 0.2783, + "step": 5929 + }, + { + "epoch": 0.51, + "learning_rate": 1.0216529759822823e-05, + "loss": 0.2986, + "step": 5930 + }, + { + "epoch": 0.51, + "learning_rate": 1.021375416288084e-05, + "loss": 0.3079, + "step": 5931 + }, + { + "epoch": 0.51, + "learning_rate": 1.021097854946374e-05, + "loss": 0.291, + "step": 5932 + }, + { + "epoch": 0.51, + "learning_rate": 1.0208202919785453e-05, + "loss": 0.2977, + "step": 5933 + }, + { + "epoch": 0.51, + "learning_rate": 1.0205427274059915e-05, + "loss": 0.2658, + "step": 5934 + }, + { + "epoch": 0.51, + "learning_rate": 1.020265161250106e-05, + "loss": 0.2443, + "step": 5935 + }, + { + "epoch": 0.51, + "learning_rate": 1.0199875935322815e-05, + "loss": 0.2748, + "step": 5936 + }, + { + "epoch": 0.51, + "learning_rate": 1.0197100242739123e-05, + "loss": 0.316, + "step": 5937 + }, + { + "epoch": 0.51, + "learning_rate": 1.019432453496392e-05, + "loss": 0.2752, + "step": 5938 + }, + { + "epoch": 0.51, + "learning_rate": 1.0191548812211143e-05, + "loss": 0.2738, + "step": 5939 + }, + { + "epoch": 0.51, + "learning_rate": 1.0188773074694727e-05, + "loss": 0.3325, + "step": 5940 + }, + { + "epoch": 0.51, + "learning_rate": 1.0185997322628618e-05, + "loss": 0.599, + "step": 5941 + }, + { + "epoch": 0.51, + "learning_rate": 1.018322155622675e-05, + "loss": 0.2682, + "step": 5942 + }, + { + "epoch": 0.51, + "learning_rate": 1.0180445775703074e-05, + "loss": 0.3058, + "step": 5943 + }, + { + "epoch": 0.51, + "learning_rate": 1.0177669981271528e-05, + "loss": 0.2665, + "step": 5944 + }, + { + "epoch": 0.51, + "learning_rate": 1.0174894173146055e-05, + "loss": 0.3224, + "step": 5945 + }, + { + "epoch": 0.51, + "learning_rate": 1.0172118351540608e-05, + "loss": 0.2926, + "step": 5946 + }, + { + "epoch": 0.51, + "learning_rate": 1.0169342516669125e-05, + "loss": 0.2706, + "step": 5947 + }, + { + "epoch": 0.51, + "learning_rate": 1.0166566668745558e-05, + "loss": 0.2663, + "step": 5948 + }, + { + "epoch": 0.51, + "learning_rate": 1.0163790807983858e-05, + "loss": 0.2739, + "step": 5949 + }, + { + "epoch": 0.51, + "learning_rate": 1.016101493459797e-05, + "loss": 0.254, + "step": 5950 + }, + { + "epoch": 0.51, + "learning_rate": 1.0158239048801848e-05, + "loss": 0.29, + "step": 5951 + }, + { + "epoch": 0.51, + "learning_rate": 1.0155463150809439e-05, + "loss": 0.2756, + "step": 5952 + }, + { + "epoch": 0.51, + "learning_rate": 1.0152687240834702e-05, + "loss": 0.3024, + "step": 5953 + }, + { + "epoch": 0.51, + "learning_rate": 1.0149911319091583e-05, + "loss": 0.2458, + "step": 5954 + }, + { + "epoch": 0.51, + "learning_rate": 1.0147135385794043e-05, + "loss": 0.2426, + "step": 5955 + }, + { + "epoch": 0.51, + "learning_rate": 1.0144359441156033e-05, + "loss": 0.2494, + "step": 5956 + }, + { + "epoch": 0.51, + "learning_rate": 1.014158348539151e-05, + "loss": 0.303, + "step": 5957 + }, + { + "epoch": 0.51, + "learning_rate": 1.0138807518714435e-05, + "loss": 0.2877, + "step": 5958 + }, + { + "epoch": 0.51, + "learning_rate": 1.013603154133876e-05, + "loss": 0.2545, + "step": 5959 + }, + { + "epoch": 0.51, + "learning_rate": 1.0133255553478447e-05, + "loss": 0.3298, + "step": 5960 + }, + { + "epoch": 0.51, + "learning_rate": 1.013047955534745e-05, + "loss": 0.2941, + "step": 5961 + }, + { + "epoch": 0.51, + "learning_rate": 1.012770354715974e-05, + "loss": 0.2681, + "step": 5962 + }, + { + "epoch": 0.51, + "learning_rate": 1.0124927529129267e-05, + "loss": 0.277, + "step": 5963 + }, + { + "epoch": 0.51, + "learning_rate": 1.0122151501469999e-05, + "loss": 0.3475, + "step": 5964 + }, + { + "epoch": 0.51, + "learning_rate": 1.01193754643959e-05, + "loss": 0.2628, + "step": 5965 + }, + { + "epoch": 0.51, + "learning_rate": 1.0116599418120924e-05, + "loss": 0.2421, + "step": 5966 + }, + { + "epoch": 0.51, + "learning_rate": 1.0113823362859042e-05, + "loss": 0.2729, + "step": 5967 + }, + { + "epoch": 0.51, + "learning_rate": 1.0111047298824222e-05, + "loss": 0.2359, + "step": 5968 + }, + { + "epoch": 0.51, + "learning_rate": 1.0108271226230423e-05, + "loss": 0.2687, + "step": 5969 + }, + { + "epoch": 0.51, + "learning_rate": 1.0105495145291612e-05, + "loss": 0.267, + "step": 5970 + }, + { + "epoch": 0.51, + "learning_rate": 1.0102719056221757e-05, + "loss": 0.2715, + "step": 5971 + }, + { + "epoch": 0.51, + "learning_rate": 1.0099942959234826e-05, + "loss": 0.3027, + "step": 5972 + }, + { + "epoch": 0.51, + "learning_rate": 1.0097166854544782e-05, + "loss": 0.3155, + "step": 5973 + }, + { + "epoch": 0.51, + "learning_rate": 1.0094390742365598e-05, + "loss": 0.2945, + "step": 5974 + }, + { + "epoch": 0.51, + "learning_rate": 1.0091614622911243e-05, + "loss": 0.2979, + "step": 5975 + }, + { + "epoch": 0.51, + "learning_rate": 1.0088838496395688e-05, + "loss": 0.3051, + "step": 5976 + }, + { + "epoch": 0.51, + "learning_rate": 1.0086062363032896e-05, + "loss": 0.2727, + "step": 5977 + }, + { + "epoch": 0.51, + "learning_rate": 1.0083286223036845e-05, + "loss": 0.2344, + "step": 5978 + }, + { + "epoch": 0.51, + "learning_rate": 1.0080510076621503e-05, + "loss": 0.3022, + "step": 5979 + }, + { + "epoch": 0.51, + "learning_rate": 1.0077733924000841e-05, + "loss": 0.2423, + "step": 5980 + }, + { + "epoch": 0.51, + "learning_rate": 1.0074957765388832e-05, + "loss": 0.2502, + "step": 5981 + }, + { + "epoch": 0.51, + "learning_rate": 1.007218160099945e-05, + "loss": 0.3272, + "step": 5982 + }, + { + "epoch": 0.51, + "learning_rate": 1.0069405431046669e-05, + "loss": 0.2727, + "step": 5983 + }, + { + "epoch": 0.51, + "learning_rate": 1.0066629255744458e-05, + "loss": 0.2729, + "step": 5984 + }, + { + "epoch": 0.51, + "learning_rate": 1.0063853075306792e-05, + "loss": 0.247, + "step": 5985 + }, + { + "epoch": 0.51, + "learning_rate": 1.006107688994765e-05, + "loss": 0.2372, + "step": 5986 + }, + { + "epoch": 0.51, + "learning_rate": 1.0058300699880998e-05, + "loss": 0.2842, + "step": 5987 + }, + { + "epoch": 0.51, + "learning_rate": 1.0055524505320821e-05, + "loss": 0.2956, + "step": 5988 + }, + { + "epoch": 0.51, + "learning_rate": 1.0052748306481088e-05, + "loss": 0.2763, + "step": 5989 + }, + { + "epoch": 0.51, + "learning_rate": 1.0049972103575775e-05, + "loss": 0.356, + "step": 5990 + }, + { + "epoch": 0.51, + "learning_rate": 1.0047195896818863e-05, + "loss": 0.259, + "step": 5991 + }, + { + "epoch": 0.51, + "learning_rate": 1.004441968642432e-05, + "loss": 0.2883, + "step": 5992 + }, + { + "epoch": 0.51, + "learning_rate": 1.0041643472606129e-05, + "loss": 0.2598, + "step": 5993 + }, + { + "epoch": 0.51, + "learning_rate": 1.0038867255578261e-05, + "loss": 0.2783, + "step": 5994 + }, + { + "epoch": 0.51, + "learning_rate": 1.0036091035554703e-05, + "loss": 0.2498, + "step": 5995 + }, + { + "epoch": 0.51, + "learning_rate": 1.0033314812749423e-05, + "loss": 0.324, + "step": 5996 + }, + { + "epoch": 0.51, + "learning_rate": 1.0030538587376402e-05, + "loss": 0.2609, + "step": 5997 + }, + { + "epoch": 0.51, + "learning_rate": 1.002776235964962e-05, + "loss": 0.2521, + "step": 5998 + }, + { + "epoch": 0.51, + "learning_rate": 1.0024986129783047e-05, + "loss": 0.2919, + "step": 5999 + }, + { + "epoch": 0.51, + "learning_rate": 1.0022209897990673e-05, + "loss": 0.2731, + "step": 6000 + }, + { + "epoch": 0.51, + "learning_rate": 1.0019433664486466e-05, + "loss": 0.2726, + "step": 6001 + }, + { + "epoch": 0.51, + "learning_rate": 1.001665742948441e-05, + "loss": 0.29, + "step": 6002 + }, + { + "epoch": 0.51, + "learning_rate": 1.001388119319848e-05, + "loss": 0.3542, + "step": 6003 + }, + { + "epoch": 0.51, + "learning_rate": 1.0011104955842656e-05, + "loss": 0.2651, + "step": 6004 + }, + { + "epoch": 0.51, + "learning_rate": 1.000832871763092e-05, + "loss": 0.2708, + "step": 6005 + }, + { + "epoch": 0.51, + "learning_rate": 1.0005552478777244e-05, + "loss": 0.2897, + "step": 6006 + }, + { + "epoch": 0.51, + "learning_rate": 1.0002776239495613e-05, + "loss": 0.2759, + "step": 6007 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.2853, + "step": 6008 + }, + { + "epoch": 0.52, + "learning_rate": 9.997223760504392e-06, + "loss": 0.3304, + "step": 6009 + }, + { + "epoch": 0.52, + "learning_rate": 9.994447521222758e-06, + "loss": 0.2999, + "step": 6010 + }, + { + "epoch": 0.52, + "learning_rate": 9.991671282369084e-06, + "loss": 0.2718, + "step": 6011 + }, + { + "epoch": 0.52, + "learning_rate": 9.988895044157345e-06, + "loss": 0.2897, + "step": 6012 + }, + { + "epoch": 0.52, + "learning_rate": 9.986118806801525e-06, + "loss": 0.2991, + "step": 6013 + }, + { + "epoch": 0.52, + "learning_rate": 9.983342570515592e-06, + "loss": 0.2459, + "step": 6014 + }, + { + "epoch": 0.52, + "learning_rate": 9.980566335513538e-06, + "loss": 0.2974, + "step": 6015 + }, + { + "epoch": 0.52, + "learning_rate": 9.977790102009332e-06, + "loss": 0.3033, + "step": 6016 + }, + { + "epoch": 0.52, + "learning_rate": 9.975013870216953e-06, + "loss": 0.3109, + "step": 6017 + }, + { + "epoch": 0.52, + "learning_rate": 9.972237640350383e-06, + "loss": 0.2469, + "step": 6018 + }, + { + "epoch": 0.52, + "learning_rate": 9.969461412623601e-06, + "loss": 0.2933, + "step": 6019 + }, + { + "epoch": 0.52, + "learning_rate": 9.966685187250582e-06, + "loss": 0.302, + "step": 6020 + }, + { + "epoch": 0.52, + "learning_rate": 9.963908964445298e-06, + "loss": 0.3113, + "step": 6021 + }, + { + "epoch": 0.52, + "learning_rate": 9.96113274442174e-06, + "loss": 0.2988, + "step": 6022 + }, + { + "epoch": 0.52, + "learning_rate": 9.958356527393876e-06, + "loss": 0.2715, + "step": 6023 + }, + { + "epoch": 0.52, + "learning_rate": 9.955580313575683e-06, + "loss": 0.3322, + "step": 6024 + }, + { + "epoch": 0.52, + "learning_rate": 9.95280410318114e-06, + "loss": 0.2742, + "step": 6025 + }, + { + "epoch": 0.52, + "learning_rate": 9.950027896424228e-06, + "loss": 0.2628, + "step": 6026 + }, + { + "epoch": 0.52, + "learning_rate": 9.947251693518917e-06, + "loss": 0.2476, + "step": 6027 + }, + { + "epoch": 0.52, + "learning_rate": 9.94447549467918e-06, + "loss": 0.2836, + "step": 6028 + }, + { + "epoch": 0.52, + "learning_rate": 9.941699300119004e-06, + "loss": 0.2685, + "step": 6029 + }, + { + "epoch": 0.52, + "learning_rate": 9.938923110052353e-06, + "loss": 0.3553, + "step": 6030 + }, + { + "epoch": 0.52, + "learning_rate": 9.93614692469321e-06, + "loss": 0.2778, + "step": 6031 + }, + { + "epoch": 0.52, + "learning_rate": 9.933370744255543e-06, + "loss": 0.2977, + "step": 6032 + }, + { + "epoch": 0.52, + "learning_rate": 9.930594568953335e-06, + "loss": 0.3034, + "step": 6033 + }, + { + "epoch": 0.52, + "learning_rate": 9.92781839900055e-06, + "loss": 0.2576, + "step": 6034 + }, + { + "epoch": 0.52, + "learning_rate": 9.92504223461117e-06, + "loss": 0.2761, + "step": 6035 + }, + { + "epoch": 0.52, + "learning_rate": 9.922266075999162e-06, + "loss": 0.2675, + "step": 6036 + }, + { + "epoch": 0.52, + "learning_rate": 9.9194899233785e-06, + "loss": 0.284, + "step": 6037 + }, + { + "epoch": 0.52, + "learning_rate": 9.916713776963156e-06, + "loss": 0.5876, + "step": 6038 + }, + { + "epoch": 0.52, + "learning_rate": 9.913937636967106e-06, + "loss": 0.3221, + "step": 6039 + }, + { + "epoch": 0.52, + "learning_rate": 9.911161503604317e-06, + "loss": 0.2642, + "step": 6040 + }, + { + "epoch": 0.52, + "learning_rate": 9.908385377088757e-06, + "loss": 0.2617, + "step": 6041 + }, + { + "epoch": 0.52, + "learning_rate": 9.905609257634404e-06, + "loss": 0.2643, + "step": 6042 + }, + { + "epoch": 0.52, + "learning_rate": 9.902833145455221e-06, + "loss": 0.2636, + "step": 6043 + }, + { + "epoch": 0.52, + "learning_rate": 9.90005704076518e-06, + "loss": 0.3041, + "step": 6044 + }, + { + "epoch": 0.52, + "learning_rate": 9.897280943778245e-06, + "loss": 0.2446, + "step": 6045 + }, + { + "epoch": 0.52, + "learning_rate": 9.894504854708391e-06, + "loss": 0.3168, + "step": 6046 + }, + { + "epoch": 0.52, + "learning_rate": 9.891728773769582e-06, + "loss": 0.2867, + "step": 6047 + }, + { + "epoch": 0.52, + "learning_rate": 9.88895270117578e-06, + "loss": 0.2738, + "step": 6048 + }, + { + "epoch": 0.52, + "learning_rate": 9.886176637140959e-06, + "loss": 0.2551, + "step": 6049 + }, + { + "epoch": 0.52, + "learning_rate": 9.883400581879077e-06, + "loss": 0.2531, + "step": 6050 + }, + { + "epoch": 0.52, + "learning_rate": 9.880624535604107e-06, + "loss": 0.2666, + "step": 6051 + }, + { + "epoch": 0.52, + "learning_rate": 9.877848498530001e-06, + "loss": 0.318, + "step": 6052 + }, + { + "epoch": 0.52, + "learning_rate": 9.875072470870735e-06, + "loss": 0.2891, + "step": 6053 + }, + { + "epoch": 0.52, + "learning_rate": 9.872296452840266e-06, + "loss": 0.3179, + "step": 6054 + }, + { + "epoch": 0.52, + "learning_rate": 9.86952044465255e-06, + "loss": 0.2405, + "step": 6055 + }, + { + "epoch": 0.52, + "learning_rate": 9.866744446521555e-06, + "loss": 0.2767, + "step": 6056 + }, + { + "epoch": 0.52, + "learning_rate": 9.863968458661244e-06, + "loss": 0.3212, + "step": 6057 + }, + { + "epoch": 0.52, + "learning_rate": 9.86119248128557e-06, + "loss": 0.2786, + "step": 6058 + }, + { + "epoch": 0.52, + "learning_rate": 9.85841651460849e-06, + "loss": 0.2606, + "step": 6059 + }, + { + "epoch": 0.52, + "learning_rate": 9.85564055884397e-06, + "loss": 0.2619, + "step": 6060 + }, + { + "epoch": 0.52, + "learning_rate": 9.85286461420596e-06, + "loss": 0.2863, + "step": 6061 + }, + { + "epoch": 0.52, + "learning_rate": 9.85008868090842e-06, + "loss": 0.3206, + "step": 6062 + }, + { + "epoch": 0.52, + "learning_rate": 9.847312759165301e-06, + "loss": 0.2991, + "step": 6063 + }, + { + "epoch": 0.52, + "learning_rate": 9.844536849190564e-06, + "loss": 0.2839, + "step": 6064 + }, + { + "epoch": 0.52, + "learning_rate": 9.841760951198159e-06, + "loss": 0.3005, + "step": 6065 + }, + { + "epoch": 0.52, + "learning_rate": 9.838985065402032e-06, + "loss": 0.2558, + "step": 6066 + }, + { + "epoch": 0.52, + "learning_rate": 9.836209192016146e-06, + "loss": 0.2752, + "step": 6067 + }, + { + "epoch": 0.52, + "learning_rate": 9.833433331254443e-06, + "loss": 0.2789, + "step": 6068 + }, + { + "epoch": 0.52, + "learning_rate": 9.830657483330877e-06, + "loss": 0.2729, + "step": 6069 + }, + { + "epoch": 0.52, + "learning_rate": 9.827881648459396e-06, + "loss": 0.2756, + "step": 6070 + }, + { + "epoch": 0.52, + "learning_rate": 9.825105826853946e-06, + "loss": 0.28, + "step": 6071 + }, + { + "epoch": 0.52, + "learning_rate": 9.822330018728474e-06, + "loss": 0.2957, + "step": 6072 + }, + { + "epoch": 0.52, + "learning_rate": 9.819554224296929e-06, + "loss": 0.251, + "step": 6073 + }, + { + "epoch": 0.52, + "learning_rate": 9.816778443773253e-06, + "loss": 0.2593, + "step": 6074 + }, + { + "epoch": 0.52, + "learning_rate": 9.814002677371387e-06, + "loss": 0.2833, + "step": 6075 + }, + { + "epoch": 0.52, + "learning_rate": 9.811226925305273e-06, + "loss": 0.2727, + "step": 6076 + }, + { + "epoch": 0.52, + "learning_rate": 9.80845118778886e-06, + "loss": 0.252, + "step": 6077 + }, + { + "epoch": 0.52, + "learning_rate": 9.805675465036084e-06, + "loss": 0.2828, + "step": 6078 + }, + { + "epoch": 0.52, + "learning_rate": 9.802899757260875e-06, + "loss": 0.2953, + "step": 6079 + }, + { + "epoch": 0.52, + "learning_rate": 9.800124064677186e-06, + "loss": 0.3062, + "step": 6080 + }, + { + "epoch": 0.52, + "learning_rate": 9.797348387498944e-06, + "loss": 0.2557, + "step": 6081 + }, + { + "epoch": 0.52, + "learning_rate": 9.794572725940088e-06, + "loss": 0.2673, + "step": 6082 + }, + { + "epoch": 0.52, + "learning_rate": 9.791797080214547e-06, + "loss": 0.2929, + "step": 6083 + }, + { + "epoch": 0.52, + "learning_rate": 9.789021450536265e-06, + "loss": 0.2668, + "step": 6084 + }, + { + "epoch": 0.52, + "learning_rate": 9.786245837119165e-06, + "loss": 0.2434, + "step": 6085 + }, + { + "epoch": 0.52, + "learning_rate": 9.783470240177175e-06, + "loss": 0.3055, + "step": 6086 + }, + { + "epoch": 0.52, + "learning_rate": 9.780694659924235e-06, + "loss": 0.3118, + "step": 6087 + }, + { + "epoch": 0.52, + "learning_rate": 9.777919096574264e-06, + "loss": 0.2756, + "step": 6088 + }, + { + "epoch": 0.52, + "learning_rate": 9.775143550341194e-06, + "loss": 0.3181, + "step": 6089 + }, + { + "epoch": 0.52, + "learning_rate": 9.772368021438943e-06, + "loss": 0.2761, + "step": 6090 + }, + { + "epoch": 0.52, + "learning_rate": 9.769592510081445e-06, + "loss": 0.2732, + "step": 6091 + }, + { + "epoch": 0.52, + "learning_rate": 9.766817016482618e-06, + "loss": 0.3576, + "step": 6092 + }, + { + "epoch": 0.52, + "learning_rate": 9.764041540856379e-06, + "loss": 0.3076, + "step": 6093 + }, + { + "epoch": 0.52, + "learning_rate": 9.761266083416655e-06, + "loss": 0.27, + "step": 6094 + }, + { + "epoch": 0.52, + "learning_rate": 9.758490644377363e-06, + "loss": 0.2689, + "step": 6095 + }, + { + "epoch": 0.52, + "learning_rate": 9.755715223952418e-06, + "loss": 0.2906, + "step": 6096 + }, + { + "epoch": 0.52, + "learning_rate": 9.752939822355733e-06, + "loss": 0.308, + "step": 6097 + }, + { + "epoch": 0.52, + "learning_rate": 9.75016443980123e-06, + "loss": 0.256, + "step": 6098 + }, + { + "epoch": 0.52, + "learning_rate": 9.747389076502814e-06, + "loss": 0.2881, + "step": 6099 + }, + { + "epoch": 0.52, + "learning_rate": 9.744613732674401e-06, + "loss": 0.2219, + "step": 6100 + }, + { + "epoch": 0.52, + "learning_rate": 9.7418384085299e-06, + "loss": 0.2842, + "step": 6101 + }, + { + "epoch": 0.52, + "learning_rate": 9.73906310428322e-06, + "loss": 0.2794, + "step": 6102 + }, + { + "epoch": 0.52, + "learning_rate": 9.736287820148269e-06, + "loss": 0.2455, + "step": 6103 + }, + { + "epoch": 0.52, + "learning_rate": 9.733512556338941e-06, + "loss": 0.278, + "step": 6104 + }, + { + "epoch": 0.52, + "learning_rate": 9.730737313069157e-06, + "loss": 0.2773, + "step": 6105 + }, + { + "epoch": 0.52, + "learning_rate": 9.727962090552808e-06, + "loss": 0.3433, + "step": 6106 + }, + { + "epoch": 0.52, + "learning_rate": 9.725186889003795e-06, + "loss": 0.2629, + "step": 6107 + }, + { + "epoch": 0.52, + "learning_rate": 9.722411708636018e-06, + "loss": 0.2755, + "step": 6108 + }, + { + "epoch": 0.52, + "learning_rate": 9.719636549663379e-06, + "loss": 0.6019, + "step": 6109 + }, + { + "epoch": 0.52, + "learning_rate": 9.716861412299769e-06, + "loss": 0.3077, + "step": 6110 + }, + { + "epoch": 0.52, + "learning_rate": 9.714086296759078e-06, + "loss": 0.263, + "step": 6111 + }, + { + "epoch": 0.52, + "learning_rate": 9.711311203255207e-06, + "loss": 0.3013, + "step": 6112 + }, + { + "epoch": 0.52, + "learning_rate": 9.708536132002038e-06, + "loss": 0.2523, + "step": 6113 + }, + { + "epoch": 0.52, + "learning_rate": 9.705761083213463e-06, + "loss": 0.2492, + "step": 6114 + }, + { + "epoch": 0.52, + "learning_rate": 9.702986057103375e-06, + "loss": 0.6278, + "step": 6115 + }, + { + "epoch": 0.52, + "learning_rate": 9.700211053885653e-06, + "loss": 0.3215, + "step": 6116 + }, + { + "epoch": 0.52, + "learning_rate": 9.697436073774178e-06, + "loss": 0.2739, + "step": 6117 + }, + { + "epoch": 0.52, + "learning_rate": 9.694661116982838e-06, + "loss": 0.2698, + "step": 6118 + }, + { + "epoch": 0.52, + "learning_rate": 9.691886183725512e-06, + "loss": 0.2839, + "step": 6119 + }, + { + "epoch": 0.52, + "learning_rate": 9.689111274216075e-06, + "loss": 0.3051, + "step": 6120 + }, + { + "epoch": 0.52, + "learning_rate": 9.6863363886684e-06, + "loss": 0.2514, + "step": 6121 + }, + { + "epoch": 0.52, + "learning_rate": 9.683561527296375e-06, + "loss": 0.2735, + "step": 6122 + }, + { + "epoch": 0.52, + "learning_rate": 9.68078669031386e-06, + "loss": 0.2988, + "step": 6123 + }, + { + "epoch": 0.52, + "learning_rate": 9.678011877934728e-06, + "loss": 0.2815, + "step": 6124 + }, + { + "epoch": 0.53, + "learning_rate": 9.675237090372852e-06, + "loss": 0.296, + "step": 6125 + }, + { + "epoch": 0.53, + "learning_rate": 9.672462327842095e-06, + "loss": 0.2869, + "step": 6126 + }, + { + "epoch": 0.53, + "learning_rate": 9.669687590556325e-06, + "loss": 0.2947, + "step": 6127 + }, + { + "epoch": 0.53, + "learning_rate": 9.666912878729398e-06, + "loss": 0.2599, + "step": 6128 + }, + { + "epoch": 0.53, + "learning_rate": 9.664138192575187e-06, + "loss": 0.264, + "step": 6129 + }, + { + "epoch": 0.53, + "learning_rate": 9.661363532307543e-06, + "loss": 0.2852, + "step": 6130 + }, + { + "epoch": 0.53, + "learning_rate": 9.658588898140322e-06, + "loss": 0.2942, + "step": 6131 + }, + { + "epoch": 0.53, + "learning_rate": 9.655814290287381e-06, + "loss": 0.3217, + "step": 6132 + }, + { + "epoch": 0.53, + "learning_rate": 9.65303970896258e-06, + "loss": 0.3466, + "step": 6133 + }, + { + "epoch": 0.53, + "learning_rate": 9.650265154379761e-06, + "loss": 0.3008, + "step": 6134 + }, + { + "epoch": 0.53, + "learning_rate": 9.647490626752773e-06, + "loss": 0.2971, + "step": 6135 + }, + { + "epoch": 0.53, + "learning_rate": 9.644716126295468e-06, + "loss": 0.3315, + "step": 6136 + }, + { + "epoch": 0.53, + "learning_rate": 9.641941653221687e-06, + "loss": 0.3229, + "step": 6137 + }, + { + "epoch": 0.53, + "learning_rate": 9.639167207745276e-06, + "loss": 0.3083, + "step": 6138 + }, + { + "epoch": 0.53, + "learning_rate": 9.636392790080073e-06, + "loss": 0.3123, + "step": 6139 + }, + { + "epoch": 0.53, + "learning_rate": 9.633618400439918e-06, + "loss": 0.2985, + "step": 6140 + }, + { + "epoch": 0.53, + "learning_rate": 9.630844039038647e-06, + "loss": 0.3315, + "step": 6141 + }, + { + "epoch": 0.53, + "learning_rate": 9.628069706090089e-06, + "loss": 0.2172, + "step": 6142 + }, + { + "epoch": 0.53, + "learning_rate": 9.625295401808085e-06, + "loss": 0.2834, + "step": 6143 + }, + { + "epoch": 0.53, + "learning_rate": 9.622521126406458e-06, + "loss": 0.2465, + "step": 6144 + }, + { + "epoch": 0.53, + "learning_rate": 9.619746880099036e-06, + "loss": 0.2546, + "step": 6145 + }, + { + "epoch": 0.53, + "learning_rate": 9.616972663099648e-06, + "loss": 0.2654, + "step": 6146 + }, + { + "epoch": 0.53, + "learning_rate": 9.614198475622113e-06, + "loss": 0.2982, + "step": 6147 + }, + { + "epoch": 0.53, + "learning_rate": 9.611424317880257e-06, + "loss": 0.2773, + "step": 6148 + }, + { + "epoch": 0.53, + "learning_rate": 9.608650190087888e-06, + "loss": 0.2869, + "step": 6149 + }, + { + "epoch": 0.53, + "learning_rate": 9.605876092458835e-06, + "loss": 0.3239, + "step": 6150 + }, + { + "epoch": 0.53, + "learning_rate": 9.603102025206901e-06, + "loss": 0.2952, + "step": 6151 + }, + { + "epoch": 0.53, + "learning_rate": 9.6003279885459e-06, + "loss": 0.28, + "step": 6152 + }, + { + "epoch": 0.53, + "learning_rate": 9.597553982689649e-06, + "loss": 0.3323, + "step": 6153 + }, + { + "epoch": 0.53, + "learning_rate": 9.594780007851947e-06, + "loss": 0.2784, + "step": 6154 + }, + { + "epoch": 0.53, + "learning_rate": 9.592006064246596e-06, + "loss": 0.2544, + "step": 6155 + }, + { + "epoch": 0.53, + "learning_rate": 9.589232152087404e-06, + "loss": 0.315, + "step": 6156 + }, + { + "epoch": 0.53, + "learning_rate": 9.586458271588167e-06, + "loss": 0.2506, + "step": 6157 + }, + { + "epoch": 0.53, + "learning_rate": 9.583684422962686e-06, + "loss": 0.2751, + "step": 6158 + }, + { + "epoch": 0.53, + "learning_rate": 9.580910606424747e-06, + "loss": 0.289, + "step": 6159 + }, + { + "epoch": 0.53, + "learning_rate": 9.578136822188154e-06, + "loss": 0.2582, + "step": 6160 + }, + { + "epoch": 0.53, + "learning_rate": 9.575363070466689e-06, + "loss": 0.2971, + "step": 6161 + }, + { + "epoch": 0.53, + "learning_rate": 9.572589351474135e-06, + "loss": 0.2603, + "step": 6162 + }, + { + "epoch": 0.53, + "learning_rate": 9.569815665424288e-06, + "loss": 0.2667, + "step": 6163 + }, + { + "epoch": 0.53, + "learning_rate": 9.56704201253092e-06, + "loss": 0.2909, + "step": 6164 + }, + { + "epoch": 0.53, + "learning_rate": 9.564268393007819e-06, + "loss": 0.28, + "step": 6165 + }, + { + "epoch": 0.53, + "learning_rate": 9.56149480706875e-06, + "loss": 0.2992, + "step": 6166 + }, + { + "epoch": 0.53, + "learning_rate": 9.558721254927501e-06, + "loss": 0.6023, + "step": 6167 + }, + { + "epoch": 0.53, + "learning_rate": 9.555947736797836e-06, + "loss": 0.2901, + "step": 6168 + }, + { + "epoch": 0.53, + "learning_rate": 9.553174252893522e-06, + "loss": 0.2755, + "step": 6169 + }, + { + "epoch": 0.53, + "learning_rate": 9.55040080342833e-06, + "loss": 0.3389, + "step": 6170 + }, + { + "epoch": 0.53, + "learning_rate": 9.547627388616026e-06, + "loss": 0.2678, + "step": 6171 + }, + { + "epoch": 0.53, + "learning_rate": 9.544854008670366e-06, + "loss": 0.2711, + "step": 6172 + }, + { + "epoch": 0.53, + "learning_rate": 9.542080663805108e-06, + "loss": 0.2574, + "step": 6173 + }, + { + "epoch": 0.53, + "learning_rate": 9.539307354234013e-06, + "loss": 0.2719, + "step": 6174 + }, + { + "epoch": 0.53, + "learning_rate": 9.536534080170827e-06, + "loss": 0.2614, + "step": 6175 + }, + { + "epoch": 0.53, + "learning_rate": 9.533760841829306e-06, + "loss": 0.2811, + "step": 6176 + }, + { + "epoch": 0.53, + "learning_rate": 9.530987639423195e-06, + "loss": 0.3169, + "step": 6177 + }, + { + "epoch": 0.53, + "learning_rate": 9.528214473166241e-06, + "loss": 0.2661, + "step": 6178 + }, + { + "epoch": 0.53, + "learning_rate": 9.525441343272185e-06, + "loss": 0.6213, + "step": 6179 + }, + { + "epoch": 0.53, + "learning_rate": 9.522668249954761e-06, + "loss": 0.3129, + "step": 6180 + }, + { + "epoch": 0.53, + "learning_rate": 9.519895193427713e-06, + "loss": 0.3321, + "step": 6181 + }, + { + "epoch": 0.53, + "learning_rate": 9.51712217390477e-06, + "loss": 0.5764, + "step": 6182 + }, + { + "epoch": 0.53, + "learning_rate": 9.514349191599665e-06, + "loss": 0.3439, + "step": 6183 + }, + { + "epoch": 0.53, + "learning_rate": 9.511576246726123e-06, + "loss": 0.2642, + "step": 6184 + }, + { + "epoch": 0.53, + "learning_rate": 9.508803339497872e-06, + "loss": 0.2975, + "step": 6185 + }, + { + "epoch": 0.53, + "learning_rate": 9.506030470128635e-06, + "loss": 0.3195, + "step": 6186 + }, + { + "epoch": 0.53, + "learning_rate": 9.503257638832122e-06, + "loss": 0.3178, + "step": 6187 + }, + { + "epoch": 0.53, + "learning_rate": 9.50048484582206e-06, + "loss": 0.2945, + "step": 6188 + }, + { + "epoch": 0.53, + "learning_rate": 9.497712091312158e-06, + "loss": 0.3174, + "step": 6189 + }, + { + "epoch": 0.53, + "learning_rate": 9.494939375516122e-06, + "loss": 0.293, + "step": 6190 + }, + { + "epoch": 0.53, + "learning_rate": 9.49216669864767e-06, + "loss": 0.2802, + "step": 6191 + }, + { + "epoch": 0.53, + "learning_rate": 9.489394060920496e-06, + "loss": 0.3081, + "step": 6192 + }, + { + "epoch": 0.53, + "learning_rate": 9.486621462548307e-06, + "loss": 0.308, + "step": 6193 + }, + { + "epoch": 0.53, + "learning_rate": 9.483848903744795e-06, + "loss": 0.2646, + "step": 6194 + }, + { + "epoch": 0.53, + "learning_rate": 9.48107638472366e-06, + "loss": 0.2584, + "step": 6195 + }, + { + "epoch": 0.53, + "learning_rate": 9.478303905698595e-06, + "loss": 0.2626, + "step": 6196 + }, + { + "epoch": 0.53, + "learning_rate": 9.475531466883284e-06, + "loss": 0.2735, + "step": 6197 + }, + { + "epoch": 0.53, + "learning_rate": 9.472759068491421e-06, + "loss": 0.2536, + "step": 6198 + }, + { + "epoch": 0.53, + "learning_rate": 9.469986710736683e-06, + "loss": 0.2912, + "step": 6199 + }, + { + "epoch": 0.53, + "learning_rate": 9.467214393832746e-06, + "loss": 0.2922, + "step": 6200 + }, + { + "epoch": 0.53, + "learning_rate": 9.464442117993296e-06, + "loss": 0.2674, + "step": 6201 + }, + { + "epoch": 0.53, + "learning_rate": 9.461669883431997e-06, + "loss": 0.2382, + "step": 6202 + }, + { + "epoch": 0.53, + "learning_rate": 9.458897690362528e-06, + "loss": 0.2496, + "step": 6203 + }, + { + "epoch": 0.53, + "learning_rate": 9.456125538998546e-06, + "loss": 0.2491, + "step": 6204 + }, + { + "epoch": 0.53, + "learning_rate": 9.453353429553724e-06, + "loss": 0.281, + "step": 6205 + }, + { + "epoch": 0.53, + "learning_rate": 9.45058136224172e-06, + "loss": 0.2861, + "step": 6206 + }, + { + "epoch": 0.53, + "learning_rate": 9.447809337276184e-06, + "loss": 0.2622, + "step": 6207 + }, + { + "epoch": 0.53, + "learning_rate": 9.44503735487078e-06, + "loss": 0.255, + "step": 6208 + }, + { + "epoch": 0.53, + "learning_rate": 9.442265415239154e-06, + "loss": 0.2674, + "step": 6209 + }, + { + "epoch": 0.53, + "learning_rate": 9.439493518594957e-06, + "loss": 0.2699, + "step": 6210 + }, + { + "epoch": 0.53, + "learning_rate": 9.436721665151824e-06, + "loss": 0.2976, + "step": 6211 + }, + { + "epoch": 0.53, + "learning_rate": 9.433949855123405e-06, + "loss": 0.3428, + "step": 6212 + }, + { + "epoch": 0.53, + "learning_rate": 9.431178088723334e-06, + "loss": 0.2438, + "step": 6213 + }, + { + "epoch": 0.53, + "learning_rate": 9.428406366165244e-06, + "loss": 0.2827, + "step": 6214 + }, + { + "epoch": 0.53, + "learning_rate": 9.425634687662768e-06, + "loss": 0.2614, + "step": 6215 + }, + { + "epoch": 0.53, + "learning_rate": 9.422863053429534e-06, + "loss": 0.2676, + "step": 6216 + }, + { + "epoch": 0.53, + "learning_rate": 9.420091463679164e-06, + "loss": 0.3492, + "step": 6217 + }, + { + "epoch": 0.53, + "learning_rate": 9.417319918625274e-06, + "loss": 0.2786, + "step": 6218 + }, + { + "epoch": 0.53, + "learning_rate": 9.41454841848149e-06, + "loss": 0.3083, + "step": 6219 + }, + { + "epoch": 0.53, + "learning_rate": 9.41177696346142e-06, + "loss": 0.3035, + "step": 6220 + }, + { + "epoch": 0.53, + "learning_rate": 9.409005553778673e-06, + "loss": 0.27, + "step": 6221 + }, + { + "epoch": 0.53, + "learning_rate": 9.40623418964686e-06, + "loss": 0.2797, + "step": 6222 + }, + { + "epoch": 0.53, + "learning_rate": 9.403462871279582e-06, + "loss": 0.2883, + "step": 6223 + }, + { + "epoch": 0.53, + "learning_rate": 9.40069159889044e-06, + "loss": 0.2992, + "step": 6224 + }, + { + "epoch": 0.53, + "learning_rate": 9.397920372693023e-06, + "loss": 0.2802, + "step": 6225 + }, + { + "epoch": 0.53, + "learning_rate": 9.395149192900934e-06, + "loss": 0.6013, + "step": 6226 + }, + { + "epoch": 0.53, + "learning_rate": 9.392378059727752e-06, + "loss": 0.2268, + "step": 6227 + }, + { + "epoch": 0.53, + "learning_rate": 9.389606973387067e-06, + "loss": 0.3, + "step": 6228 + }, + { + "epoch": 0.53, + "learning_rate": 9.386835934092464e-06, + "loss": 0.2955, + "step": 6229 + }, + { + "epoch": 0.53, + "learning_rate": 9.384064942057518e-06, + "loss": 0.3259, + "step": 6230 + }, + { + "epoch": 0.53, + "learning_rate": 9.381293997495801e-06, + "loss": 0.3099, + "step": 6231 + }, + { + "epoch": 0.53, + "learning_rate": 9.378523100620883e-06, + "loss": 0.2547, + "step": 6232 + }, + { + "epoch": 0.53, + "learning_rate": 9.375752251646336e-06, + "loss": 0.2751, + "step": 6233 + }, + { + "epoch": 0.53, + "learning_rate": 9.372981450785723e-06, + "loss": 0.2932, + "step": 6234 + }, + { + "epoch": 0.53, + "learning_rate": 9.370210698252597e-06, + "loss": 0.3113, + "step": 6235 + }, + { + "epoch": 0.53, + "learning_rate": 9.367439994260522e-06, + "loss": 0.3331, + "step": 6236 + }, + { + "epoch": 0.53, + "learning_rate": 9.364669339023047e-06, + "loss": 0.2291, + "step": 6237 + }, + { + "epoch": 0.53, + "learning_rate": 9.361898732753715e-06, + "loss": 0.2784, + "step": 6238 + }, + { + "epoch": 0.53, + "learning_rate": 9.359128175666083e-06, + "loss": 0.2957, + "step": 6239 + }, + { + "epoch": 0.53, + "learning_rate": 9.356357667973679e-06, + "loss": 0.262, + "step": 6240 + }, + { + "epoch": 0.53, + "learning_rate": 9.353587209890049e-06, + "loss": 0.3329, + "step": 6241 + }, + { + "epoch": 0.54, + "learning_rate": 9.35081680162872e-06, + "loss": 0.2634, + "step": 6242 + }, + { + "epoch": 0.54, + "learning_rate": 9.348046443403227e-06, + "loss": 0.3326, + "step": 6243 + }, + { + "epoch": 0.54, + "learning_rate": 9.345276135427093e-06, + "loss": 0.6267, + "step": 6244 + }, + { + "epoch": 0.54, + "learning_rate": 9.342505877913835e-06, + "loss": 0.2528, + "step": 6245 + }, + { + "epoch": 0.54, + "learning_rate": 9.339735671076978e-06, + "loss": 0.3056, + "step": 6246 + }, + { + "epoch": 0.54, + "learning_rate": 9.336965515130034e-06, + "loss": 0.2305, + "step": 6247 + }, + { + "epoch": 0.54, + "learning_rate": 9.334195410286513e-06, + "loss": 0.2986, + "step": 6248 + }, + { + "epoch": 0.54, + "learning_rate": 9.331425356759915e-06, + "loss": 0.2968, + "step": 6249 + }, + { + "epoch": 0.54, + "learning_rate": 9.32865535476375e-06, + "loss": 0.231, + "step": 6250 + }, + { + "epoch": 0.54, + "learning_rate": 9.325885404511513e-06, + "loss": 0.313, + "step": 6251 + }, + { + "epoch": 0.54, + "learning_rate": 9.323115506216698e-06, + "loss": 0.2585, + "step": 6252 + }, + { + "epoch": 0.54, + "learning_rate": 9.320345660092794e-06, + "loss": 0.2469, + "step": 6253 + }, + { + "epoch": 0.54, + "learning_rate": 9.317575866353293e-06, + "loss": 0.2513, + "step": 6254 + }, + { + "epoch": 0.54, + "learning_rate": 9.314806125211669e-06, + "loss": 0.2708, + "step": 6255 + }, + { + "epoch": 0.54, + "learning_rate": 9.312036436881402e-06, + "loss": 0.3019, + "step": 6256 + }, + { + "epoch": 0.54, + "learning_rate": 9.309266801575968e-06, + "loss": 0.2678, + "step": 6257 + }, + { + "epoch": 0.54, + "learning_rate": 9.306497219508835e-06, + "loss": 0.2935, + "step": 6258 + }, + { + "epoch": 0.54, + "learning_rate": 9.30372769089347e-06, + "loss": 0.2889, + "step": 6259 + }, + { + "epoch": 0.54, + "learning_rate": 9.300958215943335e-06, + "loss": 0.2858, + "step": 6260 + }, + { + "epoch": 0.54, + "learning_rate": 9.298188794871888e-06, + "loss": 0.3174, + "step": 6261 + }, + { + "epoch": 0.54, + "learning_rate": 9.29541942789258e-06, + "loss": 0.288, + "step": 6262 + }, + { + "epoch": 0.54, + "learning_rate": 9.29265011521886e-06, + "loss": 0.2868, + "step": 6263 + }, + { + "epoch": 0.54, + "learning_rate": 9.289880857064175e-06, + "loss": 0.2936, + "step": 6264 + }, + { + "epoch": 0.54, + "learning_rate": 9.287111653641964e-06, + "loss": 0.2488, + "step": 6265 + }, + { + "epoch": 0.54, + "learning_rate": 9.28434250516566e-06, + "loss": 0.322, + "step": 6266 + }, + { + "epoch": 0.54, + "learning_rate": 9.281573411848707e-06, + "loss": 0.2982, + "step": 6267 + }, + { + "epoch": 0.54, + "learning_rate": 9.278804373904525e-06, + "loss": 0.2462, + "step": 6268 + }, + { + "epoch": 0.54, + "learning_rate": 9.276035391546538e-06, + "loss": 0.2831, + "step": 6269 + }, + { + "epoch": 0.54, + "learning_rate": 9.27326646498816e-06, + "loss": 0.2899, + "step": 6270 + }, + { + "epoch": 0.54, + "learning_rate": 9.270497594442815e-06, + "loss": 0.2493, + "step": 6271 + }, + { + "epoch": 0.54, + "learning_rate": 9.267728780123913e-06, + "loss": 0.2828, + "step": 6272 + }, + { + "epoch": 0.54, + "learning_rate": 9.264960022244855e-06, + "loss": 0.248, + "step": 6273 + }, + { + "epoch": 0.54, + "learning_rate": 9.262191321019049e-06, + "loss": 0.2837, + "step": 6274 + }, + { + "epoch": 0.54, + "learning_rate": 9.259422676659892e-06, + "loss": 0.2541, + "step": 6275 + }, + { + "epoch": 0.54, + "learning_rate": 9.256654089380773e-06, + "loss": 0.2232, + "step": 6276 + }, + { + "epoch": 0.54, + "learning_rate": 9.253885559395084e-06, + "loss": 0.2437, + "step": 6277 + }, + { + "epoch": 0.54, + "learning_rate": 9.251117086916209e-06, + "loss": 0.3179, + "step": 6278 + }, + { + "epoch": 0.54, + "learning_rate": 9.24834867215753e-06, + "loss": 0.2921, + "step": 6279 + }, + { + "epoch": 0.54, + "learning_rate": 9.245580315332418e-06, + "loss": 0.3156, + "step": 6280 + }, + { + "epoch": 0.54, + "learning_rate": 9.242812016654252e-06, + "loss": 0.2936, + "step": 6281 + }, + { + "epoch": 0.54, + "learning_rate": 9.240043776336397e-06, + "loss": 0.3064, + "step": 6282 + }, + { + "epoch": 0.54, + "learning_rate": 9.237275594592205e-06, + "loss": 0.2792, + "step": 6283 + }, + { + "epoch": 0.54, + "learning_rate": 9.234507471635043e-06, + "loss": 0.2784, + "step": 6284 + }, + { + "epoch": 0.54, + "learning_rate": 9.231739407678267e-06, + "loss": 0.2673, + "step": 6285 + }, + { + "epoch": 0.54, + "learning_rate": 9.22897140293522e-06, + "loss": 0.2992, + "step": 6286 + }, + { + "epoch": 0.54, + "learning_rate": 9.226203457619245e-06, + "loss": 0.5927, + "step": 6287 + }, + { + "epoch": 0.54, + "learning_rate": 9.223435571943685e-06, + "loss": 0.2769, + "step": 6288 + }, + { + "epoch": 0.54, + "learning_rate": 9.220667746121875e-06, + "loss": 0.2463, + "step": 6289 + }, + { + "epoch": 0.54, + "learning_rate": 9.217899980367142e-06, + "loss": 0.3136, + "step": 6290 + }, + { + "epoch": 0.54, + "learning_rate": 9.215132274892817e-06, + "loss": 0.2569, + "step": 6291 + }, + { + "epoch": 0.54, + "learning_rate": 9.212364629912218e-06, + "loss": 0.2844, + "step": 6292 + }, + { + "epoch": 0.54, + "learning_rate": 9.209597045638661e-06, + "loss": 0.2471, + "step": 6293 + }, + { + "epoch": 0.54, + "learning_rate": 9.206829522285456e-06, + "loss": 0.3096, + "step": 6294 + }, + { + "epoch": 0.54, + "learning_rate": 9.204062060065915e-06, + "loss": 0.2534, + "step": 6295 + }, + { + "epoch": 0.54, + "learning_rate": 9.201294659193337e-06, + "loss": 0.2936, + "step": 6296 + }, + { + "epoch": 0.54, + "learning_rate": 9.198527319881017e-06, + "loss": 0.2944, + "step": 6297 + }, + { + "epoch": 0.54, + "learning_rate": 9.195760042342254e-06, + "loss": 0.2858, + "step": 6298 + }, + { + "epoch": 0.54, + "learning_rate": 9.192992826790335e-06, + "loss": 0.2599, + "step": 6299 + }, + { + "epoch": 0.54, + "learning_rate": 9.19022567343854e-06, + "loss": 0.2722, + "step": 6300 + }, + { + "epoch": 0.54, + "learning_rate": 9.187458582500145e-06, + "loss": 0.2665, + "step": 6301 + }, + { + "epoch": 0.54, + "learning_rate": 9.184691554188432e-06, + "loss": 0.2485, + "step": 6302 + }, + { + "epoch": 0.54, + "learning_rate": 9.181924588716664e-06, + "loss": 0.2971, + "step": 6303 + }, + { + "epoch": 0.54, + "learning_rate": 9.179157686298104e-06, + "loss": 0.2433, + "step": 6304 + }, + { + "epoch": 0.54, + "learning_rate": 9.176390847146018e-06, + "loss": 0.2442, + "step": 6305 + }, + { + "epoch": 0.54, + "learning_rate": 9.173624071473655e-06, + "loss": 0.2957, + "step": 6306 + }, + { + "epoch": 0.54, + "learning_rate": 9.170857359494265e-06, + "loss": 0.2916, + "step": 6307 + }, + { + "epoch": 0.54, + "learning_rate": 9.168090711421089e-06, + "loss": 0.2991, + "step": 6308 + }, + { + "epoch": 0.54, + "learning_rate": 9.165324127467375e-06, + "loss": 0.2511, + "step": 6309 + }, + { + "epoch": 0.54, + "learning_rate": 9.162557607846352e-06, + "loss": 0.2816, + "step": 6310 + }, + { + "epoch": 0.54, + "learning_rate": 9.159791152771247e-06, + "loss": 0.2364, + "step": 6311 + }, + { + "epoch": 0.54, + "learning_rate": 9.157024762455292e-06, + "loss": 0.2662, + "step": 6312 + }, + { + "epoch": 0.54, + "learning_rate": 9.154258437111706e-06, + "loss": 0.2716, + "step": 6313 + }, + { + "epoch": 0.54, + "learning_rate": 9.151492176953697e-06, + "loss": 0.267, + "step": 6314 + }, + { + "epoch": 0.54, + "learning_rate": 9.148725982194477e-06, + "loss": 0.2935, + "step": 6315 + }, + { + "epoch": 0.54, + "learning_rate": 9.145959853047254e-06, + "loss": 0.6002, + "step": 6316 + }, + { + "epoch": 0.54, + "learning_rate": 9.143193789725227e-06, + "loss": 0.2593, + "step": 6317 + }, + { + "epoch": 0.54, + "learning_rate": 9.140427792441584e-06, + "loss": 0.324, + "step": 6318 + }, + { + "epoch": 0.54, + "learning_rate": 9.137661861409525e-06, + "loss": 0.2525, + "step": 6319 + }, + { + "epoch": 0.54, + "learning_rate": 9.134895996842228e-06, + "loss": 0.3224, + "step": 6320 + }, + { + "epoch": 0.54, + "learning_rate": 9.13213019895287e-06, + "loss": 0.3298, + "step": 6321 + }, + { + "epoch": 0.54, + "learning_rate": 9.129364467954628e-06, + "loss": 0.2899, + "step": 6322 + }, + { + "epoch": 0.54, + "learning_rate": 9.126598804060675e-06, + "loss": 0.3036, + "step": 6323 + }, + { + "epoch": 0.54, + "learning_rate": 9.123833207484169e-06, + "loss": 0.2993, + "step": 6324 + }, + { + "epoch": 0.54, + "learning_rate": 9.121067678438267e-06, + "loss": 0.3029, + "step": 6325 + }, + { + "epoch": 0.54, + "learning_rate": 9.11830221713613e-06, + "loss": 0.2406, + "step": 6326 + }, + { + "epoch": 0.54, + "learning_rate": 9.115536823790896e-06, + "loss": 0.2467, + "step": 6327 + }, + { + "epoch": 0.54, + "learning_rate": 9.112771498615717e-06, + "loss": 0.3025, + "step": 6328 + }, + { + "epoch": 0.54, + "learning_rate": 9.110006241823726e-06, + "loss": 0.2587, + "step": 6329 + }, + { + "epoch": 0.54, + "learning_rate": 9.107241053628058e-06, + "loss": 0.348, + "step": 6330 + }, + { + "epoch": 0.54, + "learning_rate": 9.104475934241839e-06, + "loss": 0.2704, + "step": 6331 + }, + { + "epoch": 0.54, + "learning_rate": 9.101710883878185e-06, + "loss": 0.2437, + "step": 6332 + }, + { + "epoch": 0.54, + "learning_rate": 9.098945902750222e-06, + "loss": 0.2874, + "step": 6333 + }, + { + "epoch": 0.54, + "learning_rate": 9.096180991071055e-06, + "loss": 0.2757, + "step": 6334 + }, + { + "epoch": 0.54, + "learning_rate": 9.093416149053791e-06, + "loss": 0.3019, + "step": 6335 + }, + { + "epoch": 0.54, + "learning_rate": 9.090651376911532e-06, + "loss": 0.2715, + "step": 6336 + }, + { + "epoch": 0.54, + "learning_rate": 9.087886674857371e-06, + "loss": 0.2633, + "step": 6337 + }, + { + "epoch": 0.54, + "learning_rate": 9.0851220431044e-06, + "loss": 0.3062, + "step": 6338 + }, + { + "epoch": 0.54, + "learning_rate": 9.082357481865697e-06, + "loss": 0.2756, + "step": 6339 + }, + { + "epoch": 0.54, + "learning_rate": 9.07959299135435e-06, + "loss": 0.2933, + "step": 6340 + }, + { + "epoch": 0.54, + "learning_rate": 9.076828571783422e-06, + "loss": 0.2599, + "step": 6341 + }, + { + "epoch": 0.54, + "learning_rate": 9.074064223365986e-06, + "loss": 0.2508, + "step": 6342 + }, + { + "epoch": 0.54, + "learning_rate": 9.07129994631511e-06, + "loss": 0.2957, + "step": 6343 + }, + { + "epoch": 0.54, + "learning_rate": 9.068535740843844e-06, + "loss": 0.2809, + "step": 6344 + }, + { + "epoch": 0.54, + "learning_rate": 9.065771607165242e-06, + "loss": 0.2898, + "step": 6345 + }, + { + "epoch": 0.54, + "learning_rate": 9.063007545492342e-06, + "loss": 0.3552, + "step": 6346 + }, + { + "epoch": 0.54, + "learning_rate": 9.060243556038195e-06, + "loss": 0.2702, + "step": 6347 + }, + { + "epoch": 0.54, + "learning_rate": 9.057479639015832e-06, + "loss": 0.2979, + "step": 6348 + }, + { + "epoch": 0.54, + "learning_rate": 9.054715794638277e-06, + "loss": 0.3071, + "step": 6349 + }, + { + "epoch": 0.54, + "learning_rate": 9.051952023118563e-06, + "loss": 0.2598, + "step": 6350 + }, + { + "epoch": 0.54, + "learning_rate": 9.0491883246697e-06, + "loss": 0.2349, + "step": 6351 + }, + { + "epoch": 0.54, + "learning_rate": 9.046424699504703e-06, + "loss": 0.278, + "step": 6352 + }, + { + "epoch": 0.54, + "learning_rate": 9.043661147836578e-06, + "loss": 0.2801, + "step": 6353 + }, + { + "epoch": 0.54, + "learning_rate": 9.040897669878327e-06, + "loss": 0.2824, + "step": 6354 + }, + { + "epoch": 0.54, + "learning_rate": 9.038134265842946e-06, + "loss": 0.3063, + "step": 6355 + }, + { + "epoch": 0.54, + "learning_rate": 9.035370935943419e-06, + "loss": 0.2936, + "step": 6356 + }, + { + "epoch": 0.54, + "learning_rate": 9.032607680392738e-06, + "loss": 0.2664, + "step": 6357 + }, + { + "epoch": 0.55, + "learning_rate": 9.029844499403876e-06, + "loss": 0.3663, + "step": 6358 + }, + { + "epoch": 0.55, + "learning_rate": 9.027081393189804e-06, + "loss": 0.2704, + "step": 6359 + }, + { + "epoch": 0.55, + "learning_rate": 9.02431836196349e-06, + "loss": 0.2867, + "step": 6360 + }, + { + "epoch": 0.55, + "learning_rate": 9.021555405937901e-06, + "loss": 0.2859, + "step": 6361 + }, + { + "epoch": 0.55, + "learning_rate": 9.018792525325986e-06, + "loss": 0.2525, + "step": 6362 + }, + { + "epoch": 0.55, + "learning_rate": 9.016029720340688e-06, + "loss": 0.2542, + "step": 6363 + }, + { + "epoch": 0.55, + "learning_rate": 9.013266991194964e-06, + "loss": 0.3002, + "step": 6364 + }, + { + "epoch": 0.55, + "learning_rate": 9.010504338101743e-06, + "loss": 0.304, + "step": 6365 + }, + { + "epoch": 0.55, + "learning_rate": 9.007741761273957e-06, + "loss": 0.287, + "step": 6366 + }, + { + "epoch": 0.55, + "learning_rate": 9.004979260924534e-06, + "loss": 0.2683, + "step": 6367 + }, + { + "epoch": 0.55, + "learning_rate": 9.002216837266394e-06, + "loss": 0.2769, + "step": 6368 + }, + { + "epoch": 0.55, + "learning_rate": 8.999454490512451e-06, + "loss": 0.3268, + "step": 6369 + }, + { + "epoch": 0.55, + "learning_rate": 8.996692220875608e-06, + "loss": 0.2604, + "step": 6370 + }, + { + "epoch": 0.55, + "learning_rate": 8.993930028568775e-06, + "loss": 0.2933, + "step": 6371 + }, + { + "epoch": 0.55, + "learning_rate": 8.991167913804842e-06, + "loss": 0.2385, + "step": 6372 + }, + { + "epoch": 0.55, + "learning_rate": 8.988405876796701e-06, + "loss": 0.2835, + "step": 6373 + }, + { + "epoch": 0.55, + "learning_rate": 8.985643917757237e-06, + "loss": 0.2943, + "step": 6374 + }, + { + "epoch": 0.55, + "learning_rate": 8.98288203689933e-06, + "loss": 0.2914, + "step": 6375 + }, + { + "epoch": 0.55, + "learning_rate": 8.98012023443585e-06, + "loss": 0.2491, + "step": 6376 + }, + { + "epoch": 0.55, + "learning_rate": 8.977358510579658e-06, + "loss": 0.2793, + "step": 6377 + }, + { + "epoch": 0.55, + "learning_rate": 8.974596865543624e-06, + "loss": 0.267, + "step": 6378 + }, + { + "epoch": 0.55, + "learning_rate": 8.971835299540595e-06, + "loss": 0.3075, + "step": 6379 + }, + { + "epoch": 0.55, + "learning_rate": 8.969073812783417e-06, + "loss": 0.2628, + "step": 6380 + }, + { + "epoch": 0.55, + "learning_rate": 8.966312405484942e-06, + "loss": 0.3061, + "step": 6381 + }, + { + "epoch": 0.55, + "learning_rate": 8.963551077857999e-06, + "loss": 0.6271, + "step": 6382 + }, + { + "epoch": 0.55, + "learning_rate": 8.960789830115416e-06, + "loss": 0.3176, + "step": 6383 + }, + { + "epoch": 0.55, + "learning_rate": 8.958028662470014e-06, + "loss": 0.2692, + "step": 6384 + }, + { + "epoch": 0.55, + "learning_rate": 8.955267575134618e-06, + "loss": 0.2994, + "step": 6385 + }, + { + "epoch": 0.55, + "learning_rate": 8.952506568322036e-06, + "loss": 0.2919, + "step": 6386 + }, + { + "epoch": 0.55, + "learning_rate": 8.949745642245066e-06, + "loss": 0.262, + "step": 6387 + }, + { + "epoch": 0.55, + "learning_rate": 8.94698479711652e-06, + "loss": 0.2734, + "step": 6388 + }, + { + "epoch": 0.55, + "learning_rate": 8.944224033149182e-06, + "loss": 0.3241, + "step": 6389 + }, + { + "epoch": 0.55, + "learning_rate": 8.941463350555835e-06, + "loss": 0.3006, + "step": 6390 + }, + { + "epoch": 0.55, + "learning_rate": 8.938702749549264e-06, + "loss": 0.2688, + "step": 6391 + }, + { + "epoch": 0.55, + "learning_rate": 8.93594223034224e-06, + "loss": 0.2596, + "step": 6392 + }, + { + "epoch": 0.55, + "learning_rate": 8.933181793147534e-06, + "loss": 0.2806, + "step": 6393 + }, + { + "epoch": 0.55, + "learning_rate": 8.9304214381779e-06, + "loss": 0.2568, + "step": 6394 + }, + { + "epoch": 0.55, + "learning_rate": 8.9276611656461e-06, + "loss": 0.2725, + "step": 6395 + }, + { + "epoch": 0.55, + "learning_rate": 8.924900975764879e-06, + "loss": 0.269, + "step": 6396 + }, + { + "epoch": 0.55, + "learning_rate": 8.922140868746977e-06, + "loss": 0.2949, + "step": 6397 + }, + { + "epoch": 0.55, + "learning_rate": 8.919380844805129e-06, + "loss": 0.338, + "step": 6398 + }, + { + "epoch": 0.55, + "learning_rate": 8.916620904152069e-06, + "loss": 0.2419, + "step": 6399 + }, + { + "epoch": 0.55, + "learning_rate": 8.913861047000518e-06, + "loss": 0.2715, + "step": 6400 + }, + { + "epoch": 0.55, + "learning_rate": 8.911101273563185e-06, + "loss": 0.3061, + "step": 6401 + }, + { + "epoch": 0.55, + "learning_rate": 8.908341584052791e-06, + "loss": 0.2795, + "step": 6402 + }, + { + "epoch": 0.55, + "learning_rate": 8.905581978682031e-06, + "loss": 0.2607, + "step": 6403 + }, + { + "epoch": 0.55, + "learning_rate": 8.902822457663605e-06, + "loss": 0.2841, + "step": 6404 + }, + { + "epoch": 0.55, + "learning_rate": 8.900063021210203e-06, + "loss": 0.3083, + "step": 6405 + }, + { + "epoch": 0.55, + "learning_rate": 8.897303669534508e-06, + "loss": 0.2467, + "step": 6406 + }, + { + "epoch": 0.55, + "learning_rate": 8.8945444028492e-06, + "loss": 0.2706, + "step": 6407 + }, + { + "epoch": 0.55, + "learning_rate": 8.891785221366942e-06, + "loss": 0.2778, + "step": 6408 + }, + { + "epoch": 0.55, + "learning_rate": 8.889026125300407e-06, + "loss": 0.259, + "step": 6409 + }, + { + "epoch": 0.55, + "learning_rate": 8.886267114862248e-06, + "loss": 0.2808, + "step": 6410 + }, + { + "epoch": 0.55, + "learning_rate": 8.883508190265117e-06, + "loss": 0.277, + "step": 6411 + }, + { + "epoch": 0.55, + "learning_rate": 8.880749351721656e-06, + "loss": 0.6112, + "step": 6412 + }, + { + "epoch": 0.55, + "learning_rate": 8.877990599444508e-06, + "loss": 0.2902, + "step": 6413 + }, + { + "epoch": 0.55, + "learning_rate": 8.8752319336463e-06, + "loss": 0.3396, + "step": 6414 + }, + { + "epoch": 0.55, + "learning_rate": 8.872473354539653e-06, + "loss": 0.2559, + "step": 6415 + }, + { + "epoch": 0.55, + "learning_rate": 8.869714862337194e-06, + "loss": 0.3209, + "step": 6416 + }, + { + "epoch": 0.55, + "learning_rate": 8.866956457251524e-06, + "loss": 0.2571, + "step": 6417 + }, + { + "epoch": 0.55, + "learning_rate": 8.86419813949525e-06, + "loss": 0.311, + "step": 6418 + }, + { + "epoch": 0.55, + "learning_rate": 8.861439909280976e-06, + "loss": 0.265, + "step": 6419 + }, + { + "epoch": 0.55, + "learning_rate": 8.858681766821289e-06, + "loss": 0.2789, + "step": 6420 + }, + { + "epoch": 0.55, + "learning_rate": 8.855923712328771e-06, + "loss": 0.2816, + "step": 6421 + }, + { + "epoch": 0.55, + "learning_rate": 8.853165746015997e-06, + "loss": 0.25, + "step": 6422 + }, + { + "epoch": 0.55, + "learning_rate": 8.850407868095541e-06, + "loss": 0.2814, + "step": 6423 + }, + { + "epoch": 0.55, + "learning_rate": 8.84765007877997e-06, + "loss": 0.2615, + "step": 6424 + }, + { + "epoch": 0.55, + "learning_rate": 8.844892378281831e-06, + "loss": 0.3049, + "step": 6425 + }, + { + "epoch": 0.55, + "learning_rate": 8.842134766813687e-06, + "loss": 0.3261, + "step": 6426 + }, + { + "epoch": 0.55, + "learning_rate": 8.839377244588073e-06, + "loss": 0.2766, + "step": 6427 + }, + { + "epoch": 0.55, + "learning_rate": 8.836619811817522e-06, + "loss": 0.2922, + "step": 6428 + }, + { + "epoch": 0.55, + "learning_rate": 8.83386246871457e-06, + "loss": 0.3375, + "step": 6429 + }, + { + "epoch": 0.55, + "learning_rate": 8.831105215491734e-06, + "loss": 0.314, + "step": 6430 + }, + { + "epoch": 0.55, + "learning_rate": 8.828348052361537e-06, + "loss": 0.2966, + "step": 6431 + }, + { + "epoch": 0.55, + "learning_rate": 8.825590979536476e-06, + "loss": 0.2787, + "step": 6432 + }, + { + "epoch": 0.55, + "learning_rate": 8.822833997229065e-06, + "loss": 0.2784, + "step": 6433 + }, + { + "epoch": 0.55, + "learning_rate": 8.820077105651794e-06, + "loss": 0.3019, + "step": 6434 + }, + { + "epoch": 0.55, + "learning_rate": 8.817320305017148e-06, + "loss": 0.2717, + "step": 6435 + }, + { + "epoch": 0.55, + "learning_rate": 8.814563595537604e-06, + "loss": 0.2824, + "step": 6436 + }, + { + "epoch": 0.55, + "learning_rate": 8.811806977425647e-06, + "loss": 0.2773, + "step": 6437 + }, + { + "epoch": 0.55, + "learning_rate": 8.809050450893737e-06, + "loss": 0.2692, + "step": 6438 + }, + { + "epoch": 0.55, + "learning_rate": 8.806294016154329e-06, + "loss": 0.2727, + "step": 6439 + }, + { + "epoch": 0.55, + "learning_rate": 8.803537673419885e-06, + "loss": 0.3005, + "step": 6440 + }, + { + "epoch": 0.55, + "learning_rate": 8.80078142290284e-06, + "loss": 0.3063, + "step": 6441 + }, + { + "epoch": 0.55, + "learning_rate": 8.798025264815643e-06, + "loss": 0.2875, + "step": 6442 + }, + { + "epoch": 0.55, + "learning_rate": 8.795269199370714e-06, + "loss": 0.2943, + "step": 6443 + }, + { + "epoch": 0.55, + "learning_rate": 8.792513226780486e-06, + "loss": 0.2686, + "step": 6444 + }, + { + "epoch": 0.55, + "learning_rate": 8.789757347257373e-06, + "loss": 0.3112, + "step": 6445 + }, + { + "epoch": 0.55, + "learning_rate": 8.787001561013779e-06, + "loss": 0.2678, + "step": 6446 + }, + { + "epoch": 0.55, + "learning_rate": 8.784245868262117e-06, + "loss": 0.2623, + "step": 6447 + }, + { + "epoch": 0.55, + "learning_rate": 8.781490269214772e-06, + "loss": 0.3001, + "step": 6448 + }, + { + "epoch": 0.55, + "learning_rate": 8.778734764084136e-06, + "loss": 0.2631, + "step": 6449 + }, + { + "epoch": 0.55, + "learning_rate": 8.77597935308259e-06, + "loss": 0.3328, + "step": 6450 + }, + { + "epoch": 0.55, + "learning_rate": 8.773224036422512e-06, + "loss": 0.2712, + "step": 6451 + }, + { + "epoch": 0.55, + "learning_rate": 8.770468814316262e-06, + "loss": 0.2569, + "step": 6452 + }, + { + "epoch": 0.55, + "learning_rate": 8.767713686976195e-06, + "loss": 0.2609, + "step": 6453 + }, + { + "epoch": 0.55, + "learning_rate": 8.764958654614673e-06, + "loss": 0.2792, + "step": 6454 + }, + { + "epoch": 0.55, + "learning_rate": 8.762203717444034e-06, + "loss": 0.2847, + "step": 6455 + }, + { + "epoch": 0.55, + "learning_rate": 8.759448875676616e-06, + "loss": 0.2769, + "step": 6456 + }, + { + "epoch": 0.55, + "learning_rate": 8.75669412952475e-06, + "loss": 0.2563, + "step": 6457 + }, + { + "epoch": 0.55, + "learning_rate": 8.753939479200758e-06, + "loss": 0.2811, + "step": 6458 + }, + { + "epoch": 0.55, + "learning_rate": 8.751184924916954e-06, + "loss": 0.2651, + "step": 6459 + }, + { + "epoch": 0.55, + "learning_rate": 8.748430466885643e-06, + "loss": 0.2924, + "step": 6460 + }, + { + "epoch": 0.55, + "learning_rate": 8.745676105319126e-06, + "loss": 0.3098, + "step": 6461 + }, + { + "epoch": 0.55, + "learning_rate": 8.742921840429702e-06, + "loss": 0.3028, + "step": 6462 + }, + { + "epoch": 0.55, + "learning_rate": 8.740167672429646e-06, + "loss": 0.2536, + "step": 6463 + }, + { + "epoch": 0.55, + "learning_rate": 8.737413601531245e-06, + "loss": 0.2317, + "step": 6464 + }, + { + "epoch": 0.55, + "learning_rate": 8.734659627946765e-06, + "loss": 0.2729, + "step": 6465 + }, + { + "epoch": 0.55, + "learning_rate": 8.731905751888466e-06, + "loss": 0.292, + "step": 6466 + }, + { + "epoch": 0.55, + "learning_rate": 8.729151973568608e-06, + "loss": 0.2749, + "step": 6467 + }, + { + "epoch": 0.55, + "learning_rate": 8.726398293199434e-06, + "loss": 0.2852, + "step": 6468 + }, + { + "epoch": 0.55, + "learning_rate": 8.723644710993189e-06, + "loss": 0.5852, + "step": 6469 + }, + { + "epoch": 0.55, + "learning_rate": 8.720891227162099e-06, + "loss": 0.2871, + "step": 6470 + }, + { + "epoch": 0.55, + "learning_rate": 8.718137841918396e-06, + "loss": 0.2972, + "step": 6471 + }, + { + "epoch": 0.55, + "learning_rate": 8.715384555474297e-06, + "loss": 0.2758, + "step": 6472 + }, + { + "epoch": 0.55, + "learning_rate": 8.712631368042006e-06, + "loss": 0.2944, + "step": 6473 + }, + { + "epoch": 0.55, + "learning_rate": 8.709878279833725e-06, + "loss": 0.2717, + "step": 6474 + }, + { + "epoch": 0.56, + "learning_rate": 8.707125291061656e-06, + "loss": 0.278, + "step": 6475 + }, + { + "epoch": 0.56, + "learning_rate": 8.704372401937982e-06, + "loss": 0.289, + "step": 6476 + }, + { + "epoch": 0.56, + "learning_rate": 8.701619612674877e-06, + "loss": 0.28, + "step": 6477 + }, + { + "epoch": 0.56, + "learning_rate": 8.698866923484521e-06, + "loss": 0.3257, + "step": 6478 + }, + { + "epoch": 0.56, + "learning_rate": 8.69611433457907e-06, + "loss": 0.2626, + "step": 6479 + }, + { + "epoch": 0.56, + "learning_rate": 8.693361846170687e-06, + "loss": 0.2358, + "step": 6480 + }, + { + "epoch": 0.56, + "learning_rate": 8.690609458471512e-06, + "loss": 0.3358, + "step": 6481 + }, + { + "epoch": 0.56, + "learning_rate": 8.687857171693693e-06, + "loss": 0.5938, + "step": 6482 + }, + { + "epoch": 0.56, + "learning_rate": 8.685104986049362e-06, + "loss": 0.2931, + "step": 6483 + }, + { + "epoch": 0.56, + "learning_rate": 8.682352901750636e-06, + "loss": 0.2775, + "step": 6484 + }, + { + "epoch": 0.56, + "learning_rate": 8.679600919009642e-06, + "loss": 0.2616, + "step": 6485 + }, + { + "epoch": 0.56, + "learning_rate": 8.676849038038483e-06, + "loss": 0.2341, + "step": 6486 + }, + { + "epoch": 0.56, + "learning_rate": 8.674097259049263e-06, + "loss": 0.2853, + "step": 6487 + }, + { + "epoch": 0.56, + "learning_rate": 8.671345582254072e-06, + "loss": 0.2344, + "step": 6488 + }, + { + "epoch": 0.56, + "learning_rate": 8.668594007865003e-06, + "loss": 0.2463, + "step": 6489 + }, + { + "epoch": 0.56, + "learning_rate": 8.66584253609413e-06, + "loss": 0.5797, + "step": 6490 + }, + { + "epoch": 0.56, + "learning_rate": 8.663091167153516e-06, + "loss": 0.2997, + "step": 6491 + }, + { + "epoch": 0.56, + "learning_rate": 8.660339901255234e-06, + "loss": 0.3104, + "step": 6492 + }, + { + "epoch": 0.56, + "learning_rate": 8.65758873861133e-06, + "loss": 0.2821, + "step": 6493 + }, + { + "epoch": 0.56, + "learning_rate": 8.654837679433852e-06, + "loss": 0.2617, + "step": 6494 + }, + { + "epoch": 0.56, + "learning_rate": 8.652086723934841e-06, + "loss": 0.291, + "step": 6495 + }, + { + "epoch": 0.56, + "learning_rate": 8.649335872326328e-06, + "loss": 0.275, + "step": 6496 + }, + { + "epoch": 0.56, + "learning_rate": 8.64658512482033e-06, + "loss": 0.2801, + "step": 6497 + }, + { + "epoch": 0.56, + "learning_rate": 8.643834481628861e-06, + "loss": 0.2419, + "step": 6498 + }, + { + "epoch": 0.56, + "learning_rate": 8.641083942963929e-06, + "loss": 0.2874, + "step": 6499 + }, + { + "epoch": 0.56, + "learning_rate": 8.638333509037537e-06, + "loss": 0.2811, + "step": 6500 + }, + { + "epoch": 0.56, + "learning_rate": 8.635583180061664e-06, + "loss": 0.2966, + "step": 6501 + }, + { + "epoch": 0.56, + "learning_rate": 8.6328329562483e-06, + "loss": 0.3436, + "step": 6502 + }, + { + "epoch": 0.56, + "learning_rate": 8.630082837809419e-06, + "loss": 0.3058, + "step": 6503 + }, + { + "epoch": 0.56, + "learning_rate": 8.62733282495698e-06, + "loss": 0.236, + "step": 6504 + }, + { + "epoch": 0.56, + "learning_rate": 8.624582917902947e-06, + "loss": 0.2879, + "step": 6505 + }, + { + "epoch": 0.56, + "learning_rate": 8.621833116859264e-06, + "loss": 0.2872, + "step": 6506 + }, + { + "epoch": 0.56, + "learning_rate": 8.619083422037878e-06, + "loss": 0.3298, + "step": 6507 + }, + { + "epoch": 0.56, + "learning_rate": 8.616333833650714e-06, + "loss": 0.2851, + "step": 6508 + }, + { + "epoch": 0.56, + "learning_rate": 8.613584351909705e-06, + "loss": 0.3307, + "step": 6509 + }, + { + "epoch": 0.56, + "learning_rate": 8.610834977026765e-06, + "loss": 0.2833, + "step": 6510 + }, + { + "epoch": 0.56, + "learning_rate": 8.608085709213797e-06, + "loss": 0.2859, + "step": 6511 + }, + { + "epoch": 0.56, + "learning_rate": 8.605336548682704e-06, + "loss": 0.3217, + "step": 6512 + }, + { + "epoch": 0.56, + "learning_rate": 8.602587495645382e-06, + "loss": 0.322, + "step": 6513 + }, + { + "epoch": 0.56, + "learning_rate": 8.599838550313714e-06, + "loss": 0.2433, + "step": 6514 + }, + { + "epoch": 0.56, + "learning_rate": 8.597089712899564e-06, + "loss": 0.3004, + "step": 6515 + }, + { + "epoch": 0.56, + "learning_rate": 8.594340983614812e-06, + "loss": 0.3048, + "step": 6516 + }, + { + "epoch": 0.56, + "learning_rate": 8.591592362671311e-06, + "loss": 0.2905, + "step": 6517 + }, + { + "epoch": 0.56, + "learning_rate": 8.588843850280911e-06, + "loss": 0.2921, + "step": 6518 + }, + { + "epoch": 0.56, + "learning_rate": 8.586095446655452e-06, + "loss": 0.2819, + "step": 6519 + }, + { + "epoch": 0.56, + "learning_rate": 8.583347152006774e-06, + "loss": 0.2658, + "step": 6520 + }, + { + "epoch": 0.56, + "learning_rate": 8.580598966546697e-06, + "loss": 0.2552, + "step": 6521 + }, + { + "epoch": 0.56, + "learning_rate": 8.577850890487035e-06, + "loss": 0.2912, + "step": 6522 + }, + { + "epoch": 0.56, + "learning_rate": 8.575102924039602e-06, + "loss": 0.2716, + "step": 6523 + }, + { + "epoch": 0.56, + "learning_rate": 8.572355067416194e-06, + "loss": 0.6274, + "step": 6524 + }, + { + "epoch": 0.56, + "learning_rate": 8.569607320828604e-06, + "loss": 0.2807, + "step": 6525 + }, + { + "epoch": 0.56, + "learning_rate": 8.566859684488611e-06, + "loss": 0.2546, + "step": 6526 + }, + { + "epoch": 0.56, + "learning_rate": 8.564112158607996e-06, + "loss": 0.2858, + "step": 6527 + }, + { + "epoch": 0.56, + "learning_rate": 8.561364743398521e-06, + "loss": 0.2894, + "step": 6528 + }, + { + "epoch": 0.56, + "learning_rate": 8.558617439071938e-06, + "loss": 0.2871, + "step": 6529 + }, + { + "epoch": 0.56, + "learning_rate": 8.555870245840005e-06, + "loss": 0.3452, + "step": 6530 + }, + { + "epoch": 0.56, + "learning_rate": 8.553123163914456e-06, + "loss": 0.3228, + "step": 6531 + }, + { + "epoch": 0.56, + "learning_rate": 8.550376193507023e-06, + "loss": 0.2663, + "step": 6532 + }, + { + "epoch": 0.56, + "learning_rate": 8.547629334829434e-06, + "loss": 0.2761, + "step": 6533 + }, + { + "epoch": 0.56, + "learning_rate": 8.544882588093399e-06, + "loss": 0.2941, + "step": 6534 + }, + { + "epoch": 0.56, + "learning_rate": 8.542135953510625e-06, + "loss": 0.2724, + "step": 6535 + }, + { + "epoch": 0.56, + "learning_rate": 8.539389431292803e-06, + "loss": 0.2615, + "step": 6536 + }, + { + "epoch": 0.56, + "learning_rate": 8.536643021651629e-06, + "loss": 0.2693, + "step": 6537 + }, + { + "epoch": 0.56, + "learning_rate": 8.533896724798784e-06, + "loss": 0.6083, + "step": 6538 + }, + { + "epoch": 0.56, + "learning_rate": 8.531150540945929e-06, + "loss": 0.2809, + "step": 6539 + }, + { + "epoch": 0.56, + "learning_rate": 8.528404470304736e-06, + "loss": 0.2509, + "step": 6540 + }, + { + "epoch": 0.56, + "learning_rate": 8.525658513086857e-06, + "loss": 0.3068, + "step": 6541 + }, + { + "epoch": 0.56, + "learning_rate": 8.522912669503932e-06, + "loss": 0.2273, + "step": 6542 + }, + { + "epoch": 0.56, + "learning_rate": 8.5201669397676e-06, + "loss": 0.2704, + "step": 6543 + }, + { + "epoch": 0.56, + "learning_rate": 8.517421324089488e-06, + "loss": 0.301, + "step": 6544 + }, + { + "epoch": 0.56, + "learning_rate": 8.514675822681218e-06, + "loss": 0.314, + "step": 6545 + }, + { + "epoch": 0.56, + "learning_rate": 8.511930435754391e-06, + "loss": 0.2621, + "step": 6546 + }, + { + "epoch": 0.56, + "learning_rate": 8.509185163520617e-06, + "loss": 0.2892, + "step": 6547 + }, + { + "epoch": 0.56, + "learning_rate": 8.506440006191485e-06, + "loss": 0.2894, + "step": 6548 + }, + { + "epoch": 0.56, + "learning_rate": 8.503694963978576e-06, + "loss": 0.3623, + "step": 6549 + }, + { + "epoch": 0.56, + "learning_rate": 8.500950037093462e-06, + "loss": 0.3357, + "step": 6550 + }, + { + "epoch": 0.56, + "learning_rate": 8.498205225747717e-06, + "loss": 0.6342, + "step": 6551 + }, + { + "epoch": 0.56, + "learning_rate": 8.495460530152893e-06, + "loss": 0.2982, + "step": 6552 + }, + { + "epoch": 0.56, + "learning_rate": 8.492715950520534e-06, + "loss": 0.2757, + "step": 6553 + }, + { + "epoch": 0.56, + "learning_rate": 8.489971487062184e-06, + "loss": 0.2718, + "step": 6554 + }, + { + "epoch": 0.56, + "learning_rate": 8.487227139989372e-06, + "loss": 0.318, + "step": 6555 + }, + { + "epoch": 0.56, + "learning_rate": 8.484482909513613e-06, + "loss": 0.2923, + "step": 6556 + }, + { + "epoch": 0.56, + "learning_rate": 8.481738795846424e-06, + "loss": 0.2532, + "step": 6557 + }, + { + "epoch": 0.56, + "learning_rate": 8.478994799199308e-06, + "loss": 0.2848, + "step": 6558 + }, + { + "epoch": 0.56, + "learning_rate": 8.476250919783759e-06, + "loss": 0.2385, + "step": 6559 + }, + { + "epoch": 0.56, + "learning_rate": 8.473507157811254e-06, + "loss": 0.31, + "step": 6560 + }, + { + "epoch": 0.56, + "learning_rate": 8.470763513493281e-06, + "loss": 0.3047, + "step": 6561 + }, + { + "epoch": 0.56, + "learning_rate": 8.468019987041298e-06, + "loss": 0.2968, + "step": 6562 + }, + { + "epoch": 0.56, + "learning_rate": 8.465276578666766e-06, + "loss": 0.2969, + "step": 6563 + }, + { + "epoch": 0.56, + "learning_rate": 8.462533288581128e-06, + "loss": 0.2549, + "step": 6564 + }, + { + "epoch": 0.56, + "learning_rate": 8.45979011699583e-06, + "loss": 0.2863, + "step": 6565 + }, + { + "epoch": 0.56, + "learning_rate": 8.4570470641223e-06, + "loss": 0.2489, + "step": 6566 + }, + { + "epoch": 0.56, + "learning_rate": 8.454304130171956e-06, + "loss": 0.2518, + "step": 6567 + }, + { + "epoch": 0.56, + "learning_rate": 8.451561315356216e-06, + "loss": 0.5439, + "step": 6568 + }, + { + "epoch": 0.56, + "learning_rate": 8.448818619886477e-06, + "loss": 0.2722, + "step": 6569 + }, + { + "epoch": 0.56, + "learning_rate": 8.44607604397413e-06, + "loss": 0.2662, + "step": 6570 + }, + { + "epoch": 0.56, + "learning_rate": 8.443333587830568e-06, + "loss": 0.2607, + "step": 6571 + }, + { + "epoch": 0.56, + "learning_rate": 8.440591251667164e-06, + "loss": 0.2697, + "step": 6572 + }, + { + "epoch": 0.56, + "learning_rate": 8.437849035695278e-06, + "loss": 0.2972, + "step": 6573 + }, + { + "epoch": 0.56, + "learning_rate": 8.435106940126266e-06, + "loss": 0.303, + "step": 6574 + }, + { + "epoch": 0.56, + "learning_rate": 8.43236496517148e-06, + "loss": 0.2728, + "step": 6575 + }, + { + "epoch": 0.56, + "learning_rate": 8.42962311104226e-06, + "loss": 0.3177, + "step": 6576 + }, + { + "epoch": 0.56, + "learning_rate": 8.426881377949927e-06, + "loss": 0.2699, + "step": 6577 + }, + { + "epoch": 0.56, + "learning_rate": 8.424139766105808e-06, + "loss": 0.2378, + "step": 6578 + }, + { + "epoch": 0.56, + "learning_rate": 8.421398275721208e-06, + "loss": 0.278, + "step": 6579 + }, + { + "epoch": 0.56, + "learning_rate": 8.41865690700743e-06, + "loss": 0.2327, + "step": 6580 + }, + { + "epoch": 0.56, + "learning_rate": 8.415915660175763e-06, + "loss": 0.2849, + "step": 6581 + }, + { + "epoch": 0.56, + "learning_rate": 8.413174535437486e-06, + "loss": 0.2592, + "step": 6582 + }, + { + "epoch": 0.56, + "learning_rate": 8.410433533003881e-06, + "loss": 0.285, + "step": 6583 + }, + { + "epoch": 0.56, + "learning_rate": 8.4076926530862e-06, + "loss": 0.2789, + "step": 6584 + }, + { + "epoch": 0.56, + "learning_rate": 8.404951895895706e-06, + "loss": 0.3026, + "step": 6585 + }, + { + "epoch": 0.56, + "learning_rate": 8.402211261643638e-06, + "loss": 0.2661, + "step": 6586 + }, + { + "epoch": 0.56, + "learning_rate": 8.399470750541228e-06, + "loss": 0.2972, + "step": 6587 + }, + { + "epoch": 0.56, + "learning_rate": 8.396730362799704e-06, + "loss": 0.302, + "step": 6588 + }, + { + "epoch": 0.56, + "learning_rate": 8.393990098630284e-06, + "loss": 0.2769, + "step": 6589 + }, + { + "epoch": 0.56, + "learning_rate": 8.391249958244173e-06, + "loss": 0.2604, + "step": 6590 + }, + { + "epoch": 0.56, + "learning_rate": 8.388509941852562e-06, + "loss": 0.2522, + "step": 6591 + }, + { + "epoch": 0.57, + "learning_rate": 8.385770049666646e-06, + "loss": 0.2576, + "step": 6592 + }, + { + "epoch": 0.57, + "learning_rate": 8.383030281897598e-06, + "loss": 0.2767, + "step": 6593 + }, + { + "epoch": 0.57, + "learning_rate": 8.380290638756584e-06, + "loss": 0.3027, + "step": 6594 + }, + { + "epoch": 0.57, + "learning_rate": 8.377551120454762e-06, + "loss": 0.2668, + "step": 6595 + }, + { + "epoch": 0.57, + "learning_rate": 8.374811727203288e-06, + "loss": 0.2876, + "step": 6596 + }, + { + "epoch": 0.57, + "learning_rate": 8.372072459213296e-06, + "loss": 0.252, + "step": 6597 + }, + { + "epoch": 0.57, + "learning_rate": 8.369333316695909e-06, + "loss": 0.278, + "step": 6598 + }, + { + "epoch": 0.57, + "learning_rate": 8.366594299862258e-06, + "loss": 0.2847, + "step": 6599 + }, + { + "epoch": 0.57, + "learning_rate": 8.363855408923446e-06, + "loss": 0.2445, + "step": 6600 + }, + { + "epoch": 0.57, + "learning_rate": 8.361116644090576e-06, + "loss": 0.2927, + "step": 6601 + }, + { + "epoch": 0.57, + "learning_rate": 8.358378005574731e-06, + "loss": 0.2589, + "step": 6602 + }, + { + "epoch": 0.57, + "learning_rate": 8.355639493587005e-06, + "loss": 0.2673, + "step": 6603 + }, + { + "epoch": 0.57, + "learning_rate": 8.352901108338462e-06, + "loss": 0.251, + "step": 6604 + }, + { + "epoch": 0.57, + "learning_rate": 8.350162850040158e-06, + "loss": 0.2782, + "step": 6605 + }, + { + "epoch": 0.57, + "learning_rate": 8.347424718903152e-06, + "loss": 0.2795, + "step": 6606 + }, + { + "epoch": 0.57, + "learning_rate": 8.344686715138482e-06, + "loss": 0.2858, + "step": 6607 + }, + { + "epoch": 0.57, + "learning_rate": 8.341948838957185e-06, + "loss": 0.2376, + "step": 6608 + }, + { + "epoch": 0.57, + "learning_rate": 8.339211090570272e-06, + "loss": 0.2654, + "step": 6609 + }, + { + "epoch": 0.57, + "learning_rate": 8.336473470188767e-06, + "loss": 0.2508, + "step": 6610 + }, + { + "epoch": 0.57, + "learning_rate": 8.333735978023667e-06, + "loss": 0.254, + "step": 6611 + }, + { + "epoch": 0.57, + "learning_rate": 8.33099861428596e-06, + "loss": 0.276, + "step": 6612 + }, + { + "epoch": 0.57, + "learning_rate": 8.328261379186636e-06, + "loss": 0.3005, + "step": 6613 + }, + { + "epoch": 0.57, + "learning_rate": 8.325524272936668e-06, + "loss": 0.3035, + "step": 6614 + }, + { + "epoch": 0.57, + "learning_rate": 8.322787295747007e-06, + "loss": 0.2884, + "step": 6615 + }, + { + "epoch": 0.57, + "learning_rate": 8.320050447828622e-06, + "loss": 0.2643, + "step": 6616 + }, + { + "epoch": 0.57, + "learning_rate": 8.317313729392446e-06, + "loss": 0.2758, + "step": 6617 + }, + { + "epoch": 0.57, + "learning_rate": 8.31457714064941e-06, + "loss": 0.2894, + "step": 6618 + }, + { + "epoch": 0.57, + "learning_rate": 8.311840681810441e-06, + "loss": 0.3061, + "step": 6619 + }, + { + "epoch": 0.57, + "learning_rate": 8.309104353086452e-06, + "loss": 0.2903, + "step": 6620 + }, + { + "epoch": 0.57, + "learning_rate": 8.306368154688347e-06, + "loss": 0.2937, + "step": 6621 + }, + { + "epoch": 0.57, + "learning_rate": 8.30363208682701e-06, + "loss": 0.3031, + "step": 6622 + }, + { + "epoch": 0.57, + "learning_rate": 8.300896149713334e-06, + "loss": 0.3235, + "step": 6623 + }, + { + "epoch": 0.57, + "learning_rate": 8.298160343558188e-06, + "loss": 0.329, + "step": 6624 + }, + { + "epoch": 0.57, + "learning_rate": 8.295424668572432e-06, + "loss": 0.2549, + "step": 6625 + }, + { + "epoch": 0.57, + "learning_rate": 8.292689124966917e-06, + "loss": 0.3129, + "step": 6626 + }, + { + "epoch": 0.57, + "learning_rate": 8.289953712952494e-06, + "loss": 0.2688, + "step": 6627 + }, + { + "epoch": 0.57, + "learning_rate": 8.287218432739987e-06, + "loss": 0.2543, + "step": 6628 + }, + { + "epoch": 0.57, + "learning_rate": 8.284483284540217e-06, + "loss": 0.2755, + "step": 6629 + }, + { + "epoch": 0.57, + "learning_rate": 8.281748268564002e-06, + "loss": 0.3182, + "step": 6630 + }, + { + "epoch": 0.57, + "learning_rate": 8.279013385022142e-06, + "loss": 0.3085, + "step": 6631 + }, + { + "epoch": 0.57, + "learning_rate": 8.276278634125424e-06, + "loss": 0.2692, + "step": 6632 + }, + { + "epoch": 0.57, + "learning_rate": 8.273544016084629e-06, + "loss": 0.2648, + "step": 6633 + }, + { + "epoch": 0.57, + "learning_rate": 8.270809531110536e-06, + "loss": 0.2913, + "step": 6634 + }, + { + "epoch": 0.57, + "learning_rate": 8.268075179413899e-06, + "loss": 0.2413, + "step": 6635 + }, + { + "epoch": 0.57, + "learning_rate": 8.265340961205467e-06, + "loss": 0.2721, + "step": 6636 + }, + { + "epoch": 0.57, + "learning_rate": 8.262606876695984e-06, + "loss": 0.2873, + "step": 6637 + }, + { + "epoch": 0.57, + "learning_rate": 8.259872926096177e-06, + "loss": 0.2712, + "step": 6638 + }, + { + "epoch": 0.57, + "learning_rate": 8.257139109616769e-06, + "loss": 0.2597, + "step": 6639 + }, + { + "epoch": 0.57, + "learning_rate": 8.254405427468464e-06, + "loss": 0.2756, + "step": 6640 + }, + { + "epoch": 0.57, + "learning_rate": 8.251671879861966e-06, + "loss": 0.2834, + "step": 6641 + }, + { + "epoch": 0.57, + "learning_rate": 8.24893846700796e-06, + "loss": 0.2936, + "step": 6642 + }, + { + "epoch": 0.57, + "learning_rate": 8.246205189117122e-06, + "loss": 0.3012, + "step": 6643 + }, + { + "epoch": 0.57, + "learning_rate": 8.243472046400126e-06, + "loss": 0.274, + "step": 6644 + }, + { + "epoch": 0.57, + "learning_rate": 8.240739039067623e-06, + "loss": 0.274, + "step": 6645 + }, + { + "epoch": 0.57, + "learning_rate": 8.238006167330266e-06, + "loss": 0.2891, + "step": 6646 + }, + { + "epoch": 0.57, + "learning_rate": 8.235273431398681e-06, + "loss": 0.2826, + "step": 6647 + }, + { + "epoch": 0.57, + "learning_rate": 8.232540831483505e-06, + "loss": 0.2602, + "step": 6648 + }, + { + "epoch": 0.57, + "learning_rate": 8.229808367795349e-06, + "loss": 0.3093, + "step": 6649 + }, + { + "epoch": 0.57, + "learning_rate": 8.227076040544813e-06, + "loss": 0.2997, + "step": 6650 + }, + { + "epoch": 0.57, + "learning_rate": 8.224343849942496e-06, + "loss": 0.2674, + "step": 6651 + }, + { + "epoch": 0.57, + "learning_rate": 8.221611796198984e-06, + "loss": 0.3036, + "step": 6652 + }, + { + "epoch": 0.57, + "learning_rate": 8.218879879524844e-06, + "loss": 0.2232, + "step": 6653 + }, + { + "epoch": 0.57, + "learning_rate": 8.216148100130647e-06, + "loss": 0.5638, + "step": 6654 + }, + { + "epoch": 0.57, + "learning_rate": 8.213416458226939e-06, + "loss": 0.3172, + "step": 6655 + }, + { + "epoch": 0.57, + "learning_rate": 8.210684954024261e-06, + "loss": 0.244, + "step": 6656 + }, + { + "epoch": 0.57, + "learning_rate": 8.207953587733145e-06, + "loss": 0.2872, + "step": 6657 + }, + { + "epoch": 0.57, + "learning_rate": 8.205222359564113e-06, + "loss": 0.2779, + "step": 6658 + }, + { + "epoch": 0.57, + "learning_rate": 8.202491269727674e-06, + "loss": 0.3121, + "step": 6659 + }, + { + "epoch": 0.57, + "learning_rate": 8.199760318434323e-06, + "loss": 0.2393, + "step": 6660 + }, + { + "epoch": 0.57, + "learning_rate": 8.197029505894553e-06, + "loss": 0.2648, + "step": 6661 + }, + { + "epoch": 0.57, + "learning_rate": 8.194298832318843e-06, + "loss": 0.3467, + "step": 6662 + }, + { + "epoch": 0.57, + "learning_rate": 8.191568297917654e-06, + "loss": 0.2256, + "step": 6663 + }, + { + "epoch": 0.57, + "learning_rate": 8.188837902901441e-06, + "loss": 0.3017, + "step": 6664 + }, + { + "epoch": 0.57, + "learning_rate": 8.186107647480659e-06, + "loss": 0.3411, + "step": 6665 + }, + { + "epoch": 0.57, + "learning_rate": 8.183377531865737e-06, + "loss": 0.256, + "step": 6666 + }, + { + "epoch": 0.57, + "learning_rate": 8.180647556267093e-06, + "loss": 0.2773, + "step": 6667 + }, + { + "epoch": 0.57, + "learning_rate": 8.177917720895152e-06, + "loss": 0.2856, + "step": 6668 + }, + { + "epoch": 0.57, + "learning_rate": 8.17518802596031e-06, + "loss": 0.3012, + "step": 6669 + }, + { + "epoch": 0.57, + "learning_rate": 8.172458471672953e-06, + "loss": 0.2455, + "step": 6670 + }, + { + "epoch": 0.57, + "learning_rate": 8.169729058243468e-06, + "loss": 0.2645, + "step": 6671 + }, + { + "epoch": 0.57, + "learning_rate": 8.166999785882226e-06, + "loss": 0.2902, + "step": 6672 + }, + { + "epoch": 0.57, + "learning_rate": 8.164270654799584e-06, + "loss": 0.2939, + "step": 6673 + }, + { + "epoch": 0.57, + "learning_rate": 8.161541665205885e-06, + "loss": 0.2874, + "step": 6674 + }, + { + "epoch": 0.57, + "learning_rate": 8.158812817311474e-06, + "loss": 0.3354, + "step": 6675 + }, + { + "epoch": 0.57, + "learning_rate": 8.156084111326673e-06, + "loss": 0.3188, + "step": 6676 + }, + { + "epoch": 0.57, + "learning_rate": 8.1533555474618e-06, + "loss": 0.3161, + "step": 6677 + }, + { + "epoch": 0.57, + "learning_rate": 8.15062712592715e-06, + "loss": 0.2739, + "step": 6678 + }, + { + "epoch": 0.57, + "learning_rate": 8.14789884693303e-06, + "loss": 0.2619, + "step": 6679 + }, + { + "epoch": 0.57, + "learning_rate": 8.145170710689712e-06, + "loss": 0.2914, + "step": 6680 + }, + { + "epoch": 0.57, + "learning_rate": 8.142442717407469e-06, + "loss": 0.2628, + "step": 6681 + }, + { + "epoch": 0.57, + "learning_rate": 8.139714867296567e-06, + "loss": 0.3356, + "step": 6682 + }, + { + "epoch": 0.57, + "learning_rate": 8.13698716056725e-06, + "loss": 0.2923, + "step": 6683 + }, + { + "epoch": 0.57, + "learning_rate": 8.134259597429757e-06, + "loss": 0.3265, + "step": 6684 + }, + { + "epoch": 0.57, + "learning_rate": 8.13153217809431e-06, + "loss": 0.2641, + "step": 6685 + }, + { + "epoch": 0.57, + "learning_rate": 8.128804902771137e-06, + "loss": 0.275, + "step": 6686 + }, + { + "epoch": 0.57, + "learning_rate": 8.126077771670438e-06, + "loss": 0.3011, + "step": 6687 + }, + { + "epoch": 0.57, + "learning_rate": 8.123350785002398e-06, + "loss": 0.2404, + "step": 6688 + }, + { + "epoch": 0.57, + "learning_rate": 8.120623942977209e-06, + "loss": 0.2519, + "step": 6689 + }, + { + "epoch": 0.57, + "learning_rate": 8.117897245805044e-06, + "loss": 0.2847, + "step": 6690 + }, + { + "epoch": 0.57, + "learning_rate": 8.115170693696058e-06, + "loss": 0.3049, + "step": 6691 + }, + { + "epoch": 0.57, + "learning_rate": 8.112444286860397e-06, + "loss": 0.2795, + "step": 6692 + }, + { + "epoch": 0.57, + "learning_rate": 8.109718025508208e-06, + "loss": 0.3232, + "step": 6693 + }, + { + "epoch": 0.57, + "learning_rate": 8.106991909849613e-06, + "loss": 0.2578, + "step": 6694 + }, + { + "epoch": 0.57, + "learning_rate": 8.104265940094726e-06, + "loss": 0.2699, + "step": 6695 + }, + { + "epoch": 0.57, + "learning_rate": 8.101540116453655e-06, + "loss": 0.6116, + "step": 6696 + }, + { + "epoch": 0.57, + "learning_rate": 8.098814439136492e-06, + "loss": 0.2864, + "step": 6697 + }, + { + "epoch": 0.57, + "learning_rate": 8.096088908353316e-06, + "loss": 0.3403, + "step": 6698 + }, + { + "epoch": 0.57, + "learning_rate": 8.093363524314202e-06, + "loss": 0.2563, + "step": 6699 + }, + { + "epoch": 0.57, + "learning_rate": 8.090638287229207e-06, + "loss": 0.3217, + "step": 6700 + }, + { + "epoch": 0.57, + "learning_rate": 8.087913197308376e-06, + "loss": 0.2843, + "step": 6701 + }, + { + "epoch": 0.57, + "learning_rate": 8.085188254761744e-06, + "loss": 0.2822, + "step": 6702 + }, + { + "epoch": 0.57, + "learning_rate": 8.082463459799346e-06, + "loss": 0.2934, + "step": 6703 + }, + { + "epoch": 0.57, + "learning_rate": 8.07973881263119e-06, + "loss": 0.2802, + "step": 6704 + }, + { + "epoch": 0.57, + "learning_rate": 8.077014313467274e-06, + "loss": 0.27, + "step": 6705 + }, + { + "epoch": 0.57, + "learning_rate": 8.074289962517597e-06, + "loss": 0.2915, + "step": 6706 + }, + { + "epoch": 0.57, + "learning_rate": 8.071565759992133e-06, + "loss": 0.2678, + "step": 6707 + }, + { + "epoch": 0.58, + "learning_rate": 8.068841706100851e-06, + "loss": 0.3052, + "step": 6708 + }, + { + "epoch": 0.58, + "learning_rate": 8.066117801053706e-06, + "loss": 0.2889, + "step": 6709 + }, + { + "epoch": 0.58, + "learning_rate": 8.063394045060648e-06, + "loss": 0.2965, + "step": 6710 + }, + { + "epoch": 0.58, + "learning_rate": 8.06067043833161e-06, + "loss": 0.2789, + "step": 6711 + }, + { + "epoch": 0.58, + "learning_rate": 8.057946981076506e-06, + "loss": 0.2478, + "step": 6712 + }, + { + "epoch": 0.58, + "learning_rate": 8.055223673505258e-06, + "loss": 0.2797, + "step": 6713 + }, + { + "epoch": 0.58, + "learning_rate": 8.052500515827759e-06, + "loss": 0.2854, + "step": 6714 + }, + { + "epoch": 0.58, + "learning_rate": 8.049777508253898e-06, + "loss": 0.2609, + "step": 6715 + }, + { + "epoch": 0.58, + "learning_rate": 8.047054650993545e-06, + "loss": 0.3015, + "step": 6716 + }, + { + "epoch": 0.58, + "learning_rate": 8.044331944256576e-06, + "loss": 0.261, + "step": 6717 + }, + { + "epoch": 0.58, + "learning_rate": 8.041609388252836e-06, + "loss": 0.2498, + "step": 6718 + }, + { + "epoch": 0.58, + "learning_rate": 8.038886983192164e-06, + "loss": 0.2546, + "step": 6719 + }, + { + "epoch": 0.58, + "learning_rate": 8.036164729284398e-06, + "loss": 0.2996, + "step": 6720 + }, + { + "epoch": 0.58, + "learning_rate": 8.033442626739347e-06, + "loss": 0.3373, + "step": 6721 + }, + { + "epoch": 0.58, + "learning_rate": 8.030720675766825e-06, + "loss": 0.2789, + "step": 6722 + }, + { + "epoch": 0.58, + "learning_rate": 8.02799887657662e-06, + "loss": 0.261, + "step": 6723 + }, + { + "epoch": 0.58, + "learning_rate": 8.025277229378519e-06, + "loss": 0.2935, + "step": 6724 + }, + { + "epoch": 0.58, + "learning_rate": 8.022555734382294e-06, + "loss": 0.2772, + "step": 6725 + }, + { + "epoch": 0.58, + "learning_rate": 8.019834391797696e-06, + "loss": 0.2841, + "step": 6726 + }, + { + "epoch": 0.58, + "learning_rate": 8.017113201834482e-06, + "loss": 0.2551, + "step": 6727 + }, + { + "epoch": 0.58, + "learning_rate": 8.014392164702387e-06, + "loss": 0.305, + "step": 6728 + }, + { + "epoch": 0.58, + "learning_rate": 8.011671280611132e-06, + "loss": 0.2997, + "step": 6729 + }, + { + "epoch": 0.58, + "learning_rate": 8.008950549770426e-06, + "loss": 0.2642, + "step": 6730 + }, + { + "epoch": 0.58, + "learning_rate": 8.006229972389979e-06, + "loss": 0.2518, + "step": 6731 + }, + { + "epoch": 0.58, + "learning_rate": 8.003509548679471e-06, + "loss": 0.3026, + "step": 6732 + }, + { + "epoch": 0.58, + "learning_rate": 8.000789278848582e-06, + "loss": 0.298, + "step": 6733 + }, + { + "epoch": 0.58, + "learning_rate": 7.998069163106977e-06, + "loss": 0.2606, + "step": 6734 + }, + { + "epoch": 0.58, + "learning_rate": 7.995349201664311e-06, + "loss": 0.2728, + "step": 6735 + }, + { + "epoch": 0.58, + "learning_rate": 7.992629394730225e-06, + "loss": 0.3022, + "step": 6736 + }, + { + "epoch": 0.58, + "learning_rate": 7.98990974251434e-06, + "loss": 0.2748, + "step": 6737 + }, + { + "epoch": 0.58, + "learning_rate": 7.987190245226285e-06, + "loss": 0.2649, + "step": 6738 + }, + { + "epoch": 0.58, + "learning_rate": 7.984470903075658e-06, + "loss": 0.2667, + "step": 6739 + }, + { + "epoch": 0.58, + "learning_rate": 7.981751716272054e-06, + "loss": 0.2701, + "step": 6740 + }, + { + "epoch": 0.58, + "learning_rate": 7.979032685025057e-06, + "loss": 0.288, + "step": 6741 + }, + { + "epoch": 0.58, + "learning_rate": 7.976313809544237e-06, + "loss": 0.3501, + "step": 6742 + }, + { + "epoch": 0.58, + "learning_rate": 7.973595090039144e-06, + "loss": 0.2673, + "step": 6743 + }, + { + "epoch": 0.58, + "learning_rate": 7.970876526719333e-06, + "loss": 0.274, + "step": 6744 + }, + { + "epoch": 0.58, + "learning_rate": 7.968158119794334e-06, + "loss": 0.2609, + "step": 6745 + }, + { + "epoch": 0.58, + "learning_rate": 7.965439869473664e-06, + "loss": 0.3143, + "step": 6746 + }, + { + "epoch": 0.58, + "learning_rate": 7.962721775966836e-06, + "loss": 0.2941, + "step": 6747 + }, + { + "epoch": 0.58, + "learning_rate": 7.960003839483348e-06, + "loss": 0.2725, + "step": 6748 + }, + { + "epoch": 0.58, + "learning_rate": 7.957286060232687e-06, + "loss": 0.6223, + "step": 6749 + }, + { + "epoch": 0.58, + "learning_rate": 7.954568438424315e-06, + "loss": 0.2475, + "step": 6750 + }, + { + "epoch": 0.58, + "learning_rate": 7.95185097426771e-06, + "loss": 0.2858, + "step": 6751 + }, + { + "epoch": 0.58, + "learning_rate": 7.949133667972307e-06, + "loss": 0.5742, + "step": 6752 + }, + { + "epoch": 0.58, + "learning_rate": 7.946416519747549e-06, + "loss": 0.6016, + "step": 6753 + }, + { + "epoch": 0.58, + "learning_rate": 7.943699529802854e-06, + "loss": 0.2668, + "step": 6754 + }, + { + "epoch": 0.58, + "learning_rate": 7.940982698347646e-06, + "loss": 0.2445, + "step": 6755 + }, + { + "epoch": 0.58, + "learning_rate": 7.938266025591315e-06, + "loss": 0.2624, + "step": 6756 + }, + { + "epoch": 0.58, + "learning_rate": 7.935549511743249e-06, + "loss": 0.2715, + "step": 6757 + }, + { + "epoch": 0.58, + "learning_rate": 7.932833157012829e-06, + "loss": 0.3148, + "step": 6758 + }, + { + "epoch": 0.58, + "learning_rate": 7.930116961609413e-06, + "loss": 0.2615, + "step": 6759 + }, + { + "epoch": 0.58, + "learning_rate": 7.927400925742357e-06, + "loss": 0.2903, + "step": 6760 + }, + { + "epoch": 0.58, + "learning_rate": 7.92468504962099e-06, + "loss": 0.2978, + "step": 6761 + }, + { + "epoch": 0.58, + "learning_rate": 7.921969333454652e-06, + "loss": 0.2657, + "step": 6762 + }, + { + "epoch": 0.58, + "learning_rate": 7.919253777452649e-06, + "loss": 0.2697, + "step": 6763 + }, + { + "epoch": 0.58, + "learning_rate": 7.916538381824279e-06, + "loss": 0.2648, + "step": 6764 + }, + { + "epoch": 0.58, + "learning_rate": 7.913823146778838e-06, + "loss": 0.2726, + "step": 6765 + }, + { + "epoch": 0.58, + "learning_rate": 7.911108072525603e-06, + "loss": 0.2791, + "step": 6766 + }, + { + "epoch": 0.58, + "learning_rate": 7.908393159273835e-06, + "loss": 0.3007, + "step": 6767 + }, + { + "epoch": 0.58, + "learning_rate": 7.905678407232785e-06, + "loss": 0.3189, + "step": 6768 + }, + { + "epoch": 0.58, + "learning_rate": 7.902963816611699e-06, + "loss": 0.2844, + "step": 6769 + }, + { + "epoch": 0.58, + "learning_rate": 7.900249387619797e-06, + "loss": 0.2731, + "step": 6770 + }, + { + "epoch": 0.58, + "learning_rate": 7.897535120466297e-06, + "loss": 0.312, + "step": 6771 + }, + { + "epoch": 0.58, + "learning_rate": 7.894821015360404e-06, + "loss": 0.275, + "step": 6772 + }, + { + "epoch": 0.58, + "learning_rate": 7.892107072511303e-06, + "loss": 0.2603, + "step": 6773 + }, + { + "epoch": 0.58, + "learning_rate": 7.889393292128177e-06, + "loss": 0.2852, + "step": 6774 + }, + { + "epoch": 0.58, + "learning_rate": 7.886679674420182e-06, + "loss": 0.2716, + "step": 6775 + }, + { + "epoch": 0.58, + "learning_rate": 7.88396621959648e-06, + "loss": 0.2916, + "step": 6776 + }, + { + "epoch": 0.58, + "learning_rate": 7.881252927866203e-06, + "loss": 0.2948, + "step": 6777 + }, + { + "epoch": 0.58, + "learning_rate": 7.878539799438479e-06, + "loss": 0.2668, + "step": 6778 + }, + { + "epoch": 0.58, + "learning_rate": 7.87582683452243e-06, + "loss": 0.2807, + "step": 6779 + }, + { + "epoch": 0.58, + "learning_rate": 7.873114033327153e-06, + "loss": 0.289, + "step": 6780 + }, + { + "epoch": 0.58, + "learning_rate": 7.870401396061731e-06, + "loss": 0.3131, + "step": 6781 + }, + { + "epoch": 0.58, + "learning_rate": 7.867688922935253e-06, + "loss": 0.2866, + "step": 6782 + }, + { + "epoch": 0.58, + "learning_rate": 7.864976614156776e-06, + "loss": 0.3062, + "step": 6783 + }, + { + "epoch": 0.58, + "learning_rate": 7.86226446993535e-06, + "loss": 0.27, + "step": 6784 + }, + { + "epoch": 0.58, + "learning_rate": 7.859552490480014e-06, + "loss": 0.2682, + "step": 6785 + }, + { + "epoch": 0.58, + "learning_rate": 7.856840675999799e-06, + "loss": 0.3036, + "step": 6786 + }, + { + "epoch": 0.58, + "learning_rate": 7.854129026703716e-06, + "loss": 0.3032, + "step": 6787 + }, + { + "epoch": 0.58, + "learning_rate": 7.85141754280076e-06, + "loss": 0.2999, + "step": 6788 + }, + { + "epoch": 0.58, + "learning_rate": 7.848706224499928e-06, + "loss": 0.2505, + "step": 6789 + }, + { + "epoch": 0.58, + "learning_rate": 7.845995072010188e-06, + "loss": 0.2905, + "step": 6790 + }, + { + "epoch": 0.58, + "learning_rate": 7.843284085540505e-06, + "loss": 0.308, + "step": 6791 + }, + { + "epoch": 0.58, + "learning_rate": 7.840573265299827e-06, + "loss": 0.283, + "step": 6792 + }, + { + "epoch": 0.58, + "learning_rate": 7.837862611497094e-06, + "loss": 0.2396, + "step": 6793 + }, + { + "epoch": 0.58, + "learning_rate": 7.835152124341228e-06, + "loss": 0.2682, + "step": 6794 + }, + { + "epoch": 0.58, + "learning_rate": 7.832441804041135e-06, + "loss": 0.3159, + "step": 6795 + }, + { + "epoch": 0.58, + "learning_rate": 7.829731650805722e-06, + "loss": 0.2722, + "step": 6796 + }, + { + "epoch": 0.58, + "learning_rate": 7.827021664843867e-06, + "loss": 0.2944, + "step": 6797 + }, + { + "epoch": 0.58, + "learning_rate": 7.824311846364448e-06, + "loss": 0.2573, + "step": 6798 + }, + { + "epoch": 0.58, + "learning_rate": 7.821602195576316e-06, + "loss": 0.2988, + "step": 6799 + }, + { + "epoch": 0.58, + "learning_rate": 7.818892712688328e-06, + "loss": 0.2433, + "step": 6800 + }, + { + "epoch": 0.58, + "learning_rate": 7.816183397909312e-06, + "loss": 0.3422, + "step": 6801 + }, + { + "epoch": 0.58, + "learning_rate": 7.813474251448086e-06, + "loss": 0.3299, + "step": 6802 + }, + { + "epoch": 0.58, + "learning_rate": 7.810765273513463e-06, + "loss": 0.2822, + "step": 6803 + }, + { + "epoch": 0.58, + "learning_rate": 7.808056464314236e-06, + "loss": 0.2115, + "step": 6804 + }, + { + "epoch": 0.58, + "learning_rate": 7.805347824059188e-06, + "loss": 0.3323, + "step": 6805 + }, + { + "epoch": 0.58, + "learning_rate": 7.80263935295708e-06, + "loss": 0.2448, + "step": 6806 + }, + { + "epoch": 0.58, + "learning_rate": 7.799931051216677e-06, + "loss": 0.3411, + "step": 6807 + }, + { + "epoch": 0.58, + "learning_rate": 7.797222919046717e-06, + "loss": 0.3226, + "step": 6808 + }, + { + "epoch": 0.58, + "learning_rate": 7.794514956655929e-06, + "loss": 0.265, + "step": 6809 + }, + { + "epoch": 0.58, + "learning_rate": 7.79180716425303e-06, + "loss": 0.2964, + "step": 6810 + }, + { + "epoch": 0.58, + "learning_rate": 7.789099542046727e-06, + "loss": 0.2639, + "step": 6811 + }, + { + "epoch": 0.58, + "learning_rate": 7.786392090245708e-06, + "loss": 0.2372, + "step": 6812 + }, + { + "epoch": 0.58, + "learning_rate": 7.783684809058642e-06, + "loss": 0.2568, + "step": 6813 + }, + { + "epoch": 0.58, + "learning_rate": 7.780977698694206e-06, + "loss": 0.2457, + "step": 6814 + }, + { + "epoch": 0.58, + "learning_rate": 7.778270759361044e-06, + "loss": 0.2863, + "step": 6815 + }, + { + "epoch": 0.58, + "learning_rate": 7.775563991267789e-06, + "loss": 0.3049, + "step": 6816 + }, + { + "epoch": 0.58, + "learning_rate": 7.772857394623074e-06, + "loss": 0.2921, + "step": 6817 + }, + { + "epoch": 0.58, + "learning_rate": 7.770150969635509e-06, + "loss": 0.2805, + "step": 6818 + }, + { + "epoch": 0.58, + "learning_rate": 7.767444716513686e-06, + "loss": 0.3014, + "step": 6819 + }, + { + "epoch": 0.58, + "learning_rate": 7.764738635466192e-06, + "loss": 0.2441, + "step": 6820 + }, + { + "epoch": 0.58, + "learning_rate": 7.762032726701602e-06, + "loss": 0.2899, + "step": 6821 + }, + { + "epoch": 0.58, + "learning_rate": 7.759326990428468e-06, + "loss": 0.2731, + "step": 6822 + }, + { + "epoch": 0.58, + "learning_rate": 7.756621426855337e-06, + "loss": 0.3071, + "step": 6823 + }, + { + "epoch": 0.58, + "learning_rate": 7.753916036190747e-06, + "loss": 0.2357, + "step": 6824 + }, + { + "epoch": 0.59, + "learning_rate": 7.751210818643209e-06, + "loss": 0.2946, + "step": 6825 + }, + { + "epoch": 0.59, + "learning_rate": 7.748505774421227e-06, + "loss": 0.2566, + "step": 6826 + }, + { + "epoch": 0.59, + "learning_rate": 7.745800903733298e-06, + "loss": 0.2599, + "step": 6827 + }, + { + "epoch": 0.59, + "learning_rate": 7.743096206787894e-06, + "loss": 0.2734, + "step": 6828 + }, + { + "epoch": 0.59, + "learning_rate": 7.740391683793486e-06, + "loss": 0.2442, + "step": 6829 + }, + { + "epoch": 0.59, + "learning_rate": 7.737687334958518e-06, + "loss": 0.2748, + "step": 6830 + }, + { + "epoch": 0.59, + "learning_rate": 7.734983160491435e-06, + "loss": 0.242, + "step": 6831 + }, + { + "epoch": 0.59, + "learning_rate": 7.73227916060066e-06, + "loss": 0.2683, + "step": 6832 + }, + { + "epoch": 0.59, + "learning_rate": 7.729575335494595e-06, + "loss": 0.2817, + "step": 6833 + }, + { + "epoch": 0.59, + "learning_rate": 7.726871685381652e-06, + "loss": 0.29, + "step": 6834 + }, + { + "epoch": 0.59, + "learning_rate": 7.724168210470203e-06, + "loss": 0.2422, + "step": 6835 + }, + { + "epoch": 0.59, + "learning_rate": 7.721464910968628e-06, + "loss": 0.2679, + "step": 6836 + }, + { + "epoch": 0.59, + "learning_rate": 7.718761787085271e-06, + "loss": 0.2892, + "step": 6837 + }, + { + "epoch": 0.59, + "learning_rate": 7.71605883902849e-06, + "loss": 0.2853, + "step": 6838 + }, + { + "epoch": 0.59, + "learning_rate": 7.713356067006609e-06, + "loss": 0.2867, + "step": 6839 + }, + { + "epoch": 0.59, + "learning_rate": 7.710653471227939e-06, + "loss": 0.3144, + "step": 6840 + }, + { + "epoch": 0.59, + "learning_rate": 7.70795105190079e-06, + "loss": 0.2573, + "step": 6841 + }, + { + "epoch": 0.59, + "learning_rate": 7.70524880923345e-06, + "loss": 0.2829, + "step": 6842 + }, + { + "epoch": 0.59, + "learning_rate": 7.702546743434193e-06, + "loss": 0.3174, + "step": 6843 + }, + { + "epoch": 0.59, + "learning_rate": 7.699844854711276e-06, + "loss": 0.2766, + "step": 6844 + }, + { + "epoch": 0.59, + "learning_rate": 7.697143143272959e-06, + "loss": 0.261, + "step": 6845 + }, + { + "epoch": 0.59, + "learning_rate": 7.694441609327465e-06, + "loss": 0.2397, + "step": 6846 + }, + { + "epoch": 0.59, + "learning_rate": 7.691740253083022e-06, + "loss": 0.264, + "step": 6847 + }, + { + "epoch": 0.59, + "learning_rate": 7.689039074747832e-06, + "loss": 0.2981, + "step": 6848 + }, + { + "epoch": 0.59, + "learning_rate": 7.686338074530095e-06, + "loss": 0.2701, + "step": 6849 + }, + { + "epoch": 0.59, + "learning_rate": 7.683637252637988e-06, + "loss": 0.3522, + "step": 6850 + }, + { + "epoch": 0.59, + "learning_rate": 7.68093660927967e-06, + "loss": 0.2596, + "step": 6851 + }, + { + "epoch": 0.59, + "learning_rate": 7.678236144663304e-06, + "loss": 0.2336, + "step": 6852 + }, + { + "epoch": 0.59, + "learning_rate": 7.675535858997024e-06, + "loss": 0.2643, + "step": 6853 + }, + { + "epoch": 0.59, + "learning_rate": 7.67283575248895e-06, + "loss": 0.2522, + "step": 6854 + }, + { + "epoch": 0.59, + "learning_rate": 7.670135825347202e-06, + "loss": 0.2463, + "step": 6855 + }, + { + "epoch": 0.59, + "learning_rate": 7.667436077779872e-06, + "loss": 0.2444, + "step": 6856 + }, + { + "epoch": 0.59, + "learning_rate": 7.664736509995042e-06, + "loss": 0.2952, + "step": 6857 + }, + { + "epoch": 0.59, + "learning_rate": 7.662037122200783e-06, + "loss": 0.2639, + "step": 6858 + }, + { + "epoch": 0.59, + "learning_rate": 7.659337914605152e-06, + "loss": 0.282, + "step": 6859 + }, + { + "epoch": 0.59, + "learning_rate": 7.656638887416186e-06, + "loss": 0.2481, + "step": 6860 + }, + { + "epoch": 0.59, + "learning_rate": 7.653940040841917e-06, + "loss": 0.2653, + "step": 6861 + }, + { + "epoch": 0.59, + "learning_rate": 7.651241375090358e-06, + "loss": 0.2657, + "step": 6862 + }, + { + "epoch": 0.59, + "learning_rate": 7.64854289036951e-06, + "loss": 0.2699, + "step": 6863 + }, + { + "epoch": 0.59, + "learning_rate": 7.645844586887353e-06, + "loss": 0.2776, + "step": 6864 + }, + { + "epoch": 0.59, + "learning_rate": 7.643146464851867e-06, + "loss": 0.2554, + "step": 6865 + }, + { + "epoch": 0.59, + "learning_rate": 7.640448524471002e-06, + "loss": 0.3297, + "step": 6866 + }, + { + "epoch": 0.59, + "learning_rate": 7.63775076595271e-06, + "loss": 0.2556, + "step": 6867 + }, + { + "epoch": 0.59, + "learning_rate": 7.635053189504913e-06, + "loss": 0.2626, + "step": 6868 + }, + { + "epoch": 0.59, + "learning_rate": 7.632355795335533e-06, + "loss": 0.2658, + "step": 6869 + }, + { + "epoch": 0.59, + "learning_rate": 7.629658583652471e-06, + "loss": 0.2875, + "step": 6870 + }, + { + "epoch": 0.59, + "learning_rate": 7.626961554663609e-06, + "loss": 0.241, + "step": 6871 + }, + { + "epoch": 0.59, + "learning_rate": 7.624264708576827e-06, + "loss": 0.6245, + "step": 6872 + }, + { + "epoch": 0.59, + "learning_rate": 7.621568045599983e-06, + "loss": 0.2819, + "step": 6873 + }, + { + "epoch": 0.59, + "learning_rate": 7.6188715659409216e-06, + "loss": 0.2404, + "step": 6874 + }, + { + "epoch": 0.59, + "learning_rate": 7.616175269807472e-06, + "loss": 0.3386, + "step": 6875 + }, + { + "epoch": 0.59, + "learning_rate": 7.613479157407457e-06, + "loss": 0.2772, + "step": 6876 + }, + { + "epoch": 0.59, + "learning_rate": 7.6107832289486775e-06, + "loss": 0.2893, + "step": 6877 + }, + { + "epoch": 0.59, + "learning_rate": 7.608087484638915e-06, + "loss": 0.2455, + "step": 6878 + }, + { + "epoch": 0.59, + "learning_rate": 7.605391924685954e-06, + "loss": 0.2654, + "step": 6879 + }, + { + "epoch": 0.59, + "learning_rate": 7.6026965492975535e-06, + "loss": 0.2864, + "step": 6880 + }, + { + "epoch": 0.59, + "learning_rate": 7.600001358681457e-06, + "loss": 0.2517, + "step": 6881 + }, + { + "epoch": 0.59, + "learning_rate": 7.597306353045393e-06, + "loss": 0.2388, + "step": 6882 + }, + { + "epoch": 0.59, + "learning_rate": 7.594611532597087e-06, + "loss": 0.3116, + "step": 6883 + }, + { + "epoch": 0.59, + "learning_rate": 7.591916897544238e-06, + "loss": 0.2532, + "step": 6884 + }, + { + "epoch": 0.59, + "learning_rate": 7.589222448094535e-06, + "loss": 0.2809, + "step": 6885 + }, + { + "epoch": 0.59, + "learning_rate": 7.586528184455653e-06, + "loss": 0.2706, + "step": 6886 + }, + { + "epoch": 0.59, + "learning_rate": 7.583834106835256e-06, + "loss": 0.6118, + "step": 6887 + }, + { + "epoch": 0.59, + "learning_rate": 7.581140215440987e-06, + "loss": 0.248, + "step": 6888 + }, + { + "epoch": 0.59, + "learning_rate": 7.578446510480475e-06, + "loss": 0.2797, + "step": 6889 + }, + { + "epoch": 0.59, + "learning_rate": 7.575752992161345e-06, + "loss": 0.2577, + "step": 6890 + }, + { + "epoch": 0.59, + "learning_rate": 7.573059660691192e-06, + "loss": 0.2634, + "step": 6891 + }, + { + "epoch": 0.59, + "learning_rate": 7.570366516277607e-06, + "loss": 0.288, + "step": 6892 + }, + { + "epoch": 0.59, + "learning_rate": 7.567673559128171e-06, + "loss": 0.2911, + "step": 6893 + }, + { + "epoch": 0.59, + "learning_rate": 7.564980789450438e-06, + "loss": 0.2475, + "step": 6894 + }, + { + "epoch": 0.59, + "learning_rate": 7.5622882074519544e-06, + "loss": 0.3536, + "step": 6895 + }, + { + "epoch": 0.59, + "learning_rate": 7.559595813340246e-06, + "loss": 0.2318, + "step": 6896 + }, + { + "epoch": 0.59, + "learning_rate": 7.556903607322839e-06, + "loss": 0.285, + "step": 6897 + }, + { + "epoch": 0.59, + "learning_rate": 7.554211589607227e-06, + "loss": 0.2857, + "step": 6898 + }, + { + "epoch": 0.59, + "learning_rate": 7.551519760400898e-06, + "loss": 0.2746, + "step": 6899 + }, + { + "epoch": 0.59, + "learning_rate": 7.548828119911333e-06, + "loss": 0.2625, + "step": 6900 + }, + { + "epoch": 0.59, + "learning_rate": 7.546136668345985e-06, + "loss": 0.2668, + "step": 6901 + }, + { + "epoch": 0.59, + "learning_rate": 7.543445405912298e-06, + "loss": 0.2488, + "step": 6902 + }, + { + "epoch": 0.59, + "learning_rate": 7.540754332817695e-06, + "loss": 0.2884, + "step": 6903 + }, + { + "epoch": 0.59, + "learning_rate": 7.538063449269599e-06, + "loss": 0.3168, + "step": 6904 + }, + { + "epoch": 0.59, + "learning_rate": 7.535372755475411e-06, + "loss": 0.2642, + "step": 6905 + }, + { + "epoch": 0.59, + "learning_rate": 7.532682251642508e-06, + "loss": 0.2701, + "step": 6906 + }, + { + "epoch": 0.59, + "learning_rate": 7.5299919379782695e-06, + "loss": 0.3134, + "step": 6907 + }, + { + "epoch": 0.59, + "learning_rate": 7.527301814690048e-06, + "loss": 0.2495, + "step": 6908 + }, + { + "epoch": 0.59, + "learning_rate": 7.524611881985181e-06, + "loss": 0.3099, + "step": 6909 + }, + { + "epoch": 0.59, + "learning_rate": 7.521922140071003e-06, + "loss": 0.2658, + "step": 6910 + }, + { + "epoch": 0.59, + "learning_rate": 7.519232589154819e-06, + "loss": 0.2795, + "step": 6911 + }, + { + "epoch": 0.59, + "learning_rate": 7.516543229443931e-06, + "loss": 0.2455, + "step": 6912 + }, + { + "epoch": 0.59, + "learning_rate": 7.513854061145617e-06, + "loss": 0.265, + "step": 6913 + }, + { + "epoch": 0.59, + "learning_rate": 7.5111650844671515e-06, + "loss": 0.3412, + "step": 6914 + }, + { + "epoch": 0.59, + "learning_rate": 7.5084762996157835e-06, + "loss": 0.2528, + "step": 6915 + }, + { + "epoch": 0.59, + "learning_rate": 7.5057877067987464e-06, + "loss": 0.2936, + "step": 6916 + }, + { + "epoch": 0.59, + "learning_rate": 7.503099306223271e-06, + "loss": 0.3046, + "step": 6917 + }, + { + "epoch": 0.59, + "learning_rate": 7.5004110980965664e-06, + "loss": 0.2598, + "step": 6918 + }, + { + "epoch": 0.59, + "learning_rate": 7.4977230826258226e-06, + "loss": 0.3021, + "step": 6919 + }, + { + "epoch": 0.59, + "learning_rate": 7.495035260018215e-06, + "loss": 0.3339, + "step": 6920 + }, + { + "epoch": 0.59, + "learning_rate": 7.492347630480917e-06, + "loss": 0.3074, + "step": 6921 + }, + { + "epoch": 0.59, + "learning_rate": 7.489660194221071e-06, + "loss": 0.2566, + "step": 6922 + }, + { + "epoch": 0.59, + "learning_rate": 7.486972951445812e-06, + "loss": 0.2756, + "step": 6923 + }, + { + "epoch": 0.59, + "learning_rate": 7.484285902362263e-06, + "loss": 0.2369, + "step": 6924 + }, + { + "epoch": 0.59, + "learning_rate": 7.481599047177527e-06, + "loss": 0.2578, + "step": 6925 + }, + { + "epoch": 0.59, + "learning_rate": 7.478912386098692e-06, + "loss": 0.3085, + "step": 6926 + }, + { + "epoch": 0.59, + "learning_rate": 7.47622591933283e-06, + "loss": 0.2516, + "step": 6927 + }, + { + "epoch": 0.59, + "learning_rate": 7.473539647087007e-06, + "loss": 0.2854, + "step": 6928 + }, + { + "epoch": 0.59, + "learning_rate": 7.470853569568264e-06, + "loss": 0.2673, + "step": 6929 + }, + { + "epoch": 0.59, + "learning_rate": 7.468167686983627e-06, + "loss": 0.2991, + "step": 6930 + }, + { + "epoch": 0.59, + "learning_rate": 7.46548199954012e-06, + "loss": 0.2335, + "step": 6931 + }, + { + "epoch": 0.59, + "learning_rate": 7.462796507444736e-06, + "loss": 0.2685, + "step": 6932 + }, + { + "epoch": 0.59, + "learning_rate": 7.4601112109044615e-06, + "loss": 0.3016, + "step": 6933 + }, + { + "epoch": 0.59, + "learning_rate": 7.4574261101262604e-06, + "loss": 0.3145, + "step": 6934 + }, + { + "epoch": 0.59, + "learning_rate": 7.4547412053170955e-06, + "loss": 0.2549, + "step": 6935 + }, + { + "epoch": 0.59, + "learning_rate": 7.4520564966839e-06, + "loss": 0.2838, + "step": 6936 + }, + { + "epoch": 0.59, + "learning_rate": 7.449371984433598e-06, + "loss": 0.2552, + "step": 6937 + }, + { + "epoch": 0.59, + "learning_rate": 7.446687668773105e-06, + "loss": 0.2852, + "step": 6938 + }, + { + "epoch": 0.59, + "learning_rate": 7.44400354990931e-06, + "loss": 0.2657, + "step": 6939 + }, + { + "epoch": 0.59, + "learning_rate": 7.4413196280490905e-06, + "loss": 0.2704, + "step": 6940 + }, + { + "epoch": 0.59, + "learning_rate": 7.43863590339931e-06, + "loss": 0.2847, + "step": 6941 + }, + { + "epoch": 0.6, + "learning_rate": 7.435952376166818e-06, + "loss": 0.3054, + "step": 6942 + }, + { + "epoch": 0.6, + "learning_rate": 7.433269046558449e-06, + "loss": 0.3448, + "step": 6943 + }, + { + "epoch": 0.6, + "learning_rate": 7.430585914781017e-06, + "loss": 0.2815, + "step": 6944 + }, + { + "epoch": 0.6, + "learning_rate": 7.427902981041329e-06, + "loss": 0.2787, + "step": 6945 + }, + { + "epoch": 0.6, + "learning_rate": 7.425220245546172e-06, + "loss": 0.2469, + "step": 6946 + }, + { + "epoch": 0.6, + "learning_rate": 7.4225377085023105e-06, + "loss": 0.2617, + "step": 6947 + }, + { + "epoch": 0.6, + "learning_rate": 7.419855370116511e-06, + "loss": 0.3333, + "step": 6948 + }, + { + "epoch": 0.6, + "learning_rate": 7.4171732305955095e-06, + "loss": 0.3027, + "step": 6949 + }, + { + "epoch": 0.6, + "learning_rate": 7.4144912901460355e-06, + "loss": 0.2921, + "step": 6950 + }, + { + "epoch": 0.6, + "learning_rate": 7.411809548974792e-06, + "loss": 0.289, + "step": 6951 + }, + { + "epoch": 0.6, + "learning_rate": 7.4091280072884854e-06, + "loss": 0.2651, + "step": 6952 + }, + { + "epoch": 0.6, + "learning_rate": 7.406446665293789e-06, + "loss": 0.262, + "step": 6953 + }, + { + "epoch": 0.6, + "learning_rate": 7.403765523197365e-06, + "loss": 0.2734, + "step": 6954 + }, + { + "epoch": 0.6, + "learning_rate": 7.401084581205869e-06, + "loss": 0.3026, + "step": 6955 + }, + { + "epoch": 0.6, + "learning_rate": 7.3984038395259315e-06, + "loss": 0.3073, + "step": 6956 + }, + { + "epoch": 0.6, + "learning_rate": 7.395723298364174e-06, + "loss": 0.3115, + "step": 6957 + }, + { + "epoch": 0.6, + "learning_rate": 7.39304295792719e-06, + "loss": 0.2886, + "step": 6958 + }, + { + "epoch": 0.6, + "learning_rate": 7.390362818421579e-06, + "loss": 0.2398, + "step": 6959 + }, + { + "epoch": 0.6, + "learning_rate": 7.387682880053906e-06, + "loss": 0.2817, + "step": 6960 + }, + { + "epoch": 0.6, + "learning_rate": 7.385003143030727e-06, + "loss": 0.2376, + "step": 6961 + }, + { + "epoch": 0.6, + "learning_rate": 7.382323607558585e-06, + "loss": 0.2755, + "step": 6962 + }, + { + "epoch": 0.6, + "learning_rate": 7.379644273844008e-06, + "loss": 0.2515, + "step": 6963 + }, + { + "epoch": 0.6, + "learning_rate": 7.376965142093502e-06, + "loss": 0.2986, + "step": 6964 + }, + { + "epoch": 0.6, + "learning_rate": 7.374286212513558e-06, + "loss": 0.3193, + "step": 6965 + }, + { + "epoch": 0.6, + "learning_rate": 7.3716074853106635e-06, + "loss": 0.2745, + "step": 6966 + }, + { + "epoch": 0.6, + "learning_rate": 7.368928960691275e-06, + "loss": 0.2859, + "step": 6967 + }, + { + "epoch": 0.6, + "learning_rate": 7.366250638861838e-06, + "loss": 0.2816, + "step": 6968 + }, + { + "epoch": 0.6, + "learning_rate": 7.3635725200287936e-06, + "loss": 0.3256, + "step": 6969 + }, + { + "epoch": 0.6, + "learning_rate": 7.3608946043985515e-06, + "loss": 0.2828, + "step": 6970 + }, + { + "epoch": 0.6, + "learning_rate": 7.358216892177514e-06, + "loss": 0.2491, + "step": 6971 + }, + { + "epoch": 0.6, + "learning_rate": 7.355539383572059e-06, + "loss": 0.239, + "step": 6972 + }, + { + "epoch": 0.6, + "learning_rate": 7.3528620787885676e-06, + "loss": 0.2664, + "step": 6973 + }, + { + "epoch": 0.6, + "learning_rate": 7.350184978033386e-06, + "loss": 0.2633, + "step": 6974 + }, + { + "epoch": 0.6, + "learning_rate": 7.347508081512848e-06, + "loss": 0.3063, + "step": 6975 + }, + { + "epoch": 0.6, + "learning_rate": 7.344831389433287e-06, + "loss": 0.2521, + "step": 6976 + }, + { + "epoch": 0.6, + "learning_rate": 7.342154902001003e-06, + "loss": 0.2835, + "step": 6977 + }, + { + "epoch": 0.6, + "learning_rate": 7.339478619422287e-06, + "loss": 0.2777, + "step": 6978 + }, + { + "epoch": 0.6, + "learning_rate": 7.336802541903408e-06, + "loss": 0.3508, + "step": 6979 + }, + { + "epoch": 0.6, + "learning_rate": 7.3341266696506304e-06, + "loss": 0.2777, + "step": 6980 + }, + { + "epoch": 0.6, + "learning_rate": 7.3314510028702e-06, + "loss": 0.2792, + "step": 6981 + }, + { + "epoch": 0.6, + "learning_rate": 7.328775541768336e-06, + "loss": 0.2884, + "step": 6982 + }, + { + "epoch": 0.6, + "learning_rate": 7.32610028655126e-06, + "loss": 0.2721, + "step": 6983 + }, + { + "epoch": 0.6, + "learning_rate": 7.3234252374251614e-06, + "loss": 0.2717, + "step": 6984 + }, + { + "epoch": 0.6, + "learning_rate": 7.320750394596217e-06, + "loss": 0.2684, + "step": 6985 + }, + { + "epoch": 0.6, + "learning_rate": 7.318075758270593e-06, + "loss": 0.2686, + "step": 6986 + }, + { + "epoch": 0.6, + "learning_rate": 7.315401328654439e-06, + "loss": 0.2759, + "step": 6987 + }, + { + "epoch": 0.6, + "learning_rate": 7.312727105953888e-06, + "loss": 0.3135, + "step": 6988 + }, + { + "epoch": 0.6, + "learning_rate": 7.310053090375049e-06, + "loss": 0.3065, + "step": 6989 + }, + { + "epoch": 0.6, + "learning_rate": 7.30737928212403e-06, + "loss": 0.2645, + "step": 6990 + }, + { + "epoch": 0.6, + "learning_rate": 7.3047056814069115e-06, + "loss": 0.2267, + "step": 6991 + }, + { + "epoch": 0.6, + "learning_rate": 7.3020322884297565e-06, + "loss": 0.2621, + "step": 6992 + }, + { + "epoch": 0.6, + "learning_rate": 7.299359103398626e-06, + "loss": 0.2657, + "step": 6993 + }, + { + "epoch": 0.6, + "learning_rate": 7.296686126519552e-06, + "loss": 0.241, + "step": 6994 + }, + { + "epoch": 0.6, + "learning_rate": 7.294013357998554e-06, + "loss": 0.3229, + "step": 6995 + }, + { + "epoch": 0.6, + "learning_rate": 7.291340798041631e-06, + "loss": 0.2821, + "step": 6996 + }, + { + "epoch": 0.6, + "learning_rate": 7.288668446854781e-06, + "loss": 0.3004, + "step": 6997 + }, + { + "epoch": 0.6, + "learning_rate": 7.2859963046439665e-06, + "loss": 0.3038, + "step": 6998 + }, + { + "epoch": 0.6, + "learning_rate": 7.283324371615147e-06, + "loss": 0.2745, + "step": 6999 + }, + { + "epoch": 0.6, + "learning_rate": 7.280652647974263e-06, + "loss": 0.2761, + "step": 7000 + }, + { + "epoch": 0.6, + "learning_rate": 7.277981133927236e-06, + "loss": 0.2981, + "step": 7001 + }, + { + "epoch": 0.6, + "learning_rate": 7.275309829679973e-06, + "loss": 0.2807, + "step": 7002 + }, + { + "epoch": 0.6, + "learning_rate": 7.2726387354383625e-06, + "loss": 0.2733, + "step": 7003 + }, + { + "epoch": 0.6, + "learning_rate": 7.269967851408286e-06, + "loss": 0.2708, + "step": 7004 + }, + { + "epoch": 0.6, + "learning_rate": 7.267297177795596e-06, + "loss": 0.285, + "step": 7005 + }, + { + "epoch": 0.6, + "learning_rate": 7.264626714806135e-06, + "loss": 0.2902, + "step": 7006 + }, + { + "epoch": 0.6, + "learning_rate": 7.261956462645734e-06, + "loss": 0.2422, + "step": 7007 + }, + { + "epoch": 0.6, + "learning_rate": 7.259286421520201e-06, + "loss": 0.278, + "step": 7008 + }, + { + "epoch": 0.6, + "learning_rate": 7.256616591635328e-06, + "loss": 0.2925, + "step": 7009 + }, + { + "epoch": 0.6, + "learning_rate": 7.253946973196888e-06, + "loss": 0.3397, + "step": 7010 + }, + { + "epoch": 0.6, + "learning_rate": 7.251277566410651e-06, + "loss": 0.3224, + "step": 7011 + }, + { + "epoch": 0.6, + "learning_rate": 7.248608371482355e-06, + "loss": 0.2954, + "step": 7012 + }, + { + "epoch": 0.6, + "learning_rate": 7.24593938861773e-06, + "loss": 0.2712, + "step": 7013 + }, + { + "epoch": 0.6, + "learning_rate": 7.243270618022492e-06, + "loss": 0.2603, + "step": 7014 + }, + { + "epoch": 0.6, + "learning_rate": 7.240602059902333e-06, + "loss": 0.3207, + "step": 7015 + }, + { + "epoch": 0.6, + "learning_rate": 7.237933714462932e-06, + "loss": 0.3207, + "step": 7016 + }, + { + "epoch": 0.6, + "learning_rate": 7.23526558190995e-06, + "loss": 0.28, + "step": 7017 + }, + { + "epoch": 0.6, + "learning_rate": 7.232597662449038e-06, + "loss": 0.2385, + "step": 7018 + }, + { + "epoch": 0.6, + "learning_rate": 7.229929956285826e-06, + "loss": 0.2546, + "step": 7019 + }, + { + "epoch": 0.6, + "learning_rate": 7.22726246362592e-06, + "loss": 0.2901, + "step": 7020 + }, + { + "epoch": 0.6, + "learning_rate": 7.224595184674928e-06, + "loss": 0.2338, + "step": 7021 + }, + { + "epoch": 0.6, + "learning_rate": 7.221928119638426e-06, + "loss": 0.3267, + "step": 7022 + }, + { + "epoch": 0.6, + "learning_rate": 7.2192612687219755e-06, + "loss": 0.2772, + "step": 7023 + }, + { + "epoch": 0.6, + "learning_rate": 7.2165946321311254e-06, + "loss": 0.2852, + "step": 7024 + }, + { + "epoch": 0.6, + "learning_rate": 7.213928210071408e-06, + "loss": 0.2764, + "step": 7025 + }, + { + "epoch": 0.6, + "learning_rate": 7.211262002748341e-06, + "loss": 0.3029, + "step": 7026 + }, + { + "epoch": 0.6, + "learning_rate": 7.2085960103674146e-06, + "loss": 0.2753, + "step": 7027 + }, + { + "epoch": 0.6, + "learning_rate": 7.205930233134117e-06, + "loss": 0.2527, + "step": 7028 + }, + { + "epoch": 0.6, + "learning_rate": 7.203264671253915e-06, + "loss": 0.2659, + "step": 7029 + }, + { + "epoch": 0.6, + "learning_rate": 7.200599324932246e-06, + "loss": 0.312, + "step": 7030 + }, + { + "epoch": 0.6, + "learning_rate": 7.1979341943745515e-06, + "loss": 0.3139, + "step": 7031 + }, + { + "epoch": 0.6, + "learning_rate": 7.195269279786247e-06, + "loss": 0.3, + "step": 7032 + }, + { + "epoch": 0.6, + "learning_rate": 7.192604581372727e-06, + "loss": 0.2886, + "step": 7033 + }, + { + "epoch": 0.6, + "learning_rate": 7.18994009933937e-06, + "loss": 0.2709, + "step": 7034 + }, + { + "epoch": 0.6, + "learning_rate": 7.187275833891549e-06, + "loss": 0.2518, + "step": 7035 + }, + { + "epoch": 0.6, + "learning_rate": 7.1846117852346075e-06, + "loss": 0.2731, + "step": 7036 + }, + { + "epoch": 0.6, + "learning_rate": 7.181947953573878e-06, + "loss": 0.2617, + "step": 7037 + }, + { + "epoch": 0.6, + "learning_rate": 7.179284339114676e-06, + "loss": 0.2572, + "step": 7038 + }, + { + "epoch": 0.6, + "learning_rate": 7.1766209420623e-06, + "loss": 0.2774, + "step": 7039 + }, + { + "epoch": 0.6, + "learning_rate": 7.173957762622032e-06, + "loss": 0.2656, + "step": 7040 + }, + { + "epoch": 0.6, + "learning_rate": 7.171294800999134e-06, + "loss": 0.2863, + "step": 7041 + }, + { + "epoch": 0.6, + "learning_rate": 7.168632057398857e-06, + "loss": 0.2738, + "step": 7042 + }, + { + "epoch": 0.6, + "learning_rate": 7.16596953202643e-06, + "loss": 0.3048, + "step": 7043 + }, + { + "epoch": 0.6, + "learning_rate": 7.1633072250870665e-06, + "loss": 0.2772, + "step": 7044 + }, + { + "epoch": 0.6, + "learning_rate": 7.160645136785968e-06, + "loss": 0.2604, + "step": 7045 + }, + { + "epoch": 0.6, + "learning_rate": 7.157983267328314e-06, + "loss": 0.2914, + "step": 7046 + }, + { + "epoch": 0.6, + "learning_rate": 7.155321616919267e-06, + "loss": 0.2443, + "step": 7047 + }, + { + "epoch": 0.6, + "learning_rate": 7.152660185763969e-06, + "loss": 0.283, + "step": 7048 + }, + { + "epoch": 0.6, + "learning_rate": 7.149998974067558e-06, + "loss": 0.2876, + "step": 7049 + }, + { + "epoch": 0.6, + "learning_rate": 7.147337982035143e-06, + "loss": 0.2795, + "step": 7050 + }, + { + "epoch": 0.6, + "learning_rate": 7.144677209871819e-06, + "loss": 0.2578, + "step": 7051 + }, + { + "epoch": 0.6, + "learning_rate": 7.142016657782671e-06, + "loss": 0.285, + "step": 7052 + }, + { + "epoch": 0.6, + "learning_rate": 7.139356325972757e-06, + "loss": 0.2953, + "step": 7053 + }, + { + "epoch": 0.6, + "learning_rate": 7.136696214647123e-06, + "loss": 0.2733, + "step": 7054 + }, + { + "epoch": 0.6, + "learning_rate": 7.134036324010791e-06, + "loss": 0.2737, + "step": 7055 + }, + { + "epoch": 0.6, + "learning_rate": 7.1313766542687824e-06, + "loss": 0.2746, + "step": 7056 + }, + { + "epoch": 0.6, + "learning_rate": 7.1287172056260875e-06, + "loss": 0.2905, + "step": 7057 + }, + { + "epoch": 0.61, + "learning_rate": 7.12605797828768e-06, + "loss": 0.2798, + "step": 7058 + }, + { + "epoch": 0.61, + "learning_rate": 7.123398972458526e-06, + "loss": 0.3279, + "step": 7059 + }, + { + "epoch": 0.61, + "learning_rate": 7.120740188343567e-06, + "loss": 0.2869, + "step": 7060 + }, + { + "epoch": 0.61, + "learning_rate": 7.118081626147724e-06, + "loss": 0.2475, + "step": 7061 + }, + { + "epoch": 0.61, + "learning_rate": 7.11542328607591e-06, + "loss": 0.2903, + "step": 7062 + }, + { + "epoch": 0.61, + "learning_rate": 7.112765168333016e-06, + "loss": 0.2856, + "step": 7063 + }, + { + "epoch": 0.61, + "learning_rate": 7.110107273123919e-06, + "loss": 0.2883, + "step": 7064 + }, + { + "epoch": 0.61, + "learning_rate": 7.107449600653468e-06, + "loss": 0.2548, + "step": 7065 + }, + { + "epoch": 0.61, + "learning_rate": 7.104792151126515e-06, + "loss": 0.2606, + "step": 7066 + }, + { + "epoch": 0.61, + "learning_rate": 7.102134924747877e-06, + "loss": 0.2535, + "step": 7067 + }, + { + "epoch": 0.61, + "learning_rate": 7.099477921722358e-06, + "loss": 0.6453, + "step": 7068 + }, + { + "epoch": 0.61, + "learning_rate": 7.096821142254747e-06, + "loss": 0.2961, + "step": 7069 + }, + { + "epoch": 0.61, + "learning_rate": 7.094164586549821e-06, + "loss": 0.2875, + "step": 7070 + }, + { + "epoch": 0.61, + "learning_rate": 7.091508254812331e-06, + "loss": 0.2695, + "step": 7071 + }, + { + "epoch": 0.61, + "learning_rate": 7.088852147247009e-06, + "loss": 0.2967, + "step": 7072 + }, + { + "epoch": 0.61, + "learning_rate": 7.086196264058584e-06, + "loss": 0.2778, + "step": 7073 + }, + { + "epoch": 0.61, + "learning_rate": 7.0835406054517505e-06, + "loss": 0.2772, + "step": 7074 + }, + { + "epoch": 0.61, + "learning_rate": 7.080885171631198e-06, + "loss": 0.264, + "step": 7075 + }, + { + "epoch": 0.61, + "learning_rate": 7.078229962801592e-06, + "loss": 0.2643, + "step": 7076 + }, + { + "epoch": 0.61, + "learning_rate": 7.075574979167585e-06, + "loss": 0.2778, + "step": 7077 + }, + { + "epoch": 0.61, + "learning_rate": 7.072920220933808e-06, + "loss": 0.298, + "step": 7078 + }, + { + "epoch": 0.61, + "learning_rate": 7.070265688304873e-06, + "loss": 0.6022, + "step": 7079 + }, + { + "epoch": 0.61, + "learning_rate": 7.067611381485388e-06, + "loss": 0.3085, + "step": 7080 + }, + { + "epoch": 0.61, + "learning_rate": 7.064957300679927e-06, + "loss": 0.2512, + "step": 7081 + }, + { + "epoch": 0.61, + "learning_rate": 7.062303446093051e-06, + "loss": 0.2683, + "step": 7082 + }, + { + "epoch": 0.61, + "learning_rate": 7.059649817929315e-06, + "loss": 0.2716, + "step": 7083 + }, + { + "epoch": 0.61, + "learning_rate": 7.056996416393241e-06, + "loss": 0.3134, + "step": 7084 + }, + { + "epoch": 0.61, + "learning_rate": 7.054343241689343e-06, + "loss": 0.2478, + "step": 7085 + }, + { + "epoch": 0.61, + "learning_rate": 7.051690294022108e-06, + "loss": 0.2875, + "step": 7086 + }, + { + "epoch": 0.61, + "learning_rate": 7.049037573596021e-06, + "loss": 0.2996, + "step": 7087 + }, + { + "epoch": 0.61, + "learning_rate": 7.0463850806155355e-06, + "loss": 0.2771, + "step": 7088 + }, + { + "epoch": 0.61, + "learning_rate": 7.043732815285091e-06, + "loss": 0.2462, + "step": 7089 + }, + { + "epoch": 0.61, + "learning_rate": 7.041080777809118e-06, + "loss": 0.3026, + "step": 7090 + }, + { + "epoch": 0.61, + "learning_rate": 7.038428968392018e-06, + "loss": 0.247, + "step": 7091 + }, + { + "epoch": 0.61, + "learning_rate": 7.0357773872381804e-06, + "loss": 0.2667, + "step": 7092 + }, + { + "epoch": 0.61, + "learning_rate": 7.0331260345519705e-06, + "loss": 0.2906, + "step": 7093 + }, + { + "epoch": 0.61, + "learning_rate": 7.030474910537748e-06, + "loss": 0.3019, + "step": 7094 + }, + { + "epoch": 0.61, + "learning_rate": 7.027824015399849e-06, + "loss": 0.3752, + "step": 7095 + }, + { + "epoch": 0.61, + "learning_rate": 7.025173349342584e-06, + "loss": 0.2483, + "step": 7096 + }, + { + "epoch": 0.61, + "learning_rate": 7.022522912570262e-06, + "loss": 0.2938, + "step": 7097 + }, + { + "epoch": 0.61, + "learning_rate": 7.019872705287163e-06, + "loss": 0.5854, + "step": 7098 + }, + { + "epoch": 0.61, + "learning_rate": 7.017222727697548e-06, + "loss": 0.5742, + "step": 7099 + }, + { + "epoch": 0.61, + "learning_rate": 7.014572980005667e-06, + "loss": 0.2359, + "step": 7100 + }, + { + "epoch": 0.61, + "learning_rate": 7.01192346241575e-06, + "loss": 0.2411, + "step": 7101 + }, + { + "epoch": 0.61, + "learning_rate": 7.009274175132009e-06, + "loss": 0.277, + "step": 7102 + }, + { + "epoch": 0.61, + "learning_rate": 7.006625118358633e-06, + "loss": 0.274, + "step": 7103 + }, + { + "epoch": 0.61, + "learning_rate": 7.003976292299807e-06, + "loss": 0.2981, + "step": 7104 + }, + { + "epoch": 0.61, + "learning_rate": 7.001327697159684e-06, + "loss": 0.2745, + "step": 7105 + }, + { + "epoch": 0.61, + "learning_rate": 6.998679333142403e-06, + "loss": 0.2916, + "step": 7106 + }, + { + "epoch": 0.61, + "learning_rate": 6.996031200452087e-06, + "loss": 0.2646, + "step": 7107 + }, + { + "epoch": 0.61, + "learning_rate": 6.9933832992928476e-06, + "loss": 0.269, + "step": 7108 + }, + { + "epoch": 0.61, + "learning_rate": 6.990735629868768e-06, + "loss": 0.27, + "step": 7109 + }, + { + "epoch": 0.61, + "learning_rate": 6.9880881923839105e-06, + "loss": 0.2866, + "step": 7110 + }, + { + "epoch": 0.61, + "learning_rate": 6.985440987042339e-06, + "loss": 0.272, + "step": 7111 + }, + { + "epoch": 0.61, + "learning_rate": 6.9827940140480776e-06, + "loss": 0.2966, + "step": 7112 + }, + { + "epoch": 0.61, + "learning_rate": 6.980147273605146e-06, + "loss": 0.2431, + "step": 7113 + }, + { + "epoch": 0.61, + "learning_rate": 6.97750076591754e-06, + "loss": 0.259, + "step": 7114 + }, + { + "epoch": 0.61, + "learning_rate": 6.974854491189243e-06, + "loss": 0.2808, + "step": 7115 + }, + { + "epoch": 0.61, + "learning_rate": 6.9722084496242146e-06, + "loss": 0.2999, + "step": 7116 + }, + { + "epoch": 0.61, + "learning_rate": 6.969562641426394e-06, + "loss": 0.2514, + "step": 7117 + }, + { + "epoch": 0.61, + "learning_rate": 6.966917066799714e-06, + "loss": 0.2651, + "step": 7118 + }, + { + "epoch": 0.61, + "learning_rate": 6.96427172594808e-06, + "loss": 0.2206, + "step": 7119 + }, + { + "epoch": 0.61, + "learning_rate": 6.961626619075377e-06, + "loss": 0.253, + "step": 7120 + }, + { + "epoch": 0.61, + "learning_rate": 6.958981746385486e-06, + "loss": 0.3116, + "step": 7121 + }, + { + "epoch": 0.61, + "learning_rate": 6.956337108082256e-06, + "loss": 0.332, + "step": 7122 + }, + { + "epoch": 0.61, + "learning_rate": 6.953692704369522e-06, + "loss": 0.261, + "step": 7123 + }, + { + "epoch": 0.61, + "learning_rate": 6.951048535451099e-06, + "loss": 0.2593, + "step": 7124 + }, + { + "epoch": 0.61, + "learning_rate": 6.948404601530793e-06, + "loss": 0.2974, + "step": 7125 + }, + { + "epoch": 0.61, + "learning_rate": 6.9457609028123795e-06, + "loss": 0.3338, + "step": 7126 + }, + { + "epoch": 0.61, + "learning_rate": 6.943117439499622e-06, + "loss": 0.3351, + "step": 7127 + }, + { + "epoch": 0.61, + "learning_rate": 6.940474211796273e-06, + "loss": 0.3165, + "step": 7128 + }, + { + "epoch": 0.61, + "learning_rate": 6.937831219906055e-06, + "loss": 0.2773, + "step": 7129 + }, + { + "epoch": 0.61, + "learning_rate": 6.935188464032674e-06, + "loss": 0.3149, + "step": 7130 + }, + { + "epoch": 0.61, + "learning_rate": 6.932545944379818e-06, + "loss": 0.2656, + "step": 7131 + }, + { + "epoch": 0.61, + "learning_rate": 6.929903661151167e-06, + "loss": 0.2434, + "step": 7132 + }, + { + "epoch": 0.61, + "learning_rate": 6.927261614550375e-06, + "loss": 0.3065, + "step": 7133 + }, + { + "epoch": 0.61, + "learning_rate": 6.924619804781069e-06, + "loss": 0.2697, + "step": 7134 + }, + { + "epoch": 0.61, + "learning_rate": 6.921978232046878e-06, + "loss": 0.3273, + "step": 7135 + }, + { + "epoch": 0.61, + "learning_rate": 6.919336896551396e-06, + "loss": 0.3043, + "step": 7136 + }, + { + "epoch": 0.61, + "learning_rate": 6.916695798498201e-06, + "loss": 0.2528, + "step": 7137 + }, + { + "epoch": 0.61, + "learning_rate": 6.91405493809086e-06, + "loss": 0.298, + "step": 7138 + }, + { + "epoch": 0.61, + "learning_rate": 6.911414315532914e-06, + "loss": 0.2842, + "step": 7139 + }, + { + "epoch": 0.61, + "learning_rate": 6.9087739310278956e-06, + "loss": 0.2744, + "step": 7140 + }, + { + "epoch": 0.61, + "learning_rate": 6.906133784779303e-06, + "loss": 0.2769, + "step": 7141 + }, + { + "epoch": 0.61, + "learning_rate": 6.903493876990637e-06, + "loss": 0.2692, + "step": 7142 + }, + { + "epoch": 0.61, + "learning_rate": 6.900854207865361e-06, + "loss": 0.5897, + "step": 7143 + }, + { + "epoch": 0.61, + "learning_rate": 6.898214777606927e-06, + "loss": 0.2576, + "step": 7144 + }, + { + "epoch": 0.61, + "learning_rate": 6.89557558641877e-06, + "loss": 0.2808, + "step": 7145 + }, + { + "epoch": 0.61, + "learning_rate": 6.892936634504313e-06, + "loss": 0.3228, + "step": 7146 + }, + { + "epoch": 0.61, + "learning_rate": 6.890297922066947e-06, + "loss": 0.2793, + "step": 7147 + }, + { + "epoch": 0.61, + "learning_rate": 6.887659449310045e-06, + "loss": 0.3448, + "step": 7148 + }, + { + "epoch": 0.61, + "learning_rate": 6.885021216436981e-06, + "loss": 0.2498, + "step": 7149 + }, + { + "epoch": 0.61, + "learning_rate": 6.882383223651088e-06, + "loss": 0.268, + "step": 7150 + }, + { + "epoch": 0.61, + "learning_rate": 6.879745471155692e-06, + "loss": 0.2971, + "step": 7151 + }, + { + "epoch": 0.61, + "learning_rate": 6.877107959154094e-06, + "loss": 0.275, + "step": 7152 + }, + { + "epoch": 0.61, + "learning_rate": 6.8744706878495885e-06, + "loss": 0.2622, + "step": 7153 + }, + { + "epoch": 0.61, + "learning_rate": 6.871833657445438e-06, + "loss": 0.235, + "step": 7154 + }, + { + "epoch": 0.61, + "learning_rate": 6.8691968681448895e-06, + "loss": 0.2673, + "step": 7155 + }, + { + "epoch": 0.61, + "learning_rate": 6.866560320151179e-06, + "loss": 0.2803, + "step": 7156 + }, + { + "epoch": 0.61, + "learning_rate": 6.8639240136675156e-06, + "loss": 0.3454, + "step": 7157 + }, + { + "epoch": 0.61, + "learning_rate": 6.861287948897091e-06, + "loss": 0.2888, + "step": 7158 + }, + { + "epoch": 0.61, + "learning_rate": 6.858652126043086e-06, + "loss": 0.3086, + "step": 7159 + }, + { + "epoch": 0.61, + "learning_rate": 6.856016545308655e-06, + "loss": 0.2526, + "step": 7160 + }, + { + "epoch": 0.61, + "learning_rate": 6.853381206896932e-06, + "loss": 0.2421, + "step": 7161 + }, + { + "epoch": 0.61, + "learning_rate": 6.850746111011034e-06, + "loss": 0.2998, + "step": 7162 + }, + { + "epoch": 0.61, + "learning_rate": 6.848111257854069e-06, + "loss": 0.2838, + "step": 7163 + }, + { + "epoch": 0.61, + "learning_rate": 6.845476647629112e-06, + "loss": 0.3314, + "step": 7164 + }, + { + "epoch": 0.61, + "learning_rate": 6.842842280539226e-06, + "loss": 0.2663, + "step": 7165 + }, + { + "epoch": 0.61, + "learning_rate": 6.8402081567874625e-06, + "loss": 0.2894, + "step": 7166 + }, + { + "epoch": 0.61, + "learning_rate": 6.8375742765768394e-06, + "loss": 0.3058, + "step": 7167 + }, + { + "epoch": 0.61, + "learning_rate": 6.8349406401103655e-06, + "loss": 0.2712, + "step": 7168 + }, + { + "epoch": 0.61, + "learning_rate": 6.832307247591026e-06, + "loss": 0.2624, + "step": 7169 + }, + { + "epoch": 0.61, + "learning_rate": 6.8296740992217915e-06, + "loss": 0.2549, + "step": 7170 + }, + { + "epoch": 0.61, + "learning_rate": 6.827041195205615e-06, + "loss": 0.2476, + "step": 7171 + }, + { + "epoch": 0.61, + "learning_rate": 6.824408535745422e-06, + "loss": 0.2554, + "step": 7172 + }, + { + "epoch": 0.61, + "learning_rate": 6.82177612104413e-06, + "loss": 0.2761, + "step": 7173 + }, + { + "epoch": 0.61, + "learning_rate": 6.819143951304632e-06, + "loss": 0.2582, + "step": 7174 + }, + { + "epoch": 0.62, + "learning_rate": 6.8165120267297994e-06, + "loss": 0.3132, + "step": 7175 + }, + { + "epoch": 0.62, + "learning_rate": 6.813880347522489e-06, + "loss": 0.2543, + "step": 7176 + }, + { + "epoch": 0.62, + "learning_rate": 6.811248913885539e-06, + "loss": 0.2748, + "step": 7177 + }, + { + "epoch": 0.62, + "learning_rate": 6.8086177260217675e-06, + "loss": 0.2635, + "step": 7178 + }, + { + "epoch": 0.62, + "learning_rate": 6.80598678413397e-06, + "loss": 0.2427, + "step": 7179 + }, + { + "epoch": 0.62, + "learning_rate": 6.803356088424933e-06, + "loss": 0.276, + "step": 7180 + }, + { + "epoch": 0.62, + "learning_rate": 6.800725639097412e-06, + "loss": 0.2837, + "step": 7181 + }, + { + "epoch": 0.62, + "learning_rate": 6.7980954363541506e-06, + "loss": 0.2981, + "step": 7182 + }, + { + "epoch": 0.62, + "learning_rate": 6.795465480397868e-06, + "loss": 0.2987, + "step": 7183 + }, + { + "epoch": 0.62, + "learning_rate": 6.792835771431278e-06, + "loss": 0.2744, + "step": 7184 + }, + { + "epoch": 0.62, + "learning_rate": 6.790206309657058e-06, + "loss": 0.2968, + "step": 7185 + }, + { + "epoch": 0.62, + "learning_rate": 6.787577095277873e-06, + "loss": 0.2891, + "step": 7186 + }, + { + "epoch": 0.62, + "learning_rate": 6.784948128496376e-06, + "loss": 0.2675, + "step": 7187 + }, + { + "epoch": 0.62, + "learning_rate": 6.782319409515188e-06, + "loss": 0.2592, + "step": 7188 + }, + { + "epoch": 0.62, + "learning_rate": 6.7796909385369245e-06, + "loss": 0.2728, + "step": 7189 + }, + { + "epoch": 0.62, + "learning_rate": 6.777062715764166e-06, + "loss": 0.2361, + "step": 7190 + }, + { + "epoch": 0.62, + "learning_rate": 6.774434741399493e-06, + "loss": 0.2347, + "step": 7191 + }, + { + "epoch": 0.62, + "learning_rate": 6.771807015645453e-06, + "loss": 0.2879, + "step": 7192 + }, + { + "epoch": 0.62, + "learning_rate": 6.7691795387045735e-06, + "loss": 0.2632, + "step": 7193 + }, + { + "epoch": 0.62, + "learning_rate": 6.766552310779374e-06, + "loss": 0.2704, + "step": 7194 + }, + { + "epoch": 0.62, + "learning_rate": 6.763925332072343e-06, + "loss": 0.2549, + "step": 7195 + }, + { + "epoch": 0.62, + "learning_rate": 6.761298602785957e-06, + "loss": 0.2593, + "step": 7196 + }, + { + "epoch": 0.62, + "learning_rate": 6.758672123122675e-06, + "loss": 0.2761, + "step": 7197 + }, + { + "epoch": 0.62, + "learning_rate": 6.7560458932849306e-06, + "loss": 0.2642, + "step": 7198 + }, + { + "epoch": 0.62, + "learning_rate": 6.753419913475139e-06, + "loss": 0.2402, + "step": 7199 + }, + { + "epoch": 0.62, + "learning_rate": 6.7507941838956946e-06, + "loss": 0.3021, + "step": 7200 + }, + { + "epoch": 0.62, + "learning_rate": 6.748168704748984e-06, + "loss": 0.2784, + "step": 7201 + }, + { + "epoch": 0.62, + "learning_rate": 6.74554347623736e-06, + "loss": 0.2567, + "step": 7202 + }, + { + "epoch": 0.62, + "learning_rate": 6.742918498563163e-06, + "loss": 0.3071, + "step": 7203 + }, + { + "epoch": 0.62, + "learning_rate": 6.740293771928717e-06, + "loss": 0.2748, + "step": 7204 + }, + { + "epoch": 0.62, + "learning_rate": 6.7376692965363196e-06, + "loss": 0.3159, + "step": 7205 + }, + { + "epoch": 0.62, + "learning_rate": 6.735045072588256e-06, + "loss": 0.2794, + "step": 7206 + }, + { + "epoch": 0.62, + "learning_rate": 6.732421100286779e-06, + "loss": 0.259, + "step": 7207 + }, + { + "epoch": 0.62, + "learning_rate": 6.7297973798341405e-06, + "loss": 0.2919, + "step": 7208 + }, + { + "epoch": 0.62, + "learning_rate": 6.727173911432565e-06, + "loss": 0.3078, + "step": 7209 + }, + { + "epoch": 0.62, + "learning_rate": 6.724550695284247e-06, + "loss": 0.2858, + "step": 7210 + }, + { + "epoch": 0.62, + "learning_rate": 6.721927731591382e-06, + "loss": 0.2748, + "step": 7211 + }, + { + "epoch": 0.62, + "learning_rate": 6.71930502055613e-06, + "loss": 0.2647, + "step": 7212 + }, + { + "epoch": 0.62, + "learning_rate": 6.716682562380634e-06, + "loss": 0.2778, + "step": 7213 + }, + { + "epoch": 0.62, + "learning_rate": 6.714060357267023e-06, + "loss": 0.2635, + "step": 7214 + }, + { + "epoch": 0.62, + "learning_rate": 6.711438405417403e-06, + "loss": 0.3018, + "step": 7215 + }, + { + "epoch": 0.62, + "learning_rate": 6.708816707033865e-06, + "loss": 0.5948, + "step": 7216 + }, + { + "epoch": 0.62, + "learning_rate": 6.706195262318467e-06, + "loss": 0.2814, + "step": 7217 + }, + { + "epoch": 0.62, + "learning_rate": 6.703574071473269e-06, + "loss": 0.2925, + "step": 7218 + }, + { + "epoch": 0.62, + "learning_rate": 6.7009531347002924e-06, + "loss": 0.298, + "step": 7219 + }, + { + "epoch": 0.62, + "learning_rate": 6.698332452201545e-06, + "loss": 0.2256, + "step": 7220 + }, + { + "epoch": 0.62, + "learning_rate": 6.695712024179015e-06, + "loss": 0.3104, + "step": 7221 + }, + { + "epoch": 0.62, + "learning_rate": 6.693091850834681e-06, + "loss": 0.264, + "step": 7222 + }, + { + "epoch": 0.62, + "learning_rate": 6.690471932370487e-06, + "loss": 0.2758, + "step": 7223 + }, + { + "epoch": 0.62, + "learning_rate": 6.6878522689883596e-06, + "loss": 0.2579, + "step": 7224 + }, + { + "epoch": 0.62, + "learning_rate": 6.6852328608902165e-06, + "loss": 0.2759, + "step": 7225 + }, + { + "epoch": 0.62, + "learning_rate": 6.682613708277945e-06, + "loss": 0.2706, + "step": 7226 + }, + { + "epoch": 0.62, + "learning_rate": 6.679994811353419e-06, + "loss": 0.2902, + "step": 7227 + }, + { + "epoch": 0.62, + "learning_rate": 6.677376170318484e-06, + "loss": 0.2663, + "step": 7228 + }, + { + "epoch": 0.62, + "learning_rate": 6.674757785374979e-06, + "loss": 0.2781, + "step": 7229 + }, + { + "epoch": 0.62, + "learning_rate": 6.672139656724715e-06, + "loss": 0.3488, + "step": 7230 + }, + { + "epoch": 0.62, + "learning_rate": 6.669521784569479e-06, + "loss": 0.3009, + "step": 7231 + }, + { + "epoch": 0.62, + "learning_rate": 6.66690416911105e-06, + "loss": 0.3086, + "step": 7232 + }, + { + "epoch": 0.62, + "learning_rate": 6.664286810551177e-06, + "loss": 0.2546, + "step": 7233 + }, + { + "epoch": 0.62, + "learning_rate": 6.6616697090915975e-06, + "loss": 0.277, + "step": 7234 + }, + { + "epoch": 0.62, + "learning_rate": 6.659052864934017e-06, + "loss": 0.2794, + "step": 7235 + }, + { + "epoch": 0.62, + "learning_rate": 6.656436278280136e-06, + "loss": 0.2183, + "step": 7236 + }, + { + "epoch": 0.62, + "learning_rate": 6.653819949331628e-06, + "loss": 0.3157, + "step": 7237 + }, + { + "epoch": 0.62, + "learning_rate": 6.651203878290139e-06, + "loss": 0.3036, + "step": 7238 + }, + { + "epoch": 0.62, + "learning_rate": 6.648588065357313e-06, + "loss": 0.3045, + "step": 7239 + }, + { + "epoch": 0.62, + "learning_rate": 6.645972510734756e-06, + "loss": 0.3218, + "step": 7240 + }, + { + "epoch": 0.62, + "learning_rate": 6.643357214624064e-06, + "loss": 0.2398, + "step": 7241 + }, + { + "epoch": 0.62, + "learning_rate": 6.640742177226816e-06, + "loss": 0.2417, + "step": 7242 + }, + { + "epoch": 0.62, + "learning_rate": 6.638127398744563e-06, + "loss": 0.2916, + "step": 7243 + }, + { + "epoch": 0.62, + "learning_rate": 6.635512879378837e-06, + "loss": 0.2627, + "step": 7244 + }, + { + "epoch": 0.62, + "learning_rate": 6.632898619331151e-06, + "loss": 0.3278, + "step": 7245 + }, + { + "epoch": 0.62, + "learning_rate": 6.630284618803003e-06, + "loss": 0.3138, + "step": 7246 + }, + { + "epoch": 0.62, + "learning_rate": 6.6276708779958696e-06, + "loss": 0.2462, + "step": 7247 + }, + { + "epoch": 0.62, + "learning_rate": 6.6250573971111975e-06, + "loss": 0.2772, + "step": 7248 + }, + { + "epoch": 0.62, + "learning_rate": 6.622444176350429e-06, + "loss": 0.2576, + "step": 7249 + }, + { + "epoch": 0.62, + "learning_rate": 6.619831215914974e-06, + "loss": 0.2695, + "step": 7250 + }, + { + "epoch": 0.62, + "learning_rate": 6.6172185160062255e-06, + "loss": 0.3051, + "step": 7251 + }, + { + "epoch": 0.62, + "learning_rate": 6.6146060768255596e-06, + "loss": 0.3267, + "step": 7252 + }, + { + "epoch": 0.62, + "learning_rate": 6.611993898574329e-06, + "loss": 0.3086, + "step": 7253 + }, + { + "epoch": 0.62, + "learning_rate": 6.609381981453869e-06, + "loss": 0.2958, + "step": 7254 + }, + { + "epoch": 0.62, + "learning_rate": 6.60677032566549e-06, + "loss": 0.2664, + "step": 7255 + }, + { + "epoch": 0.62, + "learning_rate": 6.604158931410491e-06, + "loss": 0.3224, + "step": 7256 + }, + { + "epoch": 0.62, + "learning_rate": 6.601547798890142e-06, + "loss": 0.2673, + "step": 7257 + }, + { + "epoch": 0.62, + "learning_rate": 6.598936928305695e-06, + "loss": 0.3295, + "step": 7258 + }, + { + "epoch": 0.62, + "learning_rate": 6.596326319858382e-06, + "loss": 0.2814, + "step": 7259 + }, + { + "epoch": 0.62, + "learning_rate": 6.593715973749422e-06, + "loss": 0.3036, + "step": 7260 + }, + { + "epoch": 0.62, + "learning_rate": 6.591105890180005e-06, + "loss": 0.3146, + "step": 7261 + }, + { + "epoch": 0.62, + "learning_rate": 6.5884960693512965e-06, + "loss": 0.2615, + "step": 7262 + }, + { + "epoch": 0.62, + "learning_rate": 6.5858865114644584e-06, + "loss": 0.269, + "step": 7263 + }, + { + "epoch": 0.62, + "learning_rate": 6.583277216720618e-06, + "loss": 0.2655, + "step": 7264 + }, + { + "epoch": 0.62, + "learning_rate": 6.580668185320889e-06, + "loss": 0.295, + "step": 7265 + }, + { + "epoch": 0.62, + "learning_rate": 6.578059417466356e-06, + "loss": 0.2618, + "step": 7266 + }, + { + "epoch": 0.62, + "learning_rate": 6.575450913358099e-06, + "loss": 0.2559, + "step": 7267 + }, + { + "epoch": 0.62, + "learning_rate": 6.572842673197164e-06, + "loss": 0.3317, + "step": 7268 + }, + { + "epoch": 0.62, + "learning_rate": 6.570234697184578e-06, + "loss": 0.2997, + "step": 7269 + }, + { + "epoch": 0.62, + "learning_rate": 6.5676269855213585e-06, + "loss": 0.3006, + "step": 7270 + }, + { + "epoch": 0.62, + "learning_rate": 6.565019538408488e-06, + "loss": 0.3048, + "step": 7271 + }, + { + "epoch": 0.62, + "learning_rate": 6.562412356046943e-06, + "loss": 0.3724, + "step": 7272 + }, + { + "epoch": 0.62, + "learning_rate": 6.559805438637663e-06, + "loss": 0.2789, + "step": 7273 + }, + { + "epoch": 0.62, + "learning_rate": 6.557198786381584e-06, + "loss": 0.2672, + "step": 7274 + }, + { + "epoch": 0.62, + "learning_rate": 6.554592399479614e-06, + "loss": 0.2433, + "step": 7275 + }, + { + "epoch": 0.62, + "learning_rate": 6.5519862781326315e-06, + "loss": 0.269, + "step": 7276 + }, + { + "epoch": 0.62, + "learning_rate": 6.549380422541514e-06, + "loss": 0.2672, + "step": 7277 + }, + { + "epoch": 0.62, + "learning_rate": 6.546774832907101e-06, + "loss": 0.274, + "step": 7278 + }, + { + "epoch": 0.62, + "learning_rate": 6.544169509430219e-06, + "loss": 0.2919, + "step": 7279 + }, + { + "epoch": 0.62, + "learning_rate": 6.541564452311681e-06, + "loss": 0.3206, + "step": 7280 + }, + { + "epoch": 0.62, + "learning_rate": 6.538959661752264e-06, + "loss": 0.3083, + "step": 7281 + }, + { + "epoch": 0.62, + "learning_rate": 6.536355137952737e-06, + "loss": 0.2836, + "step": 7282 + }, + { + "epoch": 0.62, + "learning_rate": 6.533750881113836e-06, + "loss": 0.2808, + "step": 7283 + }, + { + "epoch": 0.62, + "learning_rate": 6.531146891436293e-06, + "loss": 0.3727, + "step": 7284 + }, + { + "epoch": 0.62, + "learning_rate": 6.528543169120809e-06, + "loss": 0.2753, + "step": 7285 + }, + { + "epoch": 0.62, + "learning_rate": 6.52593971436806e-06, + "loss": 0.2754, + "step": 7286 + }, + { + "epoch": 0.62, + "learning_rate": 6.523336527378716e-06, + "loss": 0.2632, + "step": 7287 + }, + { + "epoch": 0.62, + "learning_rate": 6.520733608353415e-06, + "loss": 0.269, + "step": 7288 + }, + { + "epoch": 0.62, + "learning_rate": 6.518130957492774e-06, + "loss": 0.2343, + "step": 7289 + }, + { + "epoch": 0.62, + "learning_rate": 6.515528574997394e-06, + "loss": 0.3079, + "step": 7290 + }, + { + "epoch": 0.62, + "learning_rate": 6.512926461067853e-06, + "loss": 0.2656, + "step": 7291 + }, + { + "epoch": 0.63, + "learning_rate": 6.510324615904713e-06, + "loss": 0.2411, + "step": 7292 + }, + { + "epoch": 0.63, + "learning_rate": 6.507723039708505e-06, + "loss": 0.2785, + "step": 7293 + }, + { + "epoch": 0.63, + "learning_rate": 6.5051217326797535e-06, + "loss": 0.3478, + "step": 7294 + }, + { + "epoch": 0.63, + "learning_rate": 6.5025206950189475e-06, + "loss": 0.258, + "step": 7295 + }, + { + "epoch": 0.63, + "learning_rate": 6.499919926926566e-06, + "loss": 0.2908, + "step": 7296 + }, + { + "epoch": 0.63, + "learning_rate": 6.49731942860306e-06, + "loss": 0.2934, + "step": 7297 + }, + { + "epoch": 0.63, + "learning_rate": 6.494719200248867e-06, + "loss": 0.275, + "step": 7298 + }, + { + "epoch": 0.63, + "learning_rate": 6.492119242064398e-06, + "loss": 0.2389, + "step": 7299 + }, + { + "epoch": 0.63, + "learning_rate": 6.489519554250043e-06, + "loss": 0.2653, + "step": 7300 + }, + { + "epoch": 0.63, + "learning_rate": 6.4869201370061785e-06, + "loss": 0.2875, + "step": 7301 + }, + { + "epoch": 0.63, + "learning_rate": 6.484320990533148e-06, + "loss": 0.3289, + "step": 7302 + }, + { + "epoch": 0.63, + "learning_rate": 6.481722115031287e-06, + "loss": 0.3198, + "step": 7303 + }, + { + "epoch": 0.63, + "learning_rate": 6.479123510700896e-06, + "loss": 0.2668, + "step": 7304 + }, + { + "epoch": 0.63, + "learning_rate": 6.476525177742272e-06, + "loss": 0.2445, + "step": 7305 + }, + { + "epoch": 0.63, + "learning_rate": 6.473927116355678e-06, + "loss": 0.3007, + "step": 7306 + }, + { + "epoch": 0.63, + "learning_rate": 6.4713293267413555e-06, + "loss": 0.29, + "step": 7307 + }, + { + "epoch": 0.63, + "learning_rate": 6.468731809099536e-06, + "loss": 0.2662, + "step": 7308 + }, + { + "epoch": 0.63, + "learning_rate": 6.466134563630418e-06, + "loss": 0.2924, + "step": 7309 + }, + { + "epoch": 0.63, + "learning_rate": 6.463537590534188e-06, + "loss": 0.2774, + "step": 7310 + }, + { + "epoch": 0.63, + "learning_rate": 6.460940890011004e-06, + "loss": 0.2723, + "step": 7311 + }, + { + "epoch": 0.63, + "learning_rate": 6.4583444622610126e-06, + "loss": 0.3069, + "step": 7312 + }, + { + "epoch": 0.63, + "learning_rate": 6.455748307484328e-06, + "loss": 0.2598, + "step": 7313 + }, + { + "epoch": 0.63, + "learning_rate": 6.453152425881051e-06, + "loss": 0.3437, + "step": 7314 + }, + { + "epoch": 0.63, + "learning_rate": 6.450556817651261e-06, + "loss": 0.288, + "step": 7315 + }, + { + "epoch": 0.63, + "learning_rate": 6.447961482995011e-06, + "loss": 0.2327, + "step": 7316 + }, + { + "epoch": 0.63, + "learning_rate": 6.4453664221123425e-06, + "loss": 0.2458, + "step": 7317 + }, + { + "epoch": 0.63, + "learning_rate": 6.44277163520326e-06, + "loss": 0.2919, + "step": 7318 + }, + { + "epoch": 0.63, + "learning_rate": 6.440177122467769e-06, + "loss": 0.288, + "step": 7319 + }, + { + "epoch": 0.63, + "learning_rate": 6.437582884105835e-06, + "loss": 0.2764, + "step": 7320 + }, + { + "epoch": 0.63, + "learning_rate": 6.434988920317407e-06, + "loss": 0.2935, + "step": 7321 + }, + { + "epoch": 0.63, + "learning_rate": 6.432395231302418e-06, + "loss": 0.2794, + "step": 7322 + }, + { + "epoch": 0.63, + "learning_rate": 6.429801817260779e-06, + "loss": 0.2689, + "step": 7323 + }, + { + "epoch": 0.63, + "learning_rate": 6.4272086783923715e-06, + "loss": 0.2647, + "step": 7324 + }, + { + "epoch": 0.63, + "learning_rate": 6.424615814897068e-06, + "loss": 0.267, + "step": 7325 + }, + { + "epoch": 0.63, + "learning_rate": 6.422023226974713e-06, + "loss": 0.3537, + "step": 7326 + }, + { + "epoch": 0.63, + "learning_rate": 6.419430914825125e-06, + "loss": 0.5598, + "step": 7327 + }, + { + "epoch": 0.63, + "learning_rate": 6.4168388786481106e-06, + "loss": 0.5558, + "step": 7328 + }, + { + "epoch": 0.63, + "learning_rate": 6.414247118643451e-06, + "loss": 0.2962, + "step": 7329 + }, + { + "epoch": 0.63, + "learning_rate": 6.411655635010907e-06, + "loss": 0.2992, + "step": 7330 + }, + { + "epoch": 0.63, + "learning_rate": 6.409064427950213e-06, + "loss": 0.2709, + "step": 7331 + }, + { + "epoch": 0.63, + "learning_rate": 6.406473497661092e-06, + "loss": 0.2609, + "step": 7332 + }, + { + "epoch": 0.63, + "learning_rate": 6.403882844343239e-06, + "loss": 0.3163, + "step": 7333 + }, + { + "epoch": 0.63, + "learning_rate": 6.4012924681963255e-06, + "loss": 0.2516, + "step": 7334 + }, + { + "epoch": 0.63, + "learning_rate": 6.3987023694200045e-06, + "loss": 0.2801, + "step": 7335 + }, + { + "epoch": 0.63, + "learning_rate": 6.396112548213913e-06, + "loss": 0.2522, + "step": 7336 + }, + { + "epoch": 0.63, + "learning_rate": 6.393523004777661e-06, + "loss": 0.2338, + "step": 7337 + }, + { + "epoch": 0.63, + "learning_rate": 6.39093373931083e-06, + "loss": 0.2737, + "step": 7338 + }, + { + "epoch": 0.63, + "learning_rate": 6.388344752012999e-06, + "loss": 0.2892, + "step": 7339 + }, + { + "epoch": 0.63, + "learning_rate": 6.385756043083706e-06, + "loss": 0.3145, + "step": 7340 + }, + { + "epoch": 0.63, + "learning_rate": 6.383167612722481e-06, + "loss": 0.2906, + "step": 7341 + }, + { + "epoch": 0.63, + "learning_rate": 6.38057946112882e-06, + "loss": 0.3256, + "step": 7342 + }, + { + "epoch": 0.63, + "learning_rate": 6.3779915885022145e-06, + "loss": 0.2568, + "step": 7343 + }, + { + "epoch": 0.63, + "learning_rate": 6.375403995042122e-06, + "loss": 0.3112, + "step": 7344 + }, + { + "epoch": 0.63, + "learning_rate": 6.3728166809479744e-06, + "loss": 0.254, + "step": 7345 + }, + { + "epoch": 0.63, + "learning_rate": 6.370229646419199e-06, + "loss": 0.2955, + "step": 7346 + }, + { + "epoch": 0.63, + "learning_rate": 6.3676428916551856e-06, + "loss": 0.2528, + "step": 7347 + }, + { + "epoch": 0.63, + "learning_rate": 6.365056416855311e-06, + "loss": 0.2457, + "step": 7348 + }, + { + "epoch": 0.63, + "learning_rate": 6.3624702222189235e-06, + "loss": 0.2742, + "step": 7349 + }, + { + "epoch": 0.63, + "learning_rate": 6.359884307945363e-06, + "loss": 0.2371, + "step": 7350 + }, + { + "epoch": 0.63, + "learning_rate": 6.357298674233931e-06, + "loss": 0.2926, + "step": 7351 + }, + { + "epoch": 0.63, + "learning_rate": 6.354713321283916e-06, + "loss": 0.2774, + "step": 7352 + }, + { + "epoch": 0.63, + "learning_rate": 6.352128249294591e-06, + "loss": 0.2715, + "step": 7353 + }, + { + "epoch": 0.63, + "learning_rate": 6.349543458465193e-06, + "loss": 0.2508, + "step": 7354 + }, + { + "epoch": 0.63, + "learning_rate": 6.3469589489949504e-06, + "loss": 0.254, + "step": 7355 + }, + { + "epoch": 0.63, + "learning_rate": 6.3443747210830565e-06, + "loss": 0.2607, + "step": 7356 + }, + { + "epoch": 0.63, + "learning_rate": 6.3417907749287e-06, + "loss": 0.285, + "step": 7357 + }, + { + "epoch": 0.63, + "learning_rate": 6.339207110731036e-06, + "loss": 0.3003, + "step": 7358 + }, + { + "epoch": 0.63, + "learning_rate": 6.336623728689195e-06, + "loss": 0.2698, + "step": 7359 + }, + { + "epoch": 0.63, + "learning_rate": 6.3340406290022986e-06, + "loss": 0.2584, + "step": 7360 + }, + { + "epoch": 0.63, + "learning_rate": 6.331457811869437e-06, + "loss": 0.3047, + "step": 7361 + }, + { + "epoch": 0.63, + "learning_rate": 6.328875277489677e-06, + "loss": 0.2912, + "step": 7362 + }, + { + "epoch": 0.63, + "learning_rate": 6.326293026062075e-06, + "loss": 0.3234, + "step": 7363 + }, + { + "epoch": 0.63, + "learning_rate": 6.3237110577856534e-06, + "loss": 0.2498, + "step": 7364 + }, + { + "epoch": 0.63, + "learning_rate": 6.321129372859418e-06, + "loss": 0.2927, + "step": 7365 + }, + { + "epoch": 0.63, + "learning_rate": 6.318547971482352e-06, + "loss": 0.2851, + "step": 7366 + }, + { + "epoch": 0.63, + "learning_rate": 6.315966853853417e-06, + "loss": 0.2859, + "step": 7367 + }, + { + "epoch": 0.63, + "learning_rate": 6.313386020171557e-06, + "loss": 0.2474, + "step": 7368 + }, + { + "epoch": 0.63, + "learning_rate": 6.310805470635682e-06, + "loss": 0.2416, + "step": 7369 + }, + { + "epoch": 0.63, + "learning_rate": 6.3082252054446955e-06, + "loss": 0.3397, + "step": 7370 + }, + { + "epoch": 0.63, + "learning_rate": 6.30564522479747e-06, + "loss": 0.2685, + "step": 7371 + }, + { + "epoch": 0.63, + "learning_rate": 6.303065528892853e-06, + "loss": 0.2521, + "step": 7372 + }, + { + "epoch": 0.63, + "learning_rate": 6.300486117929676e-06, + "loss": 0.2812, + "step": 7373 + }, + { + "epoch": 0.63, + "learning_rate": 6.297906992106755e-06, + "loss": 0.644, + "step": 7374 + }, + { + "epoch": 0.63, + "learning_rate": 6.295328151622868e-06, + "loss": 0.3009, + "step": 7375 + }, + { + "epoch": 0.63, + "learning_rate": 6.292749596676779e-06, + "loss": 0.2661, + "step": 7376 + }, + { + "epoch": 0.63, + "learning_rate": 6.290171327467238e-06, + "loss": 0.2781, + "step": 7377 + }, + { + "epoch": 0.63, + "learning_rate": 6.287593344192957e-06, + "loss": 0.2819, + "step": 7378 + }, + { + "epoch": 0.63, + "learning_rate": 6.285015647052639e-06, + "loss": 0.2754, + "step": 7379 + }, + { + "epoch": 0.63, + "learning_rate": 6.282438236244956e-06, + "loss": 0.2919, + "step": 7380 + }, + { + "epoch": 0.63, + "learning_rate": 6.2798611119685685e-06, + "loss": 0.2772, + "step": 7381 + }, + { + "epoch": 0.63, + "learning_rate": 6.277284274422104e-06, + "loss": 0.3116, + "step": 7382 + }, + { + "epoch": 0.63, + "learning_rate": 6.27470772380417e-06, + "loss": 0.302, + "step": 7383 + }, + { + "epoch": 0.63, + "learning_rate": 6.27213146031336e-06, + "loss": 0.2711, + "step": 7384 + }, + { + "epoch": 0.63, + "learning_rate": 6.269555484148237e-06, + "loss": 0.2593, + "step": 7385 + }, + { + "epoch": 0.63, + "learning_rate": 6.266979795507346e-06, + "loss": 0.3012, + "step": 7386 + }, + { + "epoch": 0.63, + "learning_rate": 6.264404394589202e-06, + "loss": 0.2141, + "step": 7387 + }, + { + "epoch": 0.63, + "learning_rate": 6.261829281592313e-06, + "loss": 0.2646, + "step": 7388 + }, + { + "epoch": 0.63, + "learning_rate": 6.259254456715154e-06, + "loss": 0.2444, + "step": 7389 + }, + { + "epoch": 0.63, + "learning_rate": 6.256679920156172e-06, + "loss": 0.2916, + "step": 7390 + }, + { + "epoch": 0.63, + "learning_rate": 6.25410567211381e-06, + "loss": 0.2636, + "step": 7391 + }, + { + "epoch": 0.63, + "learning_rate": 6.251531712786473e-06, + "loss": 0.2279, + "step": 7392 + }, + { + "epoch": 0.63, + "learning_rate": 6.24895804237255e-06, + "loss": 0.2864, + "step": 7393 + }, + { + "epoch": 0.63, + "learning_rate": 6.246384661070404e-06, + "loss": 0.2712, + "step": 7394 + }, + { + "epoch": 0.63, + "learning_rate": 6.243811569078384e-06, + "loss": 0.2708, + "step": 7395 + }, + { + "epoch": 0.63, + "learning_rate": 6.24123876659481e-06, + "loss": 0.2595, + "step": 7396 + }, + { + "epoch": 0.63, + "learning_rate": 6.238666253817974e-06, + "loss": 0.2948, + "step": 7397 + }, + { + "epoch": 0.63, + "learning_rate": 6.23609403094616e-06, + "loss": 0.2607, + "step": 7398 + }, + { + "epoch": 0.63, + "learning_rate": 6.233522098177622e-06, + "loss": 0.2733, + "step": 7399 + }, + { + "epoch": 0.63, + "learning_rate": 6.230950455710592e-06, + "loss": 0.3316, + "step": 7400 + }, + { + "epoch": 0.63, + "learning_rate": 6.228379103743272e-06, + "loss": 0.2439, + "step": 7401 + }, + { + "epoch": 0.63, + "learning_rate": 6.225808042473857e-06, + "loss": 0.2556, + "step": 7402 + }, + { + "epoch": 0.63, + "learning_rate": 6.223237272100509e-06, + "loss": 0.2602, + "step": 7403 + }, + { + "epoch": 0.63, + "learning_rate": 6.220666792821371e-06, + "loss": 0.2871, + "step": 7404 + }, + { + "epoch": 0.63, + "learning_rate": 6.21809660483456e-06, + "loss": 0.2881, + "step": 7405 + }, + { + "epoch": 0.63, + "learning_rate": 6.2155267083381795e-06, + "loss": 0.2601, + "step": 7406 + }, + { + "epoch": 0.63, + "learning_rate": 6.212957103530297e-06, + "loss": 0.292, + "step": 7407 + }, + { + "epoch": 0.64, + "learning_rate": 6.210387790608972e-06, + "loss": 0.2257, + "step": 7408 + }, + { + "epoch": 0.64, + "learning_rate": 6.207818769772231e-06, + "loss": 0.2697, + "step": 7409 + }, + { + "epoch": 0.64, + "learning_rate": 6.2052500412180805e-06, + "loss": 0.3264, + "step": 7410 + }, + { + "epoch": 0.64, + "learning_rate": 6.202681605144503e-06, + "loss": 0.2346, + "step": 7411 + }, + { + "epoch": 0.64, + "learning_rate": 6.200113461749469e-06, + "loss": 0.2527, + "step": 7412 + }, + { + "epoch": 0.64, + "learning_rate": 6.197545611230913e-06, + "loss": 0.267, + "step": 7413 + }, + { + "epoch": 0.64, + "learning_rate": 6.194978053786749e-06, + "loss": 0.2754, + "step": 7414 + }, + { + "epoch": 0.64, + "learning_rate": 6.19241078961488e-06, + "loss": 0.2548, + "step": 7415 + }, + { + "epoch": 0.64, + "learning_rate": 6.189843818913172e-06, + "loss": 0.2764, + "step": 7416 + }, + { + "epoch": 0.64, + "learning_rate": 6.187277141879476e-06, + "loss": 0.271, + "step": 7417 + }, + { + "epoch": 0.64, + "learning_rate": 6.184710758711616e-06, + "loss": 0.2069, + "step": 7418 + }, + { + "epoch": 0.64, + "learning_rate": 6.182144669607403e-06, + "loss": 0.2579, + "step": 7419 + }, + { + "epoch": 0.64, + "learning_rate": 6.179578874764614e-06, + "loss": 0.2655, + "step": 7420 + }, + { + "epoch": 0.64, + "learning_rate": 6.177013374381005e-06, + "loss": 0.2969, + "step": 7421 + }, + { + "epoch": 0.64, + "learning_rate": 6.174448168654317e-06, + "loss": 0.3246, + "step": 7422 + }, + { + "epoch": 0.64, + "learning_rate": 6.171883257782261e-06, + "loss": 0.3112, + "step": 7423 + }, + { + "epoch": 0.64, + "learning_rate": 6.16931864196253e-06, + "loss": 0.2684, + "step": 7424 + }, + { + "epoch": 0.64, + "learning_rate": 6.166754321392785e-06, + "loss": 0.248, + "step": 7425 + }, + { + "epoch": 0.64, + "learning_rate": 6.164190296270683e-06, + "loss": 0.2527, + "step": 7426 + }, + { + "epoch": 0.64, + "learning_rate": 6.161626566793837e-06, + "loss": 0.2909, + "step": 7427 + }, + { + "epoch": 0.64, + "learning_rate": 6.159063133159846e-06, + "loss": 0.2526, + "step": 7428 + }, + { + "epoch": 0.64, + "learning_rate": 6.156499995566294e-06, + "loss": 0.2944, + "step": 7429 + }, + { + "epoch": 0.64, + "learning_rate": 6.1539371542107295e-06, + "loss": 0.2761, + "step": 7430 + }, + { + "epoch": 0.64, + "learning_rate": 6.151374609290688e-06, + "loss": 0.2386, + "step": 7431 + }, + { + "epoch": 0.64, + "learning_rate": 6.148812361003669e-06, + "loss": 0.309, + "step": 7432 + }, + { + "epoch": 0.64, + "learning_rate": 6.14625040954717e-06, + "loss": 0.2401, + "step": 7433 + }, + { + "epoch": 0.64, + "learning_rate": 6.1436887551186466e-06, + "loss": 0.2916, + "step": 7434 + }, + { + "epoch": 0.64, + "learning_rate": 6.141127397915534e-06, + "loss": 0.3116, + "step": 7435 + }, + { + "epoch": 0.64, + "learning_rate": 6.138566338135259e-06, + "loss": 0.2842, + "step": 7436 + }, + { + "epoch": 0.64, + "learning_rate": 6.13600557597521e-06, + "loss": 0.2538, + "step": 7437 + }, + { + "epoch": 0.64, + "learning_rate": 6.133445111632761e-06, + "loss": 0.283, + "step": 7438 + }, + { + "epoch": 0.64, + "learning_rate": 6.130884945305252e-06, + "loss": 0.2909, + "step": 7439 + }, + { + "epoch": 0.64, + "learning_rate": 6.128325077190018e-06, + "loss": 0.2332, + "step": 7440 + }, + { + "epoch": 0.64, + "learning_rate": 6.125765507484356e-06, + "loss": 0.2864, + "step": 7441 + }, + { + "epoch": 0.64, + "learning_rate": 6.123206236385543e-06, + "loss": 0.2509, + "step": 7442 + }, + { + "epoch": 0.64, + "learning_rate": 6.120647264090839e-06, + "loss": 0.2197, + "step": 7443 + }, + { + "epoch": 0.64, + "learning_rate": 6.1180885907974775e-06, + "loss": 0.3011, + "step": 7444 + }, + { + "epoch": 0.64, + "learning_rate": 6.115530216702661e-06, + "loss": 0.2401, + "step": 7445 + }, + { + "epoch": 0.64, + "learning_rate": 6.112972142003587e-06, + "loss": 0.3022, + "step": 7446 + }, + { + "epoch": 0.64, + "learning_rate": 6.110414366897413e-06, + "loss": 0.2604, + "step": 7447 + }, + { + "epoch": 0.64, + "learning_rate": 6.107856891581281e-06, + "loss": 0.285, + "step": 7448 + }, + { + "epoch": 0.64, + "learning_rate": 6.105299716252303e-06, + "loss": 0.2635, + "step": 7449 + }, + { + "epoch": 0.64, + "learning_rate": 6.102742841107585e-06, + "loss": 0.2951, + "step": 7450 + }, + { + "epoch": 0.64, + "learning_rate": 6.1001862663441906e-06, + "loss": 0.2982, + "step": 7451 + }, + { + "epoch": 0.64, + "learning_rate": 6.0976299921591645e-06, + "loss": 0.2589, + "step": 7452 + }, + { + "epoch": 0.64, + "learning_rate": 6.095074018749542e-06, + "loss": 0.2439, + "step": 7453 + }, + { + "epoch": 0.64, + "learning_rate": 6.092518346312317e-06, + "loss": 0.2491, + "step": 7454 + }, + { + "epoch": 0.64, + "learning_rate": 6.089962975044472e-06, + "loss": 0.2677, + "step": 7455 + }, + { + "epoch": 0.64, + "learning_rate": 6.087407905142957e-06, + "loss": 0.3097, + "step": 7456 + }, + { + "epoch": 0.64, + "learning_rate": 6.084853136804711e-06, + "loss": 0.2666, + "step": 7457 + }, + { + "epoch": 0.64, + "learning_rate": 6.082298670226642e-06, + "loss": 0.2827, + "step": 7458 + }, + { + "epoch": 0.64, + "learning_rate": 6.079744505605628e-06, + "loss": 0.2264, + "step": 7459 + }, + { + "epoch": 0.64, + "learning_rate": 6.077190643138542e-06, + "loss": 0.3216, + "step": 7460 + }, + { + "epoch": 0.64, + "learning_rate": 6.0746370830222145e-06, + "loss": 0.2244, + "step": 7461 + }, + { + "epoch": 0.64, + "learning_rate": 6.0720838254534675e-06, + "loss": 0.2762, + "step": 7462 + }, + { + "epoch": 0.64, + "learning_rate": 6.069530870629088e-06, + "loss": 0.3168, + "step": 7463 + }, + { + "epoch": 0.64, + "learning_rate": 6.0669782187458515e-06, + "loss": 0.306, + "step": 7464 + }, + { + "epoch": 0.64, + "learning_rate": 6.064425870000499e-06, + "loss": 0.2759, + "step": 7465 + }, + { + "epoch": 0.64, + "learning_rate": 6.061873824589751e-06, + "loss": 0.2559, + "step": 7466 + }, + { + "epoch": 0.64, + "learning_rate": 6.059322082710315e-06, + "loss": 0.2316, + "step": 7467 + }, + { + "epoch": 0.64, + "learning_rate": 6.056770644558858e-06, + "loss": 0.2603, + "step": 7468 + }, + { + "epoch": 0.64, + "learning_rate": 6.054219510332038e-06, + "loss": 0.2874, + "step": 7469 + }, + { + "epoch": 0.64, + "learning_rate": 6.051668680226477e-06, + "loss": 0.2875, + "step": 7470 + }, + { + "epoch": 0.64, + "learning_rate": 6.049118154438789e-06, + "loss": 0.281, + "step": 7471 + }, + { + "epoch": 0.64, + "learning_rate": 6.046567933165552e-06, + "loss": 0.2738, + "step": 7472 + }, + { + "epoch": 0.64, + "learning_rate": 6.044018016603321e-06, + "loss": 0.2448, + "step": 7473 + }, + { + "epoch": 0.64, + "learning_rate": 6.0414684049486335e-06, + "loss": 0.2523, + "step": 7474 + }, + { + "epoch": 0.64, + "learning_rate": 6.038919098398006e-06, + "loss": 0.2759, + "step": 7475 + }, + { + "epoch": 0.64, + "learning_rate": 6.036370097147922e-06, + "loss": 0.2946, + "step": 7476 + }, + { + "epoch": 0.64, + "learning_rate": 6.033821401394842e-06, + "loss": 0.2922, + "step": 7477 + }, + { + "epoch": 0.64, + "learning_rate": 6.031273011335215e-06, + "loss": 0.2779, + "step": 7478 + }, + { + "epoch": 0.64, + "learning_rate": 6.028724927165452e-06, + "loss": 0.2715, + "step": 7479 + }, + { + "epoch": 0.64, + "learning_rate": 6.026177149081949e-06, + "loss": 0.3035, + "step": 7480 + }, + { + "epoch": 0.64, + "learning_rate": 6.023629677281075e-06, + "loss": 0.2566, + "step": 7481 + }, + { + "epoch": 0.64, + "learning_rate": 6.0210825119591806e-06, + "loss": 0.2521, + "step": 7482 + }, + { + "epoch": 0.64, + "learning_rate": 6.018535653312586e-06, + "loss": 0.3024, + "step": 7483 + }, + { + "epoch": 0.64, + "learning_rate": 6.015989101537586e-06, + "loss": 0.2773, + "step": 7484 + }, + { + "epoch": 0.64, + "learning_rate": 6.0134428568304645e-06, + "loss": 0.2871, + "step": 7485 + }, + { + "epoch": 0.64, + "learning_rate": 6.0108969193874675e-06, + "loss": 0.2777, + "step": 7486 + }, + { + "epoch": 0.64, + "learning_rate": 6.008351289404824e-06, + "loss": 0.2424, + "step": 7487 + }, + { + "epoch": 0.64, + "learning_rate": 6.005805967078741e-06, + "loss": 0.3023, + "step": 7488 + }, + { + "epoch": 0.64, + "learning_rate": 6.003260952605401e-06, + "loss": 0.2704, + "step": 7489 + }, + { + "epoch": 0.64, + "learning_rate": 6.000716246180953e-06, + "loss": 0.2845, + "step": 7490 + }, + { + "epoch": 0.64, + "learning_rate": 5.9981718480015416e-06, + "loss": 0.3026, + "step": 7491 + }, + { + "epoch": 0.64, + "learning_rate": 5.995627758263267e-06, + "loss": 0.2785, + "step": 7492 + }, + { + "epoch": 0.64, + "learning_rate": 5.9930839771622196e-06, + "loss": 0.2854, + "step": 7493 + }, + { + "epoch": 0.64, + "learning_rate": 5.9905405048944575e-06, + "loss": 0.2971, + "step": 7494 + }, + { + "epoch": 0.64, + "learning_rate": 5.987997341656027e-06, + "loss": 0.2958, + "step": 7495 + }, + { + "epoch": 0.64, + "learning_rate": 5.9854544876429364e-06, + "loss": 0.2897, + "step": 7496 + }, + { + "epoch": 0.64, + "learning_rate": 5.982911943051173e-06, + "loss": 0.2921, + "step": 7497 + }, + { + "epoch": 0.64, + "learning_rate": 5.980369708076713e-06, + "loss": 0.288, + "step": 7498 + }, + { + "epoch": 0.64, + "learning_rate": 5.977827782915493e-06, + "loss": 0.3124, + "step": 7499 + }, + { + "epoch": 0.64, + "learning_rate": 5.975286167763433e-06, + "loss": 0.2576, + "step": 7500 + }, + { + "epoch": 0.64, + "learning_rate": 5.972744862816426e-06, + "loss": 0.2802, + "step": 7501 + }, + { + "epoch": 0.64, + "learning_rate": 5.97020386827035e-06, + "loss": 0.2531, + "step": 7502 + }, + { + "epoch": 0.64, + "learning_rate": 5.967663184321047e-06, + "loss": 0.2457, + "step": 7503 + }, + { + "epoch": 0.64, + "learning_rate": 5.9651228111643385e-06, + "loss": 0.2734, + "step": 7504 + }, + { + "epoch": 0.64, + "learning_rate": 5.962582748996031e-06, + "loss": 0.2679, + "step": 7505 + }, + { + "epoch": 0.64, + "learning_rate": 5.960042998011892e-06, + "loss": 0.3422, + "step": 7506 + }, + { + "epoch": 0.64, + "learning_rate": 5.95750355840768e-06, + "loss": 0.2347, + "step": 7507 + }, + { + "epoch": 0.64, + "learning_rate": 5.954964430379115e-06, + "loss": 0.2913, + "step": 7508 + }, + { + "epoch": 0.64, + "learning_rate": 5.952425614121908e-06, + "loss": 0.3139, + "step": 7509 + }, + { + "epoch": 0.64, + "learning_rate": 5.949887109831736e-06, + "loss": 0.3038, + "step": 7510 + }, + { + "epoch": 0.64, + "learning_rate": 5.947348917704248e-06, + "loss": 0.3193, + "step": 7511 + }, + { + "epoch": 0.64, + "learning_rate": 5.944811037935083e-06, + "loss": 0.2969, + "step": 7512 + }, + { + "epoch": 0.64, + "learning_rate": 5.94227347071985e-06, + "loss": 0.2983, + "step": 7513 + }, + { + "epoch": 0.64, + "learning_rate": 5.939736216254126e-06, + "loss": 0.2701, + "step": 7514 + }, + { + "epoch": 0.64, + "learning_rate": 5.937199274733468e-06, + "loss": 0.2819, + "step": 7515 + }, + { + "epoch": 0.64, + "learning_rate": 5.93466264635342e-06, + "loss": 0.3, + "step": 7516 + }, + { + "epoch": 0.64, + "learning_rate": 5.932126331309486e-06, + "loss": 0.2663, + "step": 7517 + }, + { + "epoch": 0.64, + "learning_rate": 5.929590329797154e-06, + "loss": 0.2562, + "step": 7518 + }, + { + "epoch": 0.64, + "learning_rate": 5.9270546420118855e-06, + "loss": 0.2665, + "step": 7519 + }, + { + "epoch": 0.64, + "learning_rate": 5.924519268149123e-06, + "loss": 0.2967, + "step": 7520 + }, + { + "epoch": 0.64, + "learning_rate": 5.921984208404279e-06, + "loss": 0.3071, + "step": 7521 + }, + { + "epoch": 0.64, + "learning_rate": 5.919449462972737e-06, + "loss": 0.2625, + "step": 7522 + }, + { + "epoch": 0.64, + "learning_rate": 5.916915032049873e-06, + "loss": 0.2537, + "step": 7523 + }, + { + "epoch": 0.64, + "learning_rate": 5.91438091583102e-06, + "loss": 0.2715, + "step": 7524 + }, + { + "epoch": 0.65, + "learning_rate": 5.911847114511497e-06, + "loss": 0.3113, + "step": 7525 + }, + { + "epoch": 0.65, + "learning_rate": 5.9093136282866014e-06, + "loss": 0.3052, + "step": 7526 + }, + { + "epoch": 0.65, + "learning_rate": 5.9067804573516e-06, + "loss": 0.2943, + "step": 7527 + }, + { + "epoch": 0.65, + "learning_rate": 5.9042476019017304e-06, + "loss": 0.2795, + "step": 7528 + }, + { + "epoch": 0.65, + "learning_rate": 5.901715062132223e-06, + "loss": 0.2703, + "step": 7529 + }, + { + "epoch": 0.65, + "learning_rate": 5.899182838238265e-06, + "loss": 0.2491, + "step": 7530 + }, + { + "epoch": 0.65, + "learning_rate": 5.8966509304150354e-06, + "loss": 0.597, + "step": 7531 + }, + { + "epoch": 0.65, + "learning_rate": 5.894119338857671e-06, + "loss": 0.2837, + "step": 7532 + }, + { + "epoch": 0.65, + "learning_rate": 5.891588063761304e-06, + "loss": 0.2944, + "step": 7533 + }, + { + "epoch": 0.65, + "learning_rate": 5.8890571053210295e-06, + "loss": 0.259, + "step": 7534 + }, + { + "epoch": 0.65, + "learning_rate": 5.886526463731916e-06, + "loss": 0.3053, + "step": 7535 + }, + { + "epoch": 0.65, + "learning_rate": 5.8839961391890234e-06, + "loss": 0.252, + "step": 7536 + }, + { + "epoch": 0.65, + "learning_rate": 5.8814661318873665e-06, + "loss": 0.6045, + "step": 7537 + }, + { + "epoch": 0.65, + "learning_rate": 5.878936442021952e-06, + "loss": 0.3126, + "step": 7538 + }, + { + "epoch": 0.65, + "learning_rate": 5.87640706978775e-06, + "loss": 0.2875, + "step": 7539 + }, + { + "epoch": 0.65, + "learning_rate": 5.873878015379722e-06, + "loss": 0.2729, + "step": 7540 + }, + { + "epoch": 0.65, + "learning_rate": 5.871349278992786e-06, + "loss": 0.2519, + "step": 7541 + }, + { + "epoch": 0.65, + "learning_rate": 5.868820860821844e-06, + "loss": 0.2601, + "step": 7542 + }, + { + "epoch": 0.65, + "learning_rate": 5.866292761061781e-06, + "loss": 0.2795, + "step": 7543 + }, + { + "epoch": 0.65, + "learning_rate": 5.863764979907446e-06, + "loss": 0.2809, + "step": 7544 + }, + { + "epoch": 0.65, + "learning_rate": 5.861237517553669e-06, + "loss": 0.2676, + "step": 7545 + }, + { + "epoch": 0.65, + "learning_rate": 5.858710374195251e-06, + "loss": 0.311, + "step": 7546 + }, + { + "epoch": 0.65, + "learning_rate": 5.8561835500269795e-06, + "loss": 0.2461, + "step": 7547 + }, + { + "epoch": 0.65, + "learning_rate": 5.853657045243604e-06, + "loss": 0.3074, + "step": 7548 + }, + { + "epoch": 0.65, + "learning_rate": 5.851130860039851e-06, + "loss": 0.2747, + "step": 7549 + }, + { + "epoch": 0.65, + "learning_rate": 5.848604994610434e-06, + "loss": 0.351, + "step": 7550 + }, + { + "epoch": 0.65, + "learning_rate": 5.8460794491500325e-06, + "loss": 0.2626, + "step": 7551 + }, + { + "epoch": 0.65, + "learning_rate": 5.843554223853303e-06, + "loss": 0.3003, + "step": 7552 + }, + { + "epoch": 0.65, + "learning_rate": 5.8410293189148704e-06, + "loss": 0.299, + "step": 7553 + }, + { + "epoch": 0.65, + "learning_rate": 5.838504734529353e-06, + "loss": 0.2704, + "step": 7554 + }, + { + "epoch": 0.65, + "learning_rate": 5.835980470891327e-06, + "loss": 0.2344, + "step": 7555 + }, + { + "epoch": 0.65, + "learning_rate": 5.83345652819535e-06, + "loss": 0.2782, + "step": 7556 + }, + { + "epoch": 0.65, + "learning_rate": 5.8309329066359575e-06, + "loss": 0.288, + "step": 7557 + }, + { + "epoch": 0.65, + "learning_rate": 5.828409606407659e-06, + "loss": 0.2661, + "step": 7558 + }, + { + "epoch": 0.65, + "learning_rate": 5.825886627704933e-06, + "loss": 0.2749, + "step": 7559 + }, + { + "epoch": 0.65, + "learning_rate": 5.823363970722237e-06, + "loss": 0.258, + "step": 7560 + }, + { + "epoch": 0.65, + "learning_rate": 5.820841635654015e-06, + "loss": 0.3162, + "step": 7561 + }, + { + "epoch": 0.65, + "learning_rate": 5.818319622694668e-06, + "loss": 0.5818, + "step": 7562 + }, + { + "epoch": 0.65, + "learning_rate": 5.815797932038581e-06, + "loss": 0.2512, + "step": 7563 + }, + { + "epoch": 0.65, + "learning_rate": 5.813276563880114e-06, + "loss": 0.2894, + "step": 7564 + }, + { + "epoch": 0.65, + "learning_rate": 5.810755518413605e-06, + "loss": 0.2413, + "step": 7565 + }, + { + "epoch": 0.65, + "learning_rate": 5.8082347958333625e-06, + "loss": 0.2911, + "step": 7566 + }, + { + "epoch": 0.65, + "learning_rate": 5.805714396333662e-06, + "loss": 0.2607, + "step": 7567 + }, + { + "epoch": 0.65, + "learning_rate": 5.8031943201087805e-06, + "loss": 0.2923, + "step": 7568 + }, + { + "epoch": 0.65, + "learning_rate": 5.800674567352938e-06, + "loss": 0.3039, + "step": 7569 + }, + { + "epoch": 0.65, + "learning_rate": 5.798155138260352e-06, + "loss": 0.2632, + "step": 7570 + }, + { + "epoch": 0.65, + "learning_rate": 5.795636033025205e-06, + "loss": 0.2619, + "step": 7571 + }, + { + "epoch": 0.65, + "learning_rate": 5.793117251841659e-06, + "loss": 0.2997, + "step": 7572 + }, + { + "epoch": 0.65, + "learning_rate": 5.790598794903847e-06, + "loss": 0.3199, + "step": 7573 + }, + { + "epoch": 0.65, + "learning_rate": 5.788080662405881e-06, + "loss": 0.2531, + "step": 7574 + }, + { + "epoch": 0.65, + "learning_rate": 5.7855628545418515e-06, + "loss": 0.2776, + "step": 7575 + }, + { + "epoch": 0.65, + "learning_rate": 5.783045371505809e-06, + "loss": 0.295, + "step": 7576 + }, + { + "epoch": 0.65, + "learning_rate": 5.780528213491792e-06, + "loss": 0.2551, + "step": 7577 + }, + { + "epoch": 0.65, + "learning_rate": 5.7780113806938095e-06, + "loss": 0.2497, + "step": 7578 + }, + { + "epoch": 0.65, + "learning_rate": 5.77549487330585e-06, + "loss": 0.2665, + "step": 7579 + }, + { + "epoch": 0.65, + "learning_rate": 5.772978691521871e-06, + "loss": 0.2389, + "step": 7580 + }, + { + "epoch": 0.65, + "learning_rate": 5.770462835535809e-06, + "loss": 0.3396, + "step": 7581 + }, + { + "epoch": 0.65, + "learning_rate": 5.767947305541577e-06, + "loss": 0.2678, + "step": 7582 + }, + { + "epoch": 0.65, + "learning_rate": 5.7654321017330505e-06, + "loss": 0.3085, + "step": 7583 + }, + { + "epoch": 0.65, + "learning_rate": 5.762917224304094e-06, + "loss": 0.2903, + "step": 7584 + }, + { + "epoch": 0.65, + "learning_rate": 5.760402673448544e-06, + "loss": 0.2639, + "step": 7585 + }, + { + "epoch": 0.65, + "learning_rate": 5.757888449360205e-06, + "loss": 0.3151, + "step": 7586 + }, + { + "epoch": 0.65, + "learning_rate": 5.755374552232864e-06, + "loss": 0.2701, + "step": 7587 + }, + { + "epoch": 0.65, + "learning_rate": 5.75286098226028e-06, + "loss": 0.3142, + "step": 7588 + }, + { + "epoch": 0.65, + "learning_rate": 5.750347739636188e-06, + "loss": 0.3275, + "step": 7589 + }, + { + "epoch": 0.65, + "learning_rate": 5.747834824554293e-06, + "loss": 0.2758, + "step": 7590 + }, + { + "epoch": 0.65, + "learning_rate": 5.745322237208273e-06, + "loss": 0.3183, + "step": 7591 + }, + { + "epoch": 0.65, + "learning_rate": 5.7428099777918e-06, + "loss": 0.3133, + "step": 7592 + }, + { + "epoch": 0.65, + "learning_rate": 5.740298046498496e-06, + "loss": 0.2865, + "step": 7593 + }, + { + "epoch": 0.65, + "learning_rate": 5.737786443521968e-06, + "loss": 0.3123, + "step": 7594 + }, + { + "epoch": 0.65, + "learning_rate": 5.7352751690558025e-06, + "loss": 0.2478, + "step": 7595 + }, + { + "epoch": 0.65, + "learning_rate": 5.732764223293559e-06, + "loss": 0.2595, + "step": 7596 + }, + { + "epoch": 0.65, + "learning_rate": 5.730253606428759e-06, + "loss": 0.2635, + "step": 7597 + }, + { + "epoch": 0.65, + "learning_rate": 5.727743318654911e-06, + "loss": 0.2667, + "step": 7598 + }, + { + "epoch": 0.65, + "learning_rate": 5.725233360165505e-06, + "loss": 0.2758, + "step": 7599 + }, + { + "epoch": 0.65, + "learning_rate": 5.722723731153986e-06, + "loss": 0.2652, + "step": 7600 + }, + { + "epoch": 0.65, + "learning_rate": 5.720214431813786e-06, + "loss": 0.3037, + "step": 7601 + }, + { + "epoch": 0.65, + "learning_rate": 5.717705462338311e-06, + "loss": 0.2795, + "step": 7602 + }, + { + "epoch": 0.65, + "learning_rate": 5.71519682292094e-06, + "loss": 0.2213, + "step": 7603 + }, + { + "epoch": 0.65, + "learning_rate": 5.71268851375503e-06, + "loss": 0.2542, + "step": 7604 + }, + { + "epoch": 0.65, + "learning_rate": 5.710180535033897e-06, + "loss": 0.3026, + "step": 7605 + }, + { + "epoch": 0.65, + "learning_rate": 5.707672886950859e-06, + "loss": 0.3079, + "step": 7606 + }, + { + "epoch": 0.65, + "learning_rate": 5.7051655696991825e-06, + "loss": 0.2803, + "step": 7607 + }, + { + "epoch": 0.65, + "learning_rate": 5.7026585834721225e-06, + "loss": 0.2959, + "step": 7608 + }, + { + "epoch": 0.65, + "learning_rate": 5.7001519284629045e-06, + "loss": 0.2888, + "step": 7609 + }, + { + "epoch": 0.65, + "learning_rate": 5.697645604864732e-06, + "loss": 0.2676, + "step": 7610 + }, + { + "epoch": 0.65, + "learning_rate": 5.6951396128707745e-06, + "loss": 0.6045, + "step": 7611 + }, + { + "epoch": 0.65, + "learning_rate": 5.692633952674187e-06, + "loss": 0.2894, + "step": 7612 + }, + { + "epoch": 0.65, + "learning_rate": 5.6901286244680946e-06, + "loss": 0.2874, + "step": 7613 + }, + { + "epoch": 0.65, + "learning_rate": 5.687623628445588e-06, + "loss": 0.2729, + "step": 7614 + }, + { + "epoch": 0.65, + "learning_rate": 5.685118964799743e-06, + "loss": 0.2735, + "step": 7615 + }, + { + "epoch": 0.65, + "learning_rate": 5.682614633723609e-06, + "loss": 0.2899, + "step": 7616 + }, + { + "epoch": 0.65, + "learning_rate": 5.680110635410205e-06, + "loss": 0.2418, + "step": 7617 + }, + { + "epoch": 0.65, + "learning_rate": 5.67760697005253e-06, + "loss": 0.2729, + "step": 7618 + }, + { + "epoch": 0.65, + "learning_rate": 5.675103637843551e-06, + "loss": 0.2515, + "step": 7619 + }, + { + "epoch": 0.65, + "learning_rate": 5.672600638976218e-06, + "loss": 0.2402, + "step": 7620 + }, + { + "epoch": 0.65, + "learning_rate": 5.67009797364344e-06, + "loss": 0.2917, + "step": 7621 + }, + { + "epoch": 0.65, + "learning_rate": 5.667595642038117e-06, + "loss": 0.5909, + "step": 7622 + }, + { + "epoch": 0.65, + "learning_rate": 5.665093644353115e-06, + "loss": 0.2448, + "step": 7623 + }, + { + "epoch": 0.65, + "learning_rate": 5.662591980781276e-06, + "loss": 0.2631, + "step": 7624 + }, + { + "epoch": 0.65, + "learning_rate": 5.660090651515413e-06, + "loss": 0.2591, + "step": 7625 + }, + { + "epoch": 0.65, + "learning_rate": 5.657589656748321e-06, + "loss": 0.2198, + "step": 7626 + }, + { + "epoch": 0.65, + "learning_rate": 5.655088996672764e-06, + "loss": 0.2787, + "step": 7627 + }, + { + "epoch": 0.65, + "learning_rate": 5.652588671481475e-06, + "loss": 0.2795, + "step": 7628 + }, + { + "epoch": 0.65, + "learning_rate": 5.650088681367166e-06, + "loss": 0.2906, + "step": 7629 + }, + { + "epoch": 0.65, + "learning_rate": 5.647589026522535e-06, + "loss": 0.3565, + "step": 7630 + }, + { + "epoch": 0.65, + "learning_rate": 5.645089707140234e-06, + "loss": 0.2897, + "step": 7631 + }, + { + "epoch": 0.65, + "learning_rate": 5.642590723412898e-06, + "loss": 0.3113, + "step": 7632 + }, + { + "epoch": 0.65, + "learning_rate": 5.64009207553314e-06, + "loss": 0.6165, + "step": 7633 + }, + { + "epoch": 0.65, + "learning_rate": 5.637593763693545e-06, + "loss": 0.298, + "step": 7634 + }, + { + "epoch": 0.65, + "learning_rate": 5.635095788086664e-06, + "loss": 0.2778, + "step": 7635 + }, + { + "epoch": 0.65, + "learning_rate": 5.632598148905027e-06, + "loss": 0.3043, + "step": 7636 + }, + { + "epoch": 0.65, + "learning_rate": 5.630100846341153e-06, + "loss": 0.2673, + "step": 7637 + }, + { + "epoch": 0.65, + "learning_rate": 5.627603880587511e-06, + "loss": 0.2756, + "step": 7638 + }, + { + "epoch": 0.65, + "learning_rate": 5.625107251836556e-06, + "loss": 0.2678, + "step": 7639 + }, + { + "epoch": 0.65, + "learning_rate": 5.622610960280717e-06, + "loss": 0.3, + "step": 7640 + }, + { + "epoch": 0.65, + "learning_rate": 5.620115006112396e-06, + "loss": 0.2802, + "step": 7641 + }, + { + "epoch": 0.66, + "learning_rate": 5.617619389523973e-06, + "loss": 0.2824, + "step": 7642 + }, + { + "epoch": 0.66, + "learning_rate": 5.615124110707786e-06, + "loss": 0.3045, + "step": 7643 + }, + { + "epoch": 0.66, + "learning_rate": 5.612629169856172e-06, + "loss": 0.2823, + "step": 7644 + }, + { + "epoch": 0.66, + "learning_rate": 5.61013456716142e-06, + "loss": 0.243, + "step": 7645 + }, + { + "epoch": 0.66, + "learning_rate": 5.607640302815806e-06, + "loss": 0.2424, + "step": 7646 + }, + { + "epoch": 0.66, + "learning_rate": 5.605146377011572e-06, + "loss": 0.2728, + "step": 7647 + }, + { + "epoch": 0.66, + "learning_rate": 5.602652789940941e-06, + "loss": 0.3058, + "step": 7648 + }, + { + "epoch": 0.66, + "learning_rate": 5.60015954179611e-06, + "loss": 0.3168, + "step": 7649 + }, + { + "epoch": 0.66, + "learning_rate": 5.597666632769232e-06, + "loss": 0.2645, + "step": 7650 + }, + { + "epoch": 0.66, + "learning_rate": 5.595174063052465e-06, + "loss": 0.2794, + "step": 7651 + }, + { + "epoch": 0.66, + "learning_rate": 5.592681832837913e-06, + "loss": 0.303, + "step": 7652 + }, + { + "epoch": 0.66, + "learning_rate": 5.5901899423176674e-06, + "loss": 0.2482, + "step": 7653 + }, + { + "epoch": 0.66, + "learning_rate": 5.587698391683792e-06, + "loss": 0.2643, + "step": 7654 + }, + { + "epoch": 0.66, + "learning_rate": 5.585207181128323e-06, + "loss": 0.2599, + "step": 7655 + }, + { + "epoch": 0.66, + "learning_rate": 5.5827163108432704e-06, + "loss": 0.247, + "step": 7656 + }, + { + "epoch": 0.66, + "learning_rate": 5.580225781020618e-06, + "loss": 0.3073, + "step": 7657 + }, + { + "epoch": 0.66, + "learning_rate": 5.577735591852327e-06, + "loss": 0.2609, + "step": 7658 + }, + { + "epoch": 0.66, + "learning_rate": 5.575245743530322e-06, + "loss": 0.2872, + "step": 7659 + }, + { + "epoch": 0.66, + "learning_rate": 5.572756236246512e-06, + "loss": 0.3033, + "step": 7660 + }, + { + "epoch": 0.66, + "learning_rate": 5.570267070192776e-06, + "loss": 0.2974, + "step": 7661 + }, + { + "epoch": 0.66, + "learning_rate": 5.567778245560966e-06, + "loss": 0.2669, + "step": 7662 + }, + { + "epoch": 0.66, + "learning_rate": 5.565289762542908e-06, + "loss": 0.2764, + "step": 7663 + }, + { + "epoch": 0.66, + "learning_rate": 5.562801621330402e-06, + "loss": 0.2768, + "step": 7664 + }, + { + "epoch": 0.66, + "learning_rate": 5.560313822115229e-06, + "loss": 0.3415, + "step": 7665 + }, + { + "epoch": 0.66, + "learning_rate": 5.5578263650891225e-06, + "loss": 0.2858, + "step": 7666 + }, + { + "epoch": 0.66, + "learning_rate": 5.555339250443808e-06, + "loss": 0.2777, + "step": 7667 + }, + { + "epoch": 0.66, + "learning_rate": 5.552852478370989e-06, + "loss": 0.2498, + "step": 7668 + }, + { + "epoch": 0.66, + "learning_rate": 5.550366049062323e-06, + "loss": 0.2642, + "step": 7669 + }, + { + "epoch": 0.66, + "learning_rate": 5.547879962709457e-06, + "loss": 0.2096, + "step": 7670 + }, + { + "epoch": 0.66, + "learning_rate": 5.545394219504005e-06, + "loss": 0.281, + "step": 7671 + }, + { + "epoch": 0.66, + "learning_rate": 5.542908819637558e-06, + "loss": 0.308, + "step": 7672 + }, + { + "epoch": 0.66, + "learning_rate": 5.540423763301674e-06, + "loss": 0.2758, + "step": 7673 + }, + { + "epoch": 0.66, + "learning_rate": 5.537939050687886e-06, + "loss": 0.3063, + "step": 7674 + }, + { + "epoch": 0.66, + "learning_rate": 5.535454681987715e-06, + "loss": 0.2714, + "step": 7675 + }, + { + "epoch": 0.66, + "learning_rate": 5.532970657392635e-06, + "loss": 0.2983, + "step": 7676 + }, + { + "epoch": 0.66, + "learning_rate": 5.530486977094104e-06, + "loss": 0.2545, + "step": 7677 + }, + { + "epoch": 0.66, + "learning_rate": 5.528003641283552e-06, + "loss": 0.2699, + "step": 7678 + }, + { + "epoch": 0.66, + "learning_rate": 5.525520650152383e-06, + "loss": 0.2586, + "step": 7679 + }, + { + "epoch": 0.66, + "learning_rate": 5.523038003891976e-06, + "loss": 0.2681, + "step": 7680 + }, + { + "epoch": 0.66, + "learning_rate": 5.5205557026936714e-06, + "loss": 0.2833, + "step": 7681 + }, + { + "epoch": 0.66, + "learning_rate": 5.5180737467488085e-06, + "loss": 0.2584, + "step": 7682 + }, + { + "epoch": 0.66, + "learning_rate": 5.51559213624867e-06, + "loss": 0.2679, + "step": 7683 + }, + { + "epoch": 0.66, + "learning_rate": 5.513110871384532e-06, + "loss": 0.2529, + "step": 7684 + }, + { + "epoch": 0.66, + "learning_rate": 5.510629952347637e-06, + "loss": 0.2674, + "step": 7685 + }, + { + "epoch": 0.66, + "learning_rate": 5.508149379329204e-06, + "loss": 0.243, + "step": 7686 + }, + { + "epoch": 0.66, + "learning_rate": 5.505669152520425e-06, + "loss": 0.2602, + "step": 7687 + }, + { + "epoch": 0.66, + "learning_rate": 5.503189272112452e-06, + "loss": 0.2945, + "step": 7688 + }, + { + "epoch": 0.66, + "learning_rate": 5.50070973829644e-06, + "loss": 0.2533, + "step": 7689 + }, + { + "epoch": 0.66, + "learning_rate": 5.4982305512634845e-06, + "loss": 0.2582, + "step": 7690 + }, + { + "epoch": 0.66, + "learning_rate": 5.495751711204675e-06, + "loss": 0.2778, + "step": 7691 + }, + { + "epoch": 0.66, + "learning_rate": 5.493273218311067e-06, + "loss": 0.2935, + "step": 7692 + }, + { + "epoch": 0.66, + "learning_rate": 5.490795072773692e-06, + "loss": 0.283, + "step": 7693 + }, + { + "epoch": 0.66, + "learning_rate": 5.48831727478355e-06, + "loss": 0.262, + "step": 7694 + }, + { + "epoch": 0.66, + "learning_rate": 5.485839824531621e-06, + "loss": 0.3019, + "step": 7695 + }, + { + "epoch": 0.66, + "learning_rate": 5.483362722208858e-06, + "loss": 0.2448, + "step": 7696 + }, + { + "epoch": 0.66, + "learning_rate": 5.4808859680061734e-06, + "loss": 0.2444, + "step": 7697 + }, + { + "epoch": 0.66, + "learning_rate": 5.478409562114469e-06, + "loss": 0.2437, + "step": 7698 + }, + { + "epoch": 0.66, + "learning_rate": 5.4759335047246154e-06, + "loss": 0.2203, + "step": 7699 + }, + { + "epoch": 0.66, + "learning_rate": 5.4734577960274515e-06, + "loss": 0.2722, + "step": 7700 + }, + { + "epoch": 0.66, + "learning_rate": 5.4709824362137945e-06, + "loss": 0.2975, + "step": 7701 + }, + { + "epoch": 0.66, + "learning_rate": 5.4685074254744346e-06, + "loss": 0.3494, + "step": 7702 + }, + { + "epoch": 0.66, + "learning_rate": 5.4660327640001335e-06, + "loss": 0.2834, + "step": 7703 + }, + { + "epoch": 0.66, + "learning_rate": 5.4635584519816195e-06, + "loss": 0.2932, + "step": 7704 + }, + { + "epoch": 0.66, + "learning_rate": 5.461084489609603e-06, + "loss": 0.2642, + "step": 7705 + }, + { + "epoch": 0.66, + "learning_rate": 5.458610877074773e-06, + "loss": 0.2798, + "step": 7706 + }, + { + "epoch": 0.66, + "learning_rate": 5.456137614567773e-06, + "loss": 0.2859, + "step": 7707 + }, + { + "epoch": 0.66, + "learning_rate": 5.453664702279235e-06, + "loss": 0.2778, + "step": 7708 + }, + { + "epoch": 0.66, + "learning_rate": 5.451192140399757e-06, + "loss": 0.2631, + "step": 7709 + }, + { + "epoch": 0.66, + "learning_rate": 5.448719929119916e-06, + "loss": 0.3259, + "step": 7710 + }, + { + "epoch": 0.66, + "learning_rate": 5.446248068630251e-06, + "loss": 0.2751, + "step": 7711 + }, + { + "epoch": 0.66, + "learning_rate": 5.443776559121279e-06, + "loss": 0.2667, + "step": 7712 + }, + { + "epoch": 0.66, + "learning_rate": 5.4413054007835055e-06, + "loss": 0.2521, + "step": 7713 + }, + { + "epoch": 0.66, + "learning_rate": 5.4388345938073824e-06, + "loss": 0.2806, + "step": 7714 + }, + { + "epoch": 0.66, + "learning_rate": 5.436364138383352e-06, + "loss": 0.2941, + "step": 7715 + }, + { + "epoch": 0.66, + "learning_rate": 5.433894034701824e-06, + "loss": 0.319, + "step": 7716 + }, + { + "epoch": 0.66, + "learning_rate": 5.431424282953181e-06, + "loss": 0.277, + "step": 7717 + }, + { + "epoch": 0.66, + "learning_rate": 5.4289548833277865e-06, + "loss": 0.6084, + "step": 7718 + }, + { + "epoch": 0.66, + "learning_rate": 5.426485836015953e-06, + "loss": 0.6002, + "step": 7719 + }, + { + "epoch": 0.66, + "learning_rate": 5.424017141208002e-06, + "loss": 0.2696, + "step": 7720 + }, + { + "epoch": 0.66, + "learning_rate": 5.421548799094196e-06, + "loss": 0.2503, + "step": 7721 + }, + { + "epoch": 0.66, + "learning_rate": 5.419080809864785e-06, + "loss": 0.3042, + "step": 7722 + }, + { + "epoch": 0.66, + "learning_rate": 5.41661317370999e-06, + "loss": 0.3444, + "step": 7723 + }, + { + "epoch": 0.66, + "learning_rate": 5.414145890820004e-06, + "loss": 0.2747, + "step": 7724 + }, + { + "epoch": 0.66, + "learning_rate": 5.411678961384998e-06, + "loss": 0.2639, + "step": 7725 + }, + { + "epoch": 0.66, + "learning_rate": 5.409212385595098e-06, + "loss": 0.2527, + "step": 7726 + }, + { + "epoch": 0.66, + "learning_rate": 5.406746163640432e-06, + "loss": 0.2514, + "step": 7727 + }, + { + "epoch": 0.66, + "learning_rate": 5.404280295711071e-06, + "loss": 0.2802, + "step": 7728 + }, + { + "epoch": 0.66, + "learning_rate": 5.401814781997077e-06, + "loss": 0.2713, + "step": 7729 + }, + { + "epoch": 0.66, + "learning_rate": 5.399349622688479e-06, + "loss": 0.2701, + "step": 7730 + }, + { + "epoch": 0.66, + "learning_rate": 5.396884817975281e-06, + "loss": 0.2708, + "step": 7731 + }, + { + "epoch": 0.66, + "learning_rate": 5.394420368047459e-06, + "loss": 0.2332, + "step": 7732 + }, + { + "epoch": 0.66, + "learning_rate": 5.391956273094952e-06, + "loss": 0.308, + "step": 7733 + }, + { + "epoch": 0.66, + "learning_rate": 5.389492533307692e-06, + "loss": 0.28, + "step": 7734 + }, + { + "epoch": 0.66, + "learning_rate": 5.387029148875563e-06, + "loss": 0.2489, + "step": 7735 + }, + { + "epoch": 0.66, + "learning_rate": 5.384566119988435e-06, + "loss": 0.2212, + "step": 7736 + }, + { + "epoch": 0.66, + "learning_rate": 5.382103446836144e-06, + "loss": 0.2369, + "step": 7737 + }, + { + "epoch": 0.66, + "learning_rate": 5.379641129608501e-06, + "loss": 0.2687, + "step": 7738 + }, + { + "epoch": 0.66, + "learning_rate": 5.377179168495292e-06, + "loss": 0.2842, + "step": 7739 + }, + { + "epoch": 0.66, + "learning_rate": 5.374717563686269e-06, + "loss": 0.226, + "step": 7740 + }, + { + "epoch": 0.66, + "learning_rate": 5.372256315371167e-06, + "loss": 0.2617, + "step": 7741 + }, + { + "epoch": 0.66, + "learning_rate": 5.3697954237396764e-06, + "loss": 0.2941, + "step": 7742 + }, + { + "epoch": 0.66, + "learning_rate": 5.367334888981474e-06, + "loss": 0.269, + "step": 7743 + }, + { + "epoch": 0.66, + "learning_rate": 5.3648747112862145e-06, + "loss": 0.2697, + "step": 7744 + }, + { + "epoch": 0.66, + "learning_rate": 5.362414890843504e-06, + "loss": 0.3158, + "step": 7745 + }, + { + "epoch": 0.66, + "learning_rate": 5.3599554278429415e-06, + "loss": 0.3206, + "step": 7746 + }, + { + "epoch": 0.66, + "learning_rate": 5.357496322474086e-06, + "loss": 0.2864, + "step": 7747 + }, + { + "epoch": 0.66, + "learning_rate": 5.355037574926478e-06, + "loss": 0.2402, + "step": 7748 + }, + { + "epoch": 0.66, + "learning_rate": 5.35257918538962e-06, + "loss": 0.2917, + "step": 7749 + }, + { + "epoch": 0.66, + "learning_rate": 5.35012115405299e-06, + "loss": 0.2921, + "step": 7750 + }, + { + "epoch": 0.66, + "learning_rate": 5.347663481106053e-06, + "loss": 0.3192, + "step": 7751 + }, + { + "epoch": 0.66, + "learning_rate": 5.345206166738225e-06, + "loss": 0.3267, + "step": 7752 + }, + { + "epoch": 0.66, + "learning_rate": 5.3427492111389045e-06, + "loss": 0.2687, + "step": 7753 + }, + { + "epoch": 0.66, + "learning_rate": 5.3402926144974625e-06, + "loss": 0.2976, + "step": 7754 + }, + { + "epoch": 0.66, + "learning_rate": 5.337836377003242e-06, + "loss": 0.2578, + "step": 7755 + }, + { + "epoch": 0.66, + "learning_rate": 5.335380498845559e-06, + "loss": 0.2601, + "step": 7756 + }, + { + "epoch": 0.66, + "learning_rate": 5.332924980213694e-06, + "loss": 0.2804, + "step": 7757 + }, + { + "epoch": 0.67, + "learning_rate": 5.330469821296916e-06, + "loss": 0.2832, + "step": 7758 + }, + { + "epoch": 0.67, + "learning_rate": 5.32801502228445e-06, + "loss": 0.292, + "step": 7759 + }, + { + "epoch": 0.67, + "learning_rate": 5.325560583365499e-06, + "loss": 0.2629, + "step": 7760 + }, + { + "epoch": 0.67, + "learning_rate": 5.323106504729241e-06, + "loss": 0.2498, + "step": 7761 + }, + { + "epoch": 0.67, + "learning_rate": 5.320652786564826e-06, + "loss": 0.2895, + "step": 7762 + }, + { + "epoch": 0.67, + "learning_rate": 5.318199429061375e-06, + "loss": 0.2509, + "step": 7763 + }, + { + "epoch": 0.67, + "learning_rate": 5.315746432407972e-06, + "loss": 0.2601, + "step": 7764 + }, + { + "epoch": 0.67, + "learning_rate": 5.313293796793696e-06, + "loss": 0.2717, + "step": 7765 + }, + { + "epoch": 0.67, + "learning_rate": 5.3108415224075725e-06, + "loss": 0.2889, + "step": 7766 + }, + { + "epoch": 0.67, + "learning_rate": 5.308389609438615e-06, + "loss": 0.2663, + "step": 7767 + }, + { + "epoch": 0.67, + "learning_rate": 5.305938058075804e-06, + "loss": 0.3458, + "step": 7768 + }, + { + "epoch": 0.67, + "learning_rate": 5.303486868508093e-06, + "loss": 0.2789, + "step": 7769 + }, + { + "epoch": 0.67, + "learning_rate": 5.301036040924412e-06, + "loss": 0.26, + "step": 7770 + }, + { + "epoch": 0.67, + "learning_rate": 5.298585575513649e-06, + "loss": 0.3165, + "step": 7771 + }, + { + "epoch": 0.67, + "learning_rate": 5.296135472464686e-06, + "loss": 0.2643, + "step": 7772 + }, + { + "epoch": 0.67, + "learning_rate": 5.293685731966353e-06, + "loss": 0.2613, + "step": 7773 + }, + { + "epoch": 0.67, + "learning_rate": 5.2912363542074695e-06, + "loss": 0.2805, + "step": 7774 + }, + { + "epoch": 0.67, + "learning_rate": 5.288787339376822e-06, + "loss": 0.3005, + "step": 7775 + }, + { + "epoch": 0.67, + "learning_rate": 5.2863386876631674e-06, + "loss": 0.2213, + "step": 7776 + }, + { + "epoch": 0.67, + "learning_rate": 5.2838903992552355e-06, + "loss": 0.3118, + "step": 7777 + }, + { + "epoch": 0.67, + "learning_rate": 5.281442474341729e-06, + "loss": 0.2983, + "step": 7778 + }, + { + "epoch": 0.67, + "learning_rate": 5.278994913111326e-06, + "loss": 0.2526, + "step": 7779 + }, + { + "epoch": 0.67, + "learning_rate": 5.276547715752663e-06, + "loss": 0.2634, + "step": 7780 + }, + { + "epoch": 0.67, + "learning_rate": 5.274100882454364e-06, + "loss": 0.2818, + "step": 7781 + }, + { + "epoch": 0.67, + "learning_rate": 5.271654413405016e-06, + "loss": 0.3231, + "step": 7782 + }, + { + "epoch": 0.67, + "learning_rate": 5.269208308793183e-06, + "loss": 0.2935, + "step": 7783 + }, + { + "epoch": 0.67, + "learning_rate": 5.2667625688074e-06, + "loss": 0.2848, + "step": 7784 + }, + { + "epoch": 0.67, + "learning_rate": 5.264317193636168e-06, + "loss": 0.2892, + "step": 7785 + }, + { + "epoch": 0.67, + "learning_rate": 5.261872183467972e-06, + "loss": 0.2807, + "step": 7786 + }, + { + "epoch": 0.67, + "learning_rate": 5.2594275384912526e-06, + "loss": 0.2913, + "step": 7787 + }, + { + "epoch": 0.67, + "learning_rate": 5.25698325889443e-06, + "loss": 0.2629, + "step": 7788 + }, + { + "epoch": 0.67, + "learning_rate": 5.25453934486591e-06, + "loss": 0.2546, + "step": 7789 + }, + { + "epoch": 0.67, + "learning_rate": 5.252095796594046e-06, + "loss": 0.2908, + "step": 7790 + }, + { + "epoch": 0.67, + "learning_rate": 5.249652614267178e-06, + "loss": 0.2775, + "step": 7791 + }, + { + "epoch": 0.67, + "learning_rate": 5.247209798073614e-06, + "loss": 0.2923, + "step": 7792 + }, + { + "epoch": 0.67, + "learning_rate": 5.2447673482016335e-06, + "loss": 0.2929, + "step": 7793 + }, + { + "epoch": 0.67, + "learning_rate": 5.242325264839494e-06, + "loss": 0.2772, + "step": 7794 + }, + { + "epoch": 0.67, + "learning_rate": 5.239883548175407e-06, + "loss": 0.6228, + "step": 7795 + }, + { + "epoch": 0.67, + "learning_rate": 5.2374421983975846e-06, + "loss": 0.3563, + "step": 7796 + }, + { + "epoch": 0.67, + "learning_rate": 5.23500121569418e-06, + "loss": 0.2564, + "step": 7797 + }, + { + "epoch": 0.67, + "learning_rate": 5.232560600253336e-06, + "loss": 0.2518, + "step": 7798 + }, + { + "epoch": 0.67, + "learning_rate": 5.230120352263166e-06, + "loss": 0.2386, + "step": 7799 + }, + { + "epoch": 0.67, + "learning_rate": 5.2276804719117504e-06, + "loss": 0.3195, + "step": 7800 + }, + { + "epoch": 0.67, + "learning_rate": 5.225240959387147e-06, + "loss": 0.2998, + "step": 7801 + }, + { + "epoch": 0.67, + "learning_rate": 5.22280181487737e-06, + "loss": 0.2983, + "step": 7802 + }, + { + "epoch": 0.67, + "learning_rate": 5.220363038570432e-06, + "loss": 0.2548, + "step": 7803 + }, + { + "epoch": 0.67, + "learning_rate": 5.21792463065429e-06, + "loss": 0.2581, + "step": 7804 + }, + { + "epoch": 0.67, + "learning_rate": 5.215486591316888e-06, + "loss": 0.3034, + "step": 7805 + }, + { + "epoch": 0.67, + "learning_rate": 5.21304892074614e-06, + "loss": 0.2719, + "step": 7806 + }, + { + "epoch": 0.67, + "learning_rate": 5.210611619129927e-06, + "loss": 0.2829, + "step": 7807 + }, + { + "epoch": 0.67, + "learning_rate": 5.20817468665611e-06, + "loss": 0.2479, + "step": 7808 + }, + { + "epoch": 0.67, + "learning_rate": 5.205738123512503e-06, + "loss": 0.2667, + "step": 7809 + }, + { + "epoch": 0.67, + "learning_rate": 5.20330192988692e-06, + "loss": 0.2583, + "step": 7810 + }, + { + "epoch": 0.67, + "learning_rate": 5.200866105967119e-06, + "loss": 0.2859, + "step": 7811 + }, + { + "epoch": 0.67, + "learning_rate": 5.198430651940846e-06, + "loss": 0.2289, + "step": 7812 + }, + { + "epoch": 0.67, + "learning_rate": 5.195995567995813e-06, + "loss": 0.2433, + "step": 7813 + }, + { + "epoch": 0.67, + "learning_rate": 5.1935608543197035e-06, + "loss": 0.3417, + "step": 7814 + }, + { + "epoch": 0.67, + "learning_rate": 5.191126511100179e-06, + "loss": 0.2767, + "step": 7815 + }, + { + "epoch": 0.67, + "learning_rate": 5.188692538524854e-06, + "loss": 0.2842, + "step": 7816 + }, + { + "epoch": 0.67, + "learning_rate": 5.186258936781341e-06, + "loss": 0.3012, + "step": 7817 + }, + { + "epoch": 0.67, + "learning_rate": 5.183825706057199e-06, + "loss": 0.2468, + "step": 7818 + }, + { + "epoch": 0.67, + "learning_rate": 5.1813928465399765e-06, + "loss": 0.3082, + "step": 7819 + }, + { + "epoch": 0.67, + "learning_rate": 5.178960358417184e-06, + "loss": 0.2372, + "step": 7820 + }, + { + "epoch": 0.67, + "learning_rate": 5.1765282418763045e-06, + "loss": 0.3051, + "step": 7821 + }, + { + "epoch": 0.67, + "learning_rate": 5.1740964971047945e-06, + "loss": 0.3117, + "step": 7822 + }, + { + "epoch": 0.67, + "learning_rate": 5.171665124290082e-06, + "loss": 0.2623, + "step": 7823 + }, + { + "epoch": 0.67, + "learning_rate": 5.169234123619569e-06, + "loss": 0.2814, + "step": 7824 + }, + { + "epoch": 0.67, + "learning_rate": 5.166803495280614e-06, + "loss": 0.2733, + "step": 7825 + }, + { + "epoch": 0.67, + "learning_rate": 5.164373239460561e-06, + "loss": 0.2468, + "step": 7826 + }, + { + "epoch": 0.67, + "learning_rate": 5.161943356346734e-06, + "loss": 0.2553, + "step": 7827 + }, + { + "epoch": 0.67, + "learning_rate": 5.159513846126403e-06, + "loss": 0.2561, + "step": 7828 + }, + { + "epoch": 0.67, + "learning_rate": 5.157084708986826e-06, + "loss": 0.2724, + "step": 7829 + }, + { + "epoch": 0.67, + "learning_rate": 5.154655945115233e-06, + "loss": 0.2518, + "step": 7830 + }, + { + "epoch": 0.67, + "learning_rate": 5.152227554698814e-06, + "loss": 0.2718, + "step": 7831 + }, + { + "epoch": 0.67, + "learning_rate": 5.149799537924749e-06, + "loss": 0.2728, + "step": 7832 + }, + { + "epoch": 0.67, + "learning_rate": 5.14737189498016e-06, + "loss": 0.3082, + "step": 7833 + }, + { + "epoch": 0.67, + "learning_rate": 5.144944626052178e-06, + "loss": 0.2567, + "step": 7834 + }, + { + "epoch": 0.67, + "learning_rate": 5.142517731327868e-06, + "loss": 0.2644, + "step": 7835 + }, + { + "epoch": 0.67, + "learning_rate": 5.1400912109942915e-06, + "loss": 0.3019, + "step": 7836 + }, + { + "epoch": 0.67, + "learning_rate": 5.137665065238471e-06, + "loss": 0.2584, + "step": 7837 + }, + { + "epoch": 0.67, + "learning_rate": 5.1352392942474005e-06, + "loss": 0.2751, + "step": 7838 + }, + { + "epoch": 0.67, + "learning_rate": 5.132813898208053e-06, + "loss": 0.2803, + "step": 7839 + }, + { + "epoch": 0.67, + "learning_rate": 5.130388877307353e-06, + "loss": 0.2812, + "step": 7840 + }, + { + "epoch": 0.67, + "learning_rate": 5.127964231732223e-06, + "loss": 0.3066, + "step": 7841 + }, + { + "epoch": 0.67, + "learning_rate": 5.1255399616695345e-06, + "loss": 0.2782, + "step": 7842 + }, + { + "epoch": 0.67, + "learning_rate": 5.12311606730614e-06, + "loss": 0.2733, + "step": 7843 + }, + { + "epoch": 0.67, + "learning_rate": 5.12069254882886e-06, + "loss": 0.2774, + "step": 7844 + }, + { + "epoch": 0.67, + "learning_rate": 5.118269406424492e-06, + "loss": 0.275, + "step": 7845 + }, + { + "epoch": 0.67, + "learning_rate": 5.115846640279798e-06, + "loss": 0.2661, + "step": 7846 + }, + { + "epoch": 0.67, + "learning_rate": 5.113424250581505e-06, + "loss": 0.3121, + "step": 7847 + }, + { + "epoch": 0.67, + "learning_rate": 5.111002237516334e-06, + "loss": 0.3069, + "step": 7848 + }, + { + "epoch": 0.67, + "learning_rate": 5.108580601270947e-06, + "loss": 0.2819, + "step": 7849 + }, + { + "epoch": 0.67, + "learning_rate": 5.106159342032e-06, + "loss": 0.272, + "step": 7850 + }, + { + "epoch": 0.67, + "learning_rate": 5.10373845998611e-06, + "loss": 0.2482, + "step": 7851 + }, + { + "epoch": 0.67, + "learning_rate": 5.101317955319866e-06, + "loss": 0.2791, + "step": 7852 + }, + { + "epoch": 0.67, + "learning_rate": 5.098897828219831e-06, + "loss": 0.2651, + "step": 7853 + }, + { + "epoch": 0.67, + "learning_rate": 5.096478078872528e-06, + "loss": 0.2767, + "step": 7854 + }, + { + "epoch": 0.67, + "learning_rate": 5.094058707464474e-06, + "loss": 0.2966, + "step": 7855 + }, + { + "epoch": 0.67, + "learning_rate": 5.091639714182129e-06, + "loss": 0.2748, + "step": 7856 + }, + { + "epoch": 0.67, + "learning_rate": 5.089221099211943e-06, + "loss": 0.3063, + "step": 7857 + }, + { + "epoch": 0.67, + "learning_rate": 5.08680286274033e-06, + "loss": 0.253, + "step": 7858 + }, + { + "epoch": 0.67, + "learning_rate": 5.084385004953674e-06, + "loss": 0.2944, + "step": 7859 + }, + { + "epoch": 0.67, + "learning_rate": 5.081967526038334e-06, + "loss": 0.2981, + "step": 7860 + }, + { + "epoch": 0.67, + "learning_rate": 5.079550426180635e-06, + "loss": 0.2892, + "step": 7861 + }, + { + "epoch": 0.67, + "learning_rate": 5.0771337055668826e-06, + "loss": 0.287, + "step": 7862 + }, + { + "epoch": 0.67, + "learning_rate": 5.074717364383335e-06, + "loss": 0.2927, + "step": 7863 + }, + { + "epoch": 0.67, + "learning_rate": 5.072301402816231e-06, + "loss": 0.2622, + "step": 7864 + }, + { + "epoch": 0.67, + "learning_rate": 5.069885821051796e-06, + "loss": 0.2474, + "step": 7865 + }, + { + "epoch": 0.67, + "learning_rate": 5.067470619276196e-06, + "loss": 0.2961, + "step": 7866 + }, + { + "epoch": 0.67, + "learning_rate": 5.06505579767559e-06, + "loss": 0.2751, + "step": 7867 + }, + { + "epoch": 0.67, + "learning_rate": 5.062641356436098e-06, + "loss": 0.3049, + "step": 7868 + }, + { + "epoch": 0.67, + "learning_rate": 5.060227295743813e-06, + "loss": 0.2701, + "step": 7869 + }, + { + "epoch": 0.67, + "learning_rate": 5.057813615784806e-06, + "loss": 0.5499, + "step": 7870 + }, + { + "epoch": 0.67, + "learning_rate": 5.055400316745096e-06, + "loss": 0.2789, + "step": 7871 + }, + { + "epoch": 0.67, + "learning_rate": 5.052987398810706e-06, + "loss": 0.2699, + "step": 7872 + }, + { + "epoch": 0.67, + "learning_rate": 5.0505748621676e-06, + "loss": 0.2661, + "step": 7873 + }, + { + "epoch": 0.67, + "learning_rate": 5.048162707001727e-06, + "loss": 0.3229, + "step": 7874 + }, + { + "epoch": 0.68, + "learning_rate": 5.045750933499005e-06, + "loss": 0.2847, + "step": 7875 + }, + { + "epoch": 0.68, + "learning_rate": 5.043339541845321e-06, + "loss": 0.295, + "step": 7876 + }, + { + "epoch": 0.68, + "learning_rate": 5.040928532226539e-06, + "loss": 0.2955, + "step": 7877 + }, + { + "epoch": 0.68, + "learning_rate": 5.038517904828473e-06, + "loss": 0.2385, + "step": 7878 + }, + { + "epoch": 0.68, + "learning_rate": 5.03610765983694e-06, + "loss": 0.2499, + "step": 7879 + }, + { + "epoch": 0.68, + "learning_rate": 5.033697797437695e-06, + "loss": 0.2856, + "step": 7880 + }, + { + "epoch": 0.68, + "learning_rate": 5.0312883178164875e-06, + "loss": 0.3056, + "step": 7881 + }, + { + "epoch": 0.68, + "learning_rate": 5.028879221159025e-06, + "loss": 0.2779, + "step": 7882 + }, + { + "epoch": 0.68, + "learning_rate": 5.026470507650988e-06, + "loss": 0.2896, + "step": 7883 + }, + { + "epoch": 0.68, + "learning_rate": 5.024062177478033e-06, + "loss": 0.2896, + "step": 7884 + }, + { + "epoch": 0.68, + "learning_rate": 5.021654230825772e-06, + "loss": 0.2329, + "step": 7885 + }, + { + "epoch": 0.68, + "learning_rate": 5.0192466678798116e-06, + "loss": 0.2853, + "step": 7886 + }, + { + "epoch": 0.68, + "learning_rate": 5.016839488825703e-06, + "loss": 0.2591, + "step": 7887 + }, + { + "epoch": 0.68, + "learning_rate": 5.014432693848985e-06, + "loss": 0.2347, + "step": 7888 + }, + { + "epoch": 0.68, + "learning_rate": 5.0120262831351595e-06, + "loss": 0.2668, + "step": 7889 + }, + { + "epoch": 0.68, + "learning_rate": 5.009620256869703e-06, + "loss": 0.2458, + "step": 7890 + }, + { + "epoch": 0.68, + "learning_rate": 5.007214615238061e-06, + "loss": 0.2581, + "step": 7891 + }, + { + "epoch": 0.68, + "learning_rate": 5.004809358425639e-06, + "loss": 0.2809, + "step": 7892 + }, + { + "epoch": 0.68, + "learning_rate": 5.002404486617839e-06, + "loss": 0.2753, + "step": 7893 + }, + { + "epoch": 0.68, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2739, + "step": 7894 + }, + { + "epoch": 0.68, + "learning_rate": 4.9975958987574604e-06, + "loss": 0.3065, + "step": 7895 + }, + { + "epoch": 0.68, + "learning_rate": 4.995192183075509e-06, + "loss": 0.3094, + "step": 7896 + }, + { + "epoch": 0.68, + "learning_rate": 4.992788853139414e-06, + "loss": 0.3174, + "step": 7897 + }, + { + "epoch": 0.68, + "learning_rate": 4.9903859091344175e-06, + "loss": 0.257, + "step": 7898 + }, + { + "epoch": 0.68, + "learning_rate": 4.987983351245713e-06, + "loss": 0.2749, + "step": 7899 + }, + { + "epoch": 0.68, + "learning_rate": 4.985581179658495e-06, + "loss": 0.2996, + "step": 7900 + }, + { + "epoch": 0.68, + "learning_rate": 4.9831793945578995e-06, + "loss": 0.3027, + "step": 7901 + }, + { + "epoch": 0.68, + "learning_rate": 4.980777996129043e-06, + "loss": 0.283, + "step": 7902 + }, + { + "epoch": 0.68, + "learning_rate": 4.978376984557026e-06, + "loss": 0.2543, + "step": 7903 + }, + { + "epoch": 0.68, + "learning_rate": 4.975976360026894e-06, + "loss": 0.5981, + "step": 7904 + }, + { + "epoch": 0.68, + "learning_rate": 4.97357612272368e-06, + "loss": 0.3047, + "step": 7905 + }, + { + "epoch": 0.68, + "learning_rate": 4.971176272832382e-06, + "loss": 0.2738, + "step": 7906 + }, + { + "epoch": 0.68, + "learning_rate": 4.9687768105379685e-06, + "loss": 0.2667, + "step": 7907 + }, + { + "epoch": 0.68, + "learning_rate": 4.966377736025383e-06, + "loss": 0.284, + "step": 7908 + }, + { + "epoch": 0.68, + "learning_rate": 4.963979049479522e-06, + "loss": 0.2688, + "step": 7909 + }, + { + "epoch": 0.68, + "learning_rate": 4.9615807510852795e-06, + "loss": 0.287, + "step": 7910 + }, + { + "epoch": 0.68, + "learning_rate": 4.959182841027494e-06, + "loss": 0.3025, + "step": 7911 + }, + { + "epoch": 0.68, + "learning_rate": 4.956785319490986e-06, + "loss": 0.2818, + "step": 7912 + }, + { + "epoch": 0.68, + "learning_rate": 4.954388186660548e-06, + "loss": 0.2675, + "step": 7913 + }, + { + "epoch": 0.68, + "learning_rate": 4.951991442720937e-06, + "loss": 0.2991, + "step": 7914 + }, + { + "epoch": 0.68, + "learning_rate": 4.949595087856887e-06, + "loss": 0.2449, + "step": 7915 + }, + { + "epoch": 0.68, + "learning_rate": 4.947199122253083e-06, + "loss": 0.3584, + "step": 7916 + }, + { + "epoch": 0.68, + "learning_rate": 4.944803546094214e-06, + "loss": 0.2966, + "step": 7917 + }, + { + "epoch": 0.68, + "learning_rate": 4.942408359564906e-06, + "loss": 0.2963, + "step": 7918 + }, + { + "epoch": 0.68, + "learning_rate": 4.940013562849769e-06, + "loss": 0.2566, + "step": 7919 + }, + { + "epoch": 0.68, + "learning_rate": 4.937619156133385e-06, + "loss": 0.2581, + "step": 7920 + }, + { + "epoch": 0.68, + "learning_rate": 4.935225139600304e-06, + "loss": 0.2724, + "step": 7921 + }, + { + "epoch": 0.68, + "learning_rate": 4.932831513435045e-06, + "loss": 0.5986, + "step": 7922 + }, + { + "epoch": 0.68, + "learning_rate": 4.93043827782209e-06, + "loss": 0.2997, + "step": 7923 + }, + { + "epoch": 0.68, + "learning_rate": 4.928045432945909e-06, + "loss": 0.2913, + "step": 7924 + }, + { + "epoch": 0.68, + "learning_rate": 4.925652978990921e-06, + "loss": 0.2437, + "step": 7925 + }, + { + "epoch": 0.68, + "learning_rate": 4.92326091614153e-06, + "loss": 0.576, + "step": 7926 + }, + { + "epoch": 0.68, + "learning_rate": 4.920869244582102e-06, + "loss": 0.2755, + "step": 7927 + }, + { + "epoch": 0.68, + "learning_rate": 4.918477964496975e-06, + "loss": 0.2682, + "step": 7928 + }, + { + "epoch": 0.68, + "learning_rate": 4.916087076070462e-06, + "loss": 0.2874, + "step": 7929 + }, + { + "epoch": 0.68, + "learning_rate": 4.913696579486829e-06, + "loss": 0.25, + "step": 7930 + }, + { + "epoch": 0.68, + "learning_rate": 4.91130647493034e-06, + "loss": 0.2695, + "step": 7931 + }, + { + "epoch": 0.68, + "learning_rate": 4.9089167625852e-06, + "loss": 0.2504, + "step": 7932 + }, + { + "epoch": 0.68, + "learning_rate": 4.906527442635599e-06, + "loss": 0.2741, + "step": 7933 + }, + { + "epoch": 0.68, + "learning_rate": 4.904138515265696e-06, + "loss": 0.2186, + "step": 7934 + }, + { + "epoch": 0.68, + "learning_rate": 4.901749980659617e-06, + "loss": 0.2565, + "step": 7935 + }, + { + "epoch": 0.68, + "learning_rate": 4.899361839001462e-06, + "loss": 0.2446, + "step": 7936 + }, + { + "epoch": 0.68, + "learning_rate": 4.896974090475286e-06, + "loss": 0.2684, + "step": 7937 + }, + { + "epoch": 0.68, + "learning_rate": 4.89458673526514e-06, + "loss": 0.2877, + "step": 7938 + }, + { + "epoch": 0.68, + "learning_rate": 4.892199773555018e-06, + "loss": 0.6144, + "step": 7939 + }, + { + "epoch": 0.68, + "learning_rate": 4.889813205528895e-06, + "loss": 0.2814, + "step": 7940 + }, + { + "epoch": 0.68, + "learning_rate": 4.887427031370727e-06, + "loss": 0.2676, + "step": 7941 + }, + { + "epoch": 0.68, + "learning_rate": 4.885041251264419e-06, + "loss": 0.2526, + "step": 7942 + }, + { + "epoch": 0.68, + "learning_rate": 4.882655865393856e-06, + "loss": 0.2839, + "step": 7943 + }, + { + "epoch": 0.68, + "learning_rate": 4.880270873942895e-06, + "loss": 0.2771, + "step": 7944 + }, + { + "epoch": 0.68, + "learning_rate": 4.877886277095356e-06, + "loss": 0.3147, + "step": 7945 + }, + { + "epoch": 0.68, + "learning_rate": 4.875502075035039e-06, + "loss": 0.3027, + "step": 7946 + }, + { + "epoch": 0.68, + "learning_rate": 4.873118267945692e-06, + "loss": 0.2527, + "step": 7947 + }, + { + "epoch": 0.68, + "learning_rate": 4.870734856011066e-06, + "loss": 0.3033, + "step": 7948 + }, + { + "epoch": 0.68, + "learning_rate": 4.868351839414848e-06, + "loss": 0.2677, + "step": 7949 + }, + { + "epoch": 0.68, + "learning_rate": 4.8659692183407135e-06, + "loss": 0.3055, + "step": 7950 + }, + { + "epoch": 0.68, + "learning_rate": 4.863586992972304e-06, + "loss": 0.2398, + "step": 7951 + }, + { + "epoch": 0.68, + "learning_rate": 4.861205163493229e-06, + "loss": 0.2754, + "step": 7952 + }, + { + "epoch": 0.68, + "learning_rate": 4.858823730087072e-06, + "loss": 0.3032, + "step": 7953 + }, + { + "epoch": 0.68, + "learning_rate": 4.856442692937372e-06, + "loss": 0.302, + "step": 7954 + }, + { + "epoch": 0.68, + "learning_rate": 4.854062052227662e-06, + "loss": 0.2488, + "step": 7955 + }, + { + "epoch": 0.68, + "learning_rate": 4.851681808141418e-06, + "loss": 0.2108, + "step": 7956 + }, + { + "epoch": 0.68, + "learning_rate": 4.849301960862101e-06, + "loss": 0.2521, + "step": 7957 + }, + { + "epoch": 0.68, + "learning_rate": 4.846922510573139e-06, + "loss": 0.2726, + "step": 7958 + }, + { + "epoch": 0.68, + "learning_rate": 4.8445434574579275e-06, + "loss": 0.3016, + "step": 7959 + }, + { + "epoch": 0.68, + "learning_rate": 4.842164801699836e-06, + "loss": 0.254, + "step": 7960 + }, + { + "epoch": 0.68, + "learning_rate": 4.8397865434821886e-06, + "loss": 0.294, + "step": 7961 + }, + { + "epoch": 0.68, + "learning_rate": 4.837408682988305e-06, + "loss": 0.2191, + "step": 7962 + }, + { + "epoch": 0.68, + "learning_rate": 4.8350312204014475e-06, + "loss": 0.3165, + "step": 7963 + }, + { + "epoch": 0.68, + "learning_rate": 4.832654155904863e-06, + "loss": 0.2407, + "step": 7964 + }, + { + "epoch": 0.68, + "learning_rate": 4.830277489681762e-06, + "loss": 0.2639, + "step": 7965 + }, + { + "epoch": 0.68, + "learning_rate": 4.8279012219153284e-06, + "loss": 0.3093, + "step": 7966 + }, + { + "epoch": 0.68, + "learning_rate": 4.825525352788716e-06, + "loss": 0.2513, + "step": 7967 + }, + { + "epoch": 0.68, + "learning_rate": 4.8231498824850356e-06, + "loss": 0.2851, + "step": 7968 + }, + { + "epoch": 0.68, + "learning_rate": 4.820774811187389e-06, + "loss": 0.2749, + "step": 7969 + }, + { + "epoch": 0.68, + "learning_rate": 4.818400139078824e-06, + "loss": 0.271, + "step": 7970 + }, + { + "epoch": 0.68, + "learning_rate": 4.816025866342374e-06, + "loss": 0.2677, + "step": 7971 + }, + { + "epoch": 0.68, + "learning_rate": 4.813651993161036e-06, + "loss": 0.2302, + "step": 7972 + }, + { + "epoch": 0.68, + "learning_rate": 4.811278519717775e-06, + "loss": 0.2811, + "step": 7973 + }, + { + "epoch": 0.68, + "learning_rate": 4.808905446195532e-06, + "loss": 0.2969, + "step": 7974 + }, + { + "epoch": 0.68, + "learning_rate": 4.8065327727772005e-06, + "loss": 0.3164, + "step": 7975 + }, + { + "epoch": 0.68, + "learning_rate": 4.804160499645667e-06, + "loss": 0.2782, + "step": 7976 + }, + { + "epoch": 0.68, + "learning_rate": 4.8017886269837665e-06, + "loss": 0.2904, + "step": 7977 + }, + { + "epoch": 0.68, + "learning_rate": 4.7994171549743085e-06, + "loss": 0.2961, + "step": 7978 + }, + { + "epoch": 0.68, + "learning_rate": 4.797046083800087e-06, + "loss": 0.3427, + "step": 7979 + }, + { + "epoch": 0.68, + "learning_rate": 4.794675413643842e-06, + "loss": 0.2944, + "step": 7980 + }, + { + "epoch": 0.68, + "learning_rate": 4.7923051446883e-06, + "loss": 0.278, + "step": 7981 + }, + { + "epoch": 0.68, + "learning_rate": 4.7899352771161355e-06, + "loss": 0.2711, + "step": 7982 + }, + { + "epoch": 0.68, + "learning_rate": 4.787565811110022e-06, + "loss": 0.306, + "step": 7983 + }, + { + "epoch": 0.68, + "learning_rate": 4.785196746852584e-06, + "loss": 0.2742, + "step": 7984 + }, + { + "epoch": 0.68, + "learning_rate": 4.7828280845264056e-06, + "loss": 0.2942, + "step": 7985 + }, + { + "epoch": 0.68, + "learning_rate": 4.7804598243140664e-06, + "loss": 0.2606, + "step": 7986 + }, + { + "epoch": 0.68, + "learning_rate": 4.778091966398091e-06, + "loss": 0.2525, + "step": 7987 + }, + { + "epoch": 0.68, + "learning_rate": 4.775724510960984e-06, + "loss": 0.2756, + "step": 7988 + }, + { + "epoch": 0.68, + "learning_rate": 4.7733574581852185e-06, + "loss": 0.274, + "step": 7989 + }, + { + "epoch": 0.68, + "learning_rate": 4.770990808253234e-06, + "loss": 0.2756, + "step": 7990 + }, + { + "epoch": 0.68, + "learning_rate": 4.7686245613474445e-06, + "loss": 0.2682, + "step": 7991 + }, + { + "epoch": 0.69, + "learning_rate": 4.766258717650218e-06, + "loss": 0.2888, + "step": 7992 + }, + { + "epoch": 0.69, + "learning_rate": 4.763893277343915e-06, + "loss": 0.2914, + "step": 7993 + }, + { + "epoch": 0.69, + "learning_rate": 4.761528240610842e-06, + "loss": 0.608, + "step": 7994 + }, + { + "epoch": 0.69, + "learning_rate": 4.759163607633289e-06, + "loss": 0.2693, + "step": 7995 + }, + { + "epoch": 0.69, + "learning_rate": 4.756799378593509e-06, + "loss": 0.2768, + "step": 7996 + }, + { + "epoch": 0.69, + "learning_rate": 4.754435553673726e-06, + "loss": 0.2953, + "step": 7997 + }, + { + "epoch": 0.69, + "learning_rate": 4.752072133056135e-06, + "loss": 0.2979, + "step": 7998 + }, + { + "epoch": 0.69, + "learning_rate": 4.749709116922886e-06, + "loss": 0.2753, + "step": 7999 + }, + { + "epoch": 0.69, + "learning_rate": 4.747346505456123e-06, + "loss": 0.2813, + "step": 8000 + }, + { + "epoch": 0.69, + "learning_rate": 4.744984298837933e-06, + "loss": 0.2518, + "step": 8001 + }, + { + "epoch": 0.69, + "learning_rate": 4.742622497250389e-06, + "loss": 0.3054, + "step": 8002 + }, + { + "epoch": 0.69, + "learning_rate": 4.740261100875524e-06, + "loss": 0.2849, + "step": 8003 + }, + { + "epoch": 0.69, + "learning_rate": 4.737900109895345e-06, + "loss": 0.2591, + "step": 8004 + }, + { + "epoch": 0.69, + "learning_rate": 4.735539524491828e-06, + "loss": 0.2394, + "step": 8005 + }, + { + "epoch": 0.69, + "learning_rate": 4.7331793448469045e-06, + "loss": 0.2416, + "step": 8006 + }, + { + "epoch": 0.69, + "learning_rate": 4.7308195711425e-06, + "loss": 0.2706, + "step": 8007 + }, + { + "epoch": 0.69, + "learning_rate": 4.7284602035604845e-06, + "loss": 0.2851, + "step": 8008 + }, + { + "epoch": 0.69, + "learning_rate": 4.726101242282708e-06, + "loss": 0.2498, + "step": 8009 + }, + { + "epoch": 0.69, + "learning_rate": 4.723742687490988e-06, + "loss": 0.3086, + "step": 8010 + }, + { + "epoch": 0.69, + "learning_rate": 4.7213845393671136e-06, + "loss": 0.2362, + "step": 8011 + }, + { + "epoch": 0.69, + "learning_rate": 4.719026798092838e-06, + "loss": 0.2952, + "step": 8012 + }, + { + "epoch": 0.69, + "learning_rate": 4.7166694638498755e-06, + "loss": 0.2143, + "step": 8013 + }, + { + "epoch": 0.69, + "learning_rate": 4.7143125368199335e-06, + "loss": 0.2373, + "step": 8014 + }, + { + "epoch": 0.69, + "learning_rate": 4.71195601718466e-06, + "loss": 0.2871, + "step": 8015 + }, + { + "epoch": 0.69, + "learning_rate": 4.709599905125683e-06, + "loss": 0.2692, + "step": 8016 + }, + { + "epoch": 0.69, + "learning_rate": 4.7072442008246135e-06, + "loss": 0.2756, + "step": 8017 + }, + { + "epoch": 0.69, + "learning_rate": 4.704888904463003e-06, + "loss": 0.252, + "step": 8018 + }, + { + "epoch": 0.69, + "learning_rate": 4.702534016222398e-06, + "loss": 0.2737, + "step": 8019 + }, + { + "epoch": 0.69, + "learning_rate": 4.700179536284286e-06, + "loss": 0.2926, + "step": 8020 + }, + { + "epoch": 0.69, + "learning_rate": 4.697825464830153e-06, + "loss": 0.2598, + "step": 8021 + }, + { + "epoch": 0.69, + "learning_rate": 4.695471802041437e-06, + "loss": 0.2375, + "step": 8022 + }, + { + "epoch": 0.69, + "learning_rate": 4.693118548099538e-06, + "loss": 0.2793, + "step": 8023 + }, + { + "epoch": 0.69, + "learning_rate": 4.690765703185846e-06, + "loss": 0.3109, + "step": 8024 + }, + { + "epoch": 0.69, + "learning_rate": 4.6884132674816964e-06, + "loss": 0.2486, + "step": 8025 + }, + { + "epoch": 0.69, + "learning_rate": 4.686061241168406e-06, + "loss": 0.2942, + "step": 8026 + }, + { + "epoch": 0.69, + "learning_rate": 4.6837096244272586e-06, + "loss": 0.2729, + "step": 8027 + }, + { + "epoch": 0.69, + "learning_rate": 4.681358417439505e-06, + "loss": 0.5831, + "step": 8028 + }, + { + "epoch": 0.69, + "learning_rate": 4.679007620386369e-06, + "loss": 0.3109, + "step": 8029 + }, + { + "epoch": 0.69, + "learning_rate": 4.676657233449025e-06, + "loss": 0.2721, + "step": 8030 + }, + { + "epoch": 0.69, + "learning_rate": 4.674307256808646e-06, + "loss": 0.2817, + "step": 8031 + }, + { + "epoch": 0.69, + "learning_rate": 4.6719576906463445e-06, + "loss": 0.2647, + "step": 8032 + }, + { + "epoch": 0.69, + "learning_rate": 4.669608535143218e-06, + "loss": 0.3099, + "step": 8033 + }, + { + "epoch": 0.69, + "learning_rate": 4.667259790480327e-06, + "loss": 0.3391, + "step": 8034 + }, + { + "epoch": 0.69, + "learning_rate": 4.6649114568387024e-06, + "loss": 0.2907, + "step": 8035 + }, + { + "epoch": 0.69, + "learning_rate": 4.662563534399345e-06, + "loss": 0.2653, + "step": 8036 + }, + { + "epoch": 0.69, + "learning_rate": 4.6602160233432085e-06, + "loss": 0.2228, + "step": 8037 + }, + { + "epoch": 0.69, + "learning_rate": 4.657868923851244e-06, + "loss": 0.252, + "step": 8038 + }, + { + "epoch": 0.69, + "learning_rate": 4.655522236104344e-06, + "loss": 0.2551, + "step": 8039 + }, + { + "epoch": 0.69, + "learning_rate": 4.6531759602833815e-06, + "loss": 0.345, + "step": 8040 + }, + { + "epoch": 0.69, + "learning_rate": 4.650830096569196e-06, + "loss": 0.2882, + "step": 8041 + }, + { + "epoch": 0.69, + "learning_rate": 4.648484645142597e-06, + "loss": 0.2986, + "step": 8042 + }, + { + "epoch": 0.69, + "learning_rate": 4.646139606184362e-06, + "loss": 0.2451, + "step": 8043 + }, + { + "epoch": 0.69, + "learning_rate": 4.643794979875225e-06, + "loss": 0.2682, + "step": 8044 + }, + { + "epoch": 0.69, + "learning_rate": 4.6414507663959115e-06, + "loss": 0.286, + "step": 8045 + }, + { + "epoch": 0.69, + "learning_rate": 4.639106965927093e-06, + "loss": 0.2797, + "step": 8046 + }, + { + "epoch": 0.69, + "learning_rate": 4.636763578649419e-06, + "loss": 0.3387, + "step": 8047 + }, + { + "epoch": 0.69, + "learning_rate": 4.634420604743509e-06, + "loss": 0.2574, + "step": 8048 + }, + { + "epoch": 0.69, + "learning_rate": 4.632078044389947e-06, + "loss": 0.2545, + "step": 8049 + }, + { + "epoch": 0.69, + "learning_rate": 4.629735897769289e-06, + "loss": 0.3049, + "step": 8050 + }, + { + "epoch": 0.69, + "learning_rate": 4.627394165062045e-06, + "loss": 0.2468, + "step": 8051 + }, + { + "epoch": 0.69, + "learning_rate": 4.62505284644872e-06, + "loss": 0.3224, + "step": 8052 + }, + { + "epoch": 0.69, + "learning_rate": 4.622711942109759e-06, + "loss": 0.2697, + "step": 8053 + }, + { + "epoch": 0.69, + "learning_rate": 4.620371452225587e-06, + "loss": 0.2632, + "step": 8054 + }, + { + "epoch": 0.69, + "learning_rate": 4.61803137697661e-06, + "loss": 0.2638, + "step": 8055 + }, + { + "epoch": 0.69, + "learning_rate": 4.615691716543179e-06, + "loss": 0.2703, + "step": 8056 + }, + { + "epoch": 0.69, + "learning_rate": 4.613352471105627e-06, + "loss": 0.2521, + "step": 8057 + }, + { + "epoch": 0.69, + "learning_rate": 4.611013640844245e-06, + "loss": 0.2441, + "step": 8058 + }, + { + "epoch": 0.69, + "learning_rate": 4.608675225939308e-06, + "loss": 0.2761, + "step": 8059 + }, + { + "epoch": 0.69, + "learning_rate": 4.606337226571047e-06, + "loss": 0.2935, + "step": 8060 + }, + { + "epoch": 0.69, + "learning_rate": 4.6039996429196555e-06, + "loss": 0.285, + "step": 8061 + }, + { + "epoch": 0.69, + "learning_rate": 4.601662475165316e-06, + "loss": 0.2796, + "step": 8062 + }, + { + "epoch": 0.69, + "learning_rate": 4.5993257234881565e-06, + "loss": 0.2687, + "step": 8063 + }, + { + "epoch": 0.69, + "learning_rate": 4.596989388068283e-06, + "loss": 0.2836, + "step": 8064 + }, + { + "epoch": 0.69, + "learning_rate": 4.5946534690857705e-06, + "loss": 0.2661, + "step": 8065 + }, + { + "epoch": 0.69, + "learning_rate": 4.592317966720661e-06, + "loss": 0.2847, + "step": 8066 + }, + { + "epoch": 0.69, + "learning_rate": 4.589982881152966e-06, + "loss": 0.3113, + "step": 8067 + }, + { + "epoch": 0.69, + "learning_rate": 4.587648212562651e-06, + "loss": 0.2533, + "step": 8068 + }, + { + "epoch": 0.69, + "learning_rate": 4.585313961129676e-06, + "loss": 0.2955, + "step": 8069 + }, + { + "epoch": 0.69, + "learning_rate": 4.582980127033943e-06, + "loss": 0.252, + "step": 8070 + }, + { + "epoch": 0.69, + "learning_rate": 4.5806467104553345e-06, + "loss": 0.2835, + "step": 8071 + }, + { + "epoch": 0.69, + "learning_rate": 4.5783137115737e-06, + "loss": 0.2668, + "step": 8072 + }, + { + "epoch": 0.69, + "learning_rate": 4.575981130568856e-06, + "loss": 0.3032, + "step": 8073 + }, + { + "epoch": 0.69, + "learning_rate": 4.573648967620589e-06, + "loss": 0.2485, + "step": 8074 + }, + { + "epoch": 0.69, + "learning_rate": 4.57131722290864e-06, + "loss": 0.312, + "step": 8075 + }, + { + "epoch": 0.69, + "learning_rate": 4.568985896612742e-06, + "loss": 0.2449, + "step": 8076 + }, + { + "epoch": 0.69, + "learning_rate": 4.5666549889125726e-06, + "loss": 0.2367, + "step": 8077 + }, + { + "epoch": 0.69, + "learning_rate": 4.56432449998779e-06, + "loss": 0.2734, + "step": 8078 + }, + { + "epoch": 0.69, + "learning_rate": 4.561994430018016e-06, + "loss": 0.2636, + "step": 8079 + }, + { + "epoch": 0.69, + "learning_rate": 4.559664779182842e-06, + "loss": 0.2562, + "step": 8080 + }, + { + "epoch": 0.69, + "learning_rate": 4.557335547661828e-06, + "loss": 0.2773, + "step": 8081 + }, + { + "epoch": 0.69, + "learning_rate": 4.55500673563449e-06, + "loss": 0.2729, + "step": 8082 + }, + { + "epoch": 0.69, + "learning_rate": 4.552678343280337e-06, + "loss": 0.2978, + "step": 8083 + }, + { + "epoch": 0.69, + "learning_rate": 4.550350370778815e-06, + "loss": 0.2931, + "step": 8084 + }, + { + "epoch": 0.69, + "learning_rate": 4.548022818309361e-06, + "loss": 0.2603, + "step": 8085 + }, + { + "epoch": 0.69, + "learning_rate": 4.545695686051369e-06, + "loss": 0.2605, + "step": 8086 + }, + { + "epoch": 0.69, + "learning_rate": 4.5433689741842024e-06, + "loss": 0.2934, + "step": 8087 + }, + { + "epoch": 0.69, + "learning_rate": 4.5410426828871965e-06, + "loss": 0.2527, + "step": 8088 + }, + { + "epoch": 0.69, + "learning_rate": 4.5387168123396406e-06, + "loss": 0.6045, + "step": 8089 + }, + { + "epoch": 0.69, + "learning_rate": 4.536391362720816e-06, + "loss": 0.2437, + "step": 8090 + }, + { + "epoch": 0.69, + "learning_rate": 4.5340663342099435e-06, + "loss": 0.2952, + "step": 8091 + }, + { + "epoch": 0.69, + "learning_rate": 4.531741726986226e-06, + "loss": 0.2439, + "step": 8092 + }, + { + "epoch": 0.69, + "learning_rate": 4.529417541228846e-06, + "loss": 0.2712, + "step": 8093 + }, + { + "epoch": 0.69, + "learning_rate": 4.527093777116925e-06, + "loss": 0.2842, + "step": 8094 + }, + { + "epoch": 0.69, + "learning_rate": 4.5247704348295785e-06, + "loss": 0.3024, + "step": 8095 + }, + { + "epoch": 0.69, + "learning_rate": 4.522447514545865e-06, + "loss": 0.2585, + "step": 8096 + }, + { + "epoch": 0.69, + "learning_rate": 4.520125016444835e-06, + "loss": 0.2802, + "step": 8097 + }, + { + "epoch": 0.69, + "learning_rate": 4.5178029407054965e-06, + "loss": 0.2325, + "step": 8098 + }, + { + "epoch": 0.69, + "learning_rate": 4.515481287506811e-06, + "loss": 0.3073, + "step": 8099 + }, + { + "epoch": 0.69, + "learning_rate": 4.513160057027736e-06, + "loss": 0.2603, + "step": 8100 + }, + { + "epoch": 0.69, + "learning_rate": 4.510839249447169e-06, + "loss": 0.3051, + "step": 8101 + }, + { + "epoch": 0.69, + "learning_rate": 4.508518864943989e-06, + "loss": 0.3133, + "step": 8102 + }, + { + "epoch": 0.69, + "learning_rate": 4.506198903697041e-06, + "loss": 0.2342, + "step": 8103 + }, + { + "epoch": 0.69, + "learning_rate": 4.5038793658851365e-06, + "loss": 0.3011, + "step": 8104 + }, + { + "epoch": 0.69, + "learning_rate": 4.501560251687056e-06, + "loss": 0.2777, + "step": 8105 + }, + { + "epoch": 0.69, + "learning_rate": 4.4992415612815355e-06, + "loss": 0.2684, + "step": 8106 + }, + { + "epoch": 0.69, + "learning_rate": 4.496923294847303e-06, + "loss": 0.2896, + "step": 8107 + }, + { + "epoch": 0.7, + "learning_rate": 4.494605452563028e-06, + "loss": 0.3073, + "step": 8108 + }, + { + "epoch": 0.7, + "learning_rate": 4.492288034607361e-06, + "loss": 0.2422, + "step": 8109 + }, + { + "epoch": 0.7, + "learning_rate": 4.489971041158919e-06, + "loss": 0.2971, + "step": 8110 + }, + { + "epoch": 0.7, + "learning_rate": 4.487654472396284e-06, + "loss": 0.251, + "step": 8111 + }, + { + "epoch": 0.7, + "learning_rate": 4.4853383284980064e-06, + "loss": 0.3059, + "step": 8112 + }, + { + "epoch": 0.7, + "learning_rate": 4.483022609642596e-06, + "loss": 0.2935, + "step": 8113 + }, + { + "epoch": 0.7, + "learning_rate": 4.480707316008549e-06, + "loss": 0.2644, + "step": 8114 + }, + { + "epoch": 0.7, + "learning_rate": 4.478392447774307e-06, + "loss": 0.2598, + "step": 8115 + }, + { + "epoch": 0.7, + "learning_rate": 4.476078005118293e-06, + "loss": 0.2622, + "step": 8116 + }, + { + "epoch": 0.7, + "learning_rate": 4.473763988218891e-06, + "loss": 0.299, + "step": 8117 + }, + { + "epoch": 0.7, + "learning_rate": 4.4714503972544545e-06, + "loss": 0.289, + "step": 8118 + }, + { + "epoch": 0.7, + "learning_rate": 4.469137232403308e-06, + "loss": 0.2335, + "step": 8119 + }, + { + "epoch": 0.7, + "learning_rate": 4.466824493843728e-06, + "loss": 0.2941, + "step": 8120 + }, + { + "epoch": 0.7, + "learning_rate": 4.464512181753982e-06, + "loss": 0.257, + "step": 8121 + }, + { + "epoch": 0.7, + "learning_rate": 4.462200296312284e-06, + "loss": 0.291, + "step": 8122 + }, + { + "epoch": 0.7, + "learning_rate": 4.459888837696822e-06, + "loss": 0.2661, + "step": 8123 + }, + { + "epoch": 0.7, + "learning_rate": 4.457577806085754e-06, + "loss": 0.2953, + "step": 8124 + }, + { + "epoch": 0.7, + "learning_rate": 4.455267201657203e-06, + "loss": 0.2645, + "step": 8125 + }, + { + "epoch": 0.7, + "learning_rate": 4.4529570245892625e-06, + "loss": 0.2548, + "step": 8126 + }, + { + "epoch": 0.7, + "learning_rate": 4.450647275059979e-06, + "loss": 0.2951, + "step": 8127 + }, + { + "epoch": 0.7, + "learning_rate": 4.4483379532473906e-06, + "loss": 0.2943, + "step": 8128 + }, + { + "epoch": 0.7, + "learning_rate": 4.446029059329477e-06, + "loss": 0.3195, + "step": 8129 + }, + { + "epoch": 0.7, + "learning_rate": 4.443720593484198e-06, + "loss": 0.2298, + "step": 8130 + }, + { + "epoch": 0.7, + "learning_rate": 4.441412555889487e-06, + "loss": 0.2728, + "step": 8131 + }, + { + "epoch": 0.7, + "learning_rate": 4.439104946723228e-06, + "loss": 0.2642, + "step": 8132 + }, + { + "epoch": 0.7, + "learning_rate": 4.436797766163285e-06, + "loss": 0.2825, + "step": 8133 + }, + { + "epoch": 0.7, + "learning_rate": 4.4344910143874755e-06, + "loss": 0.3176, + "step": 8134 + }, + { + "epoch": 0.7, + "learning_rate": 4.432184691573602e-06, + "loss": 0.2866, + "step": 8135 + }, + { + "epoch": 0.7, + "learning_rate": 4.429878797899424e-06, + "loss": 0.2526, + "step": 8136 + }, + { + "epoch": 0.7, + "learning_rate": 4.42757333354266e-06, + "loss": 0.2643, + "step": 8137 + }, + { + "epoch": 0.7, + "learning_rate": 4.425268298681015e-06, + "loss": 0.2716, + "step": 8138 + }, + { + "epoch": 0.7, + "learning_rate": 4.422963693492141e-06, + "loss": 0.2578, + "step": 8139 + }, + { + "epoch": 0.7, + "learning_rate": 4.420659518153667e-06, + "loss": 0.3008, + "step": 8140 + }, + { + "epoch": 0.7, + "learning_rate": 4.41835577284319e-06, + "loss": 0.2524, + "step": 8141 + }, + { + "epoch": 0.7, + "learning_rate": 4.416052457738271e-06, + "loss": 0.2606, + "step": 8142 + }, + { + "epoch": 0.7, + "learning_rate": 4.41374957301644e-06, + "loss": 0.2066, + "step": 8143 + }, + { + "epoch": 0.7, + "learning_rate": 4.411447118855183e-06, + "loss": 0.2757, + "step": 8144 + }, + { + "epoch": 0.7, + "learning_rate": 4.409145095431976e-06, + "loss": 0.2831, + "step": 8145 + }, + { + "epoch": 0.7, + "learning_rate": 4.406843502924235e-06, + "loss": 0.3225, + "step": 8146 + }, + { + "epoch": 0.7, + "learning_rate": 4.40454234150936e-06, + "loss": 0.5675, + "step": 8147 + }, + { + "epoch": 0.7, + "learning_rate": 4.402241611364715e-06, + "loss": 0.2445, + "step": 8148 + }, + { + "epoch": 0.7, + "learning_rate": 4.399941312667626e-06, + "loss": 0.2208, + "step": 8149 + }, + { + "epoch": 0.7, + "learning_rate": 4.397641445595393e-06, + "loss": 0.28, + "step": 8150 + }, + { + "epoch": 0.7, + "learning_rate": 4.39534201032527e-06, + "loss": 0.2852, + "step": 8151 + }, + { + "epoch": 0.7, + "learning_rate": 4.393043007034496e-06, + "loss": 0.3055, + "step": 8152 + }, + { + "epoch": 0.7, + "learning_rate": 4.390744435900262e-06, + "loss": 0.251, + "step": 8153 + }, + { + "epoch": 0.7, + "learning_rate": 4.388446297099728e-06, + "loss": 0.3126, + "step": 8154 + }, + { + "epoch": 0.7, + "learning_rate": 4.386148590810027e-06, + "loss": 0.3075, + "step": 8155 + }, + { + "epoch": 0.7, + "learning_rate": 4.383851317208253e-06, + "loss": 0.2547, + "step": 8156 + }, + { + "epoch": 0.7, + "learning_rate": 4.381554476471473e-06, + "loss": 0.2348, + "step": 8157 + }, + { + "epoch": 0.7, + "learning_rate": 4.379258068776706e-06, + "loss": 0.2521, + "step": 8158 + }, + { + "epoch": 0.7, + "learning_rate": 4.3769620943009615e-06, + "loss": 0.3021, + "step": 8159 + }, + { + "epoch": 0.7, + "learning_rate": 4.374666553221191e-06, + "loss": 0.2736, + "step": 8160 + }, + { + "epoch": 0.7, + "learning_rate": 4.372371445714325e-06, + "loss": 0.262, + "step": 8161 + }, + { + "epoch": 0.7, + "learning_rate": 4.370076771957264e-06, + "loss": 0.3145, + "step": 8162 + }, + { + "epoch": 0.7, + "learning_rate": 4.367782532126864e-06, + "loss": 0.3156, + "step": 8163 + }, + { + "epoch": 0.7, + "learning_rate": 4.365488726399962e-06, + "loss": 0.2725, + "step": 8164 + }, + { + "epoch": 0.7, + "learning_rate": 4.363195354953341e-06, + "loss": 0.2673, + "step": 8165 + }, + { + "epoch": 0.7, + "learning_rate": 4.360902417963777e-06, + "loss": 0.2756, + "step": 8166 + }, + { + "epoch": 0.7, + "learning_rate": 4.358609915607987e-06, + "loss": 0.2993, + "step": 8167 + }, + { + "epoch": 0.7, + "learning_rate": 4.3563178480626665e-06, + "loss": 0.2783, + "step": 8168 + }, + { + "epoch": 0.7, + "learning_rate": 4.354026215504488e-06, + "loss": 0.2764, + "step": 8169 + }, + { + "epoch": 0.7, + "learning_rate": 4.351735018110066e-06, + "loss": 0.2222, + "step": 8170 + }, + { + "epoch": 0.7, + "learning_rate": 4.349444256056005e-06, + "loss": 0.2797, + "step": 8171 + }, + { + "epoch": 0.7, + "learning_rate": 4.347153929518852e-06, + "loss": 0.244, + "step": 8172 + }, + { + "epoch": 0.7, + "learning_rate": 4.344864038675147e-06, + "loss": 0.2549, + "step": 8173 + }, + { + "epoch": 0.7, + "learning_rate": 4.342574583701382e-06, + "loss": 0.2751, + "step": 8174 + }, + { + "epoch": 0.7, + "learning_rate": 4.340285564774007e-06, + "loss": 0.2625, + "step": 8175 + }, + { + "epoch": 0.7, + "learning_rate": 4.3379969820694636e-06, + "loss": 0.2565, + "step": 8176 + }, + { + "epoch": 0.7, + "learning_rate": 4.335708835764131e-06, + "loss": 0.2946, + "step": 8177 + }, + { + "epoch": 0.7, + "learning_rate": 4.333421126034374e-06, + "loss": 0.2819, + "step": 8178 + }, + { + "epoch": 0.7, + "learning_rate": 4.331133853056516e-06, + "loss": 0.2598, + "step": 8179 + }, + { + "epoch": 0.7, + "learning_rate": 4.3288470170068505e-06, + "loss": 0.2549, + "step": 8180 + }, + { + "epoch": 0.7, + "learning_rate": 4.326560618061639e-06, + "loss": 0.2265, + "step": 8181 + }, + { + "epoch": 0.7, + "learning_rate": 4.324274656397095e-06, + "loss": 0.2296, + "step": 8182 + }, + { + "epoch": 0.7, + "learning_rate": 4.321989132189422e-06, + "loss": 0.2867, + "step": 8183 + }, + { + "epoch": 0.7, + "learning_rate": 4.319704045614768e-06, + "loss": 0.2393, + "step": 8184 + }, + { + "epoch": 0.7, + "learning_rate": 4.317419396849258e-06, + "loss": 0.304, + "step": 8185 + }, + { + "epoch": 0.7, + "learning_rate": 4.315135186068984e-06, + "loss": 0.2891, + "step": 8186 + }, + { + "epoch": 0.7, + "learning_rate": 4.312851413449999e-06, + "loss": 0.2687, + "step": 8187 + }, + { + "epoch": 0.7, + "learning_rate": 4.31056807916833e-06, + "loss": 0.2867, + "step": 8188 + }, + { + "epoch": 0.7, + "learning_rate": 4.308285183399954e-06, + "loss": 0.2703, + "step": 8189 + }, + { + "epoch": 0.7, + "learning_rate": 4.306002726320839e-06, + "loss": 0.3225, + "step": 8190 + }, + { + "epoch": 0.7, + "learning_rate": 4.3037207081068965e-06, + "loss": 0.2919, + "step": 8191 + }, + { + "epoch": 0.7, + "learning_rate": 4.301439128934015e-06, + "loss": 0.2985, + "step": 8192 + }, + { + "epoch": 0.7, + "learning_rate": 4.299157988978048e-06, + "loss": 0.297, + "step": 8193 + }, + { + "epoch": 0.7, + "learning_rate": 4.296877288414815e-06, + "loss": 0.6129, + "step": 8194 + }, + { + "epoch": 0.7, + "learning_rate": 4.2945970274201045e-06, + "loss": 0.2766, + "step": 8195 + }, + { + "epoch": 0.7, + "learning_rate": 4.292317206169655e-06, + "loss": 0.2724, + "step": 8196 + }, + { + "epoch": 0.7, + "learning_rate": 4.290037824839202e-06, + "loss": 0.2853, + "step": 8197 + }, + { + "epoch": 0.7, + "learning_rate": 4.287758883604415e-06, + "loss": 0.2875, + "step": 8198 + }, + { + "epoch": 0.7, + "learning_rate": 4.285480382640947e-06, + "loss": 0.2808, + "step": 8199 + }, + { + "epoch": 0.7, + "learning_rate": 4.283202322124417e-06, + "loss": 0.2679, + "step": 8200 + }, + { + "epoch": 0.7, + "learning_rate": 4.280924702230403e-06, + "loss": 0.2629, + "step": 8201 + }, + { + "epoch": 0.7, + "learning_rate": 4.278647523134459e-06, + "loss": 0.3494, + "step": 8202 + }, + { + "epoch": 0.7, + "learning_rate": 4.276370785012086e-06, + "loss": 0.2757, + "step": 8203 + }, + { + "epoch": 0.7, + "learning_rate": 4.2740944880387795e-06, + "loss": 0.262, + "step": 8204 + }, + { + "epoch": 0.7, + "learning_rate": 4.271818632389973e-06, + "loss": 0.28, + "step": 8205 + }, + { + "epoch": 0.7, + "learning_rate": 4.269543218241079e-06, + "loss": 0.2706, + "step": 8206 + }, + { + "epoch": 0.7, + "learning_rate": 4.267268245767486e-06, + "loss": 0.2501, + "step": 8207 + }, + { + "epoch": 0.7, + "learning_rate": 4.2649937151445275e-06, + "loss": 0.2593, + "step": 8208 + }, + { + "epoch": 0.7, + "learning_rate": 4.262719626547519e-06, + "loss": 0.2443, + "step": 8209 + }, + { + "epoch": 0.7, + "learning_rate": 4.260445980151725e-06, + "loss": 0.2626, + "step": 8210 + }, + { + "epoch": 0.7, + "learning_rate": 4.2581727761324e-06, + "loss": 0.2586, + "step": 8211 + }, + { + "epoch": 0.7, + "learning_rate": 4.2559000146647485e-06, + "loss": 0.2744, + "step": 8212 + }, + { + "epoch": 0.7, + "learning_rate": 4.253627695923934e-06, + "loss": 0.2627, + "step": 8213 + }, + { + "epoch": 0.7, + "learning_rate": 4.2513558200851115e-06, + "loss": 0.2453, + "step": 8214 + }, + { + "epoch": 0.7, + "learning_rate": 4.249084387323373e-06, + "loss": 0.3441, + "step": 8215 + }, + { + "epoch": 0.7, + "learning_rate": 4.2468133978137945e-06, + "loss": 0.5818, + "step": 8216 + }, + { + "epoch": 0.7, + "learning_rate": 4.2445428517314116e-06, + "loss": 0.2794, + "step": 8217 + }, + { + "epoch": 0.7, + "learning_rate": 4.242272749251228e-06, + "loss": 0.3177, + "step": 8218 + }, + { + "epoch": 0.7, + "learning_rate": 4.240003090548213e-06, + "loss": 0.304, + "step": 8219 + }, + { + "epoch": 0.7, + "learning_rate": 4.237733875797293e-06, + "loss": 0.2581, + "step": 8220 + }, + { + "epoch": 0.7, + "learning_rate": 4.2354651051733795e-06, + "loss": 0.3061, + "step": 8221 + }, + { + "epoch": 0.7, + "learning_rate": 4.2331967788513295e-06, + "loss": 0.3313, + "step": 8222 + }, + { + "epoch": 0.7, + "learning_rate": 4.230928897005978e-06, + "loss": 0.3079, + "step": 8223 + }, + { + "epoch": 0.7, + "learning_rate": 4.22866145981212e-06, + "loss": 0.2622, + "step": 8224 + }, + { + "epoch": 0.71, + "learning_rate": 4.22639446744452e-06, + "loss": 0.2564, + "step": 8225 + }, + { + "epoch": 0.71, + "learning_rate": 4.2241279200779105e-06, + "loss": 0.2591, + "step": 8226 + }, + { + "epoch": 0.71, + "learning_rate": 4.221861817886973e-06, + "loss": 0.2531, + "step": 8227 + }, + { + "epoch": 0.71, + "learning_rate": 4.2195961610463845e-06, + "loss": 0.2885, + "step": 8228 + }, + { + "epoch": 0.71, + "learning_rate": 4.217330949730758e-06, + "loss": 0.2784, + "step": 8229 + }, + { + "epoch": 0.71, + "learning_rate": 4.215066184114689e-06, + "loss": 0.3102, + "step": 8230 + }, + { + "epoch": 0.71, + "learning_rate": 4.212801864372734e-06, + "loss": 0.5885, + "step": 8231 + }, + { + "epoch": 0.71, + "learning_rate": 4.210537990679417e-06, + "loss": 0.2969, + "step": 8232 + }, + { + "epoch": 0.71, + "learning_rate": 4.208274563209227e-06, + "loss": 0.2432, + "step": 8233 + }, + { + "epoch": 0.71, + "learning_rate": 4.2060115821366085e-06, + "loss": 0.2612, + "step": 8234 + }, + { + "epoch": 0.71, + "learning_rate": 4.203749047635998e-06, + "loss": 0.5912, + "step": 8235 + }, + { + "epoch": 0.71, + "learning_rate": 4.201486959881766e-06, + "loss": 0.3296, + "step": 8236 + }, + { + "epoch": 0.71, + "learning_rate": 4.199225319048267e-06, + "loss": 0.2829, + "step": 8237 + }, + { + "epoch": 0.71, + "learning_rate": 4.196964125309818e-06, + "loss": 0.2664, + "step": 8238 + }, + { + "epoch": 0.71, + "learning_rate": 4.194703378840701e-06, + "loss": 0.257, + "step": 8239 + }, + { + "epoch": 0.71, + "learning_rate": 4.192443079815166e-06, + "loss": 0.2762, + "step": 8240 + }, + { + "epoch": 0.71, + "learning_rate": 4.190183228407416e-06, + "loss": 0.2682, + "step": 8241 + }, + { + "epoch": 0.71, + "learning_rate": 4.187923824791642e-06, + "loss": 0.2657, + "step": 8242 + }, + { + "epoch": 0.71, + "learning_rate": 4.185664869141979e-06, + "loss": 0.2876, + "step": 8243 + }, + { + "epoch": 0.71, + "learning_rate": 4.183406361632534e-06, + "loss": 0.2501, + "step": 8244 + }, + { + "epoch": 0.71, + "learning_rate": 4.181148302437392e-06, + "loss": 0.2637, + "step": 8245 + }, + { + "epoch": 0.71, + "learning_rate": 4.178890691730585e-06, + "loss": 0.2749, + "step": 8246 + }, + { + "epoch": 0.71, + "learning_rate": 4.176633529686124e-06, + "loss": 0.2677, + "step": 8247 + }, + { + "epoch": 0.71, + "learning_rate": 4.174376816477969e-06, + "loss": 0.2653, + "step": 8248 + }, + { + "epoch": 0.71, + "learning_rate": 4.172120552280067e-06, + "loss": 0.2803, + "step": 8249 + }, + { + "epoch": 0.71, + "learning_rate": 4.169864737266321e-06, + "loss": 0.2349, + "step": 8250 + }, + { + "epoch": 0.71, + "learning_rate": 4.1676093716105845e-06, + "loss": 0.2369, + "step": 8251 + }, + { + "epoch": 0.71, + "learning_rate": 4.165354455486707e-06, + "loss": 0.2574, + "step": 8252 + }, + { + "epoch": 0.71, + "learning_rate": 4.163099989068476e-06, + "loss": 0.234, + "step": 8253 + }, + { + "epoch": 0.71, + "learning_rate": 4.160845972529656e-06, + "loss": 0.2217, + "step": 8254 + }, + { + "epoch": 0.71, + "learning_rate": 4.1585924060439755e-06, + "loss": 0.2908, + "step": 8255 + }, + { + "epoch": 0.71, + "learning_rate": 4.156339289785129e-06, + "loss": 0.2633, + "step": 8256 + }, + { + "epoch": 0.71, + "learning_rate": 4.154086623926781e-06, + "loss": 0.2374, + "step": 8257 + }, + { + "epoch": 0.71, + "learning_rate": 4.151834408642542e-06, + "loss": 0.261, + "step": 8258 + }, + { + "epoch": 0.71, + "learning_rate": 4.149582644106018e-06, + "loss": 0.2505, + "step": 8259 + }, + { + "epoch": 0.71, + "learning_rate": 4.147331330490752e-06, + "loss": 0.2325, + "step": 8260 + }, + { + "epoch": 0.71, + "learning_rate": 4.1450804679702685e-06, + "loss": 0.2436, + "step": 8261 + }, + { + "epoch": 0.71, + "learning_rate": 4.142830056718052e-06, + "loss": 0.2469, + "step": 8262 + }, + { + "epoch": 0.71, + "learning_rate": 4.140580096907554e-06, + "loss": 0.2831, + "step": 8263 + }, + { + "epoch": 0.71, + "learning_rate": 4.138330588712194e-06, + "loss": 0.29, + "step": 8264 + }, + { + "epoch": 0.71, + "learning_rate": 4.1360815323053406e-06, + "loss": 0.306, + "step": 8265 + }, + { + "epoch": 0.71, + "learning_rate": 4.133832927860356e-06, + "loss": 0.2546, + "step": 8266 + }, + { + "epoch": 0.71, + "learning_rate": 4.13158477555054e-06, + "loss": 0.25, + "step": 8267 + }, + { + "epoch": 0.71, + "learning_rate": 4.1293370755491725e-06, + "loss": 0.2916, + "step": 8268 + }, + { + "epoch": 0.71, + "learning_rate": 4.127089828029496e-06, + "loss": 0.3303, + "step": 8269 + }, + { + "epoch": 0.71, + "learning_rate": 4.124843033164716e-06, + "loss": 0.3043, + "step": 8270 + }, + { + "epoch": 0.71, + "learning_rate": 4.122596691128009e-06, + "loss": 0.2563, + "step": 8271 + }, + { + "epoch": 0.71, + "learning_rate": 4.120350802092501e-06, + "loss": 0.3027, + "step": 8272 + }, + { + "epoch": 0.71, + "learning_rate": 4.1181053662313075e-06, + "loss": 0.2294, + "step": 8273 + }, + { + "epoch": 0.71, + "learning_rate": 4.115860383717486e-06, + "loss": 0.2878, + "step": 8274 + }, + { + "epoch": 0.71, + "learning_rate": 4.113615854724071e-06, + "loss": 0.2259, + "step": 8275 + }, + { + "epoch": 0.71, + "learning_rate": 4.1113717794240615e-06, + "loss": 0.2839, + "step": 8276 + }, + { + "epoch": 0.71, + "learning_rate": 4.109128157990418e-06, + "loss": 0.2674, + "step": 8277 + }, + { + "epoch": 0.71, + "learning_rate": 4.106884990596073e-06, + "loss": 0.2569, + "step": 8278 + }, + { + "epoch": 0.71, + "learning_rate": 4.1046422774139065e-06, + "loss": 0.2555, + "step": 8279 + }, + { + "epoch": 0.71, + "learning_rate": 4.10240001861679e-06, + "loss": 0.2634, + "step": 8280 + }, + { + "epoch": 0.71, + "learning_rate": 4.100158214377536e-06, + "loss": 0.2503, + "step": 8281 + }, + { + "epoch": 0.71, + "learning_rate": 4.097916864868932e-06, + "loss": 0.2673, + "step": 8282 + }, + { + "epoch": 0.71, + "learning_rate": 4.095675970263738e-06, + "loss": 0.265, + "step": 8283 + }, + { + "epoch": 0.71, + "learning_rate": 4.093435530734664e-06, + "loss": 0.2879, + "step": 8284 + }, + { + "epoch": 0.71, + "learning_rate": 4.091195546454398e-06, + "loss": 0.3255, + "step": 8285 + }, + { + "epoch": 0.71, + "learning_rate": 4.088956017595575e-06, + "loss": 0.3093, + "step": 8286 + }, + { + "epoch": 0.71, + "learning_rate": 4.0867169443308196e-06, + "loss": 0.2482, + "step": 8287 + }, + { + "epoch": 0.71, + "learning_rate": 4.084478326832706e-06, + "loss": 0.2867, + "step": 8288 + }, + { + "epoch": 0.71, + "learning_rate": 4.082240165273767e-06, + "loss": 0.2518, + "step": 8289 + }, + { + "epoch": 0.71, + "learning_rate": 4.080002459826523e-06, + "loss": 0.2887, + "step": 8290 + }, + { + "epoch": 0.71, + "learning_rate": 4.0777652106634334e-06, + "loss": 0.3162, + "step": 8291 + }, + { + "epoch": 0.71, + "learning_rate": 4.07552841795694e-06, + "loss": 0.2935, + "step": 8292 + }, + { + "epoch": 0.71, + "learning_rate": 4.073292081879442e-06, + "loss": 0.3007, + "step": 8293 + }, + { + "epoch": 0.71, + "learning_rate": 4.071056202603305e-06, + "loss": 0.2941, + "step": 8294 + }, + { + "epoch": 0.71, + "learning_rate": 4.068820780300864e-06, + "loss": 0.283, + "step": 8295 + }, + { + "epoch": 0.71, + "learning_rate": 4.066585815144404e-06, + "loss": 0.2927, + "step": 8296 + }, + { + "epoch": 0.71, + "learning_rate": 4.0643513073061966e-06, + "loss": 0.2428, + "step": 8297 + }, + { + "epoch": 0.71, + "learning_rate": 4.06211725695846e-06, + "loss": 0.2556, + "step": 8298 + }, + { + "epoch": 0.71, + "learning_rate": 4.059883664273385e-06, + "loss": 0.2646, + "step": 8299 + }, + { + "epoch": 0.71, + "learning_rate": 4.057650529423126e-06, + "loss": 0.2399, + "step": 8300 + }, + { + "epoch": 0.71, + "learning_rate": 4.055417852579802e-06, + "loss": 0.288, + "step": 8301 + }, + { + "epoch": 0.71, + "learning_rate": 4.053185633915501e-06, + "loss": 0.305, + "step": 8302 + }, + { + "epoch": 0.71, + "learning_rate": 4.050953873602259e-06, + "loss": 0.3251, + "step": 8303 + }, + { + "epoch": 0.71, + "learning_rate": 4.048722571812105e-06, + "loss": 0.2581, + "step": 8304 + }, + { + "epoch": 0.71, + "learning_rate": 4.0464917287170055e-06, + "loss": 0.2963, + "step": 8305 + }, + { + "epoch": 0.71, + "learning_rate": 4.0442613444889065e-06, + "loss": 0.2631, + "step": 8306 + }, + { + "epoch": 0.71, + "learning_rate": 4.042031419299714e-06, + "loss": 0.2841, + "step": 8307 + }, + { + "epoch": 0.71, + "learning_rate": 4.039801953321302e-06, + "loss": 0.3099, + "step": 8308 + }, + { + "epoch": 0.71, + "learning_rate": 4.0375729467255074e-06, + "loss": 0.2529, + "step": 8309 + }, + { + "epoch": 0.71, + "learning_rate": 4.035344399684124e-06, + "loss": 0.2891, + "step": 8310 + }, + { + "epoch": 0.71, + "learning_rate": 4.033116312368926e-06, + "loss": 0.2452, + "step": 8311 + }, + { + "epoch": 0.71, + "learning_rate": 4.030888684951638e-06, + "loss": 0.2714, + "step": 8312 + }, + { + "epoch": 0.71, + "learning_rate": 4.028661517603956e-06, + "loss": 0.2919, + "step": 8313 + }, + { + "epoch": 0.71, + "learning_rate": 4.026434810497538e-06, + "loss": 0.2911, + "step": 8314 + }, + { + "epoch": 0.71, + "learning_rate": 4.024208563804008e-06, + "loss": 0.2867, + "step": 8315 + }, + { + "epoch": 0.71, + "learning_rate": 4.02198277769496e-06, + "loss": 0.253, + "step": 8316 + }, + { + "epoch": 0.71, + "learning_rate": 4.019757452341934e-06, + "loss": 0.2496, + "step": 8317 + }, + { + "epoch": 0.71, + "learning_rate": 4.017532587916461e-06, + "loss": 0.2726, + "step": 8318 + }, + { + "epoch": 0.71, + "learning_rate": 4.015308184590011e-06, + "loss": 0.2758, + "step": 8319 + }, + { + "epoch": 0.71, + "learning_rate": 4.013084242534032e-06, + "loss": 0.2532, + "step": 8320 + }, + { + "epoch": 0.71, + "learning_rate": 4.010860761919946e-06, + "loss": 0.2753, + "step": 8321 + }, + { + "epoch": 0.71, + "learning_rate": 4.008637742919114e-06, + "loss": 0.2874, + "step": 8322 + }, + { + "epoch": 0.71, + "learning_rate": 4.006415185702885e-06, + "loss": 0.2992, + "step": 8323 + }, + { + "epoch": 0.71, + "learning_rate": 4.004193090442551e-06, + "loss": 0.2872, + "step": 8324 + }, + { + "epoch": 0.71, + "learning_rate": 4.001971457309391e-06, + "loss": 0.2856, + "step": 8325 + }, + { + "epoch": 0.71, + "learning_rate": 3.999750286474637e-06, + "loss": 0.2642, + "step": 8326 + }, + { + "epoch": 0.71, + "learning_rate": 3.997529578109476e-06, + "loss": 0.2352, + "step": 8327 + }, + { + "epoch": 0.71, + "learning_rate": 3.995309332385083e-06, + "loss": 0.2813, + "step": 8328 + }, + { + "epoch": 0.71, + "learning_rate": 3.993089549472574e-06, + "loss": 0.3192, + "step": 8329 + }, + { + "epoch": 0.71, + "learning_rate": 3.99087022954304e-06, + "loss": 0.3025, + "step": 8330 + }, + { + "epoch": 0.71, + "learning_rate": 3.988651372767538e-06, + "loss": 0.2973, + "step": 8331 + }, + { + "epoch": 0.71, + "learning_rate": 3.986432979317085e-06, + "loss": 0.3057, + "step": 8332 + }, + { + "epoch": 0.71, + "learning_rate": 3.984215049362667e-06, + "loss": 0.2794, + "step": 8333 + }, + { + "epoch": 0.71, + "learning_rate": 3.981997583075222e-06, + "loss": 0.3143, + "step": 8334 + }, + { + "epoch": 0.71, + "learning_rate": 3.979780580625674e-06, + "loss": 0.2734, + "step": 8335 + }, + { + "epoch": 0.71, + "learning_rate": 3.977564042184888e-06, + "loss": 0.2735, + "step": 8336 + }, + { + "epoch": 0.71, + "learning_rate": 3.975347967923708e-06, + "loss": 0.2598, + "step": 8337 + }, + { + "epoch": 0.71, + "learning_rate": 3.973132358012939e-06, + "loss": 0.3055, + "step": 8338 + }, + { + "epoch": 0.71, + "learning_rate": 3.970917212623347e-06, + "loss": 0.2514, + "step": 8339 + }, + { + "epoch": 0.71, + "learning_rate": 3.96870253192567e-06, + "loss": 0.2948, + "step": 8340 + }, + { + "epoch": 0.71, + "learning_rate": 3.966488316090593e-06, + "loss": 0.2532, + "step": 8341 + }, + { + "epoch": 0.72, + "learning_rate": 3.964274565288792e-06, + "loss": 0.283, + "step": 8342 + }, + { + "epoch": 0.72, + "learning_rate": 3.9620612796908794e-06, + "loss": 0.2628, + "step": 8343 + }, + { + "epoch": 0.72, + "learning_rate": 3.95984845946745e-06, + "loss": 0.3083, + "step": 8344 + }, + { + "epoch": 0.72, + "learning_rate": 3.957636104789056e-06, + "loss": 0.2852, + "step": 8345 + }, + { + "epoch": 0.72, + "learning_rate": 3.9554242158262134e-06, + "loss": 0.2607, + "step": 8346 + }, + { + "epoch": 0.72, + "learning_rate": 3.95321279274941e-06, + "loss": 0.312, + "step": 8347 + }, + { + "epoch": 0.72, + "learning_rate": 3.951001835729079e-06, + "loss": 0.2365, + "step": 8348 + }, + { + "epoch": 0.72, + "learning_rate": 3.9487913449356454e-06, + "loss": 0.2803, + "step": 8349 + }, + { + "epoch": 0.72, + "learning_rate": 3.94658132053947e-06, + "loss": 0.2827, + "step": 8350 + }, + { + "epoch": 0.72, + "learning_rate": 3.944371762710897e-06, + "loss": 0.3231, + "step": 8351 + }, + { + "epoch": 0.72, + "learning_rate": 3.942162671620225e-06, + "loss": 0.3166, + "step": 8352 + }, + { + "epoch": 0.72, + "learning_rate": 3.939954047437723e-06, + "loss": 0.2458, + "step": 8353 + }, + { + "epoch": 0.72, + "learning_rate": 3.937745890333623e-06, + "loss": 0.2772, + "step": 8354 + }, + { + "epoch": 0.72, + "learning_rate": 3.935538200478108e-06, + "loss": 0.2287, + "step": 8355 + }, + { + "epoch": 0.72, + "learning_rate": 3.933330978041351e-06, + "loss": 0.2947, + "step": 8356 + }, + { + "epoch": 0.72, + "learning_rate": 3.931124223193461e-06, + "loss": 0.2973, + "step": 8357 + }, + { + "epoch": 0.72, + "learning_rate": 3.928917936104529e-06, + "loss": 0.265, + "step": 8358 + }, + { + "epoch": 0.72, + "learning_rate": 3.9267121169446056e-06, + "loss": 0.2961, + "step": 8359 + }, + { + "epoch": 0.72, + "learning_rate": 3.924506765883701e-06, + "loss": 0.2796, + "step": 8360 + }, + { + "epoch": 0.72, + "learning_rate": 3.9223018830918004e-06, + "loss": 0.2588, + "step": 8361 + }, + { + "epoch": 0.72, + "learning_rate": 3.920097468738833e-06, + "loss": 0.3008, + "step": 8362 + }, + { + "epoch": 0.72, + "learning_rate": 3.917893522994713e-06, + "loss": 0.2415, + "step": 8363 + }, + { + "epoch": 0.72, + "learning_rate": 3.9156900460293126e-06, + "loss": 0.5614, + "step": 8364 + }, + { + "epoch": 0.72, + "learning_rate": 3.913487038012451e-06, + "loss": 0.2741, + "step": 8365 + }, + { + "epoch": 0.72, + "learning_rate": 3.911284499113943e-06, + "loss": 0.237, + "step": 8366 + }, + { + "epoch": 0.72, + "learning_rate": 3.909082429503537e-06, + "loss": 0.2568, + "step": 8367 + }, + { + "epoch": 0.72, + "learning_rate": 3.906880829350961e-06, + "loss": 0.2489, + "step": 8368 + }, + { + "epoch": 0.72, + "learning_rate": 3.904679698825903e-06, + "loss": 0.2836, + "step": 8369 + }, + { + "epoch": 0.72, + "learning_rate": 3.902479038098017e-06, + "loss": 0.2688, + "step": 8370 + }, + { + "epoch": 0.72, + "learning_rate": 3.90027884733692e-06, + "loss": 0.3223, + "step": 8371 + }, + { + "epoch": 0.72, + "learning_rate": 3.898079126712184e-06, + "loss": 0.2689, + "step": 8372 + }, + { + "epoch": 0.72, + "learning_rate": 3.895879876393366e-06, + "loss": 0.2696, + "step": 8373 + }, + { + "epoch": 0.72, + "learning_rate": 3.893681096549961e-06, + "loss": 0.2723, + "step": 8374 + }, + { + "epoch": 0.72, + "learning_rate": 3.891482787351446e-06, + "loss": 0.2868, + "step": 8375 + }, + { + "epoch": 0.72, + "learning_rate": 3.889284948967253e-06, + "loss": 0.2629, + "step": 8376 + }, + { + "epoch": 0.72, + "learning_rate": 3.887087581566784e-06, + "loss": 0.2762, + "step": 8377 + }, + { + "epoch": 0.72, + "learning_rate": 3.884890685319402e-06, + "loss": 0.2468, + "step": 8378 + }, + { + "epoch": 0.72, + "learning_rate": 3.882694260394424e-06, + "loss": 0.282, + "step": 8379 + }, + { + "epoch": 0.72, + "learning_rate": 3.880498306961153e-06, + "loss": 0.2477, + "step": 8380 + }, + { + "epoch": 0.72, + "learning_rate": 3.87830282518883e-06, + "loss": 0.2911, + "step": 8381 + }, + { + "epoch": 0.72, + "learning_rate": 3.876107815246678e-06, + "loss": 0.3041, + "step": 8382 + }, + { + "epoch": 0.72, + "learning_rate": 3.8739132773038765e-06, + "loss": 0.2927, + "step": 8383 + }, + { + "epoch": 0.72, + "learning_rate": 3.871719211529571e-06, + "loss": 0.2674, + "step": 8384 + }, + { + "epoch": 0.72, + "learning_rate": 3.86952561809287e-06, + "loss": 0.2781, + "step": 8385 + }, + { + "epoch": 0.72, + "learning_rate": 3.867332497162836e-06, + "loss": 0.2375, + "step": 8386 + }, + { + "epoch": 0.72, + "learning_rate": 3.8651398489085176e-06, + "loss": 0.278, + "step": 8387 + }, + { + "epoch": 0.72, + "learning_rate": 3.862947673498904e-06, + "loss": 0.3192, + "step": 8388 + }, + { + "epoch": 0.72, + "learning_rate": 3.8607559711029586e-06, + "loss": 0.2703, + "step": 8389 + }, + { + "epoch": 0.72, + "learning_rate": 3.858564741889608e-06, + "loss": 0.2401, + "step": 8390 + }, + { + "epoch": 0.72, + "learning_rate": 3.8563739860277415e-06, + "loss": 0.282, + "step": 8391 + }, + { + "epoch": 0.72, + "learning_rate": 3.854183703686216e-06, + "loss": 0.2543, + "step": 8392 + }, + { + "epoch": 0.72, + "learning_rate": 3.851993895033836e-06, + "loss": 0.2818, + "step": 8393 + }, + { + "epoch": 0.72, + "learning_rate": 3.849804560239394e-06, + "loss": 0.2881, + "step": 8394 + }, + { + "epoch": 0.72, + "learning_rate": 3.847615699471625e-06, + "loss": 0.2687, + "step": 8395 + }, + { + "epoch": 0.72, + "learning_rate": 3.845427312899238e-06, + "loss": 0.2484, + "step": 8396 + }, + { + "epoch": 0.72, + "learning_rate": 3.843239400690903e-06, + "loss": 0.5842, + "step": 8397 + }, + { + "epoch": 0.72, + "learning_rate": 3.841051963015254e-06, + "loss": 0.2821, + "step": 8398 + }, + { + "epoch": 0.72, + "learning_rate": 3.83886500004089e-06, + "loss": 0.2713, + "step": 8399 + }, + { + "epoch": 0.72, + "learning_rate": 3.8366785119363624e-06, + "loss": 0.2335, + "step": 8400 + }, + { + "epoch": 0.72, + "learning_rate": 3.834492498870205e-06, + "loss": 0.2289, + "step": 8401 + }, + { + "epoch": 0.72, + "learning_rate": 3.8323069610109046e-06, + "loss": 0.2355, + "step": 8402 + }, + { + "epoch": 0.72, + "learning_rate": 3.830121898526901e-06, + "loss": 0.3112, + "step": 8403 + }, + { + "epoch": 0.72, + "learning_rate": 3.827937311586622e-06, + "loss": 0.2593, + "step": 8404 + }, + { + "epoch": 0.72, + "learning_rate": 3.825753200358434e-06, + "loss": 0.3228, + "step": 8405 + }, + { + "epoch": 0.72, + "learning_rate": 3.823569565010682e-06, + "loss": 0.2648, + "step": 8406 + }, + { + "epoch": 0.72, + "learning_rate": 3.821386405711669e-06, + "loss": 0.2551, + "step": 8407 + }, + { + "epoch": 0.72, + "learning_rate": 3.819203722629663e-06, + "loss": 0.3141, + "step": 8408 + }, + { + "epoch": 0.72, + "learning_rate": 3.817021515932897e-06, + "loss": 0.2149, + "step": 8409 + }, + { + "epoch": 0.72, + "learning_rate": 3.814839785789555e-06, + "loss": 0.2427, + "step": 8410 + }, + { + "epoch": 0.72, + "learning_rate": 3.8126585323678066e-06, + "loss": 0.263, + "step": 8411 + }, + { + "epoch": 0.72, + "learning_rate": 3.8104777558357632e-06, + "loss": 0.2725, + "step": 8412 + }, + { + "epoch": 0.72, + "learning_rate": 3.8082974563615104e-06, + "loss": 0.2427, + "step": 8413 + }, + { + "epoch": 0.72, + "learning_rate": 3.8061176341130955e-06, + "loss": 0.2381, + "step": 8414 + }, + { + "epoch": 0.72, + "learning_rate": 3.8039382892585297e-06, + "loss": 0.2234, + "step": 8415 + }, + { + "epoch": 0.72, + "learning_rate": 3.8017594219657872e-06, + "loss": 0.2787, + "step": 8416 + }, + { + "epoch": 0.72, + "learning_rate": 3.799581032402796e-06, + "loss": 0.3348, + "step": 8417 + }, + { + "epoch": 0.72, + "learning_rate": 3.7974031207374685e-06, + "loss": 0.2542, + "step": 8418 + }, + { + "epoch": 0.72, + "learning_rate": 3.795225687137657e-06, + "loss": 0.2748, + "step": 8419 + }, + { + "epoch": 0.72, + "learning_rate": 3.7930487317711907e-06, + "loss": 0.2807, + "step": 8420 + }, + { + "epoch": 0.72, + "learning_rate": 3.7908722548058586e-06, + "loss": 0.2467, + "step": 8421 + }, + { + "epoch": 0.72, + "learning_rate": 3.788696256409412e-06, + "loss": 0.2634, + "step": 8422 + }, + { + "epoch": 0.72, + "learning_rate": 3.7865207367495716e-06, + "loss": 0.2833, + "step": 8423 + }, + { + "epoch": 0.72, + "learning_rate": 3.784345695994004e-06, + "loss": 0.3292, + "step": 8424 + }, + { + "epoch": 0.72, + "learning_rate": 3.7821711343103652e-06, + "loss": 0.3304, + "step": 8425 + }, + { + "epoch": 0.72, + "learning_rate": 3.7799970518662477e-06, + "loss": 0.3697, + "step": 8426 + }, + { + "epoch": 0.72, + "learning_rate": 3.777823448829224e-06, + "loss": 0.338, + "step": 8427 + }, + { + "epoch": 0.72, + "learning_rate": 3.7756503253668244e-06, + "loss": 0.2753, + "step": 8428 + }, + { + "epoch": 0.72, + "learning_rate": 3.7734776816465413e-06, + "loss": 0.2821, + "step": 8429 + }, + { + "epoch": 0.72, + "learning_rate": 3.771305517835837e-06, + "loss": 0.2437, + "step": 8430 + }, + { + "epoch": 0.72, + "learning_rate": 3.769133834102119e-06, + "loss": 0.2803, + "step": 8431 + }, + { + "epoch": 0.72, + "learning_rate": 3.766962630612785e-06, + "loss": 0.3024, + "step": 8432 + }, + { + "epoch": 0.72, + "learning_rate": 3.764791907535168e-06, + "loss": 0.3219, + "step": 8433 + }, + { + "epoch": 0.72, + "learning_rate": 3.7626216650365833e-06, + "loss": 0.2435, + "step": 8434 + }, + { + "epoch": 0.72, + "learning_rate": 3.7604519032843e-06, + "loss": 0.2917, + "step": 8435 + }, + { + "epoch": 0.72, + "learning_rate": 3.7582826224455537e-06, + "loss": 0.2458, + "step": 8436 + }, + { + "epoch": 0.72, + "learning_rate": 3.756113822687546e-06, + "loss": 0.2998, + "step": 8437 + }, + { + "epoch": 0.72, + "learning_rate": 3.7539455041774255e-06, + "loss": 0.2598, + "step": 8438 + }, + { + "epoch": 0.72, + "learning_rate": 3.751777667082326e-06, + "loss": 0.2447, + "step": 8439 + }, + { + "epoch": 0.72, + "learning_rate": 3.749610311569334e-06, + "loss": 0.2561, + "step": 8440 + }, + { + "epoch": 0.72, + "learning_rate": 3.7474434378054913e-06, + "loss": 0.2655, + "step": 8441 + }, + { + "epoch": 0.72, + "learning_rate": 3.7452770459578134e-06, + "loss": 0.3026, + "step": 8442 + }, + { + "epoch": 0.72, + "learning_rate": 3.743111136193277e-06, + "loss": 0.2927, + "step": 8443 + }, + { + "epoch": 0.72, + "learning_rate": 3.740945708678817e-06, + "loss": 0.2655, + "step": 8444 + }, + { + "epoch": 0.72, + "learning_rate": 3.7387807635813343e-06, + "loss": 0.275, + "step": 8445 + }, + { + "epoch": 0.72, + "learning_rate": 3.7366163010676937e-06, + "loss": 0.3058, + "step": 8446 + }, + { + "epoch": 0.72, + "learning_rate": 3.7344523213047236e-06, + "loss": 0.3077, + "step": 8447 + }, + { + "epoch": 0.72, + "learning_rate": 3.7322888244592028e-06, + "loss": 0.2726, + "step": 8448 + }, + { + "epoch": 0.72, + "learning_rate": 3.7301258106978953e-06, + "loss": 0.2393, + "step": 8449 + }, + { + "epoch": 0.72, + "learning_rate": 3.7279632801875076e-06, + "loss": 0.285, + "step": 8450 + }, + { + "epoch": 0.72, + "learning_rate": 3.725801233094719e-06, + "loss": 0.2836, + "step": 8451 + }, + { + "epoch": 0.72, + "learning_rate": 3.7236396695861697e-06, + "loss": 0.2501, + "step": 8452 + }, + { + "epoch": 0.72, + "learning_rate": 3.7214785898284635e-06, + "loss": 0.2289, + "step": 8453 + }, + { + "epoch": 0.72, + "learning_rate": 3.7193179939881665e-06, + "loss": 0.5929, + "step": 8454 + }, + { + "epoch": 0.72, + "learning_rate": 3.717157882231798e-06, + "loss": 0.2479, + "step": 8455 + }, + { + "epoch": 0.72, + "learning_rate": 3.714998254725862e-06, + "loss": 0.3182, + "step": 8456 + }, + { + "epoch": 0.72, + "learning_rate": 3.7128391116368035e-06, + "loss": 0.2921, + "step": 8457 + }, + { + "epoch": 0.73, + "learning_rate": 3.710680453131039e-06, + "loss": 0.2568, + "step": 8458 + }, + { + "epoch": 0.73, + "learning_rate": 3.7085222793749486e-06, + "loss": 0.2676, + "step": 8459 + }, + { + "epoch": 0.73, + "learning_rate": 3.7063645905348753e-06, + "loss": 0.2787, + "step": 8460 + }, + { + "epoch": 0.73, + "learning_rate": 3.7042073867771243e-06, + "loss": 0.3097, + "step": 8461 + }, + { + "epoch": 0.73, + "learning_rate": 3.7020506682679524e-06, + "loss": 0.2638, + "step": 8462 + }, + { + "epoch": 0.73, + "learning_rate": 3.6998944351736034e-06, + "loss": 0.3032, + "step": 8463 + }, + { + "epoch": 0.73, + "learning_rate": 3.697738687660257e-06, + "loss": 0.3312, + "step": 8464 + }, + { + "epoch": 0.73, + "learning_rate": 3.6955834258940726e-06, + "loss": 0.2757, + "step": 8465 + }, + { + "epoch": 0.73, + "learning_rate": 3.6934286500411675e-06, + "loss": 0.259, + "step": 8466 + }, + { + "epoch": 0.73, + "learning_rate": 3.6912743602676195e-06, + "loss": 0.2856, + "step": 8467 + }, + { + "epoch": 0.73, + "learning_rate": 3.689120556739475e-06, + "loss": 0.259, + "step": 8468 + }, + { + "epoch": 0.73, + "learning_rate": 3.6869672396227283e-06, + "loss": 0.2773, + "step": 8469 + }, + { + "epoch": 0.73, + "learning_rate": 3.6848144090833602e-06, + "loss": 0.2533, + "step": 8470 + }, + { + "epoch": 0.73, + "learning_rate": 3.68266206528729e-06, + "loss": 0.2699, + "step": 8471 + }, + { + "epoch": 0.73, + "learning_rate": 3.680510208400413e-06, + "loss": 0.2847, + "step": 8472 + }, + { + "epoch": 0.73, + "learning_rate": 3.6783588385885826e-06, + "loss": 0.2516, + "step": 8473 + }, + { + "epoch": 0.73, + "learning_rate": 3.676207956017618e-06, + "loss": 0.2451, + "step": 8474 + }, + { + "epoch": 0.73, + "learning_rate": 3.6740575608533e-06, + "loss": 0.2958, + "step": 8475 + }, + { + "epoch": 0.73, + "learning_rate": 3.6719076532613606e-06, + "loss": 0.2924, + "step": 8476 + }, + { + "epoch": 0.73, + "learning_rate": 3.6697582334075156e-06, + "loss": 0.329, + "step": 8477 + }, + { + "epoch": 0.73, + "learning_rate": 3.667609301457431e-06, + "loss": 0.3004, + "step": 8478 + }, + { + "epoch": 0.73, + "learning_rate": 3.665460857576728e-06, + "loss": 0.3032, + "step": 8479 + }, + { + "epoch": 0.73, + "learning_rate": 3.663312901931002e-06, + "loss": 0.3025, + "step": 8480 + }, + { + "epoch": 0.73, + "learning_rate": 3.6611654346858074e-06, + "loss": 0.3052, + "step": 8481 + }, + { + "epoch": 0.73, + "learning_rate": 3.65901845600666e-06, + "loss": 0.2354, + "step": 8482 + }, + { + "epoch": 0.73, + "learning_rate": 3.656871966059038e-06, + "loss": 0.2615, + "step": 8483 + }, + { + "epoch": 0.73, + "learning_rate": 3.654725965008383e-06, + "loss": 0.2446, + "step": 8484 + }, + { + "epoch": 0.73, + "learning_rate": 3.6525804530201015e-06, + "loss": 0.3176, + "step": 8485 + }, + { + "epoch": 0.73, + "learning_rate": 3.650435430259548e-06, + "loss": 0.2448, + "step": 8486 + }, + { + "epoch": 0.73, + "learning_rate": 3.6482908968920636e-06, + "loss": 0.2935, + "step": 8487 + }, + { + "epoch": 0.73, + "learning_rate": 3.6461468530829293e-06, + "loss": 0.2845, + "step": 8488 + }, + { + "epoch": 0.73, + "learning_rate": 3.6440032989974004e-06, + "loss": 0.2479, + "step": 8489 + }, + { + "epoch": 0.73, + "learning_rate": 3.6418602348006903e-06, + "loss": 0.2798, + "step": 8490 + }, + { + "epoch": 0.73, + "learning_rate": 3.6397176606579775e-06, + "loss": 0.2497, + "step": 8491 + }, + { + "epoch": 0.73, + "learning_rate": 3.6375755767344047e-06, + "loss": 0.3129, + "step": 8492 + }, + { + "epoch": 0.73, + "learning_rate": 3.6354339831950603e-06, + "loss": 0.2673, + "step": 8493 + }, + { + "epoch": 0.73, + "learning_rate": 3.633292880205024e-06, + "loss": 0.2958, + "step": 8494 + }, + { + "epoch": 0.73, + "learning_rate": 3.6311522679293097e-06, + "loss": 0.2579, + "step": 8495 + }, + { + "epoch": 0.73, + "learning_rate": 3.6290121465329096e-06, + "loss": 0.2701, + "step": 8496 + }, + { + "epoch": 0.73, + "learning_rate": 3.626872516180774e-06, + "loss": 0.2783, + "step": 8497 + }, + { + "epoch": 0.73, + "learning_rate": 3.6247333770378133e-06, + "loss": 0.2784, + "step": 8498 + }, + { + "epoch": 0.73, + "learning_rate": 3.622594729268907e-06, + "loss": 0.2903, + "step": 8499 + }, + { + "epoch": 0.73, + "learning_rate": 3.6204565730388795e-06, + "loss": 0.2931, + "step": 8500 + }, + { + "epoch": 0.73, + "learning_rate": 3.618318908512545e-06, + "loss": 0.2687, + "step": 8501 + }, + { + "epoch": 0.73, + "learning_rate": 3.6161817358546513e-06, + "loss": 0.2609, + "step": 8502 + }, + { + "epoch": 0.73, + "learning_rate": 3.6140450552299268e-06, + "loss": 0.2789, + "step": 8503 + }, + { + "epoch": 0.73, + "learning_rate": 3.6119088668030557e-06, + "loss": 0.3287, + "step": 8504 + }, + { + "epoch": 0.73, + "learning_rate": 3.609773170738685e-06, + "loss": 0.2912, + "step": 8505 + }, + { + "epoch": 0.73, + "learning_rate": 3.6076379672014263e-06, + "loss": 0.2817, + "step": 8506 + }, + { + "epoch": 0.73, + "learning_rate": 3.6055032563558402e-06, + "loss": 0.2711, + "step": 8507 + }, + { + "epoch": 0.73, + "learning_rate": 3.6033690383664745e-06, + "loss": 0.2748, + "step": 8508 + }, + { + "epoch": 0.73, + "learning_rate": 3.601235313397813e-06, + "loss": 0.2513, + "step": 8509 + }, + { + "epoch": 0.73, + "learning_rate": 3.5991020816143164e-06, + "loss": 0.2733, + "step": 8510 + }, + { + "epoch": 0.73, + "learning_rate": 3.596969343180403e-06, + "loss": 0.2764, + "step": 8511 + }, + { + "epoch": 0.73, + "learning_rate": 3.594837098260454e-06, + "loss": 0.264, + "step": 8512 + }, + { + "epoch": 0.73, + "learning_rate": 3.5927053470188176e-06, + "loss": 0.2702, + "step": 8513 + }, + { + "epoch": 0.73, + "learning_rate": 3.590574089619786e-06, + "loss": 0.2794, + "step": 8514 + }, + { + "epoch": 0.73, + "learning_rate": 3.5884433262276376e-06, + "loss": 0.2744, + "step": 8515 + }, + { + "epoch": 0.73, + "learning_rate": 3.5863130570065998e-06, + "loss": 0.2683, + "step": 8516 + }, + { + "epoch": 0.73, + "learning_rate": 3.5841832821208577e-06, + "loss": 0.27, + "step": 8517 + }, + { + "epoch": 0.73, + "learning_rate": 3.5820540017345663e-06, + "loss": 0.2721, + "step": 8518 + }, + { + "epoch": 0.73, + "learning_rate": 3.5799252160118405e-06, + "loss": 0.2531, + "step": 8519 + }, + { + "epoch": 0.73, + "learning_rate": 3.5777969251167568e-06, + "loss": 0.2673, + "step": 8520 + }, + { + "epoch": 0.73, + "learning_rate": 3.575669129213353e-06, + "loss": 0.2453, + "step": 8521 + }, + { + "epoch": 0.73, + "learning_rate": 3.5735418284656287e-06, + "loss": 0.2398, + "step": 8522 + }, + { + "epoch": 0.73, + "learning_rate": 3.57141502303755e-06, + "loss": 0.254, + "step": 8523 + }, + { + "epoch": 0.73, + "learning_rate": 3.569288713093034e-06, + "loss": 0.2614, + "step": 8524 + }, + { + "epoch": 0.73, + "learning_rate": 3.5671628987959685e-06, + "loss": 0.3077, + "step": 8525 + }, + { + "epoch": 0.73, + "learning_rate": 3.565037580310201e-06, + "loss": 0.2505, + "step": 8526 + }, + { + "epoch": 0.73, + "learning_rate": 3.5629127577995405e-06, + "loss": 0.2555, + "step": 8527 + }, + { + "epoch": 0.73, + "learning_rate": 3.5607884314277583e-06, + "loss": 0.2615, + "step": 8528 + }, + { + "epoch": 0.73, + "learning_rate": 3.558664601358587e-06, + "loss": 0.2531, + "step": 8529 + }, + { + "epoch": 0.73, + "learning_rate": 3.5565412677557233e-06, + "loss": 0.2532, + "step": 8530 + }, + { + "epoch": 0.73, + "learning_rate": 3.5544184307828142e-06, + "loss": 0.2345, + "step": 8531 + }, + { + "epoch": 0.73, + "learning_rate": 3.552296090603491e-06, + "loss": 0.2524, + "step": 8532 + }, + { + "epoch": 0.73, + "learning_rate": 3.5501742473813226e-06, + "loss": 0.2336, + "step": 8533 + }, + { + "epoch": 0.73, + "learning_rate": 3.548052901279854e-06, + "loss": 0.246, + "step": 8534 + }, + { + "epoch": 0.73, + "learning_rate": 3.545932052462587e-06, + "loss": 0.2652, + "step": 8535 + }, + { + "epoch": 0.73, + "learning_rate": 3.5438117010929875e-06, + "loss": 0.2462, + "step": 8536 + }, + { + "epoch": 0.73, + "learning_rate": 3.541691847334484e-06, + "loss": 0.2732, + "step": 8537 + }, + { + "epoch": 0.73, + "learning_rate": 3.5395724913504546e-06, + "loss": 0.3038, + "step": 8538 + }, + { + "epoch": 0.73, + "learning_rate": 3.537453633304263e-06, + "loss": 0.3066, + "step": 8539 + }, + { + "epoch": 0.73, + "learning_rate": 3.5353352733592095e-06, + "loss": 0.2835, + "step": 8540 + }, + { + "epoch": 0.73, + "learning_rate": 3.53321741167857e-06, + "loss": 0.2787, + "step": 8541 + }, + { + "epoch": 0.73, + "learning_rate": 3.5311000484255796e-06, + "loss": 0.5907, + "step": 8542 + }, + { + "epoch": 0.73, + "learning_rate": 3.5289831837634334e-06, + "loss": 0.2612, + "step": 8543 + }, + { + "epoch": 0.73, + "learning_rate": 3.5268668178552922e-06, + "loss": 0.3006, + "step": 8544 + }, + { + "epoch": 0.73, + "learning_rate": 3.5247509508642653e-06, + "loss": 0.2737, + "step": 8545 + }, + { + "epoch": 0.73, + "learning_rate": 3.5226355829534475e-06, + "loss": 0.2662, + "step": 8546 + }, + { + "epoch": 0.73, + "learning_rate": 3.520520714285869e-06, + "loss": 0.2731, + "step": 8547 + }, + { + "epoch": 0.73, + "learning_rate": 3.5184063450245386e-06, + "loss": 0.2498, + "step": 8548 + }, + { + "epoch": 0.73, + "learning_rate": 3.5162924753324202e-06, + "loss": 0.2605, + "step": 8549 + }, + { + "epoch": 0.73, + "learning_rate": 3.5141791053724405e-06, + "loss": 0.277, + "step": 8550 + }, + { + "epoch": 0.73, + "learning_rate": 3.512066235307492e-06, + "loss": 0.3588, + "step": 8551 + }, + { + "epoch": 0.73, + "learning_rate": 3.509953865300414e-06, + "loss": 0.2222, + "step": 8552 + }, + { + "epoch": 0.73, + "learning_rate": 3.5078419955140263e-06, + "loss": 0.2911, + "step": 8553 + }, + { + "epoch": 0.73, + "learning_rate": 3.5057306261111024e-06, + "loss": 0.2678, + "step": 8554 + }, + { + "epoch": 0.73, + "learning_rate": 3.5036197572543697e-06, + "loss": 0.3234, + "step": 8555 + }, + { + "epoch": 0.73, + "learning_rate": 3.5015093891065253e-06, + "loss": 0.2738, + "step": 8556 + }, + { + "epoch": 0.73, + "learning_rate": 3.499399521830229e-06, + "loss": 0.2598, + "step": 8557 + }, + { + "epoch": 0.73, + "learning_rate": 3.4972901555880957e-06, + "loss": 0.2654, + "step": 8558 + }, + { + "epoch": 0.73, + "learning_rate": 3.4951812905427073e-06, + "loss": 0.2504, + "step": 8559 + }, + { + "epoch": 0.73, + "learning_rate": 3.4930729268566035e-06, + "loss": 0.3011, + "step": 8560 + }, + { + "epoch": 0.73, + "learning_rate": 3.49096506469229e-06, + "loss": 0.2445, + "step": 8561 + }, + { + "epoch": 0.73, + "learning_rate": 3.488857704212224e-06, + "loss": 0.2291, + "step": 8562 + }, + { + "epoch": 0.73, + "learning_rate": 3.4867508455788336e-06, + "loss": 0.2383, + "step": 8563 + }, + { + "epoch": 0.73, + "learning_rate": 3.484644488954505e-06, + "loss": 0.2345, + "step": 8564 + }, + { + "epoch": 0.73, + "learning_rate": 3.4825386345015865e-06, + "loss": 0.2863, + "step": 8565 + }, + { + "epoch": 0.73, + "learning_rate": 3.4804332823823862e-06, + "loss": 0.2668, + "step": 8566 + }, + { + "epoch": 0.73, + "learning_rate": 3.4783284327591736e-06, + "loss": 0.5621, + "step": 8567 + }, + { + "epoch": 0.73, + "learning_rate": 3.4762240857941843e-06, + "loss": 0.5822, + "step": 8568 + }, + { + "epoch": 0.73, + "learning_rate": 3.474120241649601e-06, + "loss": 0.3455, + "step": 8569 + }, + { + "epoch": 0.73, + "learning_rate": 3.4720169004875914e-06, + "loss": 0.2673, + "step": 8570 + }, + { + "epoch": 0.73, + "learning_rate": 3.46991406247026e-06, + "loss": 0.2792, + "step": 8571 + }, + { + "epoch": 0.73, + "learning_rate": 3.4678117277596856e-06, + "loss": 0.2955, + "step": 8572 + }, + { + "epoch": 0.73, + "learning_rate": 3.465709896517908e-06, + "loss": 0.274, + "step": 8573 + }, + { + "epoch": 0.73, + "learning_rate": 3.4636085689069244e-06, + "loss": 0.3306, + "step": 8574 + }, + { + "epoch": 0.74, + "learning_rate": 3.461507745088698e-06, + "loss": 0.2629, + "step": 8575 + }, + { + "epoch": 0.74, + "learning_rate": 3.4594074252251397e-06, + "loss": 0.27, + "step": 8576 + }, + { + "epoch": 0.74, + "learning_rate": 3.4573076094781466e-06, + "loss": 0.2626, + "step": 8577 + }, + { + "epoch": 0.74, + "learning_rate": 3.4552082980095514e-06, + "loss": 0.3026, + "step": 8578 + }, + { + "epoch": 0.74, + "learning_rate": 3.4531094909811614e-06, + "loss": 0.2465, + "step": 8579 + }, + { + "epoch": 0.74, + "learning_rate": 3.4510111885547426e-06, + "loss": 0.2574, + "step": 8580 + }, + { + "epoch": 0.74, + "learning_rate": 3.4489133908920225e-06, + "loss": 0.2577, + "step": 8581 + }, + { + "epoch": 0.74, + "learning_rate": 3.446816098154692e-06, + "loss": 0.2302, + "step": 8582 + }, + { + "epoch": 0.74, + "learning_rate": 3.4447193105043884e-06, + "loss": 0.2762, + "step": 8583 + }, + { + "epoch": 0.74, + "learning_rate": 3.4426230281027374e-06, + "loss": 0.2634, + "step": 8584 + }, + { + "epoch": 0.74, + "learning_rate": 3.4405272511112986e-06, + "loss": 0.267, + "step": 8585 + }, + { + "epoch": 0.74, + "learning_rate": 3.4384319796916075e-06, + "loss": 0.2787, + "step": 8586 + }, + { + "epoch": 0.74, + "learning_rate": 3.4363372140051586e-06, + "loss": 0.247, + "step": 8587 + }, + { + "epoch": 0.74, + "learning_rate": 3.4342429542134047e-06, + "loss": 0.2725, + "step": 8588 + }, + { + "epoch": 0.74, + "learning_rate": 3.432149200477766e-06, + "loss": 0.2563, + "step": 8589 + }, + { + "epoch": 0.74, + "learning_rate": 3.430055952959607e-06, + "loss": 0.2482, + "step": 8590 + }, + { + "epoch": 0.74, + "learning_rate": 3.4279632118202744e-06, + "loss": 0.3, + "step": 8591 + }, + { + "epoch": 0.74, + "learning_rate": 3.4258709772210686e-06, + "loss": 0.2694, + "step": 8592 + }, + { + "epoch": 0.74, + "learning_rate": 3.4237792493232402e-06, + "loss": 0.2949, + "step": 8593 + }, + { + "epoch": 0.74, + "learning_rate": 3.4216880282880128e-06, + "loss": 0.2317, + "step": 8594 + }, + { + "epoch": 0.74, + "learning_rate": 3.4195973142765694e-06, + "loss": 0.3146, + "step": 8595 + }, + { + "epoch": 0.74, + "learning_rate": 3.417507107450049e-06, + "loss": 0.2536, + "step": 8596 + }, + { + "epoch": 0.74, + "learning_rate": 3.4154174079695555e-06, + "loss": 0.297, + "step": 8597 + }, + { + "epoch": 0.74, + "learning_rate": 3.4133282159961535e-06, + "loss": 0.2405, + "step": 8598 + }, + { + "epoch": 0.74, + "learning_rate": 3.4112395316908697e-06, + "loss": 0.2518, + "step": 8599 + }, + { + "epoch": 0.74, + "learning_rate": 3.4091513552146836e-06, + "loss": 0.2872, + "step": 8600 + }, + { + "epoch": 0.74, + "learning_rate": 3.4070636867285455e-06, + "loss": 0.3068, + "step": 8601 + }, + { + "epoch": 0.74, + "learning_rate": 3.404976526393361e-06, + "loss": 0.3159, + "step": 8602 + }, + { + "epoch": 0.74, + "learning_rate": 3.4028898743699988e-06, + "loss": 0.2369, + "step": 8603 + }, + { + "epoch": 0.74, + "learning_rate": 3.4008037308192874e-06, + "loss": 0.2592, + "step": 8604 + }, + { + "epoch": 0.74, + "learning_rate": 3.3987180959020184e-06, + "loss": 0.3264, + "step": 8605 + }, + { + "epoch": 0.74, + "learning_rate": 3.3966329697789424e-06, + "loss": 0.2519, + "step": 8606 + }, + { + "epoch": 0.74, + "learning_rate": 3.394548352610767e-06, + "loss": 0.2443, + "step": 8607 + }, + { + "epoch": 0.74, + "learning_rate": 3.3924642445581647e-06, + "loss": 0.2999, + "step": 8608 + }, + { + "epoch": 0.74, + "learning_rate": 3.39038064578177e-06, + "loss": 0.2908, + "step": 8609 + }, + { + "epoch": 0.74, + "learning_rate": 3.3882975564421773e-06, + "loss": 0.2714, + "step": 8610 + }, + { + "epoch": 0.74, + "learning_rate": 3.3862149766999385e-06, + "loss": 0.6063, + "step": 8611 + }, + { + "epoch": 0.74, + "learning_rate": 3.3841329067155693e-06, + "loss": 0.2453, + "step": 8612 + }, + { + "epoch": 0.74, + "learning_rate": 3.38205134664955e-06, + "loss": 0.2421, + "step": 8613 + }, + { + "epoch": 0.74, + "learning_rate": 3.379970296662305e-06, + "loss": 0.2287, + "step": 8614 + }, + { + "epoch": 0.74, + "learning_rate": 3.3778897569142454e-06, + "loss": 0.2653, + "step": 8615 + }, + { + "epoch": 0.74, + "learning_rate": 3.3758097275657208e-06, + "loss": 0.2332, + "step": 8616 + }, + { + "epoch": 0.74, + "learning_rate": 3.3737302087770497e-06, + "loss": 0.2803, + "step": 8617 + }, + { + "epoch": 0.74, + "learning_rate": 3.3716512007085133e-06, + "loss": 0.2875, + "step": 8618 + }, + { + "epoch": 0.74, + "learning_rate": 3.369572703520352e-06, + "loss": 0.2805, + "step": 8619 + }, + { + "epoch": 0.74, + "learning_rate": 3.3674947173727667e-06, + "loss": 0.2669, + "step": 8620 + }, + { + "epoch": 0.74, + "learning_rate": 3.3654172424259103e-06, + "loss": 0.3019, + "step": 8621 + }, + { + "epoch": 0.74, + "learning_rate": 3.363340278839916e-06, + "loss": 0.2605, + "step": 8622 + }, + { + "epoch": 0.74, + "learning_rate": 3.3612638267748575e-06, + "loss": 0.2546, + "step": 8623 + }, + { + "epoch": 0.74, + "learning_rate": 3.359187886390781e-06, + "loss": 0.2698, + "step": 8624 + }, + { + "epoch": 0.74, + "learning_rate": 3.3571124578476888e-06, + "loss": 0.2451, + "step": 8625 + }, + { + "epoch": 0.74, + "learning_rate": 3.355037541305545e-06, + "loss": 0.247, + "step": 8626 + }, + { + "epoch": 0.74, + "learning_rate": 3.3529631369242764e-06, + "loss": 0.2975, + "step": 8627 + }, + { + "epoch": 0.74, + "learning_rate": 3.350889244863759e-06, + "loss": 0.2687, + "step": 8628 + }, + { + "epoch": 0.74, + "learning_rate": 3.348815865283848e-06, + "loss": 0.2657, + "step": 8629 + }, + { + "epoch": 0.74, + "learning_rate": 3.3467429983443477e-06, + "loss": 0.2597, + "step": 8630 + }, + { + "epoch": 0.74, + "learning_rate": 3.3446706442050203e-06, + "loss": 0.2668, + "step": 8631 + }, + { + "epoch": 0.74, + "learning_rate": 3.342598803025595e-06, + "loss": 0.2994, + "step": 8632 + }, + { + "epoch": 0.74, + "learning_rate": 3.3405274749657577e-06, + "loss": 0.2716, + "step": 8633 + }, + { + "epoch": 0.74, + "learning_rate": 3.3384566601851574e-06, + "loss": 0.2849, + "step": 8634 + }, + { + "epoch": 0.74, + "learning_rate": 3.336386358843403e-06, + "loss": 0.3148, + "step": 8635 + }, + { + "epoch": 0.74, + "learning_rate": 3.3343165711000613e-06, + "loss": 0.2504, + "step": 8636 + }, + { + "epoch": 0.74, + "learning_rate": 3.332247297114666e-06, + "loss": 0.2939, + "step": 8637 + }, + { + "epoch": 0.74, + "learning_rate": 3.330178537046699e-06, + "loss": 0.2566, + "step": 8638 + }, + { + "epoch": 0.74, + "learning_rate": 3.328110291055614e-06, + "loss": 0.2408, + "step": 8639 + }, + { + "epoch": 0.74, + "learning_rate": 3.3260425593008207e-06, + "loss": 0.2856, + "step": 8640 + }, + { + "epoch": 0.74, + "learning_rate": 3.323975341941691e-06, + "loss": 0.2764, + "step": 8641 + }, + { + "epoch": 0.74, + "learning_rate": 3.321908639137553e-06, + "loss": 0.29, + "step": 8642 + }, + { + "epoch": 0.74, + "learning_rate": 3.3198424510477014e-06, + "loss": 0.2631, + "step": 8643 + }, + { + "epoch": 0.74, + "learning_rate": 3.3177767778313884e-06, + "loss": 0.2785, + "step": 8644 + }, + { + "epoch": 0.74, + "learning_rate": 3.31571161964782e-06, + "loss": 0.307, + "step": 8645 + }, + { + "epoch": 0.74, + "learning_rate": 3.313646976656172e-06, + "loss": 0.2286, + "step": 8646 + }, + { + "epoch": 0.74, + "learning_rate": 3.3115828490155775e-06, + "loss": 0.2911, + "step": 8647 + }, + { + "epoch": 0.74, + "learning_rate": 3.309519236885128e-06, + "loss": 0.3014, + "step": 8648 + }, + { + "epoch": 0.74, + "learning_rate": 3.307456140423877e-06, + "loss": 0.2965, + "step": 8649 + }, + { + "epoch": 0.74, + "learning_rate": 3.305393559790838e-06, + "loss": 0.2422, + "step": 8650 + }, + { + "epoch": 0.74, + "learning_rate": 3.303331495144988e-06, + "loss": 0.2131, + "step": 8651 + }, + { + "epoch": 0.74, + "learning_rate": 3.3012699466452503e-06, + "loss": 0.3192, + "step": 8652 + }, + { + "epoch": 0.74, + "learning_rate": 3.299208914450532e-06, + "loss": 0.2818, + "step": 8653 + }, + { + "epoch": 0.74, + "learning_rate": 3.2971483987196783e-06, + "loss": 0.289, + "step": 8654 + }, + { + "epoch": 0.74, + "learning_rate": 3.295088399611507e-06, + "loss": 0.33, + "step": 8655 + }, + { + "epoch": 0.74, + "learning_rate": 3.2930289172847905e-06, + "loss": 0.2634, + "step": 8656 + }, + { + "epoch": 0.74, + "learning_rate": 3.290969951898265e-06, + "loss": 0.2961, + "step": 8657 + }, + { + "epoch": 0.74, + "learning_rate": 3.288911503610629e-06, + "loss": 0.2738, + "step": 8658 + }, + { + "epoch": 0.74, + "learning_rate": 3.286853572580527e-06, + "loss": 0.3051, + "step": 8659 + }, + { + "epoch": 0.74, + "learning_rate": 3.284796158966589e-06, + "loss": 0.2639, + "step": 8660 + }, + { + "epoch": 0.74, + "learning_rate": 3.282739262927377e-06, + "loss": 0.2726, + "step": 8661 + }, + { + "epoch": 0.74, + "learning_rate": 3.2806828846214324e-06, + "loss": 0.2635, + "step": 8662 + }, + { + "epoch": 0.74, + "learning_rate": 3.2786270242072504e-06, + "loss": 0.2471, + "step": 8663 + }, + { + "epoch": 0.74, + "learning_rate": 3.276571681843286e-06, + "loss": 0.5651, + "step": 8664 + }, + { + "epoch": 0.74, + "learning_rate": 3.274516857687957e-06, + "loss": 0.2816, + "step": 8665 + }, + { + "epoch": 0.74, + "learning_rate": 3.2724625518996322e-06, + "loss": 0.3109, + "step": 8666 + }, + { + "epoch": 0.74, + "learning_rate": 3.2704087646366546e-06, + "loss": 0.2872, + "step": 8667 + }, + { + "epoch": 0.74, + "learning_rate": 3.2683554960573207e-06, + "loss": 0.2745, + "step": 8668 + }, + { + "epoch": 0.74, + "learning_rate": 3.2663027463198794e-06, + "loss": 0.3009, + "step": 8669 + }, + { + "epoch": 0.74, + "learning_rate": 3.264250515582551e-06, + "loss": 0.2886, + "step": 8670 + }, + { + "epoch": 0.74, + "learning_rate": 3.2621988040035103e-06, + "loss": 0.2413, + "step": 8671 + }, + { + "epoch": 0.74, + "learning_rate": 3.2601476117408937e-06, + "loss": 0.2578, + "step": 8672 + }, + { + "epoch": 0.74, + "learning_rate": 3.258096938952796e-06, + "loss": 0.2976, + "step": 8673 + }, + { + "epoch": 0.74, + "learning_rate": 3.2560467857972744e-06, + "loss": 0.5608, + "step": 8674 + }, + { + "epoch": 0.74, + "learning_rate": 3.2539971524323455e-06, + "loss": 0.2701, + "step": 8675 + }, + { + "epoch": 0.74, + "learning_rate": 3.2519480390159806e-06, + "loss": 0.2593, + "step": 8676 + }, + { + "epoch": 0.74, + "learning_rate": 3.2498994457061184e-06, + "loss": 0.2593, + "step": 8677 + }, + { + "epoch": 0.74, + "learning_rate": 3.247851372660653e-06, + "loss": 0.2738, + "step": 8678 + }, + { + "epoch": 0.74, + "learning_rate": 3.2458038200374408e-06, + "loss": 0.2813, + "step": 8679 + }, + { + "epoch": 0.74, + "learning_rate": 3.2437567879942966e-06, + "loss": 0.3186, + "step": 8680 + }, + { + "epoch": 0.74, + "learning_rate": 3.241710276688995e-06, + "loss": 0.2585, + "step": 8681 + }, + { + "epoch": 0.74, + "learning_rate": 3.239664286279276e-06, + "loss": 0.2809, + "step": 8682 + }, + { + "epoch": 0.74, + "learning_rate": 3.237618816922826e-06, + "loss": 0.2548, + "step": 8683 + }, + { + "epoch": 0.74, + "learning_rate": 3.235573868777304e-06, + "loss": 0.2615, + "step": 8684 + }, + { + "epoch": 0.74, + "learning_rate": 3.233529442000324e-06, + "loss": 0.5789, + "step": 8685 + }, + { + "epoch": 0.74, + "learning_rate": 3.23148553674946e-06, + "loss": 0.2833, + "step": 8686 + }, + { + "epoch": 0.74, + "learning_rate": 3.2294421531822475e-06, + "loss": 0.2785, + "step": 8687 + }, + { + "epoch": 0.74, + "learning_rate": 3.227399291456179e-06, + "loss": 0.2759, + "step": 8688 + }, + { + "epoch": 0.74, + "learning_rate": 3.225356951728712e-06, + "loss": 0.2632, + "step": 8689 + }, + { + "epoch": 0.74, + "learning_rate": 3.223315134157253e-06, + "loss": 0.2714, + "step": 8690 + }, + { + "epoch": 0.74, + "learning_rate": 3.2212738388991803e-06, + "loss": 0.2669, + "step": 8691 + }, + { + "epoch": 0.75, + "learning_rate": 3.2192330661118243e-06, + "loss": 0.2535, + "step": 8692 + }, + { + "epoch": 0.75, + "learning_rate": 3.217192815952479e-06, + "loss": 0.2918, + "step": 8693 + }, + { + "epoch": 0.75, + "learning_rate": 3.2151530885783967e-06, + "loss": 0.282, + "step": 8694 + }, + { + "epoch": 0.75, + "learning_rate": 3.213113884146789e-06, + "loss": 0.2822, + "step": 8695 + }, + { + "epoch": 0.75, + "learning_rate": 3.211075202814832e-06, + "loss": 0.2908, + "step": 8696 + }, + { + "epoch": 0.75, + "learning_rate": 3.2090370447396468e-06, + "loss": 0.2706, + "step": 8697 + }, + { + "epoch": 0.75, + "learning_rate": 3.2069994100783376e-06, + "loss": 0.256, + "step": 8698 + }, + { + "epoch": 0.75, + "learning_rate": 3.2049622989879446e-06, + "loss": 0.3157, + "step": 8699 + }, + { + "epoch": 0.75, + "learning_rate": 3.202925711625483e-06, + "loss": 0.2403, + "step": 8700 + }, + { + "epoch": 0.75, + "learning_rate": 3.2008896481479223e-06, + "loss": 0.2863, + "step": 8701 + }, + { + "epoch": 0.75, + "learning_rate": 3.1988541087121916e-06, + "loss": 0.2985, + "step": 8702 + }, + { + "epoch": 0.75, + "learning_rate": 3.196819093475184e-06, + "loss": 0.3075, + "step": 8703 + }, + { + "epoch": 0.75, + "learning_rate": 3.194784602593739e-06, + "loss": 0.2827, + "step": 8704 + }, + { + "epoch": 0.75, + "learning_rate": 3.1927506362246753e-06, + "loss": 0.2224, + "step": 8705 + }, + { + "epoch": 0.75, + "learning_rate": 3.1907171945247595e-06, + "loss": 0.269, + "step": 8706 + }, + { + "epoch": 0.75, + "learning_rate": 3.1886842776507133e-06, + "loss": 0.3062, + "step": 8707 + }, + { + "epoch": 0.75, + "learning_rate": 3.1866518857592267e-06, + "loss": 0.2865, + "step": 8708 + }, + { + "epoch": 0.75, + "learning_rate": 3.1846200190069476e-06, + "loss": 0.3038, + "step": 8709 + }, + { + "epoch": 0.75, + "learning_rate": 3.182588677550482e-06, + "loss": 0.2447, + "step": 8710 + }, + { + "epoch": 0.75, + "learning_rate": 3.180557861546395e-06, + "loss": 0.2328, + "step": 8711 + }, + { + "epoch": 0.75, + "learning_rate": 3.1785275711512122e-06, + "loss": 0.2377, + "step": 8712 + }, + { + "epoch": 0.75, + "learning_rate": 3.1764978065214215e-06, + "loss": 0.2819, + "step": 8713 + }, + { + "epoch": 0.75, + "learning_rate": 3.174468567813461e-06, + "loss": 0.31, + "step": 8714 + }, + { + "epoch": 0.75, + "learning_rate": 3.1724398551837364e-06, + "loss": 0.2526, + "step": 8715 + }, + { + "epoch": 0.75, + "learning_rate": 3.1704116687886132e-06, + "loss": 0.2666, + "step": 8716 + }, + { + "epoch": 0.75, + "learning_rate": 3.168384008784412e-06, + "loss": 0.3362, + "step": 8717 + }, + { + "epoch": 0.75, + "learning_rate": 3.1663568753274153e-06, + "loss": 0.2383, + "step": 8718 + }, + { + "epoch": 0.75, + "learning_rate": 3.164330268573864e-06, + "loss": 0.2584, + "step": 8719 + }, + { + "epoch": 0.75, + "learning_rate": 3.1623041886799643e-06, + "loss": 0.2503, + "step": 8720 + }, + { + "epoch": 0.75, + "learning_rate": 3.1602786358018668e-06, + "loss": 0.2581, + "step": 8721 + }, + { + "epoch": 0.75, + "learning_rate": 3.1582536100956973e-06, + "loss": 0.274, + "step": 8722 + }, + { + "epoch": 0.75, + "learning_rate": 3.1562291117175324e-06, + "loss": 0.2448, + "step": 8723 + }, + { + "epoch": 0.75, + "learning_rate": 3.154205140823412e-06, + "loss": 0.3199, + "step": 8724 + }, + { + "epoch": 0.75, + "learning_rate": 3.152181697569334e-06, + "loss": 0.2867, + "step": 8725 + }, + { + "epoch": 0.75, + "learning_rate": 3.1501587821112532e-06, + "loss": 0.2816, + "step": 8726 + }, + { + "epoch": 0.75, + "learning_rate": 3.1481363946050925e-06, + "loss": 0.2725, + "step": 8727 + }, + { + "epoch": 0.75, + "learning_rate": 3.146114535206718e-06, + "loss": 0.2487, + "step": 8728 + }, + { + "epoch": 0.75, + "learning_rate": 3.1440932040719694e-06, + "loss": 0.2401, + "step": 8729 + }, + { + "epoch": 0.75, + "learning_rate": 3.1420724013566408e-06, + "loss": 0.2852, + "step": 8730 + }, + { + "epoch": 0.75, + "learning_rate": 3.1400521272164854e-06, + "loss": 0.3552, + "step": 8731 + }, + { + "epoch": 0.75, + "learning_rate": 3.1380323818072155e-06, + "loss": 0.2738, + "step": 8732 + }, + { + "epoch": 0.75, + "learning_rate": 3.136013165284504e-06, + "loss": 0.2919, + "step": 8733 + }, + { + "epoch": 0.75, + "learning_rate": 3.1339944778039844e-06, + "loss": 0.5553, + "step": 8734 + }, + { + "epoch": 0.75, + "learning_rate": 3.1319763195212382e-06, + "loss": 0.2923, + "step": 8735 + }, + { + "epoch": 0.75, + "learning_rate": 3.129958690591829e-06, + "loss": 0.3061, + "step": 8736 + }, + { + "epoch": 0.75, + "learning_rate": 3.127941591171254e-06, + "loss": 0.283, + "step": 8737 + }, + { + "epoch": 0.75, + "learning_rate": 3.125925021414985e-06, + "loss": 0.2739, + "step": 8738 + }, + { + "epoch": 0.75, + "learning_rate": 3.1239089814784505e-06, + "loss": 0.2598, + "step": 8739 + }, + { + "epoch": 0.75, + "learning_rate": 3.1218934715170355e-06, + "loss": 0.2942, + "step": 8740 + }, + { + "epoch": 0.75, + "learning_rate": 3.119878491686089e-06, + "loss": 0.3181, + "step": 8741 + }, + { + "epoch": 0.75, + "learning_rate": 3.1178640421409057e-06, + "loss": 0.2711, + "step": 8742 + }, + { + "epoch": 0.75, + "learning_rate": 3.115850123036761e-06, + "loss": 0.2656, + "step": 8743 + }, + { + "epoch": 0.75, + "learning_rate": 3.1138367345288757e-06, + "loss": 0.2692, + "step": 8744 + }, + { + "epoch": 0.75, + "learning_rate": 3.111823876772426e-06, + "loss": 0.389, + "step": 8745 + }, + { + "epoch": 0.75, + "learning_rate": 3.1098115499225567e-06, + "loss": 0.2974, + "step": 8746 + }, + { + "epoch": 0.75, + "learning_rate": 3.1077997541343672e-06, + "loss": 0.2835, + "step": 8747 + }, + { + "epoch": 0.75, + "learning_rate": 3.1057884895629174e-06, + "loss": 0.2287, + "step": 8748 + }, + { + "epoch": 0.75, + "learning_rate": 3.1037777563632264e-06, + "loss": 0.2888, + "step": 8749 + }, + { + "epoch": 0.75, + "learning_rate": 3.1017675546902704e-06, + "loss": 0.2555, + "step": 8750 + }, + { + "epoch": 0.75, + "learning_rate": 3.0997578846989886e-06, + "loss": 0.2608, + "step": 8751 + }, + { + "epoch": 0.75, + "learning_rate": 3.097748746544271e-06, + "loss": 0.3033, + "step": 8752 + }, + { + "epoch": 0.75, + "learning_rate": 3.095740140380975e-06, + "loss": 0.3217, + "step": 8753 + }, + { + "epoch": 0.75, + "learning_rate": 3.0937320663639148e-06, + "loss": 0.3027, + "step": 8754 + }, + { + "epoch": 0.75, + "learning_rate": 3.091724524647861e-06, + "loss": 0.2849, + "step": 8755 + }, + { + "epoch": 0.75, + "learning_rate": 3.0897175153875467e-06, + "loss": 0.2658, + "step": 8756 + }, + { + "epoch": 0.75, + "learning_rate": 3.087711038737662e-06, + "loss": 0.2956, + "step": 8757 + }, + { + "epoch": 0.75, + "learning_rate": 3.0857050948528576e-06, + "loss": 0.2896, + "step": 8758 + }, + { + "epoch": 0.75, + "learning_rate": 3.083699683887739e-06, + "loss": 0.3032, + "step": 8759 + }, + { + "epoch": 0.75, + "learning_rate": 3.081694805996872e-06, + "loss": 0.2695, + "step": 8760 + }, + { + "epoch": 0.75, + "learning_rate": 3.0796904613347855e-06, + "loss": 0.2991, + "step": 8761 + }, + { + "epoch": 0.75, + "learning_rate": 3.0776866500559654e-06, + "loss": 0.3035, + "step": 8762 + }, + { + "epoch": 0.75, + "learning_rate": 3.0756833723148526e-06, + "loss": 0.3132, + "step": 8763 + }, + { + "epoch": 0.75, + "learning_rate": 3.0736806282658514e-06, + "loss": 0.2821, + "step": 8764 + }, + { + "epoch": 0.75, + "learning_rate": 3.0716784180633276e-06, + "loss": 0.2738, + "step": 8765 + }, + { + "epoch": 0.75, + "learning_rate": 3.0696767418615945e-06, + "loss": 0.2892, + "step": 8766 + }, + { + "epoch": 0.75, + "learning_rate": 3.0676755998149333e-06, + "loss": 0.2974, + "step": 8767 + }, + { + "epoch": 0.75, + "learning_rate": 3.065674992077584e-06, + "loss": 0.3019, + "step": 8768 + }, + { + "epoch": 0.75, + "learning_rate": 3.063674918803743e-06, + "loss": 0.2569, + "step": 8769 + }, + { + "epoch": 0.75, + "learning_rate": 3.0616753801475653e-06, + "loss": 0.2845, + "step": 8770 + }, + { + "epoch": 0.75, + "learning_rate": 3.0596763762631655e-06, + "loss": 0.2559, + "step": 8771 + }, + { + "epoch": 0.75, + "learning_rate": 3.0576779073046214e-06, + "loss": 0.2432, + "step": 8772 + }, + { + "epoch": 0.75, + "learning_rate": 3.055679973425958e-06, + "loss": 0.2878, + "step": 8773 + }, + { + "epoch": 0.75, + "learning_rate": 3.0536825747811695e-06, + "loss": 0.289, + "step": 8774 + }, + { + "epoch": 0.75, + "learning_rate": 3.051685711524205e-06, + "loss": 0.2903, + "step": 8775 + }, + { + "epoch": 0.75, + "learning_rate": 3.0496893838089736e-06, + "loss": 0.3218, + "step": 8776 + }, + { + "epoch": 0.75, + "learning_rate": 3.0476935917893413e-06, + "loss": 0.2651, + "step": 8777 + }, + { + "epoch": 0.75, + "learning_rate": 3.045698335619135e-06, + "loss": 0.2812, + "step": 8778 + }, + { + "epoch": 0.75, + "learning_rate": 3.0437036154521426e-06, + "loss": 0.2305, + "step": 8779 + }, + { + "epoch": 0.75, + "learning_rate": 3.0417094314420958e-06, + "loss": 0.3206, + "step": 8780 + }, + { + "epoch": 0.75, + "learning_rate": 3.039715783742708e-06, + "loss": 0.2581, + "step": 8781 + }, + { + "epoch": 0.75, + "learning_rate": 3.0377226725076394e-06, + "loss": 0.2673, + "step": 8782 + }, + { + "epoch": 0.75, + "learning_rate": 3.0357300978905025e-06, + "loss": 0.2631, + "step": 8783 + }, + { + "epoch": 0.75, + "learning_rate": 3.0337380600448774e-06, + "loss": 0.2949, + "step": 8784 + }, + { + "epoch": 0.75, + "learning_rate": 3.031746559124301e-06, + "loss": 0.2809, + "step": 8785 + }, + { + "epoch": 0.75, + "learning_rate": 3.02975559528227e-06, + "loss": 0.248, + "step": 8786 + }, + { + "epoch": 0.75, + "learning_rate": 3.0277651686722353e-06, + "loss": 0.2822, + "step": 8787 + }, + { + "epoch": 0.75, + "learning_rate": 3.0257752794476113e-06, + "loss": 0.2659, + "step": 8788 + }, + { + "epoch": 0.75, + "learning_rate": 3.023785927761772e-06, + "loss": 0.2891, + "step": 8789 + }, + { + "epoch": 0.75, + "learning_rate": 3.021797113768039e-06, + "loss": 0.2844, + "step": 8790 + }, + { + "epoch": 0.75, + "learning_rate": 3.019808837619704e-06, + "loss": 0.2602, + "step": 8791 + }, + { + "epoch": 0.75, + "learning_rate": 3.017821099470014e-06, + "loss": 0.2915, + "step": 8792 + }, + { + "epoch": 0.75, + "learning_rate": 3.0158338994721737e-06, + "loss": 0.2745, + "step": 8793 + }, + { + "epoch": 0.75, + "learning_rate": 3.013847237779346e-06, + "loss": 0.2757, + "step": 8794 + }, + { + "epoch": 0.75, + "learning_rate": 3.011861114544654e-06, + "loss": 0.3434, + "step": 8795 + }, + { + "epoch": 0.75, + "learning_rate": 3.009875529921181e-06, + "loss": 0.2637, + "step": 8796 + }, + { + "epoch": 0.75, + "learning_rate": 3.0078904840619607e-06, + "loss": 0.2512, + "step": 8797 + }, + { + "epoch": 0.75, + "learning_rate": 3.005905977119992e-06, + "loss": 0.264, + "step": 8798 + }, + { + "epoch": 0.75, + "learning_rate": 3.0039220092482313e-06, + "loss": 0.2604, + "step": 8799 + }, + { + "epoch": 0.75, + "learning_rate": 3.0019385805995936e-06, + "loss": 0.2977, + "step": 8800 + }, + { + "epoch": 0.75, + "learning_rate": 2.999955691326952e-06, + "loss": 0.2452, + "step": 8801 + }, + { + "epoch": 0.75, + "learning_rate": 2.997973341583138e-06, + "loss": 0.2404, + "step": 8802 + }, + { + "epoch": 0.75, + "learning_rate": 2.9959915315209444e-06, + "loss": 0.2798, + "step": 8803 + }, + { + "epoch": 0.75, + "learning_rate": 2.994010261293111e-06, + "loss": 0.2356, + "step": 8804 + }, + { + "epoch": 0.75, + "learning_rate": 2.9920295310523496e-06, + "loss": 0.2933, + "step": 8805 + }, + { + "epoch": 0.75, + "learning_rate": 2.9900493409513256e-06, + "loss": 0.291, + "step": 8806 + }, + { + "epoch": 0.75, + "learning_rate": 2.98806969114266e-06, + "loss": 0.2778, + "step": 8807 + }, + { + "epoch": 0.76, + "learning_rate": 2.9860905817789354e-06, + "loss": 0.2945, + "step": 8808 + }, + { + "epoch": 0.76, + "learning_rate": 2.984112013012692e-06, + "loss": 0.2766, + "step": 8809 + }, + { + "epoch": 0.76, + "learning_rate": 2.9821339849964324e-06, + "loss": 0.3086, + "step": 8810 + }, + { + "epoch": 0.76, + "learning_rate": 2.980156497882605e-06, + "loss": 0.2699, + "step": 8811 + }, + { + "epoch": 0.76, + "learning_rate": 2.9781795518236288e-06, + "loss": 0.2803, + "step": 8812 + }, + { + "epoch": 0.76, + "learning_rate": 2.9762031469718777e-06, + "loss": 0.289, + "step": 8813 + }, + { + "epoch": 0.76, + "learning_rate": 2.9742272834796813e-06, + "loss": 0.2631, + "step": 8814 + }, + { + "epoch": 0.76, + "learning_rate": 2.9722519614993306e-06, + "loss": 0.2943, + "step": 8815 + }, + { + "epoch": 0.76, + "learning_rate": 2.970277181183074e-06, + "loss": 0.2924, + "step": 8816 + }, + { + "epoch": 0.76, + "learning_rate": 2.968302942683121e-06, + "loss": 0.2261, + "step": 8817 + }, + { + "epoch": 0.76, + "learning_rate": 2.966329246151626e-06, + "loss": 0.2853, + "step": 8818 + }, + { + "epoch": 0.76, + "learning_rate": 2.964356091740721e-06, + "loss": 0.2446, + "step": 8819 + }, + { + "epoch": 0.76, + "learning_rate": 2.9623834796024874e-06, + "loss": 0.6219, + "step": 8820 + }, + { + "epoch": 0.76, + "learning_rate": 2.9604114098889592e-06, + "loss": 0.3151, + "step": 8821 + }, + { + "epoch": 0.76, + "learning_rate": 2.9584398827521343e-06, + "loss": 0.264, + "step": 8822 + }, + { + "epoch": 0.76, + "learning_rate": 2.9564688983439716e-06, + "loss": 0.2772, + "step": 8823 + }, + { + "epoch": 0.76, + "learning_rate": 2.9544984568163815e-06, + "loss": 0.2816, + "step": 8824 + }, + { + "epoch": 0.76, + "learning_rate": 2.9525285583212382e-06, + "loss": 0.2513, + "step": 8825 + }, + { + "epoch": 0.76, + "learning_rate": 2.950559203010371e-06, + "loss": 0.2559, + "step": 8826 + }, + { + "epoch": 0.76, + "learning_rate": 2.9485903910355716e-06, + "loss": 0.257, + "step": 8827 + }, + { + "epoch": 0.76, + "learning_rate": 2.946622122548579e-06, + "loss": 0.2834, + "step": 8828 + }, + { + "epoch": 0.76, + "learning_rate": 2.9446543977011023e-06, + "loss": 0.2933, + "step": 8829 + }, + { + "epoch": 0.76, + "learning_rate": 2.942687216644803e-06, + "loss": 0.2908, + "step": 8830 + }, + { + "epoch": 0.76, + "learning_rate": 2.940720579531301e-06, + "loss": 0.2822, + "step": 8831 + }, + { + "epoch": 0.76, + "learning_rate": 2.9387544865121763e-06, + "loss": 0.2466, + "step": 8832 + }, + { + "epoch": 0.76, + "learning_rate": 2.936788937738966e-06, + "loss": 0.2953, + "step": 8833 + }, + { + "epoch": 0.76, + "learning_rate": 2.9348239333631655e-06, + "loss": 0.303, + "step": 8834 + }, + { + "epoch": 0.76, + "learning_rate": 2.9328594735362237e-06, + "loss": 0.2895, + "step": 8835 + }, + { + "epoch": 0.76, + "learning_rate": 2.9308955584095544e-06, + "loss": 0.2406, + "step": 8836 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.262, + "step": 8837 + }, + { + "epoch": 0.76, + "learning_rate": 2.926969362862465e-06, + "loss": 0.2534, + "step": 8838 + }, + { + "epoch": 0.76, + "learning_rate": 2.9250070827446563e-06, + "loss": 0.3336, + "step": 8839 + }, + { + "epoch": 0.76, + "learning_rate": 2.923045347932344e-06, + "loss": 0.2393, + "step": 8840 + }, + { + "epoch": 0.76, + "learning_rate": 2.92108415857673e-06, + "loss": 0.309, + "step": 8841 + }, + { + "epoch": 0.76, + "learning_rate": 2.919123514828969e-06, + "loss": 0.2438, + "step": 8842 + }, + { + "epoch": 0.76, + "learning_rate": 2.9171634168401797e-06, + "loss": 0.2555, + "step": 8843 + }, + { + "epoch": 0.76, + "learning_rate": 2.9152038647614357e-06, + "loss": 0.3085, + "step": 8844 + }, + { + "epoch": 0.76, + "learning_rate": 2.9132448587437722e-06, + "loss": 0.244, + "step": 8845 + }, + { + "epoch": 0.76, + "learning_rate": 2.911286398938178e-06, + "loss": 0.2952, + "step": 8846 + }, + { + "epoch": 0.76, + "learning_rate": 2.9093284854956017e-06, + "loss": 0.2805, + "step": 8847 + }, + { + "epoch": 0.76, + "learning_rate": 2.907371118566953e-06, + "loss": 0.2447, + "step": 8848 + }, + { + "epoch": 0.76, + "learning_rate": 2.9054142983030884e-06, + "loss": 0.2794, + "step": 8849 + }, + { + "epoch": 0.76, + "learning_rate": 2.9034580248548363e-06, + "loss": 0.2491, + "step": 8850 + }, + { + "epoch": 0.76, + "learning_rate": 2.9015022983729733e-06, + "loss": 0.2717, + "step": 8851 + }, + { + "epoch": 0.76, + "learning_rate": 2.899547119008239e-06, + "loss": 0.2768, + "step": 8852 + }, + { + "epoch": 0.76, + "learning_rate": 2.897592486911328e-06, + "loss": 0.3257, + "step": 8853 + }, + { + "epoch": 0.76, + "learning_rate": 2.8956384022328943e-06, + "loss": 0.3244, + "step": 8854 + }, + { + "epoch": 0.76, + "learning_rate": 2.8936848651235516e-06, + "loss": 0.3015, + "step": 8855 + }, + { + "epoch": 0.76, + "learning_rate": 2.891731875733863e-06, + "loss": 0.2301, + "step": 8856 + }, + { + "epoch": 0.76, + "learning_rate": 2.889779434214356e-06, + "loss": 0.275, + "step": 8857 + }, + { + "epoch": 0.76, + "learning_rate": 2.8878275407155244e-06, + "loss": 0.2593, + "step": 8858 + }, + { + "epoch": 0.76, + "learning_rate": 2.885876195387799e-06, + "loss": 0.2585, + "step": 8859 + }, + { + "epoch": 0.76, + "learning_rate": 2.883925398381585e-06, + "loss": 0.269, + "step": 8860 + }, + { + "epoch": 0.76, + "learning_rate": 2.8819751498472405e-06, + "loss": 0.2657, + "step": 8861 + }, + { + "epoch": 0.76, + "learning_rate": 2.8800254499350797e-06, + "loss": 0.2767, + "step": 8862 + }, + { + "epoch": 0.76, + "learning_rate": 2.878076298795376e-06, + "loss": 0.332, + "step": 8863 + }, + { + "epoch": 0.76, + "learning_rate": 2.8761276965783613e-06, + "loss": 0.2704, + "step": 8864 + }, + { + "epoch": 0.76, + "learning_rate": 2.874179643434227e-06, + "loss": 0.2747, + "step": 8865 + }, + { + "epoch": 0.76, + "learning_rate": 2.8722321395131127e-06, + "loss": 0.2766, + "step": 8866 + }, + { + "epoch": 0.76, + "learning_rate": 2.8702851849651258e-06, + "loss": 0.2656, + "step": 8867 + }, + { + "epoch": 0.76, + "learning_rate": 2.868338779940327e-06, + "loss": 0.2666, + "step": 8868 + }, + { + "epoch": 0.76, + "learning_rate": 2.866392924588738e-06, + "loss": 0.3022, + "step": 8869 + }, + { + "epoch": 0.76, + "learning_rate": 2.864447619060333e-06, + "loss": 0.2492, + "step": 8870 + }, + { + "epoch": 0.76, + "learning_rate": 2.8625028635050477e-06, + "loss": 0.2811, + "step": 8871 + }, + { + "epoch": 0.76, + "learning_rate": 2.8605586580727783e-06, + "loss": 0.2629, + "step": 8872 + }, + { + "epoch": 0.76, + "learning_rate": 2.8586150029133663e-06, + "loss": 0.2952, + "step": 8873 + }, + { + "epoch": 0.76, + "learning_rate": 2.8566718981766238e-06, + "loss": 0.2839, + "step": 8874 + }, + { + "epoch": 0.76, + "learning_rate": 2.854729344012316e-06, + "loss": 0.3015, + "step": 8875 + }, + { + "epoch": 0.76, + "learning_rate": 2.8527873405701636e-06, + "loss": 0.2704, + "step": 8876 + }, + { + "epoch": 0.76, + "learning_rate": 2.850845887999848e-06, + "loss": 0.2374, + "step": 8877 + }, + { + "epoch": 0.76, + "learning_rate": 2.8489049864510053e-06, + "loss": 0.277, + "step": 8878 + }, + { + "epoch": 0.76, + "learning_rate": 2.8469646360732362e-06, + "loss": 0.2593, + "step": 8879 + }, + { + "epoch": 0.76, + "learning_rate": 2.845024837016085e-06, + "loss": 0.3086, + "step": 8880 + }, + { + "epoch": 0.76, + "learning_rate": 2.8430855894290655e-06, + "loss": 0.2775, + "step": 8881 + }, + { + "epoch": 0.76, + "learning_rate": 2.841146893461646e-06, + "loss": 0.2693, + "step": 8882 + }, + { + "epoch": 0.76, + "learning_rate": 2.8392087492632504e-06, + "loss": 0.2789, + "step": 8883 + }, + { + "epoch": 0.76, + "learning_rate": 2.837271156983261e-06, + "loss": 0.2928, + "step": 8884 + }, + { + "epoch": 0.76, + "learning_rate": 2.83533411677102e-06, + "loss": 0.2761, + "step": 8885 + }, + { + "epoch": 0.76, + "learning_rate": 2.833397628775827e-06, + "loss": 0.264, + "step": 8886 + }, + { + "epoch": 0.76, + "learning_rate": 2.8314616931469295e-06, + "loss": 0.3126, + "step": 8887 + }, + { + "epoch": 0.76, + "learning_rate": 2.8295263100335447e-06, + "loss": 0.2698, + "step": 8888 + }, + { + "epoch": 0.76, + "learning_rate": 2.8275914795848415e-06, + "loss": 0.2929, + "step": 8889 + }, + { + "epoch": 0.76, + "learning_rate": 2.8256572019499474e-06, + "loss": 0.2853, + "step": 8890 + }, + { + "epoch": 0.76, + "learning_rate": 2.8237234772779455e-06, + "loss": 0.3002, + "step": 8891 + }, + { + "epoch": 0.76, + "learning_rate": 2.8217903057178796e-06, + "loss": 0.2512, + "step": 8892 + }, + { + "epoch": 0.76, + "learning_rate": 2.8198576874187513e-06, + "loss": 0.2697, + "step": 8893 + }, + { + "epoch": 0.76, + "learning_rate": 2.8179256225295114e-06, + "loss": 0.2708, + "step": 8894 + }, + { + "epoch": 0.76, + "learning_rate": 2.815994111199074e-06, + "loss": 0.2601, + "step": 8895 + }, + { + "epoch": 0.76, + "learning_rate": 2.8140631535763195e-06, + "loss": 0.3077, + "step": 8896 + }, + { + "epoch": 0.76, + "learning_rate": 2.812132749810067e-06, + "loss": 0.2704, + "step": 8897 + }, + { + "epoch": 0.76, + "learning_rate": 2.810202900049106e-06, + "loss": 0.2992, + "step": 8898 + }, + { + "epoch": 0.76, + "learning_rate": 2.80827360444218e-06, + "loss": 0.2306, + "step": 8899 + }, + { + "epoch": 0.76, + "learning_rate": 2.806344863137989e-06, + "loss": 0.2949, + "step": 8900 + }, + { + "epoch": 0.76, + "learning_rate": 2.8044166762851898e-06, + "loss": 0.2784, + "step": 8901 + }, + { + "epoch": 0.76, + "learning_rate": 2.8024890440324e-06, + "loss": 0.2444, + "step": 8902 + }, + { + "epoch": 0.76, + "learning_rate": 2.8005619665281935e-06, + "loss": 0.2661, + "step": 8903 + }, + { + "epoch": 0.76, + "learning_rate": 2.798635443921094e-06, + "loss": 0.5709, + "step": 8904 + }, + { + "epoch": 0.76, + "learning_rate": 2.7967094763595917e-06, + "loss": 0.3088, + "step": 8905 + }, + { + "epoch": 0.76, + "learning_rate": 2.7947840639921308e-06, + "loss": 0.2856, + "step": 8906 + }, + { + "epoch": 0.76, + "learning_rate": 2.7928592069671113e-06, + "loss": 0.2781, + "step": 8907 + }, + { + "epoch": 0.76, + "learning_rate": 2.7909349054328937e-06, + "loss": 0.2794, + "step": 8908 + }, + { + "epoch": 0.76, + "learning_rate": 2.789011159537792e-06, + "loss": 0.2968, + "step": 8909 + }, + { + "epoch": 0.76, + "learning_rate": 2.7870879694300825e-06, + "loss": 0.2686, + "step": 8910 + }, + { + "epoch": 0.76, + "learning_rate": 2.7851653352579886e-06, + "loss": 0.2484, + "step": 8911 + }, + { + "epoch": 0.76, + "learning_rate": 2.783243257169702e-06, + "loss": 0.2371, + "step": 8912 + }, + { + "epoch": 0.76, + "learning_rate": 2.781321735313366e-06, + "loss": 0.2642, + "step": 8913 + }, + { + "epoch": 0.76, + "learning_rate": 2.779400769837083e-06, + "loss": 0.2697, + "step": 8914 + }, + { + "epoch": 0.76, + "learning_rate": 2.777480360888909e-06, + "loss": 0.2253, + "step": 8915 + }, + { + "epoch": 0.76, + "learning_rate": 2.7755605086168624e-06, + "loss": 0.2537, + "step": 8916 + }, + { + "epoch": 0.76, + "learning_rate": 2.7736412131689173e-06, + "loss": 0.2699, + "step": 8917 + }, + { + "epoch": 0.76, + "learning_rate": 2.7717224746929984e-06, + "loss": 0.2928, + "step": 8918 + }, + { + "epoch": 0.76, + "learning_rate": 2.769804293336994e-06, + "loss": 0.2757, + "step": 8919 + }, + { + "epoch": 0.76, + "learning_rate": 2.7678866692487503e-06, + "loss": 0.3195, + "step": 8920 + }, + { + "epoch": 0.76, + "learning_rate": 2.7659696025760674e-06, + "loss": 0.2523, + "step": 8921 + }, + { + "epoch": 0.76, + "learning_rate": 2.764053093466702e-06, + "loss": 0.2746, + "step": 8922 + }, + { + "epoch": 0.76, + "learning_rate": 2.7621371420683717e-06, + "loss": 0.3416, + "step": 8923 + }, + { + "epoch": 0.76, + "learning_rate": 2.7602217485287497e-06, + "loss": 0.2775, + "step": 8924 + }, + { + "epoch": 0.77, + "learning_rate": 2.7583069129954585e-06, + "loss": 0.2582, + "step": 8925 + }, + { + "epoch": 0.77, + "learning_rate": 2.75639263561609e-06, + "loss": 0.3271, + "step": 8926 + }, + { + "epoch": 0.77, + "learning_rate": 2.7544789165381834e-06, + "loss": 0.2703, + "step": 8927 + }, + { + "epoch": 0.77, + "learning_rate": 2.752565755909242e-06, + "loss": 0.2889, + "step": 8928 + }, + { + "epoch": 0.77, + "learning_rate": 2.750653153876721e-06, + "loss": 0.2397, + "step": 8929 + }, + { + "epoch": 0.77, + "learning_rate": 2.7487411105880356e-06, + "loss": 0.3184, + "step": 8930 + }, + { + "epoch": 0.77, + "learning_rate": 2.746829626190558e-06, + "loss": 0.2506, + "step": 8931 + }, + { + "epoch": 0.77, + "learning_rate": 2.7449187008316113e-06, + "loss": 0.2531, + "step": 8932 + }, + { + "epoch": 0.77, + "learning_rate": 2.7430083346584802e-06, + "loss": 0.267, + "step": 8933 + }, + { + "epoch": 0.77, + "learning_rate": 2.7410985278184144e-06, + "loss": 0.2317, + "step": 8934 + }, + { + "epoch": 0.77, + "learning_rate": 2.739189280458604e-06, + "loss": 0.2836, + "step": 8935 + }, + { + "epoch": 0.77, + "learning_rate": 2.7372805927262057e-06, + "loss": 0.2641, + "step": 8936 + }, + { + "epoch": 0.77, + "learning_rate": 2.7353724647683344e-06, + "loss": 0.301, + "step": 8937 + }, + { + "epoch": 0.77, + "learning_rate": 2.7334648967320587e-06, + "loss": 0.2787, + "step": 8938 + }, + { + "epoch": 0.77, + "learning_rate": 2.7315578887644057e-06, + "loss": 0.2788, + "step": 8939 + }, + { + "epoch": 0.77, + "learning_rate": 2.72965144101235e-06, + "loss": 0.2899, + "step": 8940 + }, + { + "epoch": 0.77, + "learning_rate": 2.7277455536228438e-06, + "loss": 0.2459, + "step": 8941 + }, + { + "epoch": 0.77, + "learning_rate": 2.725840226742774e-06, + "loss": 0.2461, + "step": 8942 + }, + { + "epoch": 0.77, + "learning_rate": 2.723935460518997e-06, + "loss": 0.3032, + "step": 8943 + }, + { + "epoch": 0.77, + "learning_rate": 2.722031255098323e-06, + "loss": 0.2745, + "step": 8944 + }, + { + "epoch": 0.77, + "learning_rate": 2.7201276106275176e-06, + "loss": 0.2956, + "step": 8945 + }, + { + "epoch": 0.77, + "learning_rate": 2.7182245272533046e-06, + "loss": 0.3193, + "step": 8946 + }, + { + "epoch": 0.77, + "learning_rate": 2.716322005122366e-06, + "loss": 0.2828, + "step": 8947 + }, + { + "epoch": 0.77, + "learning_rate": 2.7144200443813394e-06, + "loss": 0.2609, + "step": 8948 + }, + { + "epoch": 0.77, + "learning_rate": 2.712518645176815e-06, + "loss": 0.2618, + "step": 8949 + }, + { + "epoch": 0.77, + "learning_rate": 2.7106178076553446e-06, + "loss": 0.3062, + "step": 8950 + }, + { + "epoch": 0.77, + "learning_rate": 2.708717531963435e-06, + "loss": 0.2448, + "step": 8951 + }, + { + "epoch": 0.77, + "learning_rate": 2.7068178182475514e-06, + "loss": 0.3056, + "step": 8952 + }, + { + "epoch": 0.77, + "learning_rate": 2.7049186666541126e-06, + "loss": 0.2924, + "step": 8953 + }, + { + "epoch": 0.77, + "learning_rate": 2.703020077329498e-06, + "loss": 0.2819, + "step": 8954 + }, + { + "epoch": 0.77, + "learning_rate": 2.7011220504200432e-06, + "loss": 0.3284, + "step": 8955 + }, + { + "epoch": 0.77, + "learning_rate": 2.6992245860720325e-06, + "loss": 0.288, + "step": 8956 + }, + { + "epoch": 0.77, + "learning_rate": 2.6973276844317166e-06, + "loss": 0.275, + "step": 8957 + }, + { + "epoch": 0.77, + "learning_rate": 2.6954313456452995e-06, + "loss": 0.2579, + "step": 8958 + }, + { + "epoch": 0.77, + "learning_rate": 2.6935355698589417e-06, + "loss": 0.2821, + "step": 8959 + }, + { + "epoch": 0.77, + "learning_rate": 2.691640357218759e-06, + "loss": 0.2684, + "step": 8960 + }, + { + "epoch": 0.77, + "learning_rate": 2.6897457078708267e-06, + "loss": 0.3161, + "step": 8961 + }, + { + "epoch": 0.77, + "learning_rate": 2.6878516219611773e-06, + "loss": 0.2882, + "step": 8962 + }, + { + "epoch": 0.77, + "learning_rate": 2.6859580996357905e-06, + "loss": 0.2635, + "step": 8963 + }, + { + "epoch": 0.77, + "learning_rate": 2.684065141040614e-06, + "loss": 0.2587, + "step": 8964 + }, + { + "epoch": 0.77, + "learning_rate": 2.6821727463215476e-06, + "loss": 0.2642, + "step": 8965 + }, + { + "epoch": 0.77, + "learning_rate": 2.680280915624448e-06, + "loss": 0.2742, + "step": 8966 + }, + { + "epoch": 0.77, + "learning_rate": 2.6783896490951266e-06, + "loss": 0.2397, + "step": 8967 + }, + { + "epoch": 0.77, + "learning_rate": 2.6764989468793544e-06, + "loss": 0.2552, + "step": 8968 + }, + { + "epoch": 0.77, + "learning_rate": 2.6746088091228594e-06, + "loss": 0.2483, + "step": 8969 + }, + { + "epoch": 0.77, + "learning_rate": 2.6727192359713196e-06, + "loss": 0.2735, + "step": 8970 + }, + { + "epoch": 0.77, + "learning_rate": 2.6708302275703725e-06, + "loss": 0.2709, + "step": 8971 + }, + { + "epoch": 0.77, + "learning_rate": 2.668941784065623e-06, + "loss": 0.3055, + "step": 8972 + }, + { + "epoch": 0.77, + "learning_rate": 2.6670539056026134e-06, + "loss": 0.2679, + "step": 8973 + }, + { + "epoch": 0.77, + "learning_rate": 2.6651665923268555e-06, + "loss": 0.2426, + "step": 8974 + }, + { + "epoch": 0.77, + "learning_rate": 2.663279844383815e-06, + "loss": 0.2935, + "step": 8975 + }, + { + "epoch": 0.77, + "learning_rate": 2.661393661918912e-06, + "loss": 0.2654, + "step": 8976 + }, + { + "epoch": 0.77, + "learning_rate": 2.6595080450775268e-06, + "loss": 0.2708, + "step": 8977 + }, + { + "epoch": 0.77, + "learning_rate": 2.657622994004986e-06, + "loss": 0.2574, + "step": 8978 + }, + { + "epoch": 0.77, + "learning_rate": 2.6557385088465906e-06, + "loss": 0.2859, + "step": 8979 + }, + { + "epoch": 0.77, + "learning_rate": 2.65385458974758e-06, + "loss": 0.2717, + "step": 8980 + }, + { + "epoch": 0.77, + "learning_rate": 2.651971236853158e-06, + "loss": 0.271, + "step": 8981 + }, + { + "epoch": 0.77, + "learning_rate": 2.6500884503084857e-06, + "loss": 0.2737, + "step": 8982 + }, + { + "epoch": 0.77, + "learning_rate": 2.6482062302586774e-06, + "loss": 0.278, + "step": 8983 + }, + { + "epoch": 0.77, + "learning_rate": 2.646324576848809e-06, + "loss": 0.2837, + "step": 8984 + }, + { + "epoch": 0.77, + "learning_rate": 2.6444434902239047e-06, + "loss": 0.326, + "step": 8985 + }, + { + "epoch": 0.77, + "learning_rate": 2.6425629705289556e-06, + "loss": 0.3069, + "step": 8986 + }, + { + "epoch": 0.77, + "learning_rate": 2.640683017908895e-06, + "loss": 0.2611, + "step": 8987 + }, + { + "epoch": 0.77, + "learning_rate": 2.6388036325086243e-06, + "loss": 0.2891, + "step": 8988 + }, + { + "epoch": 0.77, + "learning_rate": 2.636924814472995e-06, + "loss": 0.2443, + "step": 8989 + }, + { + "epoch": 0.77, + "learning_rate": 2.6350465639468213e-06, + "loss": 0.2504, + "step": 8990 + }, + { + "epoch": 0.77, + "learning_rate": 2.633168881074867e-06, + "loss": 0.3121, + "step": 8991 + }, + { + "epoch": 0.77, + "learning_rate": 2.6312917660018534e-06, + "loss": 0.2984, + "step": 8992 + }, + { + "epoch": 0.77, + "learning_rate": 2.6294152188724644e-06, + "loss": 0.2879, + "step": 8993 + }, + { + "epoch": 0.77, + "learning_rate": 2.627539239831328e-06, + "loss": 0.2751, + "step": 8994 + }, + { + "epoch": 0.77, + "learning_rate": 2.6256638290230385e-06, + "loss": 0.2329, + "step": 8995 + }, + { + "epoch": 0.77, + "learning_rate": 2.623788986592144e-06, + "loss": 0.261, + "step": 8996 + }, + { + "epoch": 0.77, + "learning_rate": 2.6219147126831467e-06, + "loss": 0.28, + "step": 8997 + }, + { + "epoch": 0.77, + "learning_rate": 2.620041007440508e-06, + "loss": 0.2652, + "step": 8998 + }, + { + "epoch": 0.77, + "learning_rate": 2.618167871008642e-06, + "loss": 0.2959, + "step": 8999 + }, + { + "epoch": 0.77, + "learning_rate": 2.616295303531926e-06, + "loss": 0.279, + "step": 9000 + }, + { + "epoch": 0.77, + "learning_rate": 2.6144233051546797e-06, + "loss": 0.2689, + "step": 9001 + }, + { + "epoch": 0.77, + "learning_rate": 2.6125518760211933e-06, + "loss": 0.2753, + "step": 9002 + }, + { + "epoch": 0.77, + "learning_rate": 2.6106810162757046e-06, + "loss": 0.254, + "step": 9003 + }, + { + "epoch": 0.77, + "learning_rate": 2.608810726062412e-06, + "loss": 0.2568, + "step": 9004 + }, + { + "epoch": 0.77, + "learning_rate": 2.6069410055254674e-06, + "loss": 0.2473, + "step": 9005 + }, + { + "epoch": 0.77, + "learning_rate": 2.6050718548089804e-06, + "loss": 0.2919, + "step": 9006 + }, + { + "epoch": 0.77, + "learning_rate": 2.6032032740570177e-06, + "loss": 0.2626, + "step": 9007 + }, + { + "epoch": 0.77, + "learning_rate": 2.6013352634135957e-06, + "loss": 0.3216, + "step": 9008 + }, + { + "epoch": 0.77, + "learning_rate": 2.599467823022691e-06, + "loss": 0.6083, + "step": 9009 + }, + { + "epoch": 0.77, + "learning_rate": 2.5976009530282455e-06, + "loss": 0.2357, + "step": 9010 + }, + { + "epoch": 0.77, + "learning_rate": 2.5957346535741378e-06, + "loss": 0.2939, + "step": 9011 + }, + { + "epoch": 0.77, + "learning_rate": 2.593868924804218e-06, + "loss": 0.3043, + "step": 9012 + }, + { + "epoch": 0.77, + "learning_rate": 2.592003766862288e-06, + "loss": 0.2481, + "step": 9013 + }, + { + "epoch": 0.77, + "learning_rate": 2.5901391798921018e-06, + "loss": 0.2776, + "step": 9014 + }, + { + "epoch": 0.77, + "learning_rate": 2.5882751640373783e-06, + "loss": 0.2607, + "step": 9015 + }, + { + "epoch": 0.77, + "learning_rate": 2.586411719441777e-06, + "loss": 0.2996, + "step": 9016 + }, + { + "epoch": 0.77, + "learning_rate": 2.5845488462489344e-06, + "loss": 0.3062, + "step": 9017 + }, + { + "epoch": 0.77, + "learning_rate": 2.582686544602423e-06, + "loss": 0.2727, + "step": 9018 + }, + { + "epoch": 0.77, + "learning_rate": 2.5808248146457825e-06, + "loss": 0.2786, + "step": 9019 + }, + { + "epoch": 0.77, + "learning_rate": 2.5789636565225063e-06, + "loss": 0.3326, + "step": 9020 + }, + { + "epoch": 0.77, + "learning_rate": 2.5771030703760434e-06, + "loss": 0.3009, + "step": 9021 + }, + { + "epoch": 0.77, + "learning_rate": 2.575243056349801e-06, + "loss": 0.2975, + "step": 9022 + }, + { + "epoch": 0.77, + "learning_rate": 2.5733836145871305e-06, + "loss": 0.2295, + "step": 9023 + }, + { + "epoch": 0.77, + "learning_rate": 2.571524745231361e-06, + "loss": 0.2505, + "step": 9024 + }, + { + "epoch": 0.77, + "learning_rate": 2.569666448425756e-06, + "loss": 0.2752, + "step": 9025 + }, + { + "epoch": 0.77, + "learning_rate": 2.5678087243135476e-06, + "loss": 0.2639, + "step": 9026 + }, + { + "epoch": 0.77, + "learning_rate": 2.565951573037919e-06, + "loss": 0.2641, + "step": 9027 + }, + { + "epoch": 0.77, + "learning_rate": 2.56409499474201e-06, + "loss": 0.2936, + "step": 9028 + }, + { + "epoch": 0.77, + "learning_rate": 2.562238989568917e-06, + "loss": 0.2559, + "step": 9029 + }, + { + "epoch": 0.77, + "learning_rate": 2.560383557661692e-06, + "loss": 0.2814, + "step": 9030 + }, + { + "epoch": 0.77, + "learning_rate": 2.558528699163344e-06, + "loss": 0.2956, + "step": 9031 + }, + { + "epoch": 0.77, + "learning_rate": 2.556674414216833e-06, + "loss": 0.2402, + "step": 9032 + }, + { + "epoch": 0.77, + "learning_rate": 2.5548207029650784e-06, + "loss": 0.2813, + "step": 9033 + }, + { + "epoch": 0.77, + "learning_rate": 2.5529675655509567e-06, + "loss": 0.2469, + "step": 9034 + }, + { + "epoch": 0.77, + "learning_rate": 2.5511150021172993e-06, + "loss": 0.3143, + "step": 9035 + }, + { + "epoch": 0.77, + "learning_rate": 2.5492630128068895e-06, + "loss": 0.2601, + "step": 9036 + }, + { + "epoch": 0.77, + "learning_rate": 2.5474115977624724e-06, + "loss": 0.2759, + "step": 9037 + }, + { + "epoch": 0.77, + "learning_rate": 2.5455607571267484e-06, + "loss": 0.3003, + "step": 9038 + }, + { + "epoch": 0.77, + "learning_rate": 2.5437104910423647e-06, + "loss": 0.2761, + "step": 9039 + }, + { + "epoch": 0.77, + "learning_rate": 2.541860799651934e-06, + "loss": 0.2429, + "step": 9040 + }, + { + "epoch": 0.77, + "learning_rate": 2.5400116830980203e-06, + "loss": 0.2861, + "step": 9041 + }, + { + "epoch": 0.78, + "learning_rate": 2.5381631415231455e-06, + "loss": 0.2895, + "step": 9042 + }, + { + "epoch": 0.78, + "learning_rate": 2.5363151750697856e-06, + "loss": 0.2639, + "step": 9043 + }, + { + "epoch": 0.78, + "learning_rate": 2.534467783880373e-06, + "loss": 0.2704, + "step": 9044 + }, + { + "epoch": 0.78, + "learning_rate": 2.532620968097299e-06, + "loss": 0.2745, + "step": 9045 + }, + { + "epoch": 0.78, + "learning_rate": 2.530774727862899e-06, + "loss": 0.2701, + "step": 9046 + }, + { + "epoch": 0.78, + "learning_rate": 2.528929063319475e-06, + "loss": 0.2693, + "step": 9047 + }, + { + "epoch": 0.78, + "learning_rate": 2.5270839746092878e-06, + "loss": 0.2646, + "step": 9048 + }, + { + "epoch": 0.78, + "learning_rate": 2.5252394618745415e-06, + "loss": 0.2801, + "step": 9049 + }, + { + "epoch": 0.78, + "learning_rate": 2.5233955252574027e-06, + "loss": 0.2787, + "step": 9050 + }, + { + "epoch": 0.78, + "learning_rate": 2.521552164899994e-06, + "loss": 0.2725, + "step": 9051 + }, + { + "epoch": 0.78, + "learning_rate": 2.5197093809443917e-06, + "loss": 0.2574, + "step": 9052 + }, + { + "epoch": 0.78, + "learning_rate": 2.5178671735326333e-06, + "loss": 0.2852, + "step": 9053 + }, + { + "epoch": 0.78, + "learning_rate": 2.516025542806696e-06, + "loss": 0.2609, + "step": 9054 + }, + { + "epoch": 0.78, + "learning_rate": 2.5141844889085365e-06, + "loss": 0.552, + "step": 9055 + }, + { + "epoch": 0.78, + "learning_rate": 2.512344011980045e-06, + "loss": 0.2188, + "step": 9056 + }, + { + "epoch": 0.78, + "learning_rate": 2.51050411216308e-06, + "loss": 0.2703, + "step": 9057 + }, + { + "epoch": 0.78, + "learning_rate": 2.508664789599451e-06, + "loss": 0.2961, + "step": 9058 + }, + { + "epoch": 0.78, + "learning_rate": 2.5068260444309245e-06, + "loss": 0.3074, + "step": 9059 + }, + { + "epoch": 0.78, + "learning_rate": 2.504987876799224e-06, + "loss": 0.2694, + "step": 9060 + }, + { + "epoch": 0.78, + "learning_rate": 2.503150286846019e-06, + "loss": 0.2668, + "step": 9061 + }, + { + "epoch": 0.78, + "learning_rate": 2.501313274712952e-06, + "loss": 0.2484, + "step": 9062 + }, + { + "epoch": 0.78, + "learning_rate": 2.4994768405416024e-06, + "loss": 0.2653, + "step": 9063 + }, + { + "epoch": 0.78, + "learning_rate": 2.497640984473518e-06, + "loss": 0.2391, + "step": 9064 + }, + { + "epoch": 0.78, + "learning_rate": 2.4958057066501952e-06, + "loss": 0.2624, + "step": 9065 + }, + { + "epoch": 0.78, + "learning_rate": 2.4939710072130895e-06, + "loss": 0.2863, + "step": 9066 + }, + { + "epoch": 0.78, + "learning_rate": 2.492136886303611e-06, + "loss": 0.2831, + "step": 9067 + }, + { + "epoch": 0.78, + "learning_rate": 2.490303344063123e-06, + "loss": 0.2546, + "step": 9068 + }, + { + "epoch": 0.78, + "learning_rate": 2.48847038063295e-06, + "loss": 0.2824, + "step": 9069 + }, + { + "epoch": 0.78, + "learning_rate": 2.486637996154362e-06, + "loss": 0.2989, + "step": 9070 + }, + { + "epoch": 0.78, + "learning_rate": 2.4848061907685915e-06, + "loss": 0.2344, + "step": 9071 + }, + { + "epoch": 0.78, + "learning_rate": 2.4829749646168268e-06, + "loss": 0.5498, + "step": 9072 + }, + { + "epoch": 0.78, + "learning_rate": 2.4811443178402097e-06, + "loss": 0.2698, + "step": 9073 + }, + { + "epoch": 0.78, + "learning_rate": 2.4793142505798363e-06, + "loss": 0.2719, + "step": 9074 + }, + { + "epoch": 0.78, + "learning_rate": 2.4774847629767594e-06, + "loss": 0.2679, + "step": 9075 + }, + { + "epoch": 0.78, + "learning_rate": 2.47565585517199e-06, + "loss": 0.2991, + "step": 9076 + }, + { + "epoch": 0.78, + "learning_rate": 2.473827527306486e-06, + "loss": 0.2684, + "step": 9077 + }, + { + "epoch": 0.78, + "learning_rate": 2.4719997795211683e-06, + "loss": 0.2932, + "step": 9078 + }, + { + "epoch": 0.78, + "learning_rate": 2.470172611956909e-06, + "loss": 0.2335, + "step": 9079 + }, + { + "epoch": 0.78, + "learning_rate": 2.468346024754541e-06, + "loss": 0.2741, + "step": 9080 + }, + { + "epoch": 0.78, + "learning_rate": 2.4665200180548454e-06, + "loss": 0.2979, + "step": 9081 + }, + { + "epoch": 0.78, + "learning_rate": 2.464694591998563e-06, + "loss": 0.275, + "step": 9082 + }, + { + "epoch": 0.78, + "learning_rate": 2.4628697467263916e-06, + "loss": 0.3026, + "step": 9083 + }, + { + "epoch": 0.78, + "learning_rate": 2.4610454823789742e-06, + "loss": 0.2519, + "step": 9084 + }, + { + "epoch": 0.78, + "learning_rate": 2.459221799096918e-06, + "loss": 0.2508, + "step": 9085 + }, + { + "epoch": 0.78, + "learning_rate": 2.4573986970207906e-06, + "loss": 0.2575, + "step": 9086 + }, + { + "epoch": 0.78, + "learning_rate": 2.4555761762911e-06, + "loss": 0.287, + "step": 9087 + }, + { + "epoch": 0.78, + "learning_rate": 2.4537542370483203e-06, + "loss": 0.3323, + "step": 9088 + }, + { + "epoch": 0.78, + "learning_rate": 2.4519328794328755e-06, + "loss": 0.2597, + "step": 9089 + }, + { + "epoch": 0.78, + "learning_rate": 2.4501121035851494e-06, + "loss": 0.2785, + "step": 9090 + }, + { + "epoch": 0.78, + "learning_rate": 2.4482919096454803e-06, + "loss": 0.2682, + "step": 9091 + }, + { + "epoch": 0.78, + "learning_rate": 2.44647229775415e-06, + "loss": 0.2406, + "step": 9092 + }, + { + "epoch": 0.78, + "learning_rate": 2.444653268051418e-06, + "loss": 0.5818, + "step": 9093 + }, + { + "epoch": 0.78, + "learning_rate": 2.4428348206774775e-06, + "loss": 0.3411, + "step": 9094 + }, + { + "epoch": 0.78, + "learning_rate": 2.441016955772487e-06, + "loss": 0.2684, + "step": 9095 + }, + { + "epoch": 0.78, + "learning_rate": 2.439199673476561e-06, + "loss": 0.2643, + "step": 9096 + }, + { + "epoch": 0.78, + "learning_rate": 2.437382973929764e-06, + "loss": 0.2584, + "step": 9097 + }, + { + "epoch": 0.78, + "learning_rate": 2.4355668572721224e-06, + "loss": 0.2581, + "step": 9098 + }, + { + "epoch": 0.78, + "learning_rate": 2.433751323643606e-06, + "loss": 0.2538, + "step": 9099 + }, + { + "epoch": 0.78, + "learning_rate": 2.431936373184156e-06, + "loss": 0.2744, + "step": 9100 + }, + { + "epoch": 0.78, + "learning_rate": 2.430122006033653e-06, + "loss": 0.2372, + "step": 9101 + }, + { + "epoch": 0.78, + "learning_rate": 2.428308222331942e-06, + "loss": 0.2873, + "step": 9102 + }, + { + "epoch": 0.78, + "learning_rate": 2.42649502221882e-06, + "loss": 0.2692, + "step": 9103 + }, + { + "epoch": 0.78, + "learning_rate": 2.42468240583404e-06, + "loss": 0.2466, + "step": 9104 + }, + { + "epoch": 0.78, + "learning_rate": 2.422870373317313e-06, + "loss": 0.2924, + "step": 9105 + }, + { + "epoch": 0.78, + "learning_rate": 2.4210589248082914e-06, + "loss": 0.2752, + "step": 9106 + }, + { + "epoch": 0.78, + "learning_rate": 2.4192480604466052e-06, + "loss": 0.2638, + "step": 9107 + }, + { + "epoch": 0.78, + "learning_rate": 2.4174377803718184e-06, + "loss": 0.2562, + "step": 9108 + }, + { + "epoch": 0.78, + "learning_rate": 2.41562808472346e-06, + "loss": 0.3356, + "step": 9109 + }, + { + "epoch": 0.78, + "learning_rate": 2.4138189736410144e-06, + "loss": 0.2842, + "step": 9110 + }, + { + "epoch": 0.78, + "learning_rate": 2.412010447263917e-06, + "loss": 0.2661, + "step": 9111 + }, + { + "epoch": 0.78, + "learning_rate": 2.4102025057315615e-06, + "loss": 0.3003, + "step": 9112 + }, + { + "epoch": 0.78, + "learning_rate": 2.4083951491832947e-06, + "loss": 0.3044, + "step": 9113 + }, + { + "epoch": 0.78, + "learning_rate": 2.406588377758421e-06, + "loss": 0.2915, + "step": 9114 + }, + { + "epoch": 0.78, + "learning_rate": 2.4047821915961923e-06, + "loss": 0.3181, + "step": 9115 + }, + { + "epoch": 0.78, + "learning_rate": 2.402976590835824e-06, + "loss": 0.2797, + "step": 9116 + }, + { + "epoch": 0.78, + "learning_rate": 2.401171575616481e-06, + "loss": 0.2449, + "step": 9117 + }, + { + "epoch": 0.78, + "learning_rate": 2.399367146077286e-06, + "loss": 0.2392, + "step": 9118 + }, + { + "epoch": 0.78, + "learning_rate": 2.3975633023573164e-06, + "loss": 0.2612, + "step": 9119 + }, + { + "epoch": 0.78, + "learning_rate": 2.395760044595602e-06, + "loss": 0.2234, + "step": 9120 + }, + { + "epoch": 0.78, + "learning_rate": 2.3939573729311325e-06, + "loss": 0.2739, + "step": 9121 + }, + { + "epoch": 0.78, + "learning_rate": 2.3921552875028443e-06, + "loss": 0.2234, + "step": 9122 + }, + { + "epoch": 0.78, + "learning_rate": 2.390353788449631e-06, + "loss": 0.2551, + "step": 9123 + }, + { + "epoch": 0.78, + "learning_rate": 2.388552875910354e-06, + "loss": 0.2613, + "step": 9124 + }, + { + "epoch": 0.78, + "learning_rate": 2.3867525500238086e-06, + "loss": 0.3028, + "step": 9125 + }, + { + "epoch": 0.78, + "learning_rate": 2.384952810928759e-06, + "loss": 0.2954, + "step": 9126 + }, + { + "epoch": 0.78, + "learning_rate": 2.3831536587639193e-06, + "loss": 0.278, + "step": 9127 + }, + { + "epoch": 0.78, + "learning_rate": 2.38135509366796e-06, + "loss": 0.2799, + "step": 9128 + }, + { + "epoch": 0.78, + "learning_rate": 2.379557115779507e-06, + "loss": 0.2349, + "step": 9129 + }, + { + "epoch": 0.78, + "learning_rate": 2.377759725237133e-06, + "loss": 0.2949, + "step": 9130 + }, + { + "epoch": 0.78, + "learning_rate": 2.3759629221793823e-06, + "loss": 0.2809, + "step": 9131 + }, + { + "epoch": 0.78, + "learning_rate": 2.374166706744735e-06, + "loss": 0.2585, + "step": 9132 + }, + { + "epoch": 0.78, + "learning_rate": 2.372371079071638e-06, + "loss": 0.2952, + "step": 9133 + }, + { + "epoch": 0.78, + "learning_rate": 2.3705760392984887e-06, + "loss": 0.2566, + "step": 9134 + }, + { + "epoch": 0.78, + "learning_rate": 2.368781587563641e-06, + "loss": 0.2382, + "step": 9135 + }, + { + "epoch": 0.78, + "learning_rate": 2.366987724005404e-06, + "loss": 0.2659, + "step": 9136 + }, + { + "epoch": 0.78, + "learning_rate": 2.3651944487620315e-06, + "loss": 0.272, + "step": 9137 + }, + { + "epoch": 0.78, + "learning_rate": 2.363401761971752e-06, + "loss": 0.2944, + "step": 9138 + }, + { + "epoch": 0.78, + "learning_rate": 2.3616096637727282e-06, + "loss": 0.2318, + "step": 9139 + }, + { + "epoch": 0.78, + "learning_rate": 2.3598181543030885e-06, + "loss": 0.3537, + "step": 9140 + }, + { + "epoch": 0.78, + "learning_rate": 2.358027233700915e-06, + "loss": 0.3098, + "step": 9141 + }, + { + "epoch": 0.78, + "learning_rate": 2.356236902104242e-06, + "loss": 0.3176, + "step": 9142 + }, + { + "epoch": 0.78, + "learning_rate": 2.3544471596510633e-06, + "loss": 0.3326, + "step": 9143 + }, + { + "epoch": 0.78, + "learning_rate": 2.3526580064793125e-06, + "loss": 0.2855, + "step": 9144 + }, + { + "epoch": 0.78, + "learning_rate": 2.350869442726903e-06, + "loss": 0.241, + "step": 9145 + }, + { + "epoch": 0.78, + "learning_rate": 2.3490814685316777e-06, + "loss": 0.2601, + "step": 9146 + }, + { + "epoch": 0.78, + "learning_rate": 2.3472940840314496e-06, + "loss": 0.2604, + "step": 9147 + }, + { + "epoch": 0.78, + "learning_rate": 2.3455072893639784e-06, + "loss": 0.259, + "step": 9148 + }, + { + "epoch": 0.78, + "learning_rate": 2.3437210846669854e-06, + "loss": 0.2193, + "step": 9149 + }, + { + "epoch": 0.78, + "learning_rate": 2.3419354700781393e-06, + "loss": 0.3079, + "step": 9150 + }, + { + "epoch": 0.78, + "learning_rate": 2.3401504457350677e-06, + "loss": 0.2879, + "step": 9151 + }, + { + "epoch": 0.78, + "learning_rate": 2.338366011775355e-06, + "loss": 0.255, + "step": 9152 + }, + { + "epoch": 0.78, + "learning_rate": 2.3365821683365286e-06, + "loss": 0.2527, + "step": 9153 + }, + { + "epoch": 0.78, + "learning_rate": 2.3347989155560835e-06, + "loss": 0.3129, + "step": 9154 + }, + { + "epoch": 0.78, + "learning_rate": 2.333016253571463e-06, + "loss": 0.2556, + "step": 9155 + }, + { + "epoch": 0.78, + "learning_rate": 2.331234182520066e-06, + "loss": 0.254, + "step": 9156 + }, + { + "epoch": 0.78, + "learning_rate": 2.3294527025392454e-06, + "loss": 0.2884, + "step": 9157 + }, + { + "epoch": 0.79, + "learning_rate": 2.32767181376631e-06, + "loss": 0.2582, + "step": 9158 + }, + { + "epoch": 0.79, + "learning_rate": 2.3258915163385233e-06, + "loss": 0.2702, + "step": 9159 + }, + { + "epoch": 0.79, + "learning_rate": 2.324111810393097e-06, + "loss": 0.2562, + "step": 9160 + }, + { + "epoch": 0.79, + "learning_rate": 2.322332696067202e-06, + "loss": 0.2828, + "step": 9161 + }, + { + "epoch": 0.79, + "learning_rate": 2.320554173497972e-06, + "loss": 0.3104, + "step": 9162 + }, + { + "epoch": 0.79, + "learning_rate": 2.318776242822478e-06, + "loss": 0.2874, + "step": 9163 + }, + { + "epoch": 0.79, + "learning_rate": 2.3169989041777565e-06, + "loss": 0.2489, + "step": 9164 + }, + { + "epoch": 0.79, + "learning_rate": 2.315222157700797e-06, + "loss": 0.3329, + "step": 9165 + }, + { + "epoch": 0.79, + "learning_rate": 2.3134460035285433e-06, + "loss": 0.2841, + "step": 9166 + }, + { + "epoch": 0.79, + "learning_rate": 2.311670441797893e-06, + "loss": 0.2308, + "step": 9167 + }, + { + "epoch": 0.79, + "learning_rate": 2.30989547264569e-06, + "loss": 0.2709, + "step": 9168 + }, + { + "epoch": 0.79, + "learning_rate": 2.308121096208752e-06, + "loss": 0.2532, + "step": 9169 + }, + { + "epoch": 0.79, + "learning_rate": 2.30634731262383e-06, + "loss": 0.2775, + "step": 9170 + }, + { + "epoch": 0.79, + "learning_rate": 2.3045741220276428e-06, + "loss": 0.3373, + "step": 9171 + }, + { + "epoch": 0.79, + "learning_rate": 2.302801524556857e-06, + "loss": 0.2536, + "step": 9172 + }, + { + "epoch": 0.79, + "learning_rate": 2.301029520348097e-06, + "loss": 0.2801, + "step": 9173 + }, + { + "epoch": 0.79, + "learning_rate": 2.299258109537943e-06, + "loss": 0.2907, + "step": 9174 + }, + { + "epoch": 0.79, + "learning_rate": 2.297487292262918e-06, + "loss": 0.2947, + "step": 9175 + }, + { + "epoch": 0.79, + "learning_rate": 2.2957170686595176e-06, + "loss": 0.2836, + "step": 9176 + }, + { + "epoch": 0.79, + "learning_rate": 2.2939474388641757e-06, + "loss": 0.2401, + "step": 9177 + }, + { + "epoch": 0.79, + "learning_rate": 2.2921784030132886e-06, + "loss": 0.2247, + "step": 9178 + }, + { + "epoch": 0.79, + "learning_rate": 2.290409961243204e-06, + "loss": 0.2394, + "step": 9179 + }, + { + "epoch": 0.79, + "learning_rate": 2.2886421136902257e-06, + "loss": 0.2419, + "step": 9180 + }, + { + "epoch": 0.79, + "learning_rate": 2.2868748604906145e-06, + "loss": 0.3106, + "step": 9181 + }, + { + "epoch": 0.79, + "learning_rate": 2.2851082017805704e-06, + "loss": 0.3028, + "step": 9182 + }, + { + "epoch": 0.79, + "learning_rate": 2.2833421376962718e-06, + "loss": 0.2422, + "step": 9183 + }, + { + "epoch": 0.79, + "learning_rate": 2.281576668373828e-06, + "loss": 0.2794, + "step": 9184 + }, + { + "epoch": 0.79, + "learning_rate": 2.279811793949318e-06, + "loss": 0.2437, + "step": 9185 + }, + { + "epoch": 0.79, + "learning_rate": 2.278047514558769e-06, + "loss": 0.2793, + "step": 9186 + }, + { + "epoch": 0.79, + "learning_rate": 2.2762838303381607e-06, + "loss": 0.2706, + "step": 9187 + }, + { + "epoch": 0.79, + "learning_rate": 2.274520741423435e-06, + "loss": 0.2377, + "step": 9188 + }, + { + "epoch": 0.79, + "learning_rate": 2.2727582479504704e-06, + "loss": 0.2642, + "step": 9189 + }, + { + "epoch": 0.79, + "learning_rate": 2.270996350055126e-06, + "loss": 0.2374, + "step": 9190 + }, + { + "epoch": 0.79, + "learning_rate": 2.2692350478731893e-06, + "loss": 0.2606, + "step": 9191 + }, + { + "epoch": 0.79, + "learning_rate": 2.2674743415404166e-06, + "loss": 0.2974, + "step": 9192 + }, + { + "epoch": 0.79, + "learning_rate": 2.265714231192514e-06, + "loss": 0.2502, + "step": 9193 + }, + { + "epoch": 0.79, + "learning_rate": 2.2639547169651423e-06, + "loss": 0.2722, + "step": 9194 + }, + { + "epoch": 0.79, + "learning_rate": 2.262195798993916e-06, + "loss": 0.3166, + "step": 9195 + }, + { + "epoch": 0.79, + "learning_rate": 2.260437477414403e-06, + "loss": 0.2901, + "step": 9196 + }, + { + "epoch": 0.79, + "learning_rate": 2.2586797523621306e-06, + "loss": 0.2933, + "step": 9197 + }, + { + "epoch": 0.79, + "learning_rate": 2.2569226239725695e-06, + "loss": 0.2832, + "step": 9198 + }, + { + "epoch": 0.79, + "learning_rate": 2.255166092381149e-06, + "loss": 0.2413, + "step": 9199 + }, + { + "epoch": 0.79, + "learning_rate": 2.2534101577232647e-06, + "loss": 0.251, + "step": 9200 + }, + { + "epoch": 0.79, + "learning_rate": 2.2516548201342445e-06, + "loss": 0.2373, + "step": 9201 + }, + { + "epoch": 0.79, + "learning_rate": 2.249900079749385e-06, + "loss": 0.2757, + "step": 9202 + }, + { + "epoch": 0.79, + "learning_rate": 2.248145936703934e-06, + "loss": 0.3094, + "step": 9203 + }, + { + "epoch": 0.79, + "learning_rate": 2.246392391133091e-06, + "loss": 0.248, + "step": 9204 + }, + { + "epoch": 0.79, + "learning_rate": 2.244639443172013e-06, + "loss": 0.2707, + "step": 9205 + }, + { + "epoch": 0.79, + "learning_rate": 2.2428870929558012e-06, + "loss": 0.2359, + "step": 9206 + }, + { + "epoch": 0.79, + "learning_rate": 2.241135340619528e-06, + "loss": 0.2504, + "step": 9207 + }, + { + "epoch": 0.79, + "learning_rate": 2.2393841862982036e-06, + "loss": 0.2808, + "step": 9208 + }, + { + "epoch": 0.79, + "learning_rate": 2.2376336301267985e-06, + "loss": 0.2327, + "step": 9209 + }, + { + "epoch": 0.79, + "learning_rate": 2.235883672240239e-06, + "loss": 0.3121, + "step": 9210 + }, + { + "epoch": 0.79, + "learning_rate": 2.2341343127734028e-06, + "loss": 0.2801, + "step": 9211 + }, + { + "epoch": 0.79, + "learning_rate": 2.2323855518611227e-06, + "loss": 0.2717, + "step": 9212 + }, + { + "epoch": 0.79, + "learning_rate": 2.2306373896381795e-06, + "loss": 0.2492, + "step": 9213 + }, + { + "epoch": 0.79, + "learning_rate": 2.2288898262393212e-06, + "loss": 0.2594, + "step": 9214 + }, + { + "epoch": 0.79, + "learning_rate": 2.227142861799235e-06, + "loss": 0.2529, + "step": 9215 + }, + { + "epoch": 0.79, + "learning_rate": 2.22539649645257e-06, + "loss": 0.2261, + "step": 9216 + }, + { + "epoch": 0.79, + "learning_rate": 2.2236507303339273e-06, + "loss": 0.2546, + "step": 9217 + }, + { + "epoch": 0.79, + "learning_rate": 2.2219055635778618e-06, + "loss": 0.2586, + "step": 9218 + }, + { + "epoch": 0.79, + "learning_rate": 2.220160996318886e-06, + "loss": 0.2361, + "step": 9219 + }, + { + "epoch": 0.79, + "learning_rate": 2.2184170286914543e-06, + "loss": 0.2383, + "step": 9220 + }, + { + "epoch": 0.79, + "learning_rate": 2.216673660829992e-06, + "loss": 0.2538, + "step": 9221 + }, + { + "epoch": 0.79, + "learning_rate": 2.214930892868864e-06, + "loss": 0.2588, + "step": 9222 + }, + { + "epoch": 0.79, + "learning_rate": 2.2131887249423957e-06, + "loss": 0.3054, + "step": 9223 + }, + { + "epoch": 0.79, + "learning_rate": 2.211447157184864e-06, + "loss": 0.2628, + "step": 9224 + }, + { + "epoch": 0.79, + "learning_rate": 2.2097061897305016e-06, + "loss": 0.2438, + "step": 9225 + }, + { + "epoch": 0.79, + "learning_rate": 2.207965822713496e-06, + "loss": 0.2842, + "step": 9226 + }, + { + "epoch": 0.79, + "learning_rate": 2.2062260562679773e-06, + "loss": 0.2513, + "step": 9227 + }, + { + "epoch": 0.79, + "learning_rate": 2.2044868905280504e-06, + "loss": 0.2938, + "step": 9228 + }, + { + "epoch": 0.79, + "learning_rate": 2.2027483256277517e-06, + "loss": 0.2933, + "step": 9229 + }, + { + "epoch": 0.79, + "learning_rate": 2.2010103617010836e-06, + "loss": 0.2712, + "step": 9230 + }, + { + "epoch": 0.79, + "learning_rate": 2.1992729988820026e-06, + "loss": 0.2474, + "step": 9231 + }, + { + "epoch": 0.79, + "learning_rate": 2.197536237304414e-06, + "loss": 0.2415, + "step": 9232 + }, + { + "epoch": 0.79, + "learning_rate": 2.195800077102178e-06, + "loss": 0.2503, + "step": 9233 + }, + { + "epoch": 0.79, + "learning_rate": 2.1940645184091115e-06, + "loss": 0.3033, + "step": 9234 + }, + { + "epoch": 0.79, + "learning_rate": 2.1923295613589846e-06, + "loss": 0.2966, + "step": 9235 + }, + { + "epoch": 0.79, + "learning_rate": 2.190595206085513e-06, + "loss": 0.2696, + "step": 9236 + }, + { + "epoch": 0.79, + "learning_rate": 2.188861452722373e-06, + "loss": 0.2515, + "step": 9237 + }, + { + "epoch": 0.79, + "learning_rate": 2.1871283014032007e-06, + "loss": 0.2637, + "step": 9238 + }, + { + "epoch": 0.79, + "learning_rate": 2.1853957522615732e-06, + "loss": 0.2508, + "step": 9239 + }, + { + "epoch": 0.79, + "learning_rate": 2.1836638054310265e-06, + "loss": 0.3, + "step": 9240 + }, + { + "epoch": 0.79, + "learning_rate": 2.181932461045053e-06, + "loss": 0.2824, + "step": 9241 + }, + { + "epoch": 0.79, + "learning_rate": 2.1802017192370963e-06, + "loss": 0.575, + "step": 9242 + }, + { + "epoch": 0.79, + "learning_rate": 2.178471580140553e-06, + "loss": 0.313, + "step": 9243 + }, + { + "epoch": 0.79, + "learning_rate": 2.176742043888769e-06, + "loss": 0.2617, + "step": 9244 + }, + { + "epoch": 0.79, + "learning_rate": 2.1750131106150563e-06, + "loss": 0.247, + "step": 9245 + }, + { + "epoch": 0.79, + "learning_rate": 2.173284780452667e-06, + "loss": 0.2808, + "step": 9246 + }, + { + "epoch": 0.79, + "learning_rate": 2.171557053534814e-06, + "loss": 0.6298, + "step": 9247 + }, + { + "epoch": 0.79, + "learning_rate": 2.169829929994661e-06, + "loss": 0.2391, + "step": 9248 + }, + { + "epoch": 0.79, + "learning_rate": 2.1681034099653287e-06, + "loss": 0.2638, + "step": 9249 + }, + { + "epoch": 0.79, + "learning_rate": 2.1663774935798886e-06, + "loss": 0.2831, + "step": 9250 + }, + { + "epoch": 0.79, + "learning_rate": 2.1646521809713583e-06, + "loss": 0.2561, + "step": 9251 + }, + { + "epoch": 0.79, + "learning_rate": 2.162927472272728e-06, + "loss": 0.272, + "step": 9252 + }, + { + "epoch": 0.79, + "learning_rate": 2.161203367616922e-06, + "loss": 0.2667, + "step": 9253 + }, + { + "epoch": 0.79, + "learning_rate": 2.1594798671368265e-06, + "loss": 0.2907, + "step": 9254 + }, + { + "epoch": 0.79, + "learning_rate": 2.1577569709652833e-06, + "loss": 0.2775, + "step": 9255 + }, + { + "epoch": 0.79, + "learning_rate": 2.156034679235083e-06, + "loss": 0.2592, + "step": 9256 + }, + { + "epoch": 0.79, + "learning_rate": 2.154312992078973e-06, + "loss": 0.2587, + "step": 9257 + }, + { + "epoch": 0.79, + "learning_rate": 2.1525919096296455e-06, + "loss": 0.2728, + "step": 9258 + }, + { + "epoch": 0.79, + "learning_rate": 2.1508714320197644e-06, + "loss": 0.2815, + "step": 9259 + }, + { + "epoch": 0.79, + "learning_rate": 2.1491515593819266e-06, + "loss": 0.2713, + "step": 9260 + }, + { + "epoch": 0.79, + "learning_rate": 2.1474322918486956e-06, + "loss": 0.2749, + "step": 9261 + }, + { + "epoch": 0.79, + "learning_rate": 2.1457136295525817e-06, + "loss": 0.3287, + "step": 9262 + }, + { + "epoch": 0.79, + "learning_rate": 2.143995572626052e-06, + "loss": 0.3051, + "step": 9263 + }, + { + "epoch": 0.79, + "learning_rate": 2.1422781212015286e-06, + "loss": 0.2537, + "step": 9264 + }, + { + "epoch": 0.79, + "learning_rate": 2.140561275411377e-06, + "loss": 0.2739, + "step": 9265 + }, + { + "epoch": 0.79, + "learning_rate": 2.138845035387932e-06, + "loss": 0.3284, + "step": 9266 + }, + { + "epoch": 0.79, + "learning_rate": 2.1371294012634667e-06, + "loss": 0.2881, + "step": 9267 + }, + { + "epoch": 0.79, + "learning_rate": 2.135414373170215e-06, + "loss": 0.2631, + "step": 9268 + }, + { + "epoch": 0.79, + "learning_rate": 2.1336999512403633e-06, + "loss": 0.2816, + "step": 9269 + }, + { + "epoch": 0.79, + "learning_rate": 2.131986135606051e-06, + "loss": 0.2986, + "step": 9270 + }, + { + "epoch": 0.79, + "learning_rate": 2.130272926399374e-06, + "loss": 0.2806, + "step": 9271 + }, + { + "epoch": 0.79, + "learning_rate": 2.1285603237523677e-06, + "loss": 0.3137, + "step": 9272 + }, + { + "epoch": 0.79, + "learning_rate": 2.1268483277970442e-06, + "loss": 0.2766, + "step": 9273 + }, + { + "epoch": 0.79, + "learning_rate": 2.1251369386653454e-06, + "loss": 0.2699, + "step": 9274 + }, + { + "epoch": 0.8, + "learning_rate": 2.123426156489178e-06, + "loss": 0.2676, + "step": 9275 + }, + { + "epoch": 0.8, + "learning_rate": 2.1217159814004096e-06, + "loss": 0.2808, + "step": 9276 + }, + { + "epoch": 0.8, + "learning_rate": 2.120006413530842e-06, + "loss": 0.2858, + "step": 9277 + }, + { + "epoch": 0.8, + "learning_rate": 2.1182974530122435e-06, + "loss": 0.2707, + "step": 9278 + }, + { + "epoch": 0.8, + "learning_rate": 2.116589099976334e-06, + "loss": 0.2557, + "step": 9279 + }, + { + "epoch": 0.8, + "learning_rate": 2.114881354554782e-06, + "loss": 0.2247, + "step": 9280 + }, + { + "epoch": 0.8, + "learning_rate": 2.113174216879218e-06, + "loss": 0.3052, + "step": 9281 + }, + { + "epoch": 0.8, + "learning_rate": 2.111467687081209e-06, + "loss": 0.2475, + "step": 9282 + }, + { + "epoch": 0.8, + "learning_rate": 2.1097617652922973e-06, + "loss": 0.243, + "step": 9283 + }, + { + "epoch": 0.8, + "learning_rate": 2.1080564516439605e-06, + "loss": 0.3114, + "step": 9284 + }, + { + "epoch": 0.8, + "learning_rate": 2.1063517462676365e-06, + "loss": 0.2689, + "step": 9285 + }, + { + "epoch": 0.8, + "learning_rate": 2.1046476492947155e-06, + "loss": 0.2492, + "step": 9286 + }, + { + "epoch": 0.8, + "learning_rate": 2.1029441608565425e-06, + "loss": 0.2549, + "step": 9287 + }, + { + "epoch": 0.8, + "learning_rate": 2.101241281084416e-06, + "loss": 0.278, + "step": 9288 + }, + { + "epoch": 0.8, + "learning_rate": 2.099539010109577e-06, + "loss": 0.2597, + "step": 9289 + }, + { + "epoch": 0.8, + "learning_rate": 2.0978373480632386e-06, + "loss": 0.2695, + "step": 9290 + }, + { + "epoch": 0.8, + "learning_rate": 2.0961362950765495e-06, + "loss": 0.2772, + "step": 9291 + }, + { + "epoch": 0.8, + "learning_rate": 2.0944358512806207e-06, + "loss": 0.259, + "step": 9292 + }, + { + "epoch": 0.8, + "learning_rate": 2.0927360168065135e-06, + "loss": 0.2773, + "step": 9293 + }, + { + "epoch": 0.8, + "learning_rate": 2.0910367917852437e-06, + "loss": 0.3198, + "step": 9294 + }, + { + "epoch": 0.8, + "learning_rate": 2.0893381763477816e-06, + "loss": 0.272, + "step": 9295 + }, + { + "epoch": 0.8, + "learning_rate": 2.087640170625039e-06, + "loss": 0.2737, + "step": 9296 + }, + { + "epoch": 0.8, + "learning_rate": 2.085942774747901e-06, + "loss": 0.2946, + "step": 9297 + }, + { + "epoch": 0.8, + "learning_rate": 2.084245988847188e-06, + "loss": 0.2377, + "step": 9298 + }, + { + "epoch": 0.8, + "learning_rate": 2.0825498130536804e-06, + "loss": 0.238, + "step": 9299 + }, + { + "epoch": 0.8, + "learning_rate": 2.080854247498112e-06, + "loss": 0.2417, + "step": 9300 + }, + { + "epoch": 0.8, + "learning_rate": 2.079159292311169e-06, + "loss": 0.2924, + "step": 9301 + }, + { + "epoch": 0.8, + "learning_rate": 2.077464947623492e-06, + "loss": 0.2801, + "step": 9302 + }, + { + "epoch": 0.8, + "learning_rate": 2.075771213565665e-06, + "loss": 0.2786, + "step": 9303 + }, + { + "epoch": 0.8, + "learning_rate": 2.0740780902682444e-06, + "loss": 0.2521, + "step": 9304 + }, + { + "epoch": 0.8, + "learning_rate": 2.0723855778617175e-06, + "loss": 0.2524, + "step": 9305 + }, + { + "epoch": 0.8, + "learning_rate": 2.0706936764765393e-06, + "loss": 0.2787, + "step": 9306 + }, + { + "epoch": 0.8, + "learning_rate": 2.069002386243113e-06, + "loss": 0.6023, + "step": 9307 + }, + { + "epoch": 0.8, + "learning_rate": 2.067311707291794e-06, + "loss": 0.2877, + "step": 9308 + }, + { + "epoch": 0.8, + "learning_rate": 2.065621639752895e-06, + "loss": 0.2395, + "step": 9309 + }, + { + "epoch": 0.8, + "learning_rate": 2.0639321837566696e-06, + "loss": 0.3077, + "step": 9310 + }, + { + "epoch": 0.8, + "learning_rate": 2.0622433394333443e-06, + "loss": 0.2843, + "step": 9311 + }, + { + "epoch": 0.8, + "learning_rate": 2.0605551069130767e-06, + "loss": 0.2504, + "step": 9312 + }, + { + "epoch": 0.8, + "learning_rate": 2.0588674863259907e-06, + "loss": 0.2914, + "step": 9313 + }, + { + "epoch": 0.8, + "learning_rate": 2.057180477802164e-06, + "loss": 0.2488, + "step": 9314 + }, + { + "epoch": 0.8, + "learning_rate": 2.0554940814716174e-06, + "loss": 0.2402, + "step": 9315 + }, + { + "epoch": 0.8, + "learning_rate": 2.0538082974643325e-06, + "loss": 0.2429, + "step": 9316 + }, + { + "epoch": 0.8, + "learning_rate": 2.0521231259102404e-06, + "loss": 0.2727, + "step": 9317 + }, + { + "epoch": 0.8, + "learning_rate": 2.0504385669392268e-06, + "loss": 0.3358, + "step": 9318 + }, + { + "epoch": 0.8, + "learning_rate": 2.0487546206811304e-06, + "loss": 0.2772, + "step": 9319 + }, + { + "epoch": 0.8, + "learning_rate": 2.047071287265735e-06, + "loss": 0.2646, + "step": 9320 + }, + { + "epoch": 0.8, + "learning_rate": 2.0453885668227923e-06, + "loss": 0.261, + "step": 9321 + }, + { + "epoch": 0.8, + "learning_rate": 2.043706459481992e-06, + "loss": 0.2285, + "step": 9322 + }, + { + "epoch": 0.8, + "learning_rate": 2.042024965372985e-06, + "loss": 0.2607, + "step": 9323 + }, + { + "epoch": 0.8, + "learning_rate": 2.040344084625372e-06, + "loss": 0.265, + "step": 9324 + }, + { + "epoch": 0.8, + "learning_rate": 2.0386638173687067e-06, + "loss": 0.2697, + "step": 9325 + }, + { + "epoch": 0.8, + "learning_rate": 2.0369841637324992e-06, + "loss": 0.2578, + "step": 9326 + }, + { + "epoch": 0.8, + "learning_rate": 2.0353051238462006e-06, + "loss": 0.2763, + "step": 9327 + }, + { + "epoch": 0.8, + "learning_rate": 2.033626697839234e-06, + "loss": 0.6049, + "step": 9328 + }, + { + "epoch": 0.8, + "learning_rate": 2.0319488858409552e-06, + "loss": 0.5861, + "step": 9329 + }, + { + "epoch": 0.8, + "learning_rate": 2.030271687980685e-06, + "loss": 0.315, + "step": 9330 + }, + { + "epoch": 0.8, + "learning_rate": 2.0285951043876937e-06, + "loss": 0.2921, + "step": 9331 + }, + { + "epoch": 0.8, + "learning_rate": 2.0269191351912042e-06, + "loss": 0.313, + "step": 9332 + }, + { + "epoch": 0.8, + "learning_rate": 2.025243780520394e-06, + "loss": 0.2839, + "step": 9333 + }, + { + "epoch": 0.8, + "learning_rate": 2.023569040504384e-06, + "loss": 0.2872, + "step": 9334 + }, + { + "epoch": 0.8, + "learning_rate": 2.0218949152722643e-06, + "loss": 0.2852, + "step": 9335 + }, + { + "epoch": 0.8, + "learning_rate": 2.020221404953061e-06, + "loss": 0.2509, + "step": 9336 + }, + { + "epoch": 0.8, + "learning_rate": 2.018548509675763e-06, + "loss": 0.2587, + "step": 9337 + }, + { + "epoch": 0.8, + "learning_rate": 2.016876229569308e-06, + "loss": 0.2901, + "step": 9338 + }, + { + "epoch": 0.8, + "learning_rate": 2.0152045647625874e-06, + "loss": 0.2545, + "step": 9339 + }, + { + "epoch": 0.8, + "learning_rate": 2.013533515384447e-06, + "loss": 0.2796, + "step": 9340 + }, + { + "epoch": 0.8, + "learning_rate": 2.0118630815636763e-06, + "loss": 0.2828, + "step": 9341 + }, + { + "epoch": 0.8, + "learning_rate": 2.0101932634290345e-06, + "loss": 0.3207, + "step": 9342 + }, + { + "epoch": 0.8, + "learning_rate": 2.0085240611092137e-06, + "loss": 0.2821, + "step": 9343 + }, + { + "epoch": 0.8, + "learning_rate": 2.006855474732872e-06, + "loss": 0.2629, + "step": 9344 + }, + { + "epoch": 0.8, + "learning_rate": 2.0051875044286138e-06, + "loss": 0.2532, + "step": 9345 + }, + { + "epoch": 0.8, + "learning_rate": 2.003520150325e-06, + "loss": 0.2896, + "step": 9346 + }, + { + "epoch": 0.8, + "learning_rate": 2.001853412550544e-06, + "loss": 0.2838, + "step": 9347 + }, + { + "epoch": 0.8, + "learning_rate": 2.0001872912337016e-06, + "loss": 0.2639, + "step": 9348 + }, + { + "epoch": 0.8, + "learning_rate": 1.9985217865029005e-06, + "loss": 0.2825, + "step": 9349 + }, + { + "epoch": 0.8, + "learning_rate": 1.9968568984865e-06, + "loss": 0.2417, + "step": 9350 + }, + { + "epoch": 0.8, + "learning_rate": 1.995192627312823e-06, + "loss": 0.2795, + "step": 9351 + }, + { + "epoch": 0.8, + "learning_rate": 1.9935289731101503e-06, + "loss": 0.3243, + "step": 9352 + }, + { + "epoch": 0.8, + "learning_rate": 1.9918659360067005e-06, + "loss": 0.2706, + "step": 9353 + }, + { + "epoch": 0.8, + "learning_rate": 1.9902035161306574e-06, + "loss": 0.2656, + "step": 9354 + }, + { + "epoch": 0.8, + "learning_rate": 1.9885417136101446e-06, + "loss": 0.2835, + "step": 9355 + }, + { + "epoch": 0.8, + "learning_rate": 1.9868805285732538e-06, + "loss": 0.2835, + "step": 9356 + }, + { + "epoch": 0.8, + "learning_rate": 1.9852199611480207e-06, + "loss": 0.2807, + "step": 9357 + }, + { + "epoch": 0.8, + "learning_rate": 1.983560011462425e-06, + "loss": 0.265, + "step": 9358 + }, + { + "epoch": 0.8, + "learning_rate": 1.9819006796444185e-06, + "loss": 0.2463, + "step": 9359 + }, + { + "epoch": 0.8, + "learning_rate": 1.9802419658218873e-06, + "loss": 0.2498, + "step": 9360 + }, + { + "epoch": 0.8, + "learning_rate": 1.978583870122678e-06, + "loss": 0.2531, + "step": 9361 + }, + { + "epoch": 0.8, + "learning_rate": 1.9769263926745886e-06, + "loss": 0.2675, + "step": 9362 + }, + { + "epoch": 0.8, + "learning_rate": 1.9752695336053697e-06, + "loss": 0.26, + "step": 9363 + }, + { + "epoch": 0.8, + "learning_rate": 1.9736132930427263e-06, + "loss": 0.2839, + "step": 9364 + }, + { + "epoch": 0.8, + "learning_rate": 1.971957671114306e-06, + "loss": 0.2706, + "step": 9365 + }, + { + "epoch": 0.8, + "learning_rate": 1.9703026679477253e-06, + "loss": 0.2814, + "step": 9366 + }, + { + "epoch": 0.8, + "learning_rate": 1.968648283670538e-06, + "loss": 0.2225, + "step": 9367 + }, + { + "epoch": 0.8, + "learning_rate": 1.9669945184102555e-06, + "loss": 0.2918, + "step": 9368 + }, + { + "epoch": 0.8, + "learning_rate": 1.9653413722943437e-06, + "loss": 0.2851, + "step": 9369 + }, + { + "epoch": 0.8, + "learning_rate": 1.963688845450218e-06, + "loss": 0.2845, + "step": 9370 + }, + { + "epoch": 0.8, + "learning_rate": 1.9620369380052507e-06, + "loss": 0.2684, + "step": 9371 + }, + { + "epoch": 0.8, + "learning_rate": 1.9603856500867537e-06, + "loss": 0.2231, + "step": 9372 + }, + { + "epoch": 0.8, + "learning_rate": 1.9587349818220113e-06, + "loss": 0.2791, + "step": 9373 + }, + { + "epoch": 0.8, + "learning_rate": 1.957084933338241e-06, + "loss": 0.2742, + "step": 9374 + }, + { + "epoch": 0.8, + "learning_rate": 1.955435504762624e-06, + "loss": 0.265, + "step": 9375 + }, + { + "epoch": 0.8, + "learning_rate": 1.953786696222287e-06, + "loss": 0.2646, + "step": 9376 + }, + { + "epoch": 0.8, + "learning_rate": 1.9521385078443156e-06, + "loss": 0.2541, + "step": 9377 + }, + { + "epoch": 0.8, + "learning_rate": 1.9504909397557436e-06, + "loss": 0.2821, + "step": 9378 + }, + { + "epoch": 0.8, + "learning_rate": 1.948843992083551e-06, + "loss": 0.2827, + "step": 9379 + }, + { + "epoch": 0.8, + "learning_rate": 1.9471976649546876e-06, + "loss": 0.2851, + "step": 9380 + }, + { + "epoch": 0.8, + "learning_rate": 1.945551958496035e-06, + "loss": 0.2683, + "step": 9381 + }, + { + "epoch": 0.8, + "learning_rate": 1.94390687283444e-06, + "loss": 0.316, + "step": 9382 + }, + { + "epoch": 0.8, + "learning_rate": 1.9422624080966956e-06, + "loss": 0.2656, + "step": 9383 + }, + { + "epoch": 0.8, + "learning_rate": 1.940618564409551e-06, + "loss": 0.3015, + "step": 9384 + }, + { + "epoch": 0.8, + "learning_rate": 1.938975341899708e-06, + "loss": 0.2662, + "step": 9385 + }, + { + "epoch": 0.8, + "learning_rate": 1.937332740693809e-06, + "loss": 0.3258, + "step": 9386 + }, + { + "epoch": 0.8, + "learning_rate": 1.9356907609184695e-06, + "loss": 0.3202, + "step": 9387 + }, + { + "epoch": 0.8, + "learning_rate": 1.9340494027002365e-06, + "loss": 0.3433, + "step": 9388 + }, + { + "epoch": 0.8, + "learning_rate": 1.932408666165617e-06, + "loss": 0.2412, + "step": 9389 + }, + { + "epoch": 0.8, + "learning_rate": 1.9307685514410803e-06, + "loss": 0.2311, + "step": 9390 + }, + { + "epoch": 0.8, + "learning_rate": 1.92912905865303e-06, + "loss": 0.237, + "step": 9391 + }, + { + "epoch": 0.81, + "learning_rate": 1.9274901879278342e-06, + "loss": 0.3076, + "step": 9392 + }, + { + "epoch": 0.81, + "learning_rate": 1.925851939391803e-06, + "loss": 0.2809, + "step": 9393 + }, + { + "epoch": 0.81, + "learning_rate": 1.924214313171211e-06, + "loss": 0.2503, + "step": 9394 + }, + { + "epoch": 0.81, + "learning_rate": 1.9225773093922785e-06, + "loss": 0.2345, + "step": 9395 + }, + { + "epoch": 0.81, + "learning_rate": 1.920940928181171e-06, + "loss": 0.2635, + "step": 9396 + }, + { + "epoch": 0.81, + "learning_rate": 1.919305169664021e-06, + "loss": 0.257, + "step": 9397 + }, + { + "epoch": 0.81, + "learning_rate": 1.9176700339668986e-06, + "loss": 0.3221, + "step": 9398 + }, + { + "epoch": 0.81, + "learning_rate": 1.9160355212158345e-06, + "loss": 0.2471, + "step": 9399 + }, + { + "epoch": 0.81, + "learning_rate": 1.9144016315368075e-06, + "loss": 0.2194, + "step": 9400 + }, + { + "epoch": 0.81, + "learning_rate": 1.9127683650557505e-06, + "loss": 0.2868, + "step": 9401 + }, + { + "epoch": 0.81, + "learning_rate": 1.9111357218985504e-06, + "loss": 0.5947, + "step": 9402 + }, + { + "epoch": 0.81, + "learning_rate": 1.9095037021910366e-06, + "loss": 0.2792, + "step": 9403 + }, + { + "epoch": 0.81, + "learning_rate": 1.9078723060590053e-06, + "loss": 0.2946, + "step": 9404 + }, + { + "epoch": 0.81, + "learning_rate": 1.9062415336281904e-06, + "loss": 0.2424, + "step": 9405 + }, + { + "epoch": 0.81, + "learning_rate": 1.9046113850242843e-06, + "loss": 0.5575, + "step": 9406 + }, + { + "epoch": 0.81, + "learning_rate": 1.9029818603729332e-06, + "loss": 0.2314, + "step": 9407 + }, + { + "epoch": 0.81, + "learning_rate": 1.9013529597997315e-06, + "loss": 0.2533, + "step": 9408 + }, + { + "epoch": 0.81, + "learning_rate": 1.8997246834302297e-06, + "loss": 0.2859, + "step": 9409 + }, + { + "epoch": 0.81, + "learning_rate": 1.8980970313899193e-06, + "loss": 0.2224, + "step": 9410 + }, + { + "epoch": 0.81, + "learning_rate": 1.8964700038042628e-06, + "loss": 0.2674, + "step": 9411 + }, + { + "epoch": 0.81, + "learning_rate": 1.894843600798655e-06, + "loss": 0.2587, + "step": 9412 + }, + { + "epoch": 0.81, + "learning_rate": 1.8932178224984533e-06, + "loss": 0.261, + "step": 9413 + }, + { + "epoch": 0.81, + "learning_rate": 1.8915926690289643e-06, + "loss": 0.2709, + "step": 9414 + }, + { + "epoch": 0.81, + "learning_rate": 1.8899681405154491e-06, + "loss": 0.2723, + "step": 9415 + }, + { + "epoch": 0.81, + "learning_rate": 1.8883442370831183e-06, + "loss": 0.2507, + "step": 9416 + }, + { + "epoch": 0.81, + "learning_rate": 1.8867209588571288e-06, + "loss": 0.2488, + "step": 9417 + }, + { + "epoch": 0.81, + "learning_rate": 1.8850983059626026e-06, + "loss": 0.2751, + "step": 9418 + }, + { + "epoch": 0.81, + "learning_rate": 1.8834762785246007e-06, + "loss": 0.2551, + "step": 9419 + }, + { + "epoch": 0.81, + "learning_rate": 1.881854876668142e-06, + "loss": 0.282, + "step": 9420 + }, + { + "epoch": 0.81, + "learning_rate": 1.8802341005181957e-06, + "loss": 0.2819, + "step": 9421 + }, + { + "epoch": 0.81, + "learning_rate": 1.8786139501996847e-06, + "loss": 0.5848, + "step": 9422 + }, + { + "epoch": 0.81, + "learning_rate": 1.876994425837484e-06, + "loss": 0.2703, + "step": 9423 + }, + { + "epoch": 0.81, + "learning_rate": 1.8753755275564112e-06, + "loss": 0.281, + "step": 9424 + }, + { + "epoch": 0.81, + "learning_rate": 1.8737572554812522e-06, + "loss": 0.244, + "step": 9425 + }, + { + "epoch": 0.81, + "learning_rate": 1.8721396097367294e-06, + "loss": 0.2943, + "step": 9426 + }, + { + "epoch": 0.81, + "learning_rate": 1.870522590447521e-06, + "loss": 0.2612, + "step": 9427 + }, + { + "epoch": 0.81, + "learning_rate": 1.8689061977382684e-06, + "loss": 0.2703, + "step": 9428 + }, + { + "epoch": 0.81, + "learning_rate": 1.867290431733546e-06, + "loss": 0.2958, + "step": 9429 + }, + { + "epoch": 0.81, + "learning_rate": 1.8656752925578948e-06, + "loss": 0.2273, + "step": 9430 + }, + { + "epoch": 0.81, + "learning_rate": 1.8640607803357936e-06, + "loss": 0.2496, + "step": 9431 + }, + { + "epoch": 0.81, + "learning_rate": 1.8624468951916896e-06, + "loss": 0.2704, + "step": 9432 + }, + { + "epoch": 0.81, + "learning_rate": 1.8608336372499736e-06, + "loss": 0.2464, + "step": 9433 + }, + { + "epoch": 0.81, + "learning_rate": 1.8592210066349781e-06, + "loss": 0.2734, + "step": 9434 + }, + { + "epoch": 0.81, + "learning_rate": 1.857609003471007e-06, + "loss": 0.279, + "step": 9435 + }, + { + "epoch": 0.81, + "learning_rate": 1.8559976278823e-06, + "loss": 0.2687, + "step": 9436 + }, + { + "epoch": 0.81, + "learning_rate": 1.8543868799930542e-06, + "loss": 0.2455, + "step": 9437 + }, + { + "epoch": 0.81, + "learning_rate": 1.8527767599274193e-06, + "loss": 0.2799, + "step": 9438 + }, + { + "epoch": 0.81, + "learning_rate": 1.8511672678094949e-06, + "loss": 0.2882, + "step": 9439 + }, + { + "epoch": 0.81, + "learning_rate": 1.8495584037633364e-06, + "loss": 0.3014, + "step": 9440 + }, + { + "epoch": 0.81, + "learning_rate": 1.8479501679129375e-06, + "loss": 0.2403, + "step": 9441 + }, + { + "epoch": 0.81, + "learning_rate": 1.846342560382265e-06, + "loss": 0.2645, + "step": 9442 + }, + { + "epoch": 0.81, + "learning_rate": 1.844735581295216e-06, + "loss": 0.263, + "step": 9443 + }, + { + "epoch": 0.81, + "learning_rate": 1.8431292307756532e-06, + "loss": 0.2413, + "step": 9444 + }, + { + "epoch": 0.81, + "learning_rate": 1.8415235089473848e-06, + "loss": 0.2513, + "step": 9445 + }, + { + "epoch": 0.81, + "learning_rate": 1.839918415934171e-06, + "loss": 0.2727, + "step": 9446 + }, + { + "epoch": 0.81, + "learning_rate": 1.8383139518597293e-06, + "loss": 0.347, + "step": 9447 + }, + { + "epoch": 0.81, + "learning_rate": 1.8367101168477152e-06, + "loss": 0.2689, + "step": 9448 + }, + { + "epoch": 0.81, + "learning_rate": 1.8351069110217535e-06, + "loss": 0.2823, + "step": 9449 + }, + { + "epoch": 0.81, + "learning_rate": 1.8335043345054048e-06, + "loss": 0.2725, + "step": 9450 + }, + { + "epoch": 0.81, + "learning_rate": 1.831902387422191e-06, + "loss": 0.278, + "step": 9451 + }, + { + "epoch": 0.81, + "learning_rate": 1.8303010698955803e-06, + "loss": 0.265, + "step": 9452 + }, + { + "epoch": 0.81, + "learning_rate": 1.8287003820489956e-06, + "loss": 0.2759, + "step": 9453 + }, + { + "epoch": 0.81, + "learning_rate": 1.8271003240058127e-06, + "loss": 0.2891, + "step": 9454 + }, + { + "epoch": 0.81, + "learning_rate": 1.8255008958893483e-06, + "loss": 0.2639, + "step": 9455 + }, + { + "epoch": 0.81, + "learning_rate": 1.8239020978228894e-06, + "loss": 0.248, + "step": 9456 + }, + { + "epoch": 0.81, + "learning_rate": 1.822303929929654e-06, + "loss": 0.2601, + "step": 9457 + }, + { + "epoch": 0.81, + "learning_rate": 1.820706392332824e-06, + "loss": 0.6052, + "step": 9458 + }, + { + "epoch": 0.81, + "learning_rate": 1.8191094851555314e-06, + "loss": 0.2992, + "step": 9459 + }, + { + "epoch": 0.81, + "learning_rate": 1.8175132085208558e-06, + "loss": 0.2775, + "step": 9460 + }, + { + "epoch": 0.81, + "learning_rate": 1.8159175625518344e-06, + "loss": 0.2957, + "step": 9461 + }, + { + "epoch": 0.81, + "learning_rate": 1.814322547371443e-06, + "loss": 0.2314, + "step": 9462 + }, + { + "epoch": 0.81, + "learning_rate": 1.8127281631026284e-06, + "loss": 0.2336, + "step": 9463 + }, + { + "epoch": 0.81, + "learning_rate": 1.8111344098682703e-06, + "loss": 0.3176, + "step": 9464 + }, + { + "epoch": 0.81, + "learning_rate": 1.8095412877912056e-06, + "loss": 0.265, + "step": 9465 + }, + { + "epoch": 0.81, + "learning_rate": 1.8079487969942344e-06, + "loss": 0.3464, + "step": 9466 + }, + { + "epoch": 0.81, + "learning_rate": 1.806356937600089e-06, + "loss": 0.2669, + "step": 9467 + }, + { + "epoch": 0.81, + "learning_rate": 1.8047657097314675e-06, + "loss": 0.2624, + "step": 9468 + }, + { + "epoch": 0.81, + "learning_rate": 1.8031751135110065e-06, + "loss": 0.2947, + "step": 9469 + }, + { + "epoch": 0.81, + "learning_rate": 1.8015851490613079e-06, + "loss": 0.275, + "step": 9470 + }, + { + "epoch": 0.81, + "learning_rate": 1.799995816504919e-06, + "loss": 0.2711, + "step": 9471 + }, + { + "epoch": 0.81, + "learning_rate": 1.7984071159643312e-06, + "loss": 0.251, + "step": 9472 + }, + { + "epoch": 0.81, + "learning_rate": 1.7968190475620018e-06, + "loss": 0.2641, + "step": 9473 + }, + { + "epoch": 0.81, + "learning_rate": 1.795231611420325e-06, + "loss": 0.313, + "step": 9474 + }, + { + "epoch": 0.81, + "learning_rate": 1.7936448076616542e-06, + "loss": 0.3232, + "step": 9475 + }, + { + "epoch": 0.81, + "learning_rate": 1.7920586364082926e-06, + "loss": 0.3124, + "step": 9476 + }, + { + "epoch": 0.81, + "learning_rate": 1.7904730977824958e-06, + "loss": 0.2873, + "step": 9477 + }, + { + "epoch": 0.81, + "learning_rate": 1.7888881919064694e-06, + "loss": 0.2629, + "step": 9478 + }, + { + "epoch": 0.81, + "learning_rate": 1.7873039189023644e-06, + "loss": 0.2428, + "step": 9479 + }, + { + "epoch": 0.81, + "learning_rate": 1.7857202788922977e-06, + "loss": 0.2529, + "step": 9480 + }, + { + "epoch": 0.81, + "learning_rate": 1.784137271998323e-06, + "loss": 0.2679, + "step": 9481 + }, + { + "epoch": 0.81, + "learning_rate": 1.78255489834245e-06, + "loss": 0.2695, + "step": 9482 + }, + { + "epoch": 0.81, + "learning_rate": 1.7809731580466427e-06, + "loss": 0.2533, + "step": 9483 + }, + { + "epoch": 0.81, + "learning_rate": 1.7793920512328122e-06, + "loss": 0.2829, + "step": 9484 + }, + { + "epoch": 0.81, + "learning_rate": 1.7778115780228267e-06, + "loss": 0.2685, + "step": 9485 + }, + { + "epoch": 0.81, + "learning_rate": 1.776231738538492e-06, + "loss": 0.3014, + "step": 9486 + }, + { + "epoch": 0.81, + "learning_rate": 1.7746525329015852e-06, + "loss": 0.263, + "step": 9487 + }, + { + "epoch": 0.81, + "learning_rate": 1.7730739612338166e-06, + "loss": 0.2858, + "step": 9488 + }, + { + "epoch": 0.81, + "learning_rate": 1.7714960236568556e-06, + "loss": 0.2525, + "step": 9489 + }, + { + "epoch": 0.81, + "learning_rate": 1.7699187202923241e-06, + "loss": 0.282, + "step": 9490 + }, + { + "epoch": 0.81, + "learning_rate": 1.76834205126179e-06, + "loss": 0.3347, + "step": 9491 + }, + { + "epoch": 0.81, + "learning_rate": 1.7667660166867806e-06, + "loss": 0.2742, + "step": 9492 + }, + { + "epoch": 0.81, + "learning_rate": 1.76519061668876e-06, + "loss": 0.2836, + "step": 9493 + }, + { + "epoch": 0.81, + "learning_rate": 1.763615851389161e-06, + "loss": 0.2911, + "step": 9494 + }, + { + "epoch": 0.81, + "learning_rate": 1.7620417209093544e-06, + "loss": 0.2878, + "step": 9495 + }, + { + "epoch": 0.81, + "learning_rate": 1.7604682253706652e-06, + "loss": 0.2726, + "step": 9496 + }, + { + "epoch": 0.81, + "learning_rate": 1.7588953648943742e-06, + "loss": 0.5618, + "step": 9497 + }, + { + "epoch": 0.81, + "learning_rate": 1.7573231396017064e-06, + "loss": 0.3093, + "step": 9498 + }, + { + "epoch": 0.81, + "learning_rate": 1.7557515496138455e-06, + "loss": 0.2517, + "step": 9499 + }, + { + "epoch": 0.81, + "learning_rate": 1.7541805950519154e-06, + "loss": 0.2678, + "step": 9500 + }, + { + "epoch": 0.81, + "learning_rate": 1.7526102760370056e-06, + "loss": 0.2991, + "step": 9501 + }, + { + "epoch": 0.81, + "learning_rate": 1.7510405926901408e-06, + "loss": 0.2891, + "step": 9502 + }, + { + "epoch": 0.81, + "learning_rate": 1.7494715451323063e-06, + "loss": 0.2471, + "step": 9503 + }, + { + "epoch": 0.81, + "learning_rate": 1.7479031334844421e-06, + "loss": 0.2979, + "step": 9504 + }, + { + "epoch": 0.81, + "learning_rate": 1.746335357867428e-06, + "loss": 0.2501, + "step": 9505 + }, + { + "epoch": 0.81, + "learning_rate": 1.7447682184021042e-06, + "loss": 0.2697, + "step": 9506 + }, + { + "epoch": 0.81, + "learning_rate": 1.7432017152092507e-06, + "loss": 0.2668, + "step": 9507 + }, + { + "epoch": 0.82, + "learning_rate": 1.7416358484096141e-06, + "loss": 0.2687, + "step": 9508 + }, + { + "epoch": 0.82, + "learning_rate": 1.7400706181238824e-06, + "loss": 0.2621, + "step": 9509 + }, + { + "epoch": 0.82, + "learning_rate": 1.7385060244726882e-06, + "loss": 0.2627, + "step": 9510 + }, + { + "epoch": 0.82, + "learning_rate": 1.7369420675766347e-06, + "loss": 0.2925, + "step": 9511 + }, + { + "epoch": 0.82, + "learning_rate": 1.7353787475562544e-06, + "loss": 0.3026, + "step": 9512 + }, + { + "epoch": 0.82, + "learning_rate": 1.7338160645320435e-06, + "loss": 0.2543, + "step": 9513 + }, + { + "epoch": 0.82, + "learning_rate": 1.7322540186244462e-06, + "loss": 0.2983, + "step": 9514 + }, + { + "epoch": 0.82, + "learning_rate": 1.730692609953858e-06, + "loss": 0.2759, + "step": 9515 + }, + { + "epoch": 0.82, + "learning_rate": 1.7291318386406241e-06, + "loss": 0.338, + "step": 9516 + }, + { + "epoch": 0.82, + "learning_rate": 1.7275717048050367e-06, + "loss": 0.2865, + "step": 9517 + }, + { + "epoch": 0.82, + "learning_rate": 1.7260122085673525e-06, + "loss": 0.3047, + "step": 9518 + }, + { + "epoch": 0.82, + "learning_rate": 1.7244533500477612e-06, + "loss": 0.2775, + "step": 9519 + }, + { + "epoch": 0.82, + "learning_rate": 1.7228951293664142e-06, + "loss": 0.285, + "step": 9520 + }, + { + "epoch": 0.82, + "learning_rate": 1.7213375466434134e-06, + "loss": 0.2306, + "step": 9521 + }, + { + "epoch": 0.82, + "learning_rate": 1.7197806019988084e-06, + "loss": 0.2451, + "step": 9522 + }, + { + "epoch": 0.82, + "learning_rate": 1.7182242955526029e-06, + "loss": 0.2715, + "step": 9523 + }, + { + "epoch": 0.82, + "learning_rate": 1.7166686274247424e-06, + "loss": 0.238, + "step": 9524 + }, + { + "epoch": 0.82, + "learning_rate": 1.7151135977351397e-06, + "loss": 0.2604, + "step": 9525 + }, + { + "epoch": 0.82, + "learning_rate": 1.713559206603642e-06, + "loss": 0.3048, + "step": 9526 + }, + { + "epoch": 0.82, + "learning_rate": 1.7120054541500552e-06, + "loss": 0.5493, + "step": 9527 + }, + { + "epoch": 0.82, + "learning_rate": 1.7104523404941365e-06, + "loss": 0.2971, + "step": 9528 + }, + { + "epoch": 0.82, + "learning_rate": 1.7088998657555922e-06, + "loss": 0.2821, + "step": 9529 + }, + { + "epoch": 0.82, + "learning_rate": 1.7073480300540802e-06, + "loss": 0.2471, + "step": 9530 + }, + { + "epoch": 0.82, + "learning_rate": 1.7057968335092024e-06, + "loss": 0.2377, + "step": 9531 + }, + { + "epoch": 0.82, + "learning_rate": 1.7042462762405265e-06, + "loss": 0.2661, + "step": 9532 + }, + { + "epoch": 0.82, + "learning_rate": 1.7026963583675549e-06, + "loss": 0.2929, + "step": 9533 + }, + { + "epoch": 0.82, + "learning_rate": 1.7011470800097496e-06, + "loss": 0.3049, + "step": 9534 + }, + { + "epoch": 0.82, + "learning_rate": 1.6995984412865218e-06, + "loss": 0.2688, + "step": 9535 + }, + { + "epoch": 0.82, + "learning_rate": 1.6980504423172317e-06, + "loss": 0.2785, + "step": 9536 + }, + { + "epoch": 0.82, + "learning_rate": 1.696503083221196e-06, + "loss": 0.2885, + "step": 9537 + }, + { + "epoch": 0.82, + "learning_rate": 1.694956364117668e-06, + "loss": 0.2571, + "step": 9538 + }, + { + "epoch": 0.82, + "learning_rate": 1.6934102851258726e-06, + "loss": 0.285, + "step": 9539 + }, + { + "epoch": 0.82, + "learning_rate": 1.6918648463649668e-06, + "loss": 0.2803, + "step": 9540 + }, + { + "epoch": 0.82, + "learning_rate": 1.6903200479540627e-06, + "loss": 0.2862, + "step": 9541 + }, + { + "epoch": 0.82, + "learning_rate": 1.6887758900122352e-06, + "loss": 0.301, + "step": 9542 + }, + { + "epoch": 0.82, + "learning_rate": 1.6872323726584938e-06, + "loss": 0.2347, + "step": 9543 + }, + { + "epoch": 0.82, + "learning_rate": 1.6856894960118087e-06, + "loss": 0.2665, + "step": 9544 + }, + { + "epoch": 0.82, + "learning_rate": 1.6841472601910892e-06, + "loss": 0.2335, + "step": 9545 + }, + { + "epoch": 0.82, + "learning_rate": 1.6826056653152122e-06, + "loss": 0.5646, + "step": 9546 + }, + { + "epoch": 0.82, + "learning_rate": 1.6810647115029954e-06, + "loss": 0.3072, + "step": 9547 + }, + { + "epoch": 0.82, + "learning_rate": 1.6795243988732e-06, + "loss": 0.3416, + "step": 9548 + }, + { + "epoch": 0.82, + "learning_rate": 1.677984727544557e-06, + "loss": 0.2332, + "step": 9549 + }, + { + "epoch": 0.82, + "learning_rate": 1.6764456976357279e-06, + "loss": 0.3188, + "step": 9550 + }, + { + "epoch": 0.82, + "learning_rate": 1.674907309265338e-06, + "loss": 0.287, + "step": 9551 + }, + { + "epoch": 0.82, + "learning_rate": 1.6733695625519553e-06, + "loss": 0.286, + "step": 9552 + }, + { + "epoch": 0.82, + "learning_rate": 1.6718324576141043e-06, + "loss": 0.3089, + "step": 9553 + }, + { + "epoch": 0.82, + "learning_rate": 1.67029599457026e-06, + "loss": 0.2725, + "step": 9554 + }, + { + "epoch": 0.82, + "learning_rate": 1.6687601735388358e-06, + "loss": 0.2493, + "step": 9555 + }, + { + "epoch": 0.82, + "learning_rate": 1.6672249946382179e-06, + "loss": 0.2916, + "step": 9556 + }, + { + "epoch": 0.82, + "learning_rate": 1.6656904579867205e-06, + "loss": 0.2604, + "step": 9557 + }, + { + "epoch": 0.82, + "learning_rate": 1.6641565637026225e-06, + "loss": 0.2554, + "step": 9558 + }, + { + "epoch": 0.82, + "learning_rate": 1.6626233119041468e-06, + "loss": 0.2863, + "step": 9559 + }, + { + "epoch": 0.82, + "learning_rate": 1.6610907027094714e-06, + "loss": 0.2711, + "step": 9560 + }, + { + "epoch": 0.82, + "learning_rate": 1.6595587362367226e-06, + "loss": 0.2813, + "step": 9561 + }, + { + "epoch": 0.82, + "learning_rate": 1.6580274126039698e-06, + "loss": 0.2722, + "step": 9562 + }, + { + "epoch": 0.82, + "learning_rate": 1.6564967319292502e-06, + "loss": 0.3035, + "step": 9563 + }, + { + "epoch": 0.82, + "learning_rate": 1.6549666943305342e-06, + "loss": 0.2395, + "step": 9564 + }, + { + "epoch": 0.82, + "learning_rate": 1.653437299925751e-06, + "loss": 0.2979, + "step": 9565 + }, + { + "epoch": 0.82, + "learning_rate": 1.651908548832779e-06, + "loss": 0.2621, + "step": 9566 + }, + { + "epoch": 0.82, + "learning_rate": 1.6503804411694468e-06, + "loss": 0.2891, + "step": 9567 + }, + { + "epoch": 0.82, + "learning_rate": 1.6488529770535367e-06, + "loss": 0.2832, + "step": 9568 + }, + { + "epoch": 0.82, + "learning_rate": 1.6473261566027687e-06, + "loss": 0.2579, + "step": 9569 + }, + { + "epoch": 0.82, + "learning_rate": 1.6457999799348345e-06, + "loss": 0.3221, + "step": 9570 + }, + { + "epoch": 0.82, + "learning_rate": 1.6442744471673566e-06, + "loss": 0.2507, + "step": 9571 + }, + { + "epoch": 0.82, + "learning_rate": 1.6427495584179165e-06, + "loss": 0.2961, + "step": 9572 + }, + { + "epoch": 0.82, + "learning_rate": 1.6412253138040467e-06, + "loss": 0.2866, + "step": 9573 + }, + { + "epoch": 0.82, + "learning_rate": 1.6397017134432281e-06, + "loss": 0.2695, + "step": 9574 + }, + { + "epoch": 0.82, + "learning_rate": 1.638178757452894e-06, + "loss": 0.28, + "step": 9575 + }, + { + "epoch": 0.82, + "learning_rate": 1.6366564459504186e-06, + "loss": 0.2577, + "step": 9576 + }, + { + "epoch": 0.82, + "learning_rate": 1.6351347790531457e-06, + "loss": 0.2874, + "step": 9577 + }, + { + "epoch": 0.82, + "learning_rate": 1.6336137568783495e-06, + "loss": 0.6031, + "step": 9578 + }, + { + "epoch": 0.82, + "learning_rate": 1.6320933795432626e-06, + "loss": 0.2919, + "step": 9579 + }, + { + "epoch": 0.82, + "learning_rate": 1.6305736471650756e-06, + "loss": 0.2827, + "step": 9580 + }, + { + "epoch": 0.82, + "learning_rate": 1.6290545598609165e-06, + "loss": 0.3348, + "step": 9581 + }, + { + "epoch": 0.82, + "learning_rate": 1.627536117747871e-06, + "loss": 0.2628, + "step": 9582 + }, + { + "epoch": 0.82, + "learning_rate": 1.626018320942967e-06, + "loss": 0.2819, + "step": 9583 + }, + { + "epoch": 0.82, + "learning_rate": 1.6245011695631962e-06, + "loss": 0.2634, + "step": 9584 + }, + { + "epoch": 0.82, + "learning_rate": 1.6229846637254932e-06, + "loss": 0.2584, + "step": 9585 + }, + { + "epoch": 0.82, + "learning_rate": 1.6214688035467363e-06, + "loss": 0.2779, + "step": 9586 + }, + { + "epoch": 0.82, + "learning_rate": 1.6199535891437678e-06, + "loss": 0.2803, + "step": 9587 + }, + { + "epoch": 0.82, + "learning_rate": 1.6184390206333688e-06, + "loss": 0.3201, + "step": 9588 + }, + { + "epoch": 0.82, + "learning_rate": 1.616925098132275e-06, + "loss": 0.2339, + "step": 9589 + }, + { + "epoch": 0.82, + "learning_rate": 1.6154118217571723e-06, + "loss": 0.2957, + "step": 9590 + }, + { + "epoch": 0.82, + "learning_rate": 1.613899191624697e-06, + "loss": 0.2483, + "step": 9591 + }, + { + "epoch": 0.82, + "learning_rate": 1.612387207851437e-06, + "loss": 0.2757, + "step": 9592 + }, + { + "epoch": 0.82, + "learning_rate": 1.610875870553923e-06, + "loss": 0.2422, + "step": 9593 + }, + { + "epoch": 0.82, + "learning_rate": 1.6093651798486487e-06, + "loss": 0.263, + "step": 9594 + }, + { + "epoch": 0.82, + "learning_rate": 1.6078551358520456e-06, + "loss": 0.2716, + "step": 9595 + }, + { + "epoch": 0.82, + "learning_rate": 1.6063457386805004e-06, + "loss": 0.2653, + "step": 9596 + }, + { + "epoch": 0.82, + "learning_rate": 1.6048369884503524e-06, + "loss": 0.2645, + "step": 9597 + }, + { + "epoch": 0.82, + "learning_rate": 1.6033288852778882e-06, + "loss": 0.2669, + "step": 9598 + }, + { + "epoch": 0.82, + "learning_rate": 1.6018214292793455e-06, + "loss": 0.2472, + "step": 9599 + }, + { + "epoch": 0.82, + "learning_rate": 1.6003146205709064e-06, + "loss": 0.2523, + "step": 9600 + }, + { + "epoch": 0.82, + "learning_rate": 1.5988084592687169e-06, + "loss": 0.2884, + "step": 9601 + }, + { + "epoch": 0.82, + "learning_rate": 1.5973029454888578e-06, + "loss": 0.2417, + "step": 9602 + }, + { + "epoch": 0.82, + "learning_rate": 1.5957980793473682e-06, + "loss": 0.2799, + "step": 9603 + }, + { + "epoch": 0.82, + "learning_rate": 1.5942938609602365e-06, + "loss": 0.28, + "step": 9604 + }, + { + "epoch": 0.82, + "learning_rate": 1.5927902904434e-06, + "loss": 0.2823, + "step": 9605 + }, + { + "epoch": 0.82, + "learning_rate": 1.5912873679127495e-06, + "loss": 0.2827, + "step": 9606 + }, + { + "epoch": 0.82, + "learning_rate": 1.589785093484114e-06, + "loss": 0.5929, + "step": 9607 + }, + { + "epoch": 0.82, + "learning_rate": 1.5882834672732939e-06, + "loss": 0.2723, + "step": 9608 + }, + { + "epoch": 0.82, + "learning_rate": 1.586782489396017e-06, + "loss": 0.2827, + "step": 9609 + }, + { + "epoch": 0.82, + "learning_rate": 1.5852821599679747e-06, + "loss": 0.3037, + "step": 9610 + }, + { + "epoch": 0.82, + "learning_rate": 1.5837824791048062e-06, + "loss": 0.2631, + "step": 9611 + }, + { + "epoch": 0.82, + "learning_rate": 1.5822834469220982e-06, + "loss": 0.2571, + "step": 9612 + }, + { + "epoch": 0.82, + "learning_rate": 1.5807850635353906e-06, + "loss": 0.2388, + "step": 9613 + }, + { + "epoch": 0.82, + "learning_rate": 1.5792873290601662e-06, + "loss": 0.2974, + "step": 9614 + }, + { + "epoch": 0.82, + "learning_rate": 1.5777902436118708e-06, + "loss": 0.3185, + "step": 9615 + }, + { + "epoch": 0.82, + "learning_rate": 1.5762938073058853e-06, + "loss": 0.2598, + "step": 9616 + }, + { + "epoch": 0.82, + "learning_rate": 1.5747980202575475e-06, + "loss": 0.295, + "step": 9617 + }, + { + "epoch": 0.82, + "learning_rate": 1.573302882582154e-06, + "loss": 0.2463, + "step": 9618 + }, + { + "epoch": 0.82, + "learning_rate": 1.5718083943949337e-06, + "loss": 0.2769, + "step": 9619 + }, + { + "epoch": 0.82, + "learning_rate": 1.57031455581108e-06, + "loss": 0.2896, + "step": 9620 + }, + { + "epoch": 0.82, + "learning_rate": 1.5688213669457243e-06, + "loss": 0.3014, + "step": 9621 + }, + { + "epoch": 0.82, + "learning_rate": 1.5673288279139586e-06, + "loss": 0.2529, + "step": 9622 + }, + { + "epoch": 0.82, + "learning_rate": 1.5658369388308238e-06, + "loss": 0.2572, + "step": 9623 + }, + { + "epoch": 0.82, + "learning_rate": 1.5643456998112971e-06, + "loss": 0.2864, + "step": 9624 + }, + { + "epoch": 0.83, + "learning_rate": 1.5628551109703282e-06, + "loss": 0.283, + "step": 9625 + }, + { + "epoch": 0.83, + "learning_rate": 1.561365172422795e-06, + "loss": 0.2617, + "step": 9626 + }, + { + "epoch": 0.83, + "learning_rate": 1.5598758842835382e-06, + "loss": 0.2762, + "step": 9627 + }, + { + "epoch": 0.83, + "learning_rate": 1.5583872466673433e-06, + "loss": 0.2802, + "step": 9628 + }, + { + "epoch": 0.83, + "learning_rate": 1.5568992596889487e-06, + "loss": 0.2702, + "step": 9629 + }, + { + "epoch": 0.83, + "learning_rate": 1.5554119234630438e-06, + "loss": 0.2714, + "step": 9630 + }, + { + "epoch": 0.83, + "learning_rate": 1.553925238104257e-06, + "loss": 0.3066, + "step": 9631 + }, + { + "epoch": 0.83, + "learning_rate": 1.5524392037271828e-06, + "loss": 0.2478, + "step": 9632 + }, + { + "epoch": 0.83, + "learning_rate": 1.5509538204463536e-06, + "loss": 0.2829, + "step": 9633 + }, + { + "epoch": 0.83, + "learning_rate": 1.5494690883762553e-06, + "loss": 0.2643, + "step": 9634 + }, + { + "epoch": 0.83, + "learning_rate": 1.5479850076313241e-06, + "loss": 0.2995, + "step": 9635 + }, + { + "epoch": 0.83, + "learning_rate": 1.5465015783259463e-06, + "loss": 0.2699, + "step": 9636 + }, + { + "epoch": 0.83, + "learning_rate": 1.5450188005744593e-06, + "loss": 0.2903, + "step": 9637 + }, + { + "epoch": 0.83, + "learning_rate": 1.5435366744911406e-06, + "loss": 0.2469, + "step": 9638 + }, + { + "epoch": 0.83, + "learning_rate": 1.5420552001902355e-06, + "loss": 0.2745, + "step": 9639 + }, + { + "epoch": 0.83, + "learning_rate": 1.5405743777859206e-06, + "loss": 0.2694, + "step": 9640 + }, + { + "epoch": 0.83, + "learning_rate": 1.5390942073923343e-06, + "loss": 0.2482, + "step": 9641 + }, + { + "epoch": 0.83, + "learning_rate": 1.53761468912356e-06, + "loss": 0.2903, + "step": 9642 + }, + { + "epoch": 0.83, + "learning_rate": 1.5361358230936308e-06, + "loss": 0.2616, + "step": 9643 + }, + { + "epoch": 0.83, + "learning_rate": 1.5346576094165343e-06, + "loss": 0.2466, + "step": 9644 + }, + { + "epoch": 0.83, + "learning_rate": 1.5331800482061954e-06, + "loss": 0.2435, + "step": 9645 + }, + { + "epoch": 0.83, + "learning_rate": 1.5317031395765081e-06, + "loss": 0.2365, + "step": 9646 + }, + { + "epoch": 0.83, + "learning_rate": 1.530226883641297e-06, + "loss": 0.2845, + "step": 9647 + }, + { + "epoch": 0.83, + "learning_rate": 1.5287512805143467e-06, + "loss": 0.2715, + "step": 9648 + }, + { + "epoch": 0.83, + "learning_rate": 1.5272763303093907e-06, + "loss": 0.2733, + "step": 9649 + }, + { + "epoch": 0.83, + "learning_rate": 1.5258020331401102e-06, + "loss": 0.2758, + "step": 9650 + }, + { + "epoch": 0.83, + "learning_rate": 1.5243283891201388e-06, + "loss": 0.2772, + "step": 9651 + }, + { + "epoch": 0.83, + "learning_rate": 1.522855398363051e-06, + "loss": 0.2529, + "step": 9652 + }, + { + "epoch": 0.83, + "learning_rate": 1.5213830609823877e-06, + "loss": 0.2728, + "step": 9653 + }, + { + "epoch": 0.83, + "learning_rate": 1.5199113770916207e-06, + "loss": 0.3022, + "step": 9654 + }, + { + "epoch": 0.83, + "learning_rate": 1.518440346804182e-06, + "loss": 0.2502, + "step": 9655 + }, + { + "epoch": 0.83, + "learning_rate": 1.5169699702334562e-06, + "loss": 0.2434, + "step": 9656 + }, + { + "epoch": 0.83, + "learning_rate": 1.5155002474927683e-06, + "loss": 0.2771, + "step": 9657 + }, + { + "epoch": 0.83, + "learning_rate": 1.5140311786953986e-06, + "loss": 0.2565, + "step": 9658 + }, + { + "epoch": 0.83, + "learning_rate": 1.5125627639545725e-06, + "loss": 0.2664, + "step": 9659 + }, + { + "epoch": 0.83, + "learning_rate": 1.5110950033834726e-06, + "loss": 0.2642, + "step": 9660 + }, + { + "epoch": 0.83, + "learning_rate": 1.5096278970952272e-06, + "loss": 0.2405, + "step": 9661 + }, + { + "epoch": 0.83, + "learning_rate": 1.508161445202906e-06, + "loss": 0.2526, + "step": 9662 + }, + { + "epoch": 0.83, + "learning_rate": 1.506695647819546e-06, + "loss": 0.2799, + "step": 9663 + }, + { + "epoch": 0.83, + "learning_rate": 1.5052305050581173e-06, + "loss": 0.2782, + "step": 9664 + }, + { + "epoch": 0.83, + "learning_rate": 1.503766017031547e-06, + "loss": 0.2469, + "step": 9665 + }, + { + "epoch": 0.83, + "learning_rate": 1.5023021838527108e-06, + "loss": 0.2711, + "step": 9666 + }, + { + "epoch": 0.83, + "learning_rate": 1.5008390056344347e-06, + "loss": 0.2717, + "step": 9667 + }, + { + "epoch": 0.83, + "learning_rate": 1.499376482489494e-06, + "loss": 0.2559, + "step": 9668 + }, + { + "epoch": 0.83, + "learning_rate": 1.4979146145306068e-06, + "loss": 0.2724, + "step": 9669 + }, + { + "epoch": 0.83, + "learning_rate": 1.4964534018704558e-06, + "loss": 0.2732, + "step": 9670 + }, + { + "epoch": 0.83, + "learning_rate": 1.4949928446216567e-06, + "loss": 0.29, + "step": 9671 + }, + { + "epoch": 0.83, + "learning_rate": 1.493532942896785e-06, + "loss": 0.5996, + "step": 9672 + }, + { + "epoch": 0.83, + "learning_rate": 1.4920736968083616e-06, + "loss": 0.2831, + "step": 9673 + }, + { + "epoch": 0.83, + "learning_rate": 1.4906151064688602e-06, + "loss": 0.2847, + "step": 9674 + }, + { + "epoch": 0.83, + "learning_rate": 1.4891571719907016e-06, + "loss": 0.2447, + "step": 9675 + }, + { + "epoch": 0.83, + "learning_rate": 1.4876998934862497e-06, + "loss": 0.2653, + "step": 9676 + }, + { + "epoch": 0.83, + "learning_rate": 1.4862432710678355e-06, + "loss": 0.2581, + "step": 9677 + }, + { + "epoch": 0.83, + "learning_rate": 1.4847873048477191e-06, + "loss": 0.275, + "step": 9678 + }, + { + "epoch": 0.83, + "learning_rate": 1.4833319949381232e-06, + "loss": 0.2882, + "step": 9679 + }, + { + "epoch": 0.83, + "learning_rate": 1.4818773414512134e-06, + "loss": 0.2963, + "step": 9680 + }, + { + "epoch": 0.83, + "learning_rate": 1.4804233444991102e-06, + "loss": 0.309, + "step": 9681 + }, + { + "epoch": 0.83, + "learning_rate": 1.4789700041938816e-06, + "loss": 0.2344, + "step": 9682 + }, + { + "epoch": 0.83, + "learning_rate": 1.4775173206475357e-06, + "loss": 0.269, + "step": 9683 + }, + { + "epoch": 0.83, + "learning_rate": 1.4760652939720488e-06, + "loss": 0.3092, + "step": 9684 + }, + { + "epoch": 0.83, + "learning_rate": 1.474613924279329e-06, + "loss": 0.2766, + "step": 9685 + }, + { + "epoch": 0.83, + "learning_rate": 1.4731632116812434e-06, + "loss": 0.2602, + "step": 9686 + }, + { + "epoch": 0.83, + "learning_rate": 1.4717131562896047e-06, + "loss": 0.2472, + "step": 9687 + }, + { + "epoch": 0.83, + "learning_rate": 1.4702637582161761e-06, + "loss": 0.2795, + "step": 9688 + }, + { + "epoch": 0.83, + "learning_rate": 1.4688150175726724e-06, + "loss": 0.2886, + "step": 9689 + }, + { + "epoch": 0.83, + "learning_rate": 1.4673669344707498e-06, + "loss": 0.2604, + "step": 9690 + }, + { + "epoch": 0.83, + "learning_rate": 1.4659195090220258e-06, + "loss": 0.2692, + "step": 9691 + }, + { + "epoch": 0.83, + "learning_rate": 1.4644727413380566e-06, + "loss": 0.2769, + "step": 9692 + }, + { + "epoch": 0.83, + "learning_rate": 1.463026631530351e-06, + "loss": 0.2602, + "step": 9693 + }, + { + "epoch": 0.83, + "learning_rate": 1.4615811797103751e-06, + "loss": 0.5746, + "step": 9694 + }, + { + "epoch": 0.83, + "learning_rate": 1.4601363859895301e-06, + "loss": 0.2729, + "step": 9695 + }, + { + "epoch": 0.83, + "learning_rate": 1.4586922504791767e-06, + "loss": 0.2681, + "step": 9696 + }, + { + "epoch": 0.83, + "learning_rate": 1.457248773290617e-06, + "loss": 0.2784, + "step": 9697 + }, + { + "epoch": 0.83, + "learning_rate": 1.4558059545351144e-06, + "loss": 0.5823, + "step": 9698 + }, + { + "epoch": 0.83, + "learning_rate": 1.454363794323872e-06, + "loss": 0.253, + "step": 9699 + }, + { + "epoch": 0.83, + "learning_rate": 1.4529222927680375e-06, + "loss": 0.2785, + "step": 9700 + }, + { + "epoch": 0.83, + "learning_rate": 1.4514814499787266e-06, + "loss": 0.3043, + "step": 9701 + }, + { + "epoch": 0.83, + "learning_rate": 1.4500412660669828e-06, + "loss": 0.3049, + "step": 9702 + }, + { + "epoch": 0.83, + "learning_rate": 1.4486017411438114e-06, + "loss": 0.2595, + "step": 9703 + }, + { + "epoch": 0.83, + "learning_rate": 1.447162875320165e-06, + "loss": 0.2455, + "step": 9704 + }, + { + "epoch": 0.83, + "learning_rate": 1.4457246687069427e-06, + "loss": 0.3316, + "step": 9705 + }, + { + "epoch": 0.83, + "learning_rate": 1.444287121414998e-06, + "loss": 0.2393, + "step": 9706 + }, + { + "epoch": 0.83, + "learning_rate": 1.442850233555122e-06, + "loss": 0.2883, + "step": 9707 + }, + { + "epoch": 0.83, + "learning_rate": 1.4414140052380721e-06, + "loss": 0.3138, + "step": 9708 + }, + { + "epoch": 0.83, + "learning_rate": 1.4399784365745396e-06, + "loss": 0.3044, + "step": 9709 + }, + { + "epoch": 0.83, + "learning_rate": 1.4385435276751724e-06, + "loss": 0.3032, + "step": 9710 + }, + { + "epoch": 0.83, + "learning_rate": 1.437109278650567e-06, + "loss": 0.3209, + "step": 9711 + }, + { + "epoch": 0.83, + "learning_rate": 1.4356756896112678e-06, + "loss": 0.2537, + "step": 9712 + }, + { + "epoch": 0.83, + "learning_rate": 1.4342427606677712e-06, + "loss": 0.2808, + "step": 9713 + }, + { + "epoch": 0.83, + "learning_rate": 1.432810491930514e-06, + "loss": 0.272, + "step": 9714 + }, + { + "epoch": 0.83, + "learning_rate": 1.4313788835098964e-06, + "loss": 0.2477, + "step": 9715 + }, + { + "epoch": 0.83, + "learning_rate": 1.4299479355162526e-06, + "loss": 0.2949, + "step": 9716 + }, + { + "epoch": 0.83, + "learning_rate": 1.4285176480598772e-06, + "loss": 0.2582, + "step": 9717 + }, + { + "epoch": 0.83, + "learning_rate": 1.4270880212510086e-06, + "loss": 0.3104, + "step": 9718 + }, + { + "epoch": 0.83, + "learning_rate": 1.425659055199835e-06, + "loss": 0.2592, + "step": 9719 + }, + { + "epoch": 0.83, + "learning_rate": 1.4242307500164964e-06, + "loss": 0.2753, + "step": 9720 + }, + { + "epoch": 0.83, + "learning_rate": 1.4228031058110725e-06, + "loss": 0.2289, + "step": 9721 + }, + { + "epoch": 0.83, + "learning_rate": 1.4213761226936095e-06, + "loss": 0.2394, + "step": 9722 + }, + { + "epoch": 0.83, + "learning_rate": 1.4199498007740841e-06, + "loss": 0.2959, + "step": 9723 + }, + { + "epoch": 0.83, + "learning_rate": 1.4185241401624327e-06, + "loss": 0.2681, + "step": 9724 + }, + { + "epoch": 0.83, + "learning_rate": 1.4170991409685386e-06, + "loss": 0.2711, + "step": 9725 + }, + { + "epoch": 0.83, + "learning_rate": 1.4156748033022328e-06, + "loss": 0.2444, + "step": 9726 + }, + { + "epoch": 0.83, + "learning_rate": 1.4142511272732994e-06, + "loss": 0.2516, + "step": 9727 + }, + { + "epoch": 0.83, + "learning_rate": 1.4128281129914611e-06, + "loss": 0.3023, + "step": 9728 + }, + { + "epoch": 0.83, + "learning_rate": 1.4114057605664066e-06, + "loss": 0.2648, + "step": 9729 + }, + { + "epoch": 0.83, + "learning_rate": 1.409984070107755e-06, + "loss": 0.2441, + "step": 9730 + }, + { + "epoch": 0.83, + "learning_rate": 1.4085630417250873e-06, + "loss": 0.2982, + "step": 9731 + }, + { + "epoch": 0.83, + "learning_rate": 1.4071426755279293e-06, + "loss": 0.29, + "step": 9732 + }, + { + "epoch": 0.83, + "learning_rate": 1.4057229716257548e-06, + "loss": 0.2644, + "step": 9733 + }, + { + "epoch": 0.83, + "learning_rate": 1.4043039301279904e-06, + "loss": 0.2658, + "step": 9734 + }, + { + "epoch": 0.83, + "learning_rate": 1.402885551144002e-06, + "loss": 0.2925, + "step": 9735 + }, + { + "epoch": 0.83, + "learning_rate": 1.4014678347831178e-06, + "loss": 0.3234, + "step": 9736 + }, + { + "epoch": 0.83, + "learning_rate": 1.4000507811546094e-06, + "loss": 0.2839, + "step": 9737 + }, + { + "epoch": 0.83, + "learning_rate": 1.398634390367688e-06, + "loss": 0.2759, + "step": 9738 + }, + { + "epoch": 0.83, + "learning_rate": 1.397218662531532e-06, + "loss": 0.2896, + "step": 9739 + }, + { + "epoch": 0.83, + "learning_rate": 1.3958035977552509e-06, + "loss": 0.2549, + "step": 9740 + }, + { + "epoch": 0.83, + "learning_rate": 1.394389196147915e-06, + "loss": 0.2703, + "step": 9741 + }, + { + "epoch": 0.84, + "learning_rate": 1.3929754578185373e-06, + "loss": 0.29, + "step": 9742 + }, + { + "epoch": 0.84, + "learning_rate": 1.3915623828760837e-06, + "loss": 0.2773, + "step": 9743 + }, + { + "epoch": 0.84, + "learning_rate": 1.3901499714294675e-06, + "loss": 0.2894, + "step": 9744 + }, + { + "epoch": 0.84, + "learning_rate": 1.3887382235875446e-06, + "loss": 0.2537, + "step": 9745 + }, + { + "epoch": 0.84, + "learning_rate": 1.3873271394591348e-06, + "loss": 0.2374, + "step": 9746 + }, + { + "epoch": 0.84, + "learning_rate": 1.38591671915299e-06, + "loss": 0.2666, + "step": 9747 + }, + { + "epoch": 0.84, + "learning_rate": 1.3845069627778218e-06, + "loss": 0.3292, + "step": 9748 + }, + { + "epoch": 0.84, + "learning_rate": 1.383097870442286e-06, + "loss": 0.2768, + "step": 9749 + }, + { + "epoch": 0.84, + "learning_rate": 1.3816894422549888e-06, + "loss": 0.2569, + "step": 9750 + }, + { + "epoch": 0.84, + "learning_rate": 1.3802816783244877e-06, + "loss": 0.246, + "step": 9751 + }, + { + "epoch": 0.84, + "learning_rate": 1.3788745787592784e-06, + "loss": 0.2312, + "step": 9752 + }, + { + "epoch": 0.84, + "learning_rate": 1.377468143667824e-06, + "loss": 0.2919, + "step": 9753 + }, + { + "epoch": 0.84, + "learning_rate": 1.3760623731585165e-06, + "loss": 0.2836, + "step": 9754 + }, + { + "epoch": 0.84, + "learning_rate": 1.3746572673397096e-06, + "loss": 0.2731, + "step": 9755 + }, + { + "epoch": 0.84, + "learning_rate": 1.373252826319701e-06, + "loss": 0.2968, + "step": 9756 + }, + { + "epoch": 0.84, + "learning_rate": 1.3718490502067393e-06, + "loss": 0.2946, + "step": 9757 + }, + { + "epoch": 0.84, + "learning_rate": 1.370445939109022e-06, + "loss": 0.3139, + "step": 9758 + }, + { + "epoch": 0.84, + "learning_rate": 1.3690434931346874e-06, + "loss": 0.2404, + "step": 9759 + }, + { + "epoch": 0.84, + "learning_rate": 1.3676417123918374e-06, + "loss": 0.3181, + "step": 9760 + }, + { + "epoch": 0.84, + "learning_rate": 1.3662405969885084e-06, + "loss": 0.2812, + "step": 9761 + }, + { + "epoch": 0.84, + "learning_rate": 1.3648401470326932e-06, + "loss": 0.2633, + "step": 9762 + }, + { + "epoch": 0.84, + "learning_rate": 1.3634403626323334e-06, + "loss": 0.2422, + "step": 9763 + }, + { + "epoch": 0.84, + "learning_rate": 1.3620412438953145e-06, + "loss": 0.2888, + "step": 9764 + }, + { + "epoch": 0.84, + "learning_rate": 1.3606427909294784e-06, + "loss": 0.2874, + "step": 9765 + }, + { + "epoch": 0.84, + "learning_rate": 1.359245003842602e-06, + "loss": 0.3331, + "step": 9766 + }, + { + "epoch": 0.84, + "learning_rate": 1.35784788274243e-06, + "loss": 0.3184, + "step": 9767 + }, + { + "epoch": 0.84, + "learning_rate": 1.3564514277366403e-06, + "loss": 0.2883, + "step": 9768 + }, + { + "epoch": 0.84, + "learning_rate": 1.355055638932864e-06, + "loss": 0.2527, + "step": 9769 + }, + { + "epoch": 0.84, + "learning_rate": 1.353660516438684e-06, + "loss": 0.2435, + "step": 9770 + }, + { + "epoch": 0.84, + "learning_rate": 1.352266060361629e-06, + "loss": 0.2354, + "step": 9771 + }, + { + "epoch": 0.84, + "learning_rate": 1.350872270809177e-06, + "loss": 0.2676, + "step": 9772 + }, + { + "epoch": 0.84, + "learning_rate": 1.3494791478887504e-06, + "loss": 0.2786, + "step": 9773 + }, + { + "epoch": 0.84, + "learning_rate": 1.3480866917077294e-06, + "loss": 0.2835, + "step": 9774 + }, + { + "epoch": 0.84, + "learning_rate": 1.3466949023734387e-06, + "loss": 0.298, + "step": 9775 + }, + { + "epoch": 0.84, + "learning_rate": 1.3453037799931435e-06, + "loss": 0.2686, + "step": 9776 + }, + { + "epoch": 0.84, + "learning_rate": 1.343913324674072e-06, + "loss": 0.2469, + "step": 9777 + }, + { + "epoch": 0.84, + "learning_rate": 1.3425235365233892e-06, + "loss": 0.2936, + "step": 9778 + }, + { + "epoch": 0.84, + "learning_rate": 1.3411344156482142e-06, + "loss": 0.3016, + "step": 9779 + }, + { + "epoch": 0.84, + "learning_rate": 1.339745962155613e-06, + "loss": 0.2621, + "step": 9780 + }, + { + "epoch": 0.84, + "learning_rate": 1.3383581761526022e-06, + "loss": 0.2589, + "step": 9781 + }, + { + "epoch": 0.84, + "learning_rate": 1.336971057746147e-06, + "loss": 0.311, + "step": 9782 + }, + { + "epoch": 0.84, + "learning_rate": 1.3355846070431533e-06, + "loss": 0.2666, + "step": 9783 + }, + { + "epoch": 0.84, + "learning_rate": 1.33419882415049e-06, + "loss": 0.3063, + "step": 9784 + }, + { + "epoch": 0.84, + "learning_rate": 1.3328137091749594e-06, + "loss": 0.2723, + "step": 9785 + }, + { + "epoch": 0.84, + "learning_rate": 1.3314292622233227e-06, + "loss": 0.2581, + "step": 9786 + }, + { + "epoch": 0.84, + "learning_rate": 1.3300454834022857e-06, + "loss": 0.3113, + "step": 9787 + }, + { + "epoch": 0.84, + "learning_rate": 1.3286623728185044e-06, + "loss": 0.3043, + "step": 9788 + }, + { + "epoch": 0.84, + "learning_rate": 1.3272799305785822e-06, + "loss": 0.2677, + "step": 9789 + }, + { + "epoch": 0.84, + "learning_rate": 1.325898156789066e-06, + "loss": 0.2315, + "step": 9790 + }, + { + "epoch": 0.84, + "learning_rate": 1.324517051556463e-06, + "loss": 0.601, + "step": 9791 + }, + { + "epoch": 0.84, + "learning_rate": 1.3231366149872183e-06, + "loss": 0.3023, + "step": 9792 + }, + { + "epoch": 0.84, + "learning_rate": 1.3217568471877284e-06, + "loss": 0.2868, + "step": 9793 + }, + { + "epoch": 0.84, + "learning_rate": 1.320377748264341e-06, + "loss": 0.2831, + "step": 9794 + }, + { + "epoch": 0.84, + "learning_rate": 1.3189993183233496e-06, + "loss": 0.2608, + "step": 9795 + }, + { + "epoch": 0.84, + "learning_rate": 1.3176215574709982e-06, + "loss": 0.2668, + "step": 9796 + }, + { + "epoch": 0.84, + "learning_rate": 1.3162444658134731e-06, + "loss": 0.2589, + "step": 9797 + }, + { + "epoch": 0.84, + "learning_rate": 1.3148680434569206e-06, + "loss": 0.2736, + "step": 9798 + }, + { + "epoch": 0.84, + "learning_rate": 1.313492290507422e-06, + "loss": 0.291, + "step": 9799 + }, + { + "epoch": 0.84, + "learning_rate": 1.3121172070710165e-06, + "loss": 0.2738, + "step": 9800 + }, + { + "epoch": 0.84, + "learning_rate": 1.3107427932536886e-06, + "loss": 0.2787, + "step": 9801 + }, + { + "epoch": 0.84, + "learning_rate": 1.309369049161372e-06, + "loss": 0.2728, + "step": 9802 + }, + { + "epoch": 0.84, + "learning_rate": 1.3079959748999494e-06, + "loss": 0.251, + "step": 9803 + }, + { + "epoch": 0.84, + "learning_rate": 1.3066235705752439e-06, + "loss": 0.2325, + "step": 9804 + }, + { + "epoch": 0.84, + "learning_rate": 1.3052518362930433e-06, + "loss": 0.2521, + "step": 9805 + }, + { + "epoch": 0.84, + "learning_rate": 1.3038807721590663e-06, + "loss": 0.2484, + "step": 9806 + }, + { + "epoch": 0.84, + "learning_rate": 1.3025103782789906e-06, + "loss": 0.2568, + "step": 9807 + }, + { + "epoch": 0.84, + "learning_rate": 1.3011406547584392e-06, + "loss": 0.2615, + "step": 9808 + }, + { + "epoch": 0.84, + "learning_rate": 1.2997716017029849e-06, + "loss": 0.2695, + "step": 9809 + }, + { + "epoch": 0.84, + "learning_rate": 1.2984032192181473e-06, + "loss": 0.2481, + "step": 9810 + }, + { + "epoch": 0.84, + "learning_rate": 1.2970355074093898e-06, + "loss": 0.2468, + "step": 9811 + }, + { + "epoch": 0.84, + "learning_rate": 1.2956684663821363e-06, + "loss": 0.2677, + "step": 9812 + }, + { + "epoch": 0.84, + "learning_rate": 1.2943020962417485e-06, + "loss": 0.322, + "step": 9813 + }, + { + "epoch": 0.84, + "learning_rate": 1.2929363970935371e-06, + "loss": 0.2497, + "step": 9814 + }, + { + "epoch": 0.84, + "learning_rate": 1.2915713690427655e-06, + "loss": 0.3096, + "step": 9815 + }, + { + "epoch": 0.84, + "learning_rate": 1.2902070121946441e-06, + "loss": 0.2708, + "step": 9816 + }, + { + "epoch": 0.84, + "learning_rate": 1.2888433266543288e-06, + "loss": 0.3307, + "step": 9817 + }, + { + "epoch": 0.84, + "learning_rate": 1.2874803125269274e-06, + "loss": 0.2576, + "step": 9818 + }, + { + "epoch": 0.84, + "learning_rate": 1.286117969917493e-06, + "loss": 0.2819, + "step": 9819 + }, + { + "epoch": 0.84, + "learning_rate": 1.2847562989310313e-06, + "loss": 0.2889, + "step": 9820 + }, + { + "epoch": 0.84, + "learning_rate": 1.2833952996724864e-06, + "loss": 0.2562, + "step": 9821 + }, + { + "epoch": 0.84, + "learning_rate": 1.2820349722467663e-06, + "loss": 0.294, + "step": 9822 + }, + { + "epoch": 0.84, + "learning_rate": 1.2806753167587117e-06, + "loss": 0.3049, + "step": 9823 + }, + { + "epoch": 0.84, + "learning_rate": 1.2793163333131208e-06, + "loss": 0.289, + "step": 9824 + }, + { + "epoch": 0.84, + "learning_rate": 1.277958022014736e-06, + "loss": 0.2167, + "step": 9825 + }, + { + "epoch": 0.84, + "learning_rate": 1.2766003829682504e-06, + "loss": 0.2513, + "step": 9826 + }, + { + "epoch": 0.84, + "learning_rate": 1.2752434162783056e-06, + "loss": 0.2759, + "step": 9827 + }, + { + "epoch": 0.84, + "learning_rate": 1.273887122049483e-06, + "loss": 0.5486, + "step": 9828 + }, + { + "epoch": 0.84, + "learning_rate": 1.2725315003863292e-06, + "loss": 0.2589, + "step": 9829 + }, + { + "epoch": 0.84, + "learning_rate": 1.2711765513933216e-06, + "loss": 0.2759, + "step": 9830 + }, + { + "epoch": 0.84, + "learning_rate": 1.2698222751748946e-06, + "loss": 0.2731, + "step": 9831 + }, + { + "epoch": 0.84, + "learning_rate": 1.26846867183543e-06, + "loss": 0.2802, + "step": 9832 + }, + { + "epoch": 0.84, + "learning_rate": 1.2671157414792567e-06, + "loss": 0.3047, + "step": 9833 + }, + { + "epoch": 0.84, + "learning_rate": 1.2657634842106526e-06, + "loss": 0.2815, + "step": 9834 + }, + { + "epoch": 0.84, + "learning_rate": 1.2644119001338385e-06, + "loss": 0.2588, + "step": 9835 + }, + { + "epoch": 0.84, + "learning_rate": 1.2630609893529956e-06, + "loss": 0.306, + "step": 9836 + }, + { + "epoch": 0.84, + "learning_rate": 1.2617107519722393e-06, + "loss": 0.3174, + "step": 9837 + }, + { + "epoch": 0.84, + "learning_rate": 1.26036118809564e-06, + "loss": 0.3262, + "step": 9838 + }, + { + "epoch": 0.84, + "learning_rate": 1.2590122978272178e-06, + "loss": 0.2402, + "step": 9839 + }, + { + "epoch": 0.84, + "learning_rate": 1.2576640812709363e-06, + "loss": 0.6742, + "step": 9840 + }, + { + "epoch": 0.84, + "learning_rate": 1.256316538530713e-06, + "loss": 0.3113, + "step": 9841 + }, + { + "epoch": 0.84, + "learning_rate": 1.254969669710402e-06, + "loss": 0.3033, + "step": 9842 + }, + { + "epoch": 0.84, + "learning_rate": 1.2536234749138232e-06, + "loss": 0.248, + "step": 9843 + }, + { + "epoch": 0.84, + "learning_rate": 1.2522779542447272e-06, + "loss": 0.2856, + "step": 9844 + }, + { + "epoch": 0.84, + "learning_rate": 1.2509331078068231e-06, + "loss": 0.2316, + "step": 9845 + }, + { + "epoch": 0.84, + "learning_rate": 1.249588935703765e-06, + "loss": 0.2607, + "step": 9846 + }, + { + "epoch": 0.84, + "learning_rate": 1.2482454380391552e-06, + "loss": 0.2594, + "step": 9847 + }, + { + "epoch": 0.84, + "learning_rate": 1.246902614916544e-06, + "loss": 0.2625, + "step": 9848 + }, + { + "epoch": 0.84, + "learning_rate": 1.245560466439425e-06, + "loss": 0.2535, + "step": 9849 + }, + { + "epoch": 0.84, + "learning_rate": 1.2442189927112514e-06, + "loss": 0.588, + "step": 9850 + }, + { + "epoch": 0.84, + "learning_rate": 1.242878193835415e-06, + "loss": 0.2385, + "step": 9851 + }, + { + "epoch": 0.84, + "learning_rate": 1.2415380699152568e-06, + "loss": 0.2433, + "step": 9852 + }, + { + "epoch": 0.84, + "learning_rate": 1.240198621054066e-06, + "loss": 0.2903, + "step": 9853 + }, + { + "epoch": 0.84, + "learning_rate": 1.2388598473550828e-06, + "loss": 0.3208, + "step": 9854 + }, + { + "epoch": 0.84, + "learning_rate": 1.237521748921492e-06, + "loss": 0.2714, + "step": 9855 + }, + { + "epoch": 0.84, + "learning_rate": 1.2361843258564277e-06, + "loss": 0.2559, + "step": 9856 + }, + { + "epoch": 0.84, + "learning_rate": 1.2348475782629733e-06, + "loss": 0.2958, + "step": 9857 + }, + { + "epoch": 0.85, + "learning_rate": 1.2335115062441593e-06, + "loss": 0.2708, + "step": 9858 + }, + { + "epoch": 0.85, + "learning_rate": 1.2321761099029571e-06, + "loss": 0.2709, + "step": 9859 + }, + { + "epoch": 0.85, + "learning_rate": 1.2308413893423021e-06, + "loss": 0.2973, + "step": 9860 + }, + { + "epoch": 0.85, + "learning_rate": 1.229507344665062e-06, + "loss": 0.603, + "step": 9861 + }, + { + "epoch": 0.85, + "learning_rate": 1.2281739759740575e-06, + "loss": 0.239, + "step": 9862 + }, + { + "epoch": 0.85, + "learning_rate": 1.2268412833720611e-06, + "loss": 0.2968, + "step": 9863 + }, + { + "epoch": 0.85, + "learning_rate": 1.2255092669617897e-06, + "loss": 0.2596, + "step": 9864 + }, + { + "epoch": 0.85, + "learning_rate": 1.2241779268459098e-06, + "loss": 0.2218, + "step": 9865 + }, + { + "epoch": 0.85, + "learning_rate": 1.2228472631270272e-06, + "loss": 0.2832, + "step": 9866 + }, + { + "epoch": 0.85, + "learning_rate": 1.2215172759077143e-06, + "loss": 0.2881, + "step": 9867 + }, + { + "epoch": 0.85, + "learning_rate": 1.2201879652904714e-06, + "loss": 0.2374, + "step": 9868 + }, + { + "epoch": 0.85, + "learning_rate": 1.2188593313777575e-06, + "loss": 0.2803, + "step": 9869 + }, + { + "epoch": 0.85, + "learning_rate": 1.2175313742719775e-06, + "loss": 0.2687, + "step": 9870 + }, + { + "epoch": 0.85, + "learning_rate": 1.2162040940754826e-06, + "loss": 0.2388, + "step": 9871 + }, + { + "epoch": 0.85, + "learning_rate": 1.2148774908905782e-06, + "loss": 0.2557, + "step": 9872 + }, + { + "epoch": 0.85, + "learning_rate": 1.2135515648195029e-06, + "loss": 0.2516, + "step": 9873 + }, + { + "epoch": 0.85, + "learning_rate": 1.212226315964462e-06, + "loss": 0.2314, + "step": 9874 + }, + { + "epoch": 0.85, + "learning_rate": 1.210901744427594e-06, + "loss": 0.2977, + "step": 9875 + }, + { + "epoch": 0.85, + "learning_rate": 1.20957785031099e-06, + "loss": 0.3063, + "step": 9876 + }, + { + "epoch": 0.85, + "learning_rate": 1.208254633716691e-06, + "loss": 0.2479, + "step": 9877 + }, + { + "epoch": 0.85, + "learning_rate": 1.2069320947466845e-06, + "loss": 0.2745, + "step": 9878 + }, + { + "epoch": 0.85, + "learning_rate": 1.2056102335029052e-06, + "loss": 0.2602, + "step": 9879 + }, + { + "epoch": 0.85, + "learning_rate": 1.2042890500872306e-06, + "loss": 0.2932, + "step": 9880 + }, + { + "epoch": 0.85, + "learning_rate": 1.2029685446015005e-06, + "loss": 0.3196, + "step": 9881 + }, + { + "epoch": 0.85, + "learning_rate": 1.2016487171474844e-06, + "loss": 0.2668, + "step": 9882 + }, + { + "epoch": 0.85, + "learning_rate": 1.2003295678269112e-06, + "loss": 0.2781, + "step": 9883 + }, + { + "epoch": 0.85, + "learning_rate": 1.1990110967414548e-06, + "loss": 0.3168, + "step": 9884 + }, + { + "epoch": 0.85, + "learning_rate": 1.1976933039927363e-06, + "loss": 0.5594, + "step": 9885 + }, + { + "epoch": 0.85, + "learning_rate": 1.1963761896823255e-06, + "loss": 0.2618, + "step": 9886 + }, + { + "epoch": 0.85, + "learning_rate": 1.1950597539117348e-06, + "loss": 0.2421, + "step": 9887 + }, + { + "epoch": 0.85, + "learning_rate": 1.1937439967824338e-06, + "loss": 0.2948, + "step": 9888 + }, + { + "epoch": 0.85, + "learning_rate": 1.1924289183958349e-06, + "loss": 0.2701, + "step": 9889 + }, + { + "epoch": 0.85, + "learning_rate": 1.1911145188532936e-06, + "loss": 0.2894, + "step": 9890 + }, + { + "epoch": 0.85, + "learning_rate": 1.1898007982561177e-06, + "loss": 0.2553, + "step": 9891 + }, + { + "epoch": 0.85, + "learning_rate": 1.1884877567055653e-06, + "loss": 0.2543, + "step": 9892 + }, + { + "epoch": 0.85, + "learning_rate": 1.1871753943028375e-06, + "loss": 0.2953, + "step": 9893 + }, + { + "epoch": 0.85, + "learning_rate": 1.1858637111490845e-06, + "loss": 0.3027, + "step": 9894 + }, + { + "epoch": 0.85, + "learning_rate": 1.1845527073454045e-06, + "loss": 0.2673, + "step": 9895 + }, + { + "epoch": 0.85, + "learning_rate": 1.1832423829928452e-06, + "loss": 0.2372, + "step": 9896 + }, + { + "epoch": 0.85, + "learning_rate": 1.1819327381923972e-06, + "loss": 0.2414, + "step": 9897 + }, + { + "epoch": 0.85, + "learning_rate": 1.1806237730450009e-06, + "loss": 0.2679, + "step": 9898 + }, + { + "epoch": 0.85, + "learning_rate": 1.1793154876515477e-06, + "loss": 0.2697, + "step": 9899 + }, + { + "epoch": 0.85, + "learning_rate": 1.1780078821128716e-06, + "loss": 0.2605, + "step": 9900 + }, + { + "epoch": 0.85, + "learning_rate": 1.1767009565297583e-06, + "loss": 0.3094, + "step": 9901 + }, + { + "epoch": 0.85, + "learning_rate": 1.1753947110029373e-06, + "loss": 0.2665, + "step": 9902 + }, + { + "epoch": 0.85, + "learning_rate": 1.1740891456330894e-06, + "loss": 0.2715, + "step": 9903 + }, + { + "epoch": 0.85, + "learning_rate": 1.1727842605208373e-06, + "loss": 0.2929, + "step": 9904 + }, + { + "epoch": 0.85, + "learning_rate": 1.1714800557667616e-06, + "loss": 0.2645, + "step": 9905 + }, + { + "epoch": 0.85, + "learning_rate": 1.1701765314713786e-06, + "loss": 0.2672, + "step": 9906 + }, + { + "epoch": 0.85, + "learning_rate": 1.168873687735158e-06, + "loss": 0.2319, + "step": 9907 + }, + { + "epoch": 0.85, + "learning_rate": 1.1675715246585184e-06, + "loss": 0.249, + "step": 9908 + }, + { + "epoch": 0.85, + "learning_rate": 1.1662700423418239e-06, + "loss": 0.2432, + "step": 9909 + }, + { + "epoch": 0.85, + "learning_rate": 1.1649692408853875e-06, + "loss": 0.2649, + "step": 9910 + }, + { + "epoch": 0.85, + "learning_rate": 1.1636691203894623e-06, + "loss": 0.2471, + "step": 9911 + }, + { + "epoch": 0.85, + "learning_rate": 1.162369680954264e-06, + "loss": 0.3283, + "step": 9912 + }, + { + "epoch": 0.85, + "learning_rate": 1.16107092267994e-06, + "loss": 0.3055, + "step": 9913 + }, + { + "epoch": 0.85, + "learning_rate": 1.1597728456665958e-06, + "loss": 0.2349, + "step": 9914 + }, + { + "epoch": 0.85, + "learning_rate": 1.1584754500142792e-06, + "loss": 0.2845, + "step": 9915 + }, + { + "epoch": 0.85, + "learning_rate": 1.157178735822988e-06, + "loss": 0.2772, + "step": 9916 + }, + { + "epoch": 0.85, + "learning_rate": 1.1558827031926679e-06, + "loss": 0.5519, + "step": 9917 + }, + { + "epoch": 0.85, + "learning_rate": 1.1545873522232055e-06, + "loss": 0.3152, + "step": 9918 + }, + { + "epoch": 0.85, + "learning_rate": 1.1532926830144475e-06, + "loss": 0.2501, + "step": 9919 + }, + { + "epoch": 0.85, + "learning_rate": 1.1519986956661744e-06, + "loss": 0.5618, + "step": 9920 + }, + { + "epoch": 0.85, + "learning_rate": 1.1507053902781217e-06, + "loss": 0.2558, + "step": 9921 + }, + { + "epoch": 0.85, + "learning_rate": 1.1494127669499732e-06, + "loss": 0.2357, + "step": 9922 + }, + { + "epoch": 0.85, + "learning_rate": 1.1481208257813558e-06, + "loss": 0.2667, + "step": 9923 + }, + { + "epoch": 0.85, + "learning_rate": 1.1468295668718498e-06, + "loss": 0.231, + "step": 9924 + }, + { + "epoch": 0.85, + "learning_rate": 1.1455389903209713e-06, + "loss": 0.2867, + "step": 9925 + }, + { + "epoch": 0.85, + "learning_rate": 1.1442490962281983e-06, + "loss": 0.3174, + "step": 9926 + }, + { + "epoch": 0.85, + "learning_rate": 1.142959884692949e-06, + "loss": 0.2521, + "step": 9927 + }, + { + "epoch": 0.85, + "learning_rate": 1.1416713558145854e-06, + "loss": 0.283, + "step": 9928 + }, + { + "epoch": 0.85, + "learning_rate": 1.1403835096924244e-06, + "loss": 0.2653, + "step": 9929 + }, + { + "epoch": 0.85, + "learning_rate": 1.1390963464257254e-06, + "loss": 0.2654, + "step": 9930 + }, + { + "epoch": 0.85, + "learning_rate": 1.1378098661136993e-06, + "loss": 0.2322, + "step": 9931 + }, + { + "epoch": 0.85, + "learning_rate": 1.1365240688554958e-06, + "loss": 0.3051, + "step": 9932 + }, + { + "epoch": 0.85, + "learning_rate": 1.135238954750223e-06, + "loss": 0.2597, + "step": 9933 + }, + { + "epoch": 0.85, + "learning_rate": 1.1339545238969308e-06, + "loss": 0.2883, + "step": 9934 + }, + { + "epoch": 0.85, + "learning_rate": 1.1326707763946143e-06, + "loss": 0.2836, + "step": 9935 + }, + { + "epoch": 0.85, + "learning_rate": 1.131387712342219e-06, + "loss": 0.3225, + "step": 9936 + }, + { + "epoch": 0.85, + "learning_rate": 1.130105331838638e-06, + "loss": 0.3378, + "step": 9937 + }, + { + "epoch": 0.85, + "learning_rate": 1.1288236349827108e-06, + "loss": 0.6455, + "step": 9938 + }, + { + "epoch": 0.85, + "learning_rate": 1.1275426218732233e-06, + "loss": 0.2538, + "step": 9939 + }, + { + "epoch": 0.85, + "learning_rate": 1.1262622926089118e-06, + "loss": 0.2444, + "step": 9940 + }, + { + "epoch": 0.85, + "learning_rate": 1.1249826472884574e-06, + "loss": 0.2537, + "step": 9941 + }, + { + "epoch": 0.85, + "learning_rate": 1.1237036860104833e-06, + "loss": 0.259, + "step": 9942 + }, + { + "epoch": 0.85, + "learning_rate": 1.1224254088735752e-06, + "loss": 0.2936, + "step": 9943 + }, + { + "epoch": 0.85, + "learning_rate": 1.121147815976248e-06, + "loss": 0.2749, + "step": 9944 + }, + { + "epoch": 0.85, + "learning_rate": 1.1198709074169766e-06, + "loss": 0.2701, + "step": 9945 + }, + { + "epoch": 0.85, + "learning_rate": 1.1185946832941774e-06, + "loss": 0.3221, + "step": 9946 + }, + { + "epoch": 0.85, + "learning_rate": 1.1173191437062147e-06, + "loss": 0.2545, + "step": 9947 + }, + { + "epoch": 0.85, + "learning_rate": 1.1160442887514045e-06, + "loss": 0.2734, + "step": 9948 + }, + { + "epoch": 0.85, + "learning_rate": 1.1147701185280002e-06, + "loss": 0.3171, + "step": 9949 + }, + { + "epoch": 0.85, + "learning_rate": 1.1134966331342157e-06, + "loss": 0.2708, + "step": 9950 + }, + { + "epoch": 0.85, + "learning_rate": 1.1122238326681978e-06, + "loss": 0.2604, + "step": 9951 + }, + { + "epoch": 0.85, + "learning_rate": 1.1109517172280525e-06, + "loss": 0.2535, + "step": 9952 + }, + { + "epoch": 0.85, + "learning_rate": 1.109680286911826e-06, + "loss": 0.2397, + "step": 9953 + }, + { + "epoch": 0.85, + "learning_rate": 1.1084095418175156e-06, + "loss": 0.2753, + "step": 9954 + }, + { + "epoch": 0.85, + "learning_rate": 1.1071394820430647e-06, + "loss": 0.2865, + "step": 9955 + }, + { + "epoch": 0.85, + "learning_rate": 1.1058701076863575e-06, + "loss": 0.2385, + "step": 9956 + }, + { + "epoch": 0.85, + "learning_rate": 1.1046014188452392e-06, + "loss": 0.2606, + "step": 9957 + }, + { + "epoch": 0.85, + "learning_rate": 1.103333415617488e-06, + "loss": 0.2532, + "step": 9958 + }, + { + "epoch": 0.85, + "learning_rate": 1.102066098100838e-06, + "loss": 0.2868, + "step": 9959 + }, + { + "epoch": 0.85, + "learning_rate": 1.1007994663929656e-06, + "loss": 0.3349, + "step": 9960 + }, + { + "epoch": 0.85, + "learning_rate": 1.0995335205914993e-06, + "loss": 0.2753, + "step": 9961 + }, + { + "epoch": 0.85, + "learning_rate": 1.0982682607940131e-06, + "loss": 0.3004, + "step": 9962 + }, + { + "epoch": 0.85, + "learning_rate": 1.0970036870980195e-06, + "loss": 0.2977, + "step": 9963 + }, + { + "epoch": 0.85, + "learning_rate": 1.0957397996009934e-06, + "loss": 0.3046, + "step": 9964 + }, + { + "epoch": 0.85, + "learning_rate": 1.094476598400348e-06, + "loss": 0.2841, + "step": 9965 + }, + { + "epoch": 0.85, + "learning_rate": 1.0932140835934414e-06, + "loss": 0.2584, + "step": 9966 + }, + { + "epoch": 0.85, + "learning_rate": 1.0919522552775829e-06, + "loss": 0.3019, + "step": 9967 + }, + { + "epoch": 0.85, + "learning_rate": 1.0906911135500298e-06, + "loss": 0.2882, + "step": 9968 + }, + { + "epoch": 0.85, + "learning_rate": 1.0894306585079838e-06, + "loss": 0.2058, + "step": 9969 + }, + { + "epoch": 0.85, + "learning_rate": 1.088170890248591e-06, + "loss": 0.2933, + "step": 9970 + }, + { + "epoch": 0.85, + "learning_rate": 1.0869118088689535e-06, + "loss": 0.3075, + "step": 9971 + }, + { + "epoch": 0.85, + "learning_rate": 1.0856534144661146e-06, + "loss": 0.2606, + "step": 9972 + }, + { + "epoch": 0.85, + "learning_rate": 1.0843957071370626e-06, + "loss": 0.2866, + "step": 9973 + }, + { + "epoch": 0.85, + "learning_rate": 1.0831386869787353e-06, + "loss": 0.2972, + "step": 9974 + }, + { + "epoch": 0.86, + "learning_rate": 1.0818823540880174e-06, + "loss": 0.277, + "step": 9975 + }, + { + "epoch": 0.86, + "learning_rate": 1.080626708561744e-06, + "loss": 0.2919, + "step": 9976 + }, + { + "epoch": 0.86, + "learning_rate": 1.0793717504966906e-06, + "loss": 0.2162, + "step": 9977 + }, + { + "epoch": 0.86, + "learning_rate": 1.0781174799895844e-06, + "loss": 0.3007, + "step": 9978 + }, + { + "epoch": 0.86, + "learning_rate": 1.0768638971371014e-06, + "loss": 0.2684, + "step": 9979 + }, + { + "epoch": 0.86, + "learning_rate": 1.0756110020358568e-06, + "loss": 0.2422, + "step": 9980 + }, + { + "epoch": 0.86, + "learning_rate": 1.0743587947824186e-06, + "loss": 0.3024, + "step": 9981 + }, + { + "epoch": 0.86, + "learning_rate": 1.0731072754733019e-06, + "loss": 0.2855, + "step": 9982 + }, + { + "epoch": 0.86, + "learning_rate": 1.0718564442049672e-06, + "loss": 0.2949, + "step": 9983 + }, + { + "epoch": 0.86, + "learning_rate": 1.0706063010738232e-06, + "loss": 0.2958, + "step": 9984 + }, + { + "epoch": 0.86, + "learning_rate": 1.0693568461762238e-06, + "loss": 0.2711, + "step": 9985 + }, + { + "epoch": 0.86, + "learning_rate": 1.068108079608473e-06, + "loss": 0.2485, + "step": 9986 + }, + { + "epoch": 0.86, + "learning_rate": 1.066860001466813e-06, + "loss": 0.239, + "step": 9987 + }, + { + "epoch": 0.86, + "learning_rate": 1.0656126118474485e-06, + "loss": 0.2626, + "step": 9988 + }, + { + "epoch": 0.86, + "learning_rate": 1.0643659108465166e-06, + "loss": 0.2715, + "step": 9989 + }, + { + "epoch": 0.86, + "learning_rate": 1.0631198985601077e-06, + "loss": 0.2724, + "step": 9990 + }, + { + "epoch": 0.86, + "learning_rate": 1.0618745750842585e-06, + "loss": 0.2841, + "step": 9991 + }, + { + "epoch": 0.86, + "learning_rate": 1.0606299405149522e-06, + "loss": 0.5852, + "step": 9992 + }, + { + "epoch": 0.86, + "learning_rate": 1.059385994948121e-06, + "loss": 0.301, + "step": 9993 + }, + { + "epoch": 0.86, + "learning_rate": 1.0581427384796372e-06, + "loss": 0.2637, + "step": 9994 + }, + { + "epoch": 0.86, + "learning_rate": 1.0569001712053317e-06, + "loss": 0.2565, + "step": 9995 + }, + { + "epoch": 0.86, + "learning_rate": 1.0556582932209703e-06, + "loss": 0.2871, + "step": 9996 + }, + { + "epoch": 0.86, + "learning_rate": 1.0544171046222717e-06, + "loss": 0.2781, + "step": 9997 + }, + { + "epoch": 0.86, + "learning_rate": 1.053176605504902e-06, + "loss": 0.6038, + "step": 9998 + }, + { + "epoch": 0.86, + "learning_rate": 1.051936795964471e-06, + "loss": 0.2552, + "step": 9999 + }, + { + "epoch": 0.86, + "learning_rate": 1.0506976760965414e-06, + "loss": 0.2501, + "step": 10000 + }, + { + "epoch": 0.86, + "learning_rate": 1.0494592459966102e-06, + "loss": 0.2786, + "step": 10001 + }, + { + "epoch": 0.86, + "learning_rate": 1.0482215057601364e-06, + "loss": 0.2624, + "step": 10002 + }, + { + "epoch": 0.86, + "learning_rate": 1.0469844554825192e-06, + "loss": 0.2753, + "step": 10003 + }, + { + "epoch": 0.86, + "learning_rate": 1.0457480952591005e-06, + "loss": 0.2574, + "step": 10004 + }, + { + "epoch": 0.86, + "learning_rate": 1.0445124251851735e-06, + "loss": 0.2845, + "step": 10005 + }, + { + "epoch": 0.86, + "learning_rate": 1.043277445355978e-06, + "loss": 0.2184, + "step": 10006 + }, + { + "epoch": 0.86, + "learning_rate": 1.0420431558667033e-06, + "loss": 0.2621, + "step": 10007 + }, + { + "epoch": 0.86, + "learning_rate": 1.0408095568124765e-06, + "loss": 0.2249, + "step": 10008 + }, + { + "epoch": 0.86, + "learning_rate": 1.0395766482883806e-06, + "loss": 0.2835, + "step": 10009 + }, + { + "epoch": 0.86, + "learning_rate": 1.0383444303894453e-06, + "loss": 0.2427, + "step": 10010 + }, + { + "epoch": 0.86, + "learning_rate": 1.0371129032106375e-06, + "loss": 0.3246, + "step": 10011 + }, + { + "epoch": 0.86, + "learning_rate": 1.0358820668468805e-06, + "loss": 0.279, + "step": 10012 + }, + { + "epoch": 0.86, + "learning_rate": 1.0346519213930417e-06, + "loss": 0.2651, + "step": 10013 + }, + { + "epoch": 0.86, + "learning_rate": 1.033422466943933e-06, + "loss": 0.3018, + "step": 10014 + }, + { + "epoch": 0.86, + "learning_rate": 1.0321937035943153e-06, + "loss": 0.2598, + "step": 10015 + }, + { + "epoch": 0.86, + "learning_rate": 1.0309656314388949e-06, + "loss": 0.2408, + "step": 10016 + }, + { + "epoch": 0.86, + "learning_rate": 1.0297382505723297e-06, + "loss": 0.5775, + "step": 10017 + }, + { + "epoch": 0.86, + "learning_rate": 1.0285115610892138e-06, + "loss": 0.2997, + "step": 10018 + }, + { + "epoch": 0.86, + "learning_rate": 1.0272855630840982e-06, + "loss": 0.2479, + "step": 10019 + }, + { + "epoch": 0.86, + "learning_rate": 1.0260602566514755e-06, + "loss": 0.2786, + "step": 10020 + }, + { + "epoch": 0.86, + "learning_rate": 1.0248356418857863e-06, + "loss": 0.2689, + "step": 10021 + }, + { + "epoch": 0.86, + "learning_rate": 1.0236117188814187e-06, + "loss": 0.2477, + "step": 10022 + }, + { + "epoch": 0.86, + "learning_rate": 1.0223884877327062e-06, + "loss": 0.2961, + "step": 10023 + }, + { + "epoch": 0.86, + "learning_rate": 1.0211659485339308e-06, + "loss": 0.2662, + "step": 10024 + }, + { + "epoch": 0.86, + "learning_rate": 1.0199441013793155e-06, + "loss": 0.2689, + "step": 10025 + }, + { + "epoch": 0.86, + "learning_rate": 1.01872294636304e-06, + "loss": 0.2758, + "step": 10026 + }, + { + "epoch": 0.86, + "learning_rate": 1.0175024835792202e-06, + "loss": 0.2993, + "step": 10027 + }, + { + "epoch": 0.86, + "learning_rate": 1.0162827131219255e-06, + "loss": 0.2344, + "step": 10028 + }, + { + "epoch": 0.86, + "learning_rate": 1.0150636350851695e-06, + "loss": 0.2725, + "step": 10029 + }, + { + "epoch": 0.86, + "learning_rate": 1.0138452495629125e-06, + "loss": 0.262, + "step": 10030 + }, + { + "epoch": 0.86, + "learning_rate": 1.0126275566490628e-06, + "loss": 0.2758, + "step": 10031 + }, + { + "epoch": 0.86, + "learning_rate": 1.011410556437471e-06, + "loss": 0.2301, + "step": 10032 + }, + { + "epoch": 0.86, + "learning_rate": 1.0101942490219418e-06, + "loss": 0.2752, + "step": 10033 + }, + { + "epoch": 0.86, + "learning_rate": 1.0089786344962194e-06, + "loss": 0.2966, + "step": 10034 + }, + { + "epoch": 0.86, + "learning_rate": 1.0077637129539963e-06, + "loss": 0.2946, + "step": 10035 + }, + { + "epoch": 0.86, + "learning_rate": 1.0065494844889156e-06, + "loss": 0.2461, + "step": 10036 + }, + { + "epoch": 0.86, + "learning_rate": 1.0053359491945624e-06, + "loss": 0.2614, + "step": 10037 + }, + { + "epoch": 0.86, + "learning_rate": 1.004123107164472e-06, + "loss": 0.2887, + "step": 10038 + }, + { + "epoch": 0.86, + "learning_rate": 1.0029109584921193e-06, + "loss": 0.2658, + "step": 10039 + }, + { + "epoch": 0.86, + "learning_rate": 1.0016995032709354e-06, + "loss": 0.2348, + "step": 10040 + }, + { + "epoch": 0.86, + "learning_rate": 1.0004887415942943e-06, + "loss": 0.2816, + "step": 10041 + }, + { + "epoch": 0.86, + "learning_rate": 9.992786735555104e-07, + "loss": 0.3228, + "step": 10042 + }, + { + "epoch": 0.86, + "learning_rate": 9.980692992478524e-07, + "loss": 0.287, + "step": 10043 + }, + { + "epoch": 0.86, + "learning_rate": 9.968606187645336e-07, + "loss": 0.2502, + "step": 10044 + }, + { + "epoch": 0.86, + "learning_rate": 9.956526321987147e-07, + "loss": 0.2922, + "step": 10045 + }, + { + "epoch": 0.86, + "learning_rate": 9.94445339643495e-07, + "loss": 0.2318, + "step": 10046 + }, + { + "epoch": 0.86, + "learning_rate": 9.93238741191932e-07, + "loss": 0.3, + "step": 10047 + }, + { + "epoch": 0.86, + "learning_rate": 9.920328369370258e-07, + "loss": 0.2662, + "step": 10048 + }, + { + "epoch": 0.86, + "learning_rate": 9.908276269717166e-07, + "loss": 0.2729, + "step": 10049 + }, + { + "epoch": 0.86, + "learning_rate": 9.896231113888988e-07, + "loss": 0.2654, + "step": 10050 + }, + { + "epoch": 0.86, + "learning_rate": 9.884192902814094e-07, + "loss": 0.5657, + "step": 10051 + }, + { + "epoch": 0.86, + "learning_rate": 9.87216163742033e-07, + "loss": 0.2981, + "step": 10052 + }, + { + "epoch": 0.86, + "learning_rate": 9.860137318635021e-07, + "loss": 0.3122, + "step": 10053 + }, + { + "epoch": 0.86, + "learning_rate": 9.848119947384937e-07, + "loss": 0.2991, + "step": 10054 + }, + { + "epoch": 0.86, + "learning_rate": 9.836109524596326e-07, + "loss": 0.2426, + "step": 10055 + }, + { + "epoch": 0.86, + "learning_rate": 9.82410605119486e-07, + "loss": 0.314, + "step": 10056 + }, + { + "epoch": 0.86, + "learning_rate": 9.812109528105728e-07, + "loss": 0.265, + "step": 10057 + }, + { + "epoch": 0.86, + "learning_rate": 9.800119956253574e-07, + "loss": 0.5778, + "step": 10058 + }, + { + "epoch": 0.86, + "learning_rate": 9.788137336562464e-07, + "loss": 0.2259, + "step": 10059 + }, + { + "epoch": 0.86, + "learning_rate": 9.776161669955997e-07, + "loss": 0.3255, + "step": 10060 + }, + { + "epoch": 0.86, + "learning_rate": 9.764192957357166e-07, + "loss": 0.2555, + "step": 10061 + }, + { + "epoch": 0.86, + "learning_rate": 9.7522311996885e-07, + "loss": 0.2515, + "step": 10062 + }, + { + "epoch": 0.86, + "learning_rate": 9.740276397871906e-07, + "loss": 0.2311, + "step": 10063 + }, + { + "epoch": 0.86, + "learning_rate": 9.728328552828814e-07, + "loss": 0.2904, + "step": 10064 + }, + { + "epoch": 0.86, + "learning_rate": 9.71638766548012e-07, + "loss": 0.2338, + "step": 10065 + }, + { + "epoch": 0.86, + "learning_rate": 9.704453736746156e-07, + "loss": 0.2412, + "step": 10066 + }, + { + "epoch": 0.86, + "learning_rate": 9.692526767546727e-07, + "loss": 0.305, + "step": 10067 + }, + { + "epoch": 0.86, + "learning_rate": 9.680606758801126e-07, + "loss": 0.2756, + "step": 10068 + }, + { + "epoch": 0.86, + "learning_rate": 9.66869371142809e-07, + "loss": 0.2956, + "step": 10069 + }, + { + "epoch": 0.86, + "learning_rate": 9.656787626345765e-07, + "loss": 0.2383, + "step": 10070 + }, + { + "epoch": 0.86, + "learning_rate": 9.644888504471883e-07, + "loss": 0.2682, + "step": 10071 + }, + { + "epoch": 0.86, + "learning_rate": 9.632996346723522e-07, + "loss": 0.2812, + "step": 10072 + }, + { + "epoch": 0.86, + "learning_rate": 9.62111115401728e-07, + "loss": 0.2468, + "step": 10073 + }, + { + "epoch": 0.86, + "learning_rate": 9.60923292726923e-07, + "loss": 0.2803, + "step": 10074 + }, + { + "epoch": 0.86, + "learning_rate": 9.597361667394867e-07, + "loss": 0.2731, + "step": 10075 + }, + { + "epoch": 0.86, + "learning_rate": 9.585497375309183e-07, + "loss": 0.2899, + "step": 10076 + }, + { + "epoch": 0.86, + "learning_rate": 9.57364005192658e-07, + "loss": 0.2965, + "step": 10077 + }, + { + "epoch": 0.86, + "learning_rate": 9.561789698161007e-07, + "loss": 0.2524, + "step": 10078 + }, + { + "epoch": 0.86, + "learning_rate": 9.54994631492584e-07, + "loss": 0.2448, + "step": 10079 + }, + { + "epoch": 0.86, + "learning_rate": 9.538109903133862e-07, + "loss": 0.2676, + "step": 10080 + }, + { + "epoch": 0.86, + "learning_rate": 9.526280463697391e-07, + "loss": 0.2374, + "step": 10081 + }, + { + "epoch": 0.86, + "learning_rate": 9.514457997528171e-07, + "loss": 0.2627, + "step": 10082 + }, + { + "epoch": 0.86, + "learning_rate": 9.502642505537452e-07, + "loss": 0.2521, + "step": 10083 + }, + { + "epoch": 0.86, + "learning_rate": 9.49083398863585e-07, + "loss": 0.2778, + "step": 10084 + }, + { + "epoch": 0.86, + "learning_rate": 9.479032447733561e-07, + "loss": 0.257, + "step": 10085 + }, + { + "epoch": 0.86, + "learning_rate": 9.467237883740199e-07, + "loss": 0.2679, + "step": 10086 + }, + { + "epoch": 0.86, + "learning_rate": 9.455450297564773e-07, + "loss": 0.2492, + "step": 10087 + }, + { + "epoch": 0.86, + "learning_rate": 9.443669690115853e-07, + "loss": 0.2881, + "step": 10088 + }, + { + "epoch": 0.86, + "learning_rate": 9.431896062301427e-07, + "loss": 0.2505, + "step": 10089 + }, + { + "epoch": 0.86, + "learning_rate": 9.420129415028934e-07, + "loss": 0.2717, + "step": 10090 + }, + { + "epoch": 0.86, + "learning_rate": 9.408369749205303e-07, + "loss": 0.2823, + "step": 10091 + }, + { + "epoch": 0.87, + "learning_rate": 9.396617065736902e-07, + "loss": 0.2504, + "step": 10092 + }, + { + "epoch": 0.87, + "learning_rate": 9.384871365529591e-07, + "loss": 0.2822, + "step": 10093 + }, + { + "epoch": 0.87, + "learning_rate": 9.373132649488636e-07, + "loss": 0.2186, + "step": 10094 + }, + { + "epoch": 0.87, + "learning_rate": 9.361400918518814e-07, + "loss": 0.3019, + "step": 10095 + }, + { + "epoch": 0.87, + "learning_rate": 9.349676173524347e-07, + "loss": 0.2479, + "step": 10096 + }, + { + "epoch": 0.87, + "learning_rate": 9.337958415408932e-07, + "loss": 0.2894, + "step": 10097 + }, + { + "epoch": 0.87, + "learning_rate": 9.326247645075703e-07, + "loss": 0.2569, + "step": 10098 + }, + { + "epoch": 0.87, + "learning_rate": 9.314543863427272e-07, + "loss": 0.2856, + "step": 10099 + }, + { + "epoch": 0.87, + "learning_rate": 9.302847071365739e-07, + "loss": 0.2401, + "step": 10100 + }, + { + "epoch": 0.87, + "learning_rate": 9.291157269792572e-07, + "loss": 0.2794, + "step": 10101 + }, + { + "epoch": 0.87, + "learning_rate": 9.279474459608806e-07, + "loss": 0.2654, + "step": 10102 + }, + { + "epoch": 0.87, + "learning_rate": 9.267798641714887e-07, + "loss": 0.6188, + "step": 10103 + }, + { + "epoch": 0.87, + "learning_rate": 9.256129817010728e-07, + "loss": 0.2567, + "step": 10104 + }, + { + "epoch": 0.87, + "learning_rate": 9.244467986395699e-07, + "loss": 0.2742, + "step": 10105 + }, + { + "epoch": 0.87, + "learning_rate": 9.23281315076865e-07, + "loss": 0.2898, + "step": 10106 + }, + { + "epoch": 0.87, + "learning_rate": 9.221165311027879e-07, + "loss": 0.2709, + "step": 10107 + }, + { + "epoch": 0.87, + "learning_rate": 9.209524468071096e-07, + "loss": 0.3021, + "step": 10108 + }, + { + "epoch": 0.87, + "learning_rate": 9.197890622795603e-07, + "loss": 0.278, + "step": 10109 + }, + { + "epoch": 0.87, + "learning_rate": 9.186263776098014e-07, + "loss": 0.2543, + "step": 10110 + }, + { + "epoch": 0.87, + "learning_rate": 9.174643928874483e-07, + "loss": 0.312, + "step": 10111 + }, + { + "epoch": 0.87, + "learning_rate": 9.163031082020623e-07, + "loss": 0.2413, + "step": 10112 + }, + { + "epoch": 0.87, + "learning_rate": 9.151425236431489e-07, + "loss": 0.2113, + "step": 10113 + }, + { + "epoch": 0.87, + "learning_rate": 9.139826393001617e-07, + "loss": 0.2538, + "step": 10114 + }, + { + "epoch": 0.87, + "learning_rate": 9.128234552624937e-07, + "loss": 0.3054, + "step": 10115 + }, + { + "epoch": 0.87, + "learning_rate": 9.116649716194948e-07, + "loss": 0.241, + "step": 10116 + }, + { + "epoch": 0.87, + "learning_rate": 9.105071884604543e-07, + "loss": 0.2518, + "step": 10117 + }, + { + "epoch": 0.87, + "learning_rate": 9.093501058746057e-07, + "loss": 0.2545, + "step": 10118 + }, + { + "epoch": 0.87, + "learning_rate": 9.081937239511329e-07, + "loss": 0.2581, + "step": 10119 + }, + { + "epoch": 0.87, + "learning_rate": 9.070380427791636e-07, + "loss": 0.3092, + "step": 10120 + }, + { + "epoch": 0.87, + "learning_rate": 9.058830624477754e-07, + "loss": 0.2772, + "step": 10121 + }, + { + "epoch": 0.87, + "learning_rate": 9.047287830459806e-07, + "loss": 0.3005, + "step": 10122 + }, + { + "epoch": 0.87, + "learning_rate": 9.035752046627522e-07, + "loss": 0.2443, + "step": 10123 + }, + { + "epoch": 0.87, + "learning_rate": 9.024223273870014e-07, + "loss": 0.3227, + "step": 10124 + }, + { + "epoch": 0.87, + "learning_rate": 9.012701513075839e-07, + "loss": 0.2647, + "step": 10125 + }, + { + "epoch": 0.87, + "learning_rate": 9.001186765133052e-07, + "loss": 0.2624, + "step": 10126 + }, + { + "epoch": 0.87, + "learning_rate": 8.989679030929155e-07, + "loss": 0.2267, + "step": 10127 + }, + { + "epoch": 0.87, + "learning_rate": 8.978178311351094e-07, + "loss": 0.3022, + "step": 10128 + }, + { + "epoch": 0.87, + "learning_rate": 8.966684607285304e-07, + "loss": 0.2643, + "step": 10129 + }, + { + "epoch": 0.87, + "learning_rate": 8.955197919617653e-07, + "loss": 0.2537, + "step": 10130 + }, + { + "epoch": 0.87, + "learning_rate": 8.943718249233491e-07, + "loss": 0.2786, + "step": 10131 + }, + { + "epoch": 0.87, + "learning_rate": 8.932245597017597e-07, + "loss": 0.2454, + "step": 10132 + }, + { + "epoch": 0.87, + "learning_rate": 8.920779963854231e-07, + "loss": 0.2853, + "step": 10133 + }, + { + "epoch": 0.87, + "learning_rate": 8.909321350627109e-07, + "loss": 0.2805, + "step": 10134 + }, + { + "epoch": 0.87, + "learning_rate": 8.897869758219401e-07, + "loss": 0.286, + "step": 10135 + }, + { + "epoch": 0.87, + "learning_rate": 8.886425187513748e-07, + "loss": 0.2698, + "step": 10136 + }, + { + "epoch": 0.87, + "learning_rate": 8.87498763939223e-07, + "loss": 0.2537, + "step": 10137 + }, + { + "epoch": 0.87, + "learning_rate": 8.863557114736432e-07, + "loss": 0.2933, + "step": 10138 + }, + { + "epoch": 0.87, + "learning_rate": 8.852133614427306e-07, + "loss": 0.2758, + "step": 10139 + }, + { + "epoch": 0.87, + "learning_rate": 8.840717139345345e-07, + "loss": 0.2505, + "step": 10140 + }, + { + "epoch": 0.87, + "learning_rate": 8.829307690370481e-07, + "loss": 0.2435, + "step": 10141 + }, + { + "epoch": 0.87, + "learning_rate": 8.817905268382088e-07, + "loss": 0.2628, + "step": 10142 + }, + { + "epoch": 0.87, + "learning_rate": 8.806509874259017e-07, + "loss": 0.2493, + "step": 10143 + }, + { + "epoch": 0.87, + "learning_rate": 8.795121508879567e-07, + "loss": 0.2852, + "step": 10144 + }, + { + "epoch": 0.87, + "learning_rate": 8.783740173121502e-07, + "loss": 0.2282, + "step": 10145 + }, + { + "epoch": 0.87, + "learning_rate": 8.772365867862021e-07, + "loss": 0.262, + "step": 10146 + }, + { + "epoch": 0.87, + "learning_rate": 8.76099859397781e-07, + "loss": 0.2187, + "step": 10147 + }, + { + "epoch": 0.87, + "learning_rate": 8.749638352345002e-07, + "loss": 0.2646, + "step": 10148 + }, + { + "epoch": 0.87, + "learning_rate": 8.738285143839198e-07, + "loss": 0.256, + "step": 10149 + }, + { + "epoch": 0.87, + "learning_rate": 8.726938969335419e-07, + "loss": 0.287, + "step": 10150 + }, + { + "epoch": 0.87, + "learning_rate": 8.715599829708199e-07, + "loss": 0.2524, + "step": 10151 + }, + { + "epoch": 0.87, + "learning_rate": 8.704267725831517e-07, + "loss": 0.3007, + "step": 10152 + }, + { + "epoch": 0.87, + "learning_rate": 8.69294265857874e-07, + "loss": 0.253, + "step": 10153 + }, + { + "epoch": 0.87, + "learning_rate": 8.681624628822794e-07, + "loss": 0.2729, + "step": 10154 + }, + { + "epoch": 0.87, + "learning_rate": 8.670313637436012e-07, + "loss": 0.2668, + "step": 10155 + }, + { + "epoch": 0.87, + "learning_rate": 8.659009685290177e-07, + "loss": 0.3208, + "step": 10156 + }, + { + "epoch": 0.87, + "learning_rate": 8.647712773256545e-07, + "loss": 0.2519, + "step": 10157 + }, + { + "epoch": 0.87, + "learning_rate": 8.636422902205821e-07, + "loss": 0.2219, + "step": 10158 + }, + { + "epoch": 0.87, + "learning_rate": 8.625140073008209e-07, + "loss": 0.2719, + "step": 10159 + }, + { + "epoch": 0.87, + "learning_rate": 8.613864286533257e-07, + "loss": 0.2439, + "step": 10160 + }, + { + "epoch": 0.87, + "learning_rate": 8.602595543650117e-07, + "loss": 0.28, + "step": 10161 + }, + { + "epoch": 0.87, + "learning_rate": 8.591333845227312e-07, + "loss": 0.2411, + "step": 10162 + }, + { + "epoch": 0.87, + "learning_rate": 8.580079192132829e-07, + "loss": 0.2894, + "step": 10163 + }, + { + "epoch": 0.87, + "learning_rate": 8.568831585234116e-07, + "loss": 0.2717, + "step": 10164 + }, + { + "epoch": 0.87, + "learning_rate": 8.557591025398082e-07, + "loss": 0.2485, + "step": 10165 + }, + { + "epoch": 0.87, + "learning_rate": 8.54635751349111e-07, + "loss": 0.2521, + "step": 10166 + }, + { + "epoch": 0.87, + "learning_rate": 8.535131050379019e-07, + "loss": 0.2629, + "step": 10167 + }, + { + "epoch": 0.87, + "learning_rate": 8.523911636927074e-07, + "loss": 0.2896, + "step": 10168 + }, + { + "epoch": 0.87, + "learning_rate": 8.512699274000047e-07, + "loss": 0.2623, + "step": 10169 + }, + { + "epoch": 0.87, + "learning_rate": 8.501493962462092e-07, + "loss": 0.3315, + "step": 10170 + }, + { + "epoch": 0.87, + "learning_rate": 8.490295703176876e-07, + "loss": 0.2733, + "step": 10171 + }, + { + "epoch": 0.87, + "learning_rate": 8.479104497007496e-07, + "loss": 0.2975, + "step": 10172 + }, + { + "epoch": 0.87, + "learning_rate": 8.467920344816538e-07, + "loss": 0.2535, + "step": 10173 + }, + { + "epoch": 0.87, + "learning_rate": 8.456743247465992e-07, + "loss": 0.2791, + "step": 10174 + }, + { + "epoch": 0.87, + "learning_rate": 8.445573205817359e-07, + "loss": 0.2787, + "step": 10175 + }, + { + "epoch": 0.87, + "learning_rate": 8.434410220731571e-07, + "loss": 0.2979, + "step": 10176 + }, + { + "epoch": 0.87, + "learning_rate": 8.423254293068983e-07, + "loss": 0.2758, + "step": 10177 + }, + { + "epoch": 0.87, + "learning_rate": 8.412105423689465e-07, + "loss": 0.2809, + "step": 10178 + }, + { + "epoch": 0.87, + "learning_rate": 8.400963613452307e-07, + "loss": 0.2643, + "step": 10179 + }, + { + "epoch": 0.87, + "learning_rate": 8.389828863216276e-07, + "loss": 0.2905, + "step": 10180 + }, + { + "epoch": 0.87, + "learning_rate": 8.378701173839577e-07, + "loss": 0.2662, + "step": 10181 + }, + { + "epoch": 0.87, + "learning_rate": 8.367580546179877e-07, + "loss": 0.2819, + "step": 10182 + }, + { + "epoch": 0.87, + "learning_rate": 8.356466981094313e-07, + "loss": 0.2352, + "step": 10183 + }, + { + "epoch": 0.87, + "learning_rate": 8.345360479439435e-07, + "loss": 0.2591, + "step": 10184 + }, + { + "epoch": 0.87, + "learning_rate": 8.33426104207129e-07, + "loss": 0.2668, + "step": 10185 + }, + { + "epoch": 0.87, + "learning_rate": 8.323168669845383e-07, + "loss": 0.2708, + "step": 10186 + }, + { + "epoch": 0.87, + "learning_rate": 8.31208336361663e-07, + "loss": 0.2935, + "step": 10187 + }, + { + "epoch": 0.87, + "learning_rate": 8.301005124239459e-07, + "loss": 0.2203, + "step": 10188 + }, + { + "epoch": 0.87, + "learning_rate": 8.289933952567708e-07, + "loss": 0.2464, + "step": 10189 + }, + { + "epoch": 0.87, + "learning_rate": 8.278869849454718e-07, + "loss": 0.2709, + "step": 10190 + }, + { + "epoch": 0.87, + "learning_rate": 8.267812815753195e-07, + "loss": 0.236, + "step": 10191 + }, + { + "epoch": 0.87, + "learning_rate": 8.256762852315414e-07, + "loss": 0.2982, + "step": 10192 + }, + { + "epoch": 0.87, + "learning_rate": 8.245719959993049e-07, + "loss": 0.3003, + "step": 10193 + }, + { + "epoch": 0.87, + "learning_rate": 8.234684139637205e-07, + "loss": 0.2906, + "step": 10194 + }, + { + "epoch": 0.87, + "learning_rate": 8.223655392098484e-07, + "loss": 0.2953, + "step": 10195 + }, + { + "epoch": 0.87, + "learning_rate": 8.212633718226926e-07, + "loss": 0.2431, + "step": 10196 + }, + { + "epoch": 0.87, + "learning_rate": 8.201619118872039e-07, + "loss": 0.2515, + "step": 10197 + }, + { + "epoch": 0.87, + "learning_rate": 8.190611594882736e-07, + "loss": 0.2704, + "step": 10198 + }, + { + "epoch": 0.87, + "learning_rate": 8.179611147107458e-07, + "loss": 0.2585, + "step": 10199 + }, + { + "epoch": 0.87, + "learning_rate": 8.168617776394072e-07, + "loss": 0.2499, + "step": 10200 + }, + { + "epoch": 0.87, + "learning_rate": 8.157631483589856e-07, + "loss": 0.2742, + "step": 10201 + }, + { + "epoch": 0.87, + "learning_rate": 8.146652269541599e-07, + "loss": 0.283, + "step": 10202 + }, + { + "epoch": 0.87, + "learning_rate": 8.135680135095525e-07, + "loss": 0.2634, + "step": 10203 + }, + { + "epoch": 0.87, + "learning_rate": 8.124715081097323e-07, + "loss": 0.2778, + "step": 10204 + }, + { + "epoch": 0.87, + "learning_rate": 8.113757108392095e-07, + "loss": 0.3146, + "step": 10205 + }, + { + "epoch": 0.87, + "learning_rate": 8.102806217824455e-07, + "loss": 0.2761, + "step": 10206 + }, + { + "epoch": 0.87, + "learning_rate": 8.09186241023846e-07, + "loss": 0.3047, + "step": 10207 + }, + { + "epoch": 0.88, + "learning_rate": 8.080925686477548e-07, + "loss": 0.3096, + "step": 10208 + }, + { + "epoch": 0.88, + "learning_rate": 8.06999604738471e-07, + "loss": 0.597, + "step": 10209 + }, + { + "epoch": 0.88, + "learning_rate": 8.059073493802327e-07, + "loss": 0.2451, + "step": 10210 + }, + { + "epoch": 0.88, + "learning_rate": 8.048158026572272e-07, + "loss": 0.2445, + "step": 10211 + }, + { + "epoch": 0.88, + "learning_rate": 8.037249646535838e-07, + "loss": 0.2629, + "step": 10212 + }, + { + "epoch": 0.88, + "learning_rate": 8.026348354533808e-07, + "loss": 0.3062, + "step": 10213 + }, + { + "epoch": 0.88, + "learning_rate": 8.01545415140641e-07, + "loss": 0.3006, + "step": 10214 + }, + { + "epoch": 0.88, + "learning_rate": 8.004567037993282e-07, + "loss": 0.2589, + "step": 10215 + }, + { + "epoch": 0.88, + "learning_rate": 7.993687015133567e-07, + "loss": 0.2894, + "step": 10216 + }, + { + "epoch": 0.88, + "learning_rate": 7.982814083665825e-07, + "loss": 0.2858, + "step": 10217 + }, + { + "epoch": 0.88, + "learning_rate": 7.971948244428118e-07, + "loss": 0.3172, + "step": 10218 + }, + { + "epoch": 0.88, + "learning_rate": 7.961089498257912e-07, + "loss": 0.2963, + "step": 10219 + }, + { + "epoch": 0.88, + "learning_rate": 7.950237845992148e-07, + "loss": 0.2882, + "step": 10220 + }, + { + "epoch": 0.88, + "learning_rate": 7.939393288467234e-07, + "loss": 0.3141, + "step": 10221 + }, + { + "epoch": 0.88, + "learning_rate": 7.928555826518991e-07, + "loss": 0.2718, + "step": 10222 + }, + { + "epoch": 0.88, + "learning_rate": 7.917725460982717e-07, + "loss": 0.2129, + "step": 10223 + }, + { + "epoch": 0.88, + "learning_rate": 7.906902192693177e-07, + "loss": 0.273, + "step": 10224 + }, + { + "epoch": 0.88, + "learning_rate": 7.896086022484572e-07, + "loss": 0.2366, + "step": 10225 + }, + { + "epoch": 0.88, + "learning_rate": 7.885276951190568e-07, + "loss": 0.2632, + "step": 10226 + }, + { + "epoch": 0.88, + "learning_rate": 7.874474979644254e-07, + "loss": 0.2374, + "step": 10227 + }, + { + "epoch": 0.88, + "learning_rate": 7.863680108678218e-07, + "loss": 0.3135, + "step": 10228 + }, + { + "epoch": 0.88, + "learning_rate": 7.85289233912444e-07, + "loss": 0.2545, + "step": 10229 + }, + { + "epoch": 0.88, + "learning_rate": 7.842111671814401e-07, + "loss": 0.2639, + "step": 10230 + }, + { + "epoch": 0.88, + "learning_rate": 7.831338107579056e-07, + "loss": 0.267, + "step": 10231 + }, + { + "epoch": 0.88, + "learning_rate": 7.820571647248732e-07, + "loss": 0.2482, + "step": 10232 + }, + { + "epoch": 0.88, + "learning_rate": 7.809812291653285e-07, + "loss": 0.262, + "step": 10233 + }, + { + "epoch": 0.88, + "learning_rate": 7.799060041621975e-07, + "loss": 0.2926, + "step": 10234 + }, + { + "epoch": 0.88, + "learning_rate": 7.788314897983551e-07, + "loss": 0.2347, + "step": 10235 + }, + { + "epoch": 0.88, + "learning_rate": 7.777576861566149e-07, + "loss": 0.2864, + "step": 10236 + }, + { + "epoch": 0.88, + "learning_rate": 7.766845933197464e-07, + "loss": 0.2698, + "step": 10237 + }, + { + "epoch": 0.88, + "learning_rate": 7.756122113704567e-07, + "loss": 0.2802, + "step": 10238 + }, + { + "epoch": 0.88, + "learning_rate": 7.745405403913975e-07, + "loss": 0.3302, + "step": 10239 + }, + { + "epoch": 0.88, + "learning_rate": 7.734695804651693e-07, + "loss": 0.2688, + "step": 10240 + }, + { + "epoch": 0.88, + "learning_rate": 7.72399331674315e-07, + "loss": 0.2608, + "step": 10241 + }, + { + "epoch": 0.88, + "learning_rate": 7.713297941013264e-07, + "loss": 0.2874, + "step": 10242 + }, + { + "epoch": 0.88, + "learning_rate": 7.702609678286366e-07, + "loss": 0.2923, + "step": 10243 + }, + { + "epoch": 0.88, + "learning_rate": 7.691928529386261e-07, + "loss": 0.2902, + "step": 10244 + }, + { + "epoch": 0.88, + "learning_rate": 7.681254495136214e-07, + "loss": 0.2681, + "step": 10245 + }, + { + "epoch": 0.88, + "learning_rate": 7.670587576358889e-07, + "loss": 0.3006, + "step": 10246 + }, + { + "epoch": 0.88, + "learning_rate": 7.659927773876474e-07, + "loss": 0.2382, + "step": 10247 + }, + { + "epoch": 0.88, + "learning_rate": 7.649275088510544e-07, + "loss": 0.295, + "step": 10248 + }, + { + "epoch": 0.88, + "learning_rate": 7.638629521082186e-07, + "loss": 0.2173, + "step": 10249 + }, + { + "epoch": 0.88, + "learning_rate": 7.627991072411889e-07, + "loss": 0.3004, + "step": 10250 + }, + { + "epoch": 0.88, + "learning_rate": 7.617359743319608e-07, + "loss": 0.2554, + "step": 10251 + }, + { + "epoch": 0.88, + "learning_rate": 7.606735534624787e-07, + "loss": 0.2717, + "step": 10252 + }, + { + "epoch": 0.88, + "learning_rate": 7.59611844714625e-07, + "loss": 0.3234, + "step": 10253 + }, + { + "epoch": 0.88, + "learning_rate": 7.585508481702308e-07, + "loss": 0.2795, + "step": 10254 + }, + { + "epoch": 0.88, + "learning_rate": 7.574905639110752e-07, + "loss": 0.3218, + "step": 10255 + }, + { + "epoch": 0.88, + "learning_rate": 7.564309920188784e-07, + "loss": 0.2847, + "step": 10256 + }, + { + "epoch": 0.88, + "learning_rate": 7.553721325753061e-07, + "loss": 0.2843, + "step": 10257 + }, + { + "epoch": 0.88, + "learning_rate": 7.543139856619708e-07, + "loss": 0.3107, + "step": 10258 + }, + { + "epoch": 0.88, + "learning_rate": 7.532565513604306e-07, + "loss": 0.27, + "step": 10259 + }, + { + "epoch": 0.88, + "learning_rate": 7.521998297521848e-07, + "loss": 0.2729, + "step": 10260 + }, + { + "epoch": 0.88, + "learning_rate": 7.511438209186817e-07, + "loss": 0.2521, + "step": 10261 + }, + { + "epoch": 0.88, + "learning_rate": 7.500885249413126e-07, + "loss": 0.267, + "step": 10262 + }, + { + "epoch": 0.88, + "learning_rate": 7.490339419014147e-07, + "loss": 0.2605, + "step": 10263 + }, + { + "epoch": 0.88, + "learning_rate": 7.47980071880271e-07, + "loss": 0.2557, + "step": 10264 + }, + { + "epoch": 0.88, + "learning_rate": 7.469269149591074e-07, + "loss": 0.2662, + "step": 10265 + }, + { + "epoch": 0.88, + "learning_rate": 7.45874471219098e-07, + "loss": 0.2609, + "step": 10266 + }, + { + "epoch": 0.88, + "learning_rate": 7.448227407413566e-07, + "loss": 0.2703, + "step": 10267 + }, + { + "epoch": 0.88, + "learning_rate": 7.437717236069453e-07, + "loss": 0.2714, + "step": 10268 + }, + { + "epoch": 0.88, + "learning_rate": 7.427214198968769e-07, + "loss": 0.2731, + "step": 10269 + }, + { + "epoch": 0.88, + "learning_rate": 7.416718296920977e-07, + "loss": 0.233, + "step": 10270 + }, + { + "epoch": 0.88, + "learning_rate": 7.406229530735076e-07, + "loss": 0.3277, + "step": 10271 + }, + { + "epoch": 0.88, + "learning_rate": 7.395747901219474e-07, + "loss": 0.3146, + "step": 10272 + }, + { + "epoch": 0.88, + "learning_rate": 7.385273409182081e-07, + "loss": 0.2561, + "step": 10273 + }, + { + "epoch": 0.88, + "learning_rate": 7.37480605543015e-07, + "loss": 0.3088, + "step": 10274 + }, + { + "epoch": 0.88, + "learning_rate": 7.364345840770515e-07, + "loss": 0.2448, + "step": 10275 + }, + { + "epoch": 0.88, + "learning_rate": 7.353892766009396e-07, + "loss": 0.2888, + "step": 10276 + }, + { + "epoch": 0.88, + "learning_rate": 7.343446831952428e-07, + "loss": 0.6007, + "step": 10277 + }, + { + "epoch": 0.88, + "learning_rate": 7.333008039404743e-07, + "loss": 0.2688, + "step": 10278 + }, + { + "epoch": 0.88, + "learning_rate": 7.322576389170921e-07, + "loss": 0.2797, + "step": 10279 + }, + { + "epoch": 0.88, + "learning_rate": 7.312151882054974e-07, + "loss": 0.3083, + "step": 10280 + }, + { + "epoch": 0.88, + "learning_rate": 7.301734518860381e-07, + "loss": 0.3824, + "step": 10281 + }, + { + "epoch": 0.88, + "learning_rate": 7.291324300390057e-07, + "loss": 0.2395, + "step": 10282 + }, + { + "epoch": 0.88, + "learning_rate": 7.280921227446381e-07, + "loss": 0.3444, + "step": 10283 + }, + { + "epoch": 0.88, + "learning_rate": 7.270525300831133e-07, + "loss": 0.3143, + "step": 10284 + }, + { + "epoch": 0.88, + "learning_rate": 7.260136521345618e-07, + "loss": 0.272, + "step": 10285 + }, + { + "epoch": 0.88, + "learning_rate": 7.249754889790539e-07, + "loss": 0.2831, + "step": 10286 + }, + { + "epoch": 0.88, + "learning_rate": 7.239380406966057e-07, + "loss": 0.5676, + "step": 10287 + }, + { + "epoch": 0.88, + "learning_rate": 7.229013073671787e-07, + "loss": 0.2584, + "step": 10288 + }, + { + "epoch": 0.88, + "learning_rate": 7.218652890706801e-07, + "loss": 0.2538, + "step": 10289 + }, + { + "epoch": 0.88, + "learning_rate": 7.208299858869616e-07, + "loss": 0.2994, + "step": 10290 + }, + { + "epoch": 0.88, + "learning_rate": 7.197953978958161e-07, + "loss": 0.2445, + "step": 10291 + }, + { + "epoch": 0.88, + "learning_rate": 7.187615251769875e-07, + "loss": 0.2707, + "step": 10292 + }, + { + "epoch": 0.88, + "learning_rate": 7.177283678101598e-07, + "loss": 0.3119, + "step": 10293 + }, + { + "epoch": 0.88, + "learning_rate": 7.16695925874964e-07, + "loss": 0.3141, + "step": 10294 + }, + { + "epoch": 0.88, + "learning_rate": 7.156641994509772e-07, + "loss": 0.2675, + "step": 10295 + }, + { + "epoch": 0.88, + "learning_rate": 7.146331886177183e-07, + "loss": 0.291, + "step": 10296 + }, + { + "epoch": 0.88, + "learning_rate": 7.136028934546546e-07, + "loss": 0.2991, + "step": 10297 + }, + { + "epoch": 0.88, + "learning_rate": 7.125733140411928e-07, + "loss": 0.545, + "step": 10298 + }, + { + "epoch": 0.88, + "learning_rate": 7.115444504566882e-07, + "loss": 0.2623, + "step": 10299 + }, + { + "epoch": 0.88, + "learning_rate": 7.105163027804429e-07, + "loss": 0.2701, + "step": 10300 + }, + { + "epoch": 0.88, + "learning_rate": 7.094888710917003e-07, + "loss": 0.3021, + "step": 10301 + }, + { + "epoch": 0.88, + "learning_rate": 7.084621554696502e-07, + "loss": 0.2534, + "step": 10302 + }, + { + "epoch": 0.88, + "learning_rate": 7.074361559934251e-07, + "loss": 0.2833, + "step": 10303 + }, + { + "epoch": 0.88, + "learning_rate": 7.064108727421082e-07, + "loss": 0.2531, + "step": 10304 + }, + { + "epoch": 0.88, + "learning_rate": 7.053863057947175e-07, + "loss": 0.2766, + "step": 10305 + }, + { + "epoch": 0.88, + "learning_rate": 7.043624552302231e-07, + "loss": 0.2597, + "step": 10306 + }, + { + "epoch": 0.88, + "learning_rate": 7.03339321127543e-07, + "loss": 0.6469, + "step": 10307 + }, + { + "epoch": 0.88, + "learning_rate": 7.023169035655297e-07, + "loss": 0.2501, + "step": 10308 + }, + { + "epoch": 0.88, + "learning_rate": 7.012952026229892e-07, + "loss": 0.267, + "step": 10309 + }, + { + "epoch": 0.88, + "learning_rate": 7.002742183786671e-07, + "loss": 0.275, + "step": 10310 + }, + { + "epoch": 0.88, + "learning_rate": 6.992539509112595e-07, + "loss": 0.3052, + "step": 10311 + }, + { + "epoch": 0.88, + "learning_rate": 6.98234400299399e-07, + "loss": 0.269, + "step": 10312 + }, + { + "epoch": 0.88, + "learning_rate": 6.972155666216684e-07, + "loss": 0.2802, + "step": 10313 + }, + { + "epoch": 0.88, + "learning_rate": 6.961974499565982e-07, + "loss": 0.2859, + "step": 10314 + }, + { + "epoch": 0.88, + "learning_rate": 6.951800503826556e-07, + "loss": 0.2609, + "step": 10315 + }, + { + "epoch": 0.88, + "learning_rate": 6.941633679782578e-07, + "loss": 0.2773, + "step": 10316 + }, + { + "epoch": 0.88, + "learning_rate": 6.931474028217666e-07, + "loss": 0.2495, + "step": 10317 + }, + { + "epoch": 0.88, + "learning_rate": 6.921321549914872e-07, + "loss": 0.2581, + "step": 10318 + }, + { + "epoch": 0.88, + "learning_rate": 6.911176245656693e-07, + "loss": 0.3005, + "step": 10319 + }, + { + "epoch": 0.88, + "learning_rate": 6.90103811622509e-07, + "loss": 0.2834, + "step": 10320 + }, + { + "epoch": 0.88, + "learning_rate": 6.890907162401473e-07, + "loss": 0.2974, + "step": 10321 + }, + { + "epoch": 0.88, + "learning_rate": 6.880783384966638e-07, + "loss": 0.2846, + "step": 10322 + }, + { + "epoch": 0.88, + "learning_rate": 6.870666784700908e-07, + "loss": 0.2733, + "step": 10323 + }, + { + "epoch": 0.88, + "learning_rate": 6.860557362384024e-07, + "loss": 0.3167, + "step": 10324 + }, + { + "epoch": 0.89, + "learning_rate": 6.850455118795152e-07, + "loss": 0.2252, + "step": 10325 + }, + { + "epoch": 0.89, + "learning_rate": 6.840360054712946e-07, + "loss": 0.2889, + "step": 10326 + }, + { + "epoch": 0.89, + "learning_rate": 6.830272170915464e-07, + "loss": 0.283, + "step": 10327 + }, + { + "epoch": 0.89, + "learning_rate": 6.82019146818026e-07, + "loss": 0.2692, + "step": 10328 + }, + { + "epoch": 0.89, + "learning_rate": 6.810117947284256e-07, + "loss": 0.306, + "step": 10329 + }, + { + "epoch": 0.89, + "learning_rate": 6.800051609003911e-07, + "loss": 0.2507, + "step": 10330 + }, + { + "epoch": 0.89, + "learning_rate": 6.789992454115058e-07, + "loss": 0.2523, + "step": 10331 + }, + { + "epoch": 0.89, + "learning_rate": 6.779940483393033e-07, + "loss": 0.2793, + "step": 10332 + }, + { + "epoch": 0.89, + "learning_rate": 6.769895697612572e-07, + "loss": 0.2893, + "step": 10333 + }, + { + "epoch": 0.89, + "learning_rate": 6.7598580975479e-07, + "loss": 0.2636, + "step": 10334 + }, + { + "epoch": 0.89, + "learning_rate": 6.749827683972654e-07, + "loss": 0.2209, + "step": 10335 + }, + { + "epoch": 0.89, + "learning_rate": 6.739804457659927e-07, + "loss": 0.2809, + "step": 10336 + }, + { + "epoch": 0.89, + "learning_rate": 6.729788419382255e-07, + "loss": 0.254, + "step": 10337 + }, + { + "epoch": 0.89, + "learning_rate": 6.719779569911622e-07, + "loss": 0.2605, + "step": 10338 + }, + { + "epoch": 0.89, + "learning_rate": 6.709777910019477e-07, + "loss": 0.2521, + "step": 10339 + }, + { + "epoch": 0.89, + "learning_rate": 6.699783440476681e-07, + "loss": 0.2465, + "step": 10340 + }, + { + "epoch": 0.89, + "learning_rate": 6.689796162053575e-07, + "loss": 0.2467, + "step": 10341 + }, + { + "epoch": 0.89, + "learning_rate": 6.67981607551994e-07, + "loss": 0.6031, + "step": 10342 + }, + { + "epoch": 0.89, + "learning_rate": 6.669843181644953e-07, + "loss": 0.265, + "step": 10343 + }, + { + "epoch": 0.89, + "learning_rate": 6.659877481197285e-07, + "loss": 0.2589, + "step": 10344 + }, + { + "epoch": 0.89, + "learning_rate": 6.64991897494508e-07, + "loss": 0.2261, + "step": 10345 + }, + { + "epoch": 0.89, + "learning_rate": 6.639967663655844e-07, + "loss": 0.2673, + "step": 10346 + }, + { + "epoch": 0.89, + "learning_rate": 6.630023548096609e-07, + "loss": 0.2745, + "step": 10347 + }, + { + "epoch": 0.89, + "learning_rate": 6.620086629033795e-07, + "loss": 0.296, + "step": 10348 + }, + { + "epoch": 0.89, + "learning_rate": 6.610156907233312e-07, + "loss": 0.2382, + "step": 10349 + }, + { + "epoch": 0.89, + "learning_rate": 6.600234383460469e-07, + "loss": 0.2868, + "step": 10350 + }, + { + "epoch": 0.89, + "learning_rate": 6.590319058480044e-07, + "loss": 0.3128, + "step": 10351 + }, + { + "epoch": 0.89, + "learning_rate": 6.580410933056314e-07, + "loss": 0.2762, + "step": 10352 + }, + { + "epoch": 0.89, + "learning_rate": 6.57051000795288e-07, + "loss": 0.3168, + "step": 10353 + }, + { + "epoch": 0.89, + "learning_rate": 6.560616283932897e-07, + "loss": 0.2469, + "step": 10354 + }, + { + "epoch": 0.89, + "learning_rate": 6.550729761758901e-07, + "loss": 0.2615, + "step": 10355 + }, + { + "epoch": 0.89, + "learning_rate": 6.540850442192926e-07, + "loss": 0.2931, + "step": 10356 + }, + { + "epoch": 0.89, + "learning_rate": 6.530978325996385e-07, + "loss": 0.2956, + "step": 10357 + }, + { + "epoch": 0.89, + "learning_rate": 6.521113413930202e-07, + "loss": 0.2905, + "step": 10358 + }, + { + "epoch": 0.89, + "learning_rate": 6.511255706754715e-07, + "loss": 0.2384, + "step": 10359 + }, + { + "epoch": 0.89, + "learning_rate": 6.501405205229683e-07, + "loss": 0.2676, + "step": 10360 + }, + { + "epoch": 0.89, + "learning_rate": 6.491561910114352e-07, + "loss": 0.3158, + "step": 10361 + }, + { + "epoch": 0.89, + "learning_rate": 6.481725822167384e-07, + "loss": 0.2765, + "step": 10362 + }, + { + "epoch": 0.89, + "learning_rate": 6.471896942146905e-07, + "loss": 0.2473, + "step": 10363 + }, + { + "epoch": 0.89, + "learning_rate": 6.462075270810475e-07, + "loss": 0.263, + "step": 10364 + }, + { + "epoch": 0.89, + "learning_rate": 6.4522608089151e-07, + "loss": 0.2588, + "step": 10365 + }, + { + "epoch": 0.89, + "learning_rate": 6.442453557217243e-07, + "loss": 0.2292, + "step": 10366 + }, + { + "epoch": 0.89, + "learning_rate": 6.432653516472765e-07, + "loss": 0.3415, + "step": 10367 + }, + { + "epoch": 0.89, + "learning_rate": 6.422860687437027e-07, + "loss": 0.2875, + "step": 10368 + }, + { + "epoch": 0.89, + "learning_rate": 6.413075070864805e-07, + "loss": 0.2634, + "step": 10369 + }, + { + "epoch": 0.89, + "learning_rate": 6.403296667510339e-07, + "loss": 0.3031, + "step": 10370 + }, + { + "epoch": 0.89, + "learning_rate": 6.393525478127282e-07, + "loss": 0.2927, + "step": 10371 + }, + { + "epoch": 0.89, + "learning_rate": 6.383761503468766e-07, + "loss": 0.2823, + "step": 10372 + }, + { + "epoch": 0.89, + "learning_rate": 6.374004744287354e-07, + "loss": 0.2841, + "step": 10373 + }, + { + "epoch": 0.89, + "learning_rate": 6.364255201335013e-07, + "loss": 0.2617, + "step": 10374 + }, + { + "epoch": 0.89, + "learning_rate": 6.354512875363228e-07, + "loss": 0.3224, + "step": 10375 + }, + { + "epoch": 0.89, + "learning_rate": 6.344777767122867e-07, + "loss": 0.2996, + "step": 10376 + }, + { + "epoch": 0.89, + "learning_rate": 6.335049877364264e-07, + "loss": 0.2654, + "step": 10377 + }, + { + "epoch": 0.89, + "learning_rate": 6.325329206837217e-07, + "loss": 0.2483, + "step": 10378 + }, + { + "epoch": 0.89, + "learning_rate": 6.315615756290927e-07, + "loss": 0.2231, + "step": 10379 + }, + { + "epoch": 0.89, + "learning_rate": 6.305909526474085e-07, + "loss": 0.265, + "step": 10380 + }, + { + "epoch": 0.89, + "learning_rate": 6.296210518134771e-07, + "loss": 0.2485, + "step": 10381 + }, + { + "epoch": 0.89, + "learning_rate": 6.286518732020519e-07, + "loss": 0.238, + "step": 10382 + }, + { + "epoch": 0.89, + "learning_rate": 6.276834168878388e-07, + "loss": 0.2448, + "step": 10383 + }, + { + "epoch": 0.89, + "learning_rate": 6.267156829454768e-07, + "loss": 0.3331, + "step": 10384 + }, + { + "epoch": 0.89, + "learning_rate": 6.257486714495542e-07, + "loss": 0.2851, + "step": 10385 + }, + { + "epoch": 0.89, + "learning_rate": 6.247823824746058e-07, + "loss": 0.2515, + "step": 10386 + }, + { + "epoch": 0.89, + "learning_rate": 6.238168160951075e-07, + "loss": 0.3083, + "step": 10387 + }, + { + "epoch": 0.89, + "learning_rate": 6.228519723854787e-07, + "loss": 0.2543, + "step": 10388 + }, + { + "epoch": 0.89, + "learning_rate": 6.218878514200843e-07, + "loss": 0.2869, + "step": 10389 + }, + { + "epoch": 0.89, + "learning_rate": 6.209244532732394e-07, + "loss": 0.259, + "step": 10390 + }, + { + "epoch": 0.89, + "learning_rate": 6.199617780191924e-07, + "loss": 0.2486, + "step": 10391 + }, + { + "epoch": 0.89, + "learning_rate": 6.189998257321428e-07, + "loss": 0.2713, + "step": 10392 + }, + { + "epoch": 0.89, + "learning_rate": 6.180385964862346e-07, + "loss": 0.3004, + "step": 10393 + }, + { + "epoch": 0.89, + "learning_rate": 6.170780903555529e-07, + "loss": 0.237, + "step": 10394 + }, + { + "epoch": 0.89, + "learning_rate": 6.161183074141319e-07, + "loss": 0.2733, + "step": 10395 + }, + { + "epoch": 0.89, + "learning_rate": 6.151592477359413e-07, + "loss": 0.5713, + "step": 10396 + }, + { + "epoch": 0.89, + "learning_rate": 6.142009113949076e-07, + "loss": 0.2364, + "step": 10397 + }, + { + "epoch": 0.89, + "learning_rate": 6.132432984648895e-07, + "loss": 0.2567, + "step": 10398 + }, + { + "epoch": 0.89, + "learning_rate": 6.122864090196956e-07, + "loss": 0.6224, + "step": 10399 + }, + { + "epoch": 0.89, + "learning_rate": 6.113302431330803e-07, + "loss": 0.2811, + "step": 10400 + }, + { + "epoch": 0.89, + "learning_rate": 6.10374800878738e-07, + "loss": 0.3021, + "step": 10401 + }, + { + "epoch": 0.89, + "learning_rate": 6.09420082330312e-07, + "loss": 0.3235, + "step": 10402 + }, + { + "epoch": 0.89, + "learning_rate": 6.084660875613846e-07, + "loss": 0.2883, + "step": 10403 + }, + { + "epoch": 0.89, + "learning_rate": 6.07512816645488e-07, + "loss": 0.2834, + "step": 10404 + }, + { + "epoch": 0.89, + "learning_rate": 6.065602696560924e-07, + "loss": 0.2847, + "step": 10405 + }, + { + "epoch": 0.89, + "learning_rate": 6.056084466666167e-07, + "loss": 0.2787, + "step": 10406 + }, + { + "epoch": 0.89, + "learning_rate": 6.046573477504236e-07, + "loss": 0.2586, + "step": 10407 + }, + { + "epoch": 0.89, + "learning_rate": 6.037069729808188e-07, + "loss": 0.2845, + "step": 10408 + }, + { + "epoch": 0.89, + "learning_rate": 6.027573224310501e-07, + "loss": 0.2635, + "step": 10409 + }, + { + "epoch": 0.89, + "learning_rate": 6.01808396174316e-07, + "loss": 0.2975, + "step": 10410 + }, + { + "epoch": 0.89, + "learning_rate": 6.008601942837533e-07, + "loss": 0.2719, + "step": 10411 + }, + { + "epoch": 0.89, + "learning_rate": 5.999127168324425e-07, + "loss": 0.3004, + "step": 10412 + }, + { + "epoch": 0.89, + "learning_rate": 5.989659638934131e-07, + "loss": 0.2484, + "step": 10413 + }, + { + "epoch": 0.89, + "learning_rate": 5.980199355396343e-07, + "loss": 0.2873, + "step": 10414 + }, + { + "epoch": 0.89, + "learning_rate": 5.970746318440235e-07, + "loss": 0.2653, + "step": 10415 + }, + { + "epoch": 0.89, + "learning_rate": 5.961300528794378e-07, + "loss": 0.267, + "step": 10416 + }, + { + "epoch": 0.89, + "learning_rate": 5.951861987186824e-07, + "loss": 0.2244, + "step": 10417 + }, + { + "epoch": 0.89, + "learning_rate": 5.942430694345058e-07, + "loss": 0.267, + "step": 10418 + }, + { + "epoch": 0.89, + "learning_rate": 5.933006650995965e-07, + "loss": 0.2414, + "step": 10419 + }, + { + "epoch": 0.89, + "learning_rate": 5.923589857865908e-07, + "loss": 0.258, + "step": 10420 + }, + { + "epoch": 0.89, + "learning_rate": 5.914180315680729e-07, + "loss": 0.2464, + "step": 10421 + }, + { + "epoch": 0.89, + "learning_rate": 5.904778025165614e-07, + "loss": 0.2617, + "step": 10422 + }, + { + "epoch": 0.89, + "learning_rate": 5.895382987045284e-07, + "loss": 0.2484, + "step": 10423 + }, + { + "epoch": 0.89, + "learning_rate": 5.885995202043848e-07, + "loss": 0.2515, + "step": 10424 + }, + { + "epoch": 0.89, + "learning_rate": 5.876614670884873e-07, + "loss": 0.2789, + "step": 10425 + }, + { + "epoch": 0.89, + "learning_rate": 5.867241394291356e-07, + "loss": 0.2703, + "step": 10426 + }, + { + "epoch": 0.89, + "learning_rate": 5.857875372985733e-07, + "loss": 0.2812, + "step": 10427 + }, + { + "epoch": 0.89, + "learning_rate": 5.848516607689947e-07, + "loss": 0.2932, + "step": 10428 + }, + { + "epoch": 0.89, + "learning_rate": 5.839165099125254e-07, + "loss": 0.3059, + "step": 10429 + }, + { + "epoch": 0.89, + "learning_rate": 5.829820848012457e-07, + "loss": 0.3168, + "step": 10430 + }, + { + "epoch": 0.89, + "learning_rate": 5.820483855071768e-07, + "loss": 0.2957, + "step": 10431 + }, + { + "epoch": 0.89, + "learning_rate": 5.811154121022821e-07, + "loss": 0.2262, + "step": 10432 + }, + { + "epoch": 0.89, + "learning_rate": 5.801831646584732e-07, + "loss": 0.2581, + "step": 10433 + }, + { + "epoch": 0.89, + "learning_rate": 5.79251643247598e-07, + "loss": 0.2501, + "step": 10434 + }, + { + "epoch": 0.89, + "learning_rate": 5.783208479414603e-07, + "loss": 0.2808, + "step": 10435 + }, + { + "epoch": 0.89, + "learning_rate": 5.77390778811796e-07, + "loss": 0.6025, + "step": 10436 + }, + { + "epoch": 0.89, + "learning_rate": 5.764614359302912e-07, + "loss": 0.2513, + "step": 10437 + }, + { + "epoch": 0.89, + "learning_rate": 5.755328193685772e-07, + "loss": 0.2607, + "step": 10438 + }, + { + "epoch": 0.89, + "learning_rate": 5.746049291982247e-07, + "loss": 0.2401, + "step": 10439 + }, + { + "epoch": 0.89, + "learning_rate": 5.736777654907532e-07, + "loss": 0.278, + "step": 10440 + }, + { + "epoch": 0.89, + "learning_rate": 5.727513283176222e-07, + "loss": 0.2461, + "step": 10441 + }, + { + "epoch": 0.9, + "learning_rate": 5.718256177502379e-07, + "loss": 0.2544, + "step": 10442 + }, + { + "epoch": 0.9, + "learning_rate": 5.709006338599488e-07, + "loss": 0.2806, + "step": 10443 + }, + { + "epoch": 0.9, + "learning_rate": 5.699763767180477e-07, + "loss": 0.2455, + "step": 10444 + }, + { + "epoch": 0.9, + "learning_rate": 5.690528463957734e-07, + "loss": 0.2767, + "step": 10445 + }, + { + "epoch": 0.9, + "learning_rate": 5.681300429643044e-07, + "loss": 0.2718, + "step": 10446 + }, + { + "epoch": 0.9, + "learning_rate": 5.672079664947683e-07, + "loss": 0.6139, + "step": 10447 + }, + { + "epoch": 0.9, + "learning_rate": 5.662866170582338e-07, + "loss": 0.2974, + "step": 10448 + }, + { + "epoch": 0.9, + "learning_rate": 5.653659947257139e-07, + "loss": 0.2699, + "step": 10449 + }, + { + "epoch": 0.9, + "learning_rate": 5.644460995681644e-07, + "loss": 0.2922, + "step": 10450 + }, + { + "epoch": 0.9, + "learning_rate": 5.635269316564873e-07, + "loss": 0.2414, + "step": 10451 + }, + { + "epoch": 0.9, + "learning_rate": 5.62608491061527e-07, + "loss": 0.2637, + "step": 10452 + }, + { + "epoch": 0.9, + "learning_rate": 5.616907778540726e-07, + "loss": 0.2783, + "step": 10453 + }, + { + "epoch": 0.9, + "learning_rate": 5.607737921048573e-07, + "loss": 0.2968, + "step": 10454 + }, + { + "epoch": 0.9, + "learning_rate": 5.598575338845569e-07, + "loss": 0.2773, + "step": 10455 + }, + { + "epoch": 0.9, + "learning_rate": 5.589420032637949e-07, + "loss": 0.2729, + "step": 10456 + }, + { + "epoch": 0.9, + "learning_rate": 5.580272003131315e-07, + "loss": 0.2513, + "step": 10457 + }, + { + "epoch": 0.9, + "learning_rate": 5.57113125103077e-07, + "loss": 0.2886, + "step": 10458 + }, + { + "epoch": 0.9, + "learning_rate": 5.561997777040862e-07, + "loss": 0.2676, + "step": 10459 + }, + { + "epoch": 0.9, + "learning_rate": 5.552871581865515e-07, + "loss": 0.2918, + "step": 10460 + }, + { + "epoch": 0.9, + "learning_rate": 5.543752666208158e-07, + "loss": 0.2942, + "step": 10461 + }, + { + "epoch": 0.9, + "learning_rate": 5.534641030771615e-07, + "loss": 0.2472, + "step": 10462 + }, + { + "epoch": 0.9, + "learning_rate": 5.525536676258203e-07, + "loss": 0.31, + "step": 10463 + }, + { + "epoch": 0.9, + "learning_rate": 5.516439603369583e-07, + "loss": 0.2626, + "step": 10464 + }, + { + "epoch": 0.9, + "learning_rate": 5.507349812806939e-07, + "loss": 0.2842, + "step": 10465 + }, + { + "epoch": 0.9, + "learning_rate": 5.498267305270888e-07, + "loss": 0.302, + "step": 10466 + }, + { + "epoch": 0.9, + "learning_rate": 5.489192081461436e-07, + "loss": 0.3217, + "step": 10467 + }, + { + "epoch": 0.9, + "learning_rate": 5.48012414207807e-07, + "loss": 0.2433, + "step": 10468 + }, + { + "epoch": 0.9, + "learning_rate": 5.471063487819695e-07, + "loss": 0.2498, + "step": 10469 + }, + { + "epoch": 0.9, + "learning_rate": 5.462010119384665e-07, + "loss": 0.2684, + "step": 10470 + }, + { + "epoch": 0.9, + "learning_rate": 5.452964037470776e-07, + "loss": 0.2736, + "step": 10471 + }, + { + "epoch": 0.9, + "learning_rate": 5.443925242775227e-07, + "loss": 0.2201, + "step": 10472 + }, + { + "epoch": 0.9, + "learning_rate": 5.43489373599474e-07, + "loss": 0.2712, + "step": 10473 + }, + { + "epoch": 0.9, + "learning_rate": 5.425869517825366e-07, + "loss": 0.2578, + "step": 10474 + }, + { + "epoch": 0.9, + "learning_rate": 5.416852588962662e-07, + "loss": 0.2773, + "step": 10475 + }, + { + "epoch": 0.9, + "learning_rate": 5.407842950101605e-07, + "loss": 0.3067, + "step": 10476 + }, + { + "epoch": 0.9, + "learning_rate": 5.398840601936628e-07, + "loss": 0.27, + "step": 10477 + }, + { + "epoch": 0.9, + "learning_rate": 5.389845545161598e-07, + "loss": 0.2852, + "step": 10478 + }, + { + "epoch": 0.9, + "learning_rate": 5.380857780469762e-07, + "loss": 0.2975, + "step": 10479 + }, + { + "epoch": 0.9, + "learning_rate": 5.371877308553896e-07, + "loss": 0.2757, + "step": 10480 + }, + { + "epoch": 0.9, + "learning_rate": 5.36290413010615e-07, + "loss": 0.2927, + "step": 10481 + }, + { + "epoch": 0.9, + "learning_rate": 5.353938245818147e-07, + "loss": 0.2905, + "step": 10482 + }, + { + "epoch": 0.9, + "learning_rate": 5.34497965638091e-07, + "loss": 0.3113, + "step": 10483 + }, + { + "epoch": 0.9, + "learning_rate": 5.336028362484947e-07, + "loss": 0.2873, + "step": 10484 + }, + { + "epoch": 0.9, + "learning_rate": 5.327084364820168e-07, + "loss": 0.2693, + "step": 10485 + }, + { + "epoch": 0.9, + "learning_rate": 5.318147664075923e-07, + "loss": 0.2186, + "step": 10486 + }, + { + "epoch": 0.9, + "learning_rate": 5.30921826094104e-07, + "loss": 0.3048, + "step": 10487 + }, + { + "epoch": 0.9, + "learning_rate": 5.300296156103723e-07, + "loss": 0.6042, + "step": 10488 + }, + { + "epoch": 0.9, + "learning_rate": 5.291381350251645e-07, + "loss": 0.2502, + "step": 10489 + }, + { + "epoch": 0.9, + "learning_rate": 5.282473844071933e-07, + "loss": 0.2612, + "step": 10490 + }, + { + "epoch": 0.9, + "learning_rate": 5.273573638251117e-07, + "loss": 0.2348, + "step": 10491 + }, + { + "epoch": 0.9, + "learning_rate": 5.264680733475192e-07, + "loss": 0.2571, + "step": 10492 + }, + { + "epoch": 0.9, + "learning_rate": 5.255795130429575e-07, + "loss": 0.286, + "step": 10493 + }, + { + "epoch": 0.9, + "learning_rate": 5.246916829799132e-07, + "loss": 0.2637, + "step": 10494 + }, + { + "epoch": 0.9, + "learning_rate": 5.238045832268146e-07, + "loss": 0.271, + "step": 10495 + }, + { + "epoch": 0.9, + "learning_rate": 5.229182138520339e-07, + "loss": 0.2603, + "step": 10496 + }, + { + "epoch": 0.9, + "learning_rate": 5.220325749238919e-07, + "loss": 0.3203, + "step": 10497 + }, + { + "epoch": 0.9, + "learning_rate": 5.211476665106463e-07, + "loss": 0.2745, + "step": 10498 + }, + { + "epoch": 0.9, + "learning_rate": 5.202634886805013e-07, + "loss": 0.2832, + "step": 10499 + }, + { + "epoch": 0.9, + "learning_rate": 5.193800415016071e-07, + "loss": 0.3075, + "step": 10500 + }, + { + "epoch": 0.9, + "learning_rate": 5.184973250420544e-07, + "loss": 0.3088, + "step": 10501 + }, + { + "epoch": 0.9, + "learning_rate": 5.17615339369878e-07, + "loss": 0.2941, + "step": 10502 + }, + { + "epoch": 0.9, + "learning_rate": 5.167340845530544e-07, + "loss": 0.2606, + "step": 10503 + }, + { + "epoch": 0.9, + "learning_rate": 5.158535606595138e-07, + "loss": 0.2651, + "step": 10504 + }, + { + "epoch": 0.9, + "learning_rate": 5.149737677571164e-07, + "loss": 0.2852, + "step": 10505 + }, + { + "epoch": 0.9, + "learning_rate": 5.140947059136736e-07, + "loss": 0.2871, + "step": 10506 + }, + { + "epoch": 0.9, + "learning_rate": 5.132163751969399e-07, + "loss": 0.5829, + "step": 10507 + }, + { + "epoch": 0.9, + "learning_rate": 5.123387756746123e-07, + "loss": 0.2967, + "step": 10508 + }, + { + "epoch": 0.9, + "learning_rate": 5.114619074143335e-07, + "loss": 0.2571, + "step": 10509 + }, + { + "epoch": 0.9, + "learning_rate": 5.105857704836836e-07, + "loss": 0.5992, + "step": 10510 + }, + { + "epoch": 0.9, + "learning_rate": 5.097103649501977e-07, + "loss": 0.3119, + "step": 10511 + }, + { + "epoch": 0.9, + "learning_rate": 5.088356908813419e-07, + "loss": 0.2571, + "step": 10512 + }, + { + "epoch": 0.9, + "learning_rate": 5.079617483445332e-07, + "loss": 0.2983, + "step": 10513 + }, + { + "epoch": 0.9, + "learning_rate": 5.070885374071321e-07, + "loss": 0.2593, + "step": 10514 + }, + { + "epoch": 0.9, + "learning_rate": 5.062160581364406e-07, + "loss": 0.2964, + "step": 10515 + }, + { + "epoch": 0.9, + "learning_rate": 5.053443105997069e-07, + "loss": 0.2898, + "step": 10516 + }, + { + "epoch": 0.9, + "learning_rate": 5.044732948641162e-07, + "loss": 0.2579, + "step": 10517 + }, + { + "epoch": 0.9, + "learning_rate": 5.036030109968082e-07, + "loss": 0.2892, + "step": 10518 + }, + { + "epoch": 0.9, + "learning_rate": 5.027334590648547e-07, + "loss": 0.2431, + "step": 10519 + }, + { + "epoch": 0.9, + "learning_rate": 5.018646391352799e-07, + "loss": 0.2501, + "step": 10520 + }, + { + "epoch": 0.9, + "learning_rate": 5.00996551275047e-07, + "loss": 0.2723, + "step": 10521 + }, + { + "epoch": 0.9, + "learning_rate": 5.001291955510634e-07, + "loss": 0.2573, + "step": 10522 + }, + { + "epoch": 0.9, + "learning_rate": 4.992625720301814e-07, + "loss": 0.3287, + "step": 10523 + }, + { + "epoch": 0.9, + "learning_rate": 4.983966807791951e-07, + "loss": 0.2615, + "step": 10524 + }, + { + "epoch": 0.9, + "learning_rate": 4.975315218648457e-07, + "loss": 0.2998, + "step": 10525 + }, + { + "epoch": 0.9, + "learning_rate": 4.966670953538133e-07, + "loss": 0.2562, + "step": 10526 + }, + { + "epoch": 0.9, + "learning_rate": 4.958034013127222e-07, + "loss": 0.3052, + "step": 10527 + }, + { + "epoch": 0.9, + "learning_rate": 4.949404398081448e-07, + "loss": 0.2487, + "step": 10528 + }, + { + "epoch": 0.9, + "learning_rate": 4.940782109065911e-07, + "loss": 0.2537, + "step": 10529 + }, + { + "epoch": 0.9, + "learning_rate": 4.932167146745193e-07, + "loss": 0.2241, + "step": 10530 + }, + { + "epoch": 0.9, + "learning_rate": 4.923559511783282e-07, + "loss": 0.256, + "step": 10531 + }, + { + "epoch": 0.9, + "learning_rate": 4.914959204843639e-07, + "loss": 0.2524, + "step": 10532 + }, + { + "epoch": 0.9, + "learning_rate": 4.906366226589099e-07, + "loss": 0.2492, + "step": 10533 + }, + { + "epoch": 0.9, + "learning_rate": 4.897780577681954e-07, + "loss": 0.2391, + "step": 10534 + }, + { + "epoch": 0.9, + "learning_rate": 4.889202258784009e-07, + "loss": 0.2836, + "step": 10535 + }, + { + "epoch": 0.9, + "learning_rate": 4.88063127055638e-07, + "loss": 0.2707, + "step": 10536 + }, + { + "epoch": 0.9, + "learning_rate": 4.872067613659692e-07, + "loss": 0.5853, + "step": 10537 + }, + { + "epoch": 0.9, + "learning_rate": 4.863511288753986e-07, + "loss": 0.2502, + "step": 10538 + }, + { + "epoch": 0.9, + "learning_rate": 4.854962296498766e-07, + "loss": 0.3179, + "step": 10539 + }, + { + "epoch": 0.9, + "learning_rate": 4.846420637552918e-07, + "loss": 0.2991, + "step": 10540 + }, + { + "epoch": 0.9, + "learning_rate": 4.83788631257478e-07, + "loss": 0.3018, + "step": 10541 + }, + { + "epoch": 0.9, + "learning_rate": 4.829359322222182e-07, + "loss": 0.2626, + "step": 10542 + }, + { + "epoch": 0.9, + "learning_rate": 4.820839667152299e-07, + "loss": 0.3276, + "step": 10543 + }, + { + "epoch": 0.9, + "learning_rate": 4.812327348021805e-07, + "loss": 0.2945, + "step": 10544 + }, + { + "epoch": 0.9, + "learning_rate": 4.803822365486787e-07, + "loss": 0.3055, + "step": 10545 + }, + { + "epoch": 0.9, + "learning_rate": 4.795324720202754e-07, + "loss": 0.2872, + "step": 10546 + }, + { + "epoch": 0.9, + "learning_rate": 4.786834412824692e-07, + "loss": 0.2974, + "step": 10547 + }, + { + "epoch": 0.9, + "learning_rate": 4.778351444006946e-07, + "loss": 0.2582, + "step": 10548 + }, + { + "epoch": 0.9, + "learning_rate": 4.76987581440338e-07, + "loss": 0.2956, + "step": 10549 + }, + { + "epoch": 0.9, + "learning_rate": 4.761407524667239e-07, + "loss": 0.3148, + "step": 10550 + }, + { + "epoch": 0.9, + "learning_rate": 4.7529465754512226e-07, + "loss": 0.3064, + "step": 10551 + }, + { + "epoch": 0.9, + "learning_rate": 4.7444929674074435e-07, + "loss": 0.2509, + "step": 10552 + }, + { + "epoch": 0.9, + "learning_rate": 4.73604670118748e-07, + "loss": 0.2363, + "step": 10553 + }, + { + "epoch": 0.9, + "learning_rate": 4.7276077774423334e-07, + "loss": 0.2545, + "step": 10554 + }, + { + "epoch": 0.9, + "learning_rate": 4.7191761968224057e-07, + "loss": 0.2847, + "step": 10555 + }, + { + "epoch": 0.9, + "learning_rate": 4.7107519599775997e-07, + "loss": 0.2504, + "step": 10556 + }, + { + "epoch": 0.9, + "learning_rate": 4.7023350675571846e-07, + "loss": 0.6125, + "step": 10557 + }, + { + "epoch": 0.91, + "learning_rate": 4.693925520209908e-07, + "loss": 0.2773, + "step": 10558 + }, + { + "epoch": 0.91, + "learning_rate": 4.6855233185839175e-07, + "loss": 0.2301, + "step": 10559 + }, + { + "epoch": 0.91, + "learning_rate": 4.677128463326841e-07, + "loss": 0.3303, + "step": 10560 + }, + { + "epoch": 0.91, + "learning_rate": 4.668740955085704e-07, + "loss": 0.2538, + "step": 10561 + }, + { + "epoch": 0.91, + "learning_rate": 4.660360794506946e-07, + "loss": 0.261, + "step": 10562 + }, + { + "epoch": 0.91, + "learning_rate": 4.651987982236517e-07, + "loss": 0.2473, + "step": 10563 + }, + { + "epoch": 0.91, + "learning_rate": 4.6436225189197103e-07, + "loss": 0.2572, + "step": 10564 + }, + { + "epoch": 0.91, + "learning_rate": 4.635264405201312e-07, + "loss": 0.3335, + "step": 10565 + }, + { + "epoch": 0.91, + "learning_rate": 4.6269136417255167e-07, + "loss": 0.2767, + "step": 10566 + }, + { + "epoch": 0.91, + "learning_rate": 4.618570229135966e-07, + "loss": 0.2702, + "step": 10567 + }, + { + "epoch": 0.91, + "learning_rate": 4.610234168075733e-07, + "loss": 0.3082, + "step": 10568 + }, + { + "epoch": 0.91, + "learning_rate": 4.601905459187317e-07, + "loss": 0.2458, + "step": 10569 + }, + { + "epoch": 0.91, + "learning_rate": 4.5935841031126693e-07, + "loss": 0.2302, + "step": 10570 + }, + { + "epoch": 0.91, + "learning_rate": 4.585270100493122e-07, + "loss": 0.2791, + "step": 10571 + }, + { + "epoch": 0.91, + "learning_rate": 4.576963451969485e-07, + "loss": 0.2523, + "step": 10572 + }, + { + "epoch": 0.91, + "learning_rate": 4.5686641581820345e-07, + "loss": 0.2899, + "step": 10573 + }, + { + "epoch": 0.91, + "learning_rate": 4.5603722197703925e-07, + "loss": 0.2668, + "step": 10574 + }, + { + "epoch": 0.91, + "learning_rate": 4.552087637373681e-07, + "loss": 0.2677, + "step": 10575 + }, + { + "epoch": 0.91, + "learning_rate": 4.543810411630434e-07, + "loss": 0.2477, + "step": 10576 + }, + { + "epoch": 0.91, + "learning_rate": 4.535540543178629e-07, + "loss": 0.2773, + "step": 10577 + }, + { + "epoch": 0.91, + "learning_rate": 4.5272780326556466e-07, + "loss": 0.2397, + "step": 10578 + }, + { + "epoch": 0.91, + "learning_rate": 4.5190228806983205e-07, + "loss": 0.2968, + "step": 10579 + }, + { + "epoch": 0.91, + "learning_rate": 4.5107750879429424e-07, + "loss": 0.2383, + "step": 10580 + }, + { + "epoch": 0.91, + "learning_rate": 4.5025346550251815e-07, + "loss": 0.2869, + "step": 10581 + }, + { + "epoch": 0.91, + "learning_rate": 4.494301582580185e-07, + "loss": 0.2935, + "step": 10582 + }, + { + "epoch": 0.91, + "learning_rate": 4.4860758712425124e-07, + "loss": 0.2528, + "step": 10583 + }, + { + "epoch": 0.91, + "learning_rate": 4.4778575216461673e-07, + "loss": 0.2806, + "step": 10584 + }, + { + "epoch": 0.91, + "learning_rate": 4.4696465344245874e-07, + "loss": 0.2936, + "step": 10585 + }, + { + "epoch": 0.91, + "learning_rate": 4.4614429102105893e-07, + "loss": 0.2203, + "step": 10586 + }, + { + "epoch": 0.91, + "learning_rate": 4.4532466496365334e-07, + "loss": 0.2712, + "step": 10587 + }, + { + "epoch": 0.91, + "learning_rate": 4.4450577533341035e-07, + "loss": 0.2574, + "step": 10588 + }, + { + "epoch": 0.91, + "learning_rate": 4.436876221934472e-07, + "loss": 0.295, + "step": 10589 + }, + { + "epoch": 0.91, + "learning_rate": 4.4287020560682345e-07, + "loss": 0.263, + "step": 10590 + }, + { + "epoch": 0.91, + "learning_rate": 4.42053525636541e-07, + "loss": 0.2476, + "step": 10591 + }, + { + "epoch": 0.91, + "learning_rate": 4.4123758234554615e-07, + "loss": 0.2755, + "step": 10592 + }, + { + "epoch": 0.91, + "learning_rate": 4.404223757967252e-07, + "loss": 0.5533, + "step": 10593 + }, + { + "epoch": 0.91, + "learning_rate": 4.396079060529146e-07, + "loss": 0.2665, + "step": 10594 + }, + { + "epoch": 0.91, + "learning_rate": 4.387941731768852e-07, + "loss": 0.267, + "step": 10595 + }, + { + "epoch": 0.91, + "learning_rate": 4.3798117723135915e-07, + "loss": 0.2541, + "step": 10596 + }, + { + "epoch": 0.91, + "learning_rate": 4.371689182789962e-07, + "loss": 0.2609, + "step": 10597 + }, + { + "epoch": 0.91, + "learning_rate": 4.363573963824008e-07, + "loss": 0.2586, + "step": 10598 + }, + { + "epoch": 0.91, + "learning_rate": 4.3554661160412406e-07, + "loss": 0.2878, + "step": 10599 + }, + { + "epoch": 0.91, + "learning_rate": 4.3473656400665256e-07, + "loss": 0.2692, + "step": 10600 + }, + { + "epoch": 0.91, + "learning_rate": 4.339272536524253e-07, + "loss": 0.2986, + "step": 10601 + }, + { + "epoch": 0.91, + "learning_rate": 4.331186806038179e-07, + "loss": 0.2589, + "step": 10602 + }, + { + "epoch": 0.91, + "learning_rate": 4.323108449231506e-07, + "loss": 0.2644, + "step": 10603 + }, + { + "epoch": 0.91, + "learning_rate": 4.315037466726879e-07, + "loss": 0.269, + "step": 10604 + }, + { + "epoch": 0.91, + "learning_rate": 4.306973859146368e-07, + "loss": 0.2852, + "step": 10605 + }, + { + "epoch": 0.91, + "learning_rate": 4.298917627111476e-07, + "loss": 0.2578, + "step": 10606 + }, + { + "epoch": 0.91, + "learning_rate": 4.290868771243151e-07, + "loss": 0.2289, + "step": 10607 + }, + { + "epoch": 0.91, + "learning_rate": 4.282827292161762e-07, + "loss": 0.241, + "step": 10608 + }, + { + "epoch": 0.91, + "learning_rate": 4.2747931904870697e-07, + "loss": 0.2812, + "step": 10609 + }, + { + "epoch": 0.91, + "learning_rate": 4.266766466838335e-07, + "loss": 0.3018, + "step": 10610 + }, + { + "epoch": 0.91, + "learning_rate": 4.2587471218342057e-07, + "loss": 0.6152, + "step": 10611 + }, + { + "epoch": 0.91, + "learning_rate": 4.250735156092778e-07, + "loss": 0.2538, + "step": 10612 + }, + { + "epoch": 0.91, + "learning_rate": 4.242730570231568e-07, + "loss": 0.2151, + "step": 10613 + }, + { + "epoch": 0.91, + "learning_rate": 4.2347333648675383e-07, + "loss": 0.2544, + "step": 10614 + }, + { + "epoch": 0.91, + "learning_rate": 4.2267435406170845e-07, + "loss": 0.3024, + "step": 10615 + }, + { + "epoch": 0.91, + "learning_rate": 4.2187610980959916e-07, + "loss": 0.3073, + "step": 10616 + }, + { + "epoch": 0.91, + "learning_rate": 4.210786037919523e-07, + "loss": 0.2786, + "step": 10617 + }, + { + "epoch": 0.91, + "learning_rate": 4.2028183607023766e-07, + "loss": 0.3167, + "step": 10618 + }, + { + "epoch": 0.91, + "learning_rate": 4.194858067058627e-07, + "loss": 0.2816, + "step": 10619 + }, + { + "epoch": 0.91, + "learning_rate": 4.186905157601839e-07, + "loss": 0.2636, + "step": 10620 + }, + { + "epoch": 0.91, + "learning_rate": 4.1789596329449676e-07, + "loss": 0.6168, + "step": 10621 + }, + { + "epoch": 0.91, + "learning_rate": 4.1710214937004223e-07, + "loss": 0.2958, + "step": 10622 + }, + { + "epoch": 0.91, + "learning_rate": 4.1630907404800467e-07, + "loss": 0.2322, + "step": 10623 + }, + { + "epoch": 0.91, + "learning_rate": 4.155167373895075e-07, + "loss": 0.3427, + "step": 10624 + }, + { + "epoch": 0.91, + "learning_rate": 4.14725139455624e-07, + "loss": 0.2393, + "step": 10625 + }, + { + "epoch": 0.91, + "learning_rate": 4.139342803073632e-07, + "loss": 0.2492, + "step": 10626 + }, + { + "epoch": 0.91, + "learning_rate": 4.131441600056818e-07, + "loss": 0.295, + "step": 10627 + }, + { + "epoch": 0.91, + "learning_rate": 4.12354778611479e-07, + "loss": 0.2583, + "step": 10628 + }, + { + "epoch": 0.91, + "learning_rate": 4.11566136185596e-07, + "loss": 0.2529, + "step": 10629 + }, + { + "epoch": 0.91, + "learning_rate": 4.1077823278881767e-07, + "loss": 0.2549, + "step": 10630 + }, + { + "epoch": 0.91, + "learning_rate": 4.099910684818698e-07, + "loss": 0.2022, + "step": 10631 + }, + { + "epoch": 0.91, + "learning_rate": 4.092046433254271e-07, + "loss": 0.3221, + "step": 10632 + }, + { + "epoch": 0.91, + "learning_rate": 4.084189573801001e-07, + "loss": 0.2886, + "step": 10633 + }, + { + "epoch": 0.91, + "learning_rate": 4.076340107064458e-07, + "loss": 0.274, + "step": 10634 + }, + { + "epoch": 0.91, + "learning_rate": 4.0684980336496484e-07, + "loss": 0.282, + "step": 10635 + }, + { + "epoch": 0.91, + "learning_rate": 4.0606633541609983e-07, + "loss": 0.2231, + "step": 10636 + }, + { + "epoch": 0.91, + "learning_rate": 4.052836069202382e-07, + "loss": 0.2781, + "step": 10637 + }, + { + "epoch": 0.91, + "learning_rate": 4.045016179377048e-07, + "loss": 0.2819, + "step": 10638 + }, + { + "epoch": 0.91, + "learning_rate": 4.0372036852877607e-07, + "loss": 0.2317, + "step": 10639 + }, + { + "epoch": 0.91, + "learning_rate": 4.029398587536637e-07, + "loss": 0.2639, + "step": 10640 + }, + { + "epoch": 0.91, + "learning_rate": 4.021600886725263e-07, + "loss": 0.2709, + "step": 10641 + }, + { + "epoch": 0.91, + "learning_rate": 4.013810583454647e-07, + "loss": 0.3183, + "step": 10642 + }, + { + "epoch": 0.91, + "learning_rate": 4.006027678325242e-07, + "loss": 0.2614, + "step": 10643 + }, + { + "epoch": 0.91, + "learning_rate": 3.9982521719369003e-07, + "loss": 0.2722, + "step": 10644 + }, + { + "epoch": 0.91, + "learning_rate": 3.990484064888911e-07, + "loss": 0.2424, + "step": 10645 + }, + { + "epoch": 0.91, + "learning_rate": 3.982723357780027e-07, + "loss": 0.2834, + "step": 10646 + }, + { + "epoch": 0.91, + "learning_rate": 3.9749700512083824e-07, + "loss": 0.24, + "step": 10647 + }, + { + "epoch": 0.91, + "learning_rate": 3.9672241457715755e-07, + "loss": 0.2673, + "step": 10648 + }, + { + "epoch": 0.91, + "learning_rate": 3.959485642066618e-07, + "loss": 0.2567, + "step": 10649 + }, + { + "epoch": 0.91, + "learning_rate": 3.951754540689956e-07, + "loss": 0.2541, + "step": 10650 + }, + { + "epoch": 0.91, + "learning_rate": 3.944030842237467e-07, + "loss": 0.2701, + "step": 10651 + }, + { + "epoch": 0.91, + "learning_rate": 3.936314547304454e-07, + "loss": 0.2679, + "step": 10652 + }, + { + "epoch": 0.91, + "learning_rate": 3.928605656485662e-07, + "loss": 0.2813, + "step": 10653 + }, + { + "epoch": 0.91, + "learning_rate": 3.920904170375239e-07, + "loss": 0.2947, + "step": 10654 + }, + { + "epoch": 0.91, + "learning_rate": 3.913210089566766e-07, + "loss": 0.2719, + "step": 10655 + }, + { + "epoch": 0.91, + "learning_rate": 3.905523414653301e-07, + "loss": 0.2612, + "step": 10656 + }, + { + "epoch": 0.91, + "learning_rate": 3.897844146227259e-07, + "loss": 0.2624, + "step": 10657 + }, + { + "epoch": 0.91, + "learning_rate": 3.8901722848805443e-07, + "loss": 0.2831, + "step": 10658 + }, + { + "epoch": 0.91, + "learning_rate": 3.8825078312044515e-07, + "loss": 0.2256, + "step": 10659 + }, + { + "epoch": 0.91, + "learning_rate": 3.8748507857897187e-07, + "loss": 0.2902, + "step": 10660 + }, + { + "epoch": 0.91, + "learning_rate": 3.8672011492265404e-07, + "loss": 0.2784, + "step": 10661 + }, + { + "epoch": 0.91, + "learning_rate": 3.8595589221044674e-07, + "loss": 0.277, + "step": 10662 + }, + { + "epoch": 0.91, + "learning_rate": 3.8519241050125724e-07, + "loss": 0.2922, + "step": 10663 + }, + { + "epoch": 0.91, + "learning_rate": 3.844296698539274e-07, + "loss": 0.2989, + "step": 10664 + }, + { + "epoch": 0.91, + "learning_rate": 3.8366767032724685e-07, + "loss": 0.2772, + "step": 10665 + }, + { + "epoch": 0.91, + "learning_rate": 3.8290641197994526e-07, + "loss": 0.2678, + "step": 10666 + }, + { + "epoch": 0.91, + "learning_rate": 3.821458948706991e-07, + "loss": 0.323, + "step": 10667 + }, + { + "epoch": 0.91, + "learning_rate": 3.8138611905812584e-07, + "loss": 0.253, + "step": 10668 + }, + { + "epoch": 0.91, + "learning_rate": 3.806270846007798e-07, + "loss": 0.308, + "step": 10669 + }, + { + "epoch": 0.91, + "learning_rate": 3.7986879155717084e-07, + "loss": 0.2569, + "step": 10670 + }, + { + "epoch": 0.91, + "learning_rate": 3.7911123998573995e-07, + "loss": 0.3113, + "step": 10671 + }, + { + "epoch": 0.91, + "learning_rate": 3.78354429944876e-07, + "loss": 0.2806, + "step": 10672 + }, + { + "epoch": 0.91, + "learning_rate": 3.775983614929102e-07, + "loss": 0.2773, + "step": 10673 + }, + { + "epoch": 0.91, + "learning_rate": 3.768430346881169e-07, + "loss": 0.2744, + "step": 10674 + }, + { + "epoch": 0.92, + "learning_rate": 3.760884495887152e-07, + "loss": 0.2924, + "step": 10675 + }, + { + "epoch": 0.92, + "learning_rate": 3.7533460625285955e-07, + "loss": 0.2761, + "step": 10676 + }, + { + "epoch": 0.92, + "learning_rate": 3.7458150473865806e-07, + "loss": 0.2402, + "step": 10677 + }, + { + "epoch": 0.92, + "learning_rate": 3.7382914510415316e-07, + "loss": 0.3066, + "step": 10678 + }, + { + "epoch": 0.92, + "learning_rate": 3.7307752740733283e-07, + "loss": 0.2853, + "step": 10679 + }, + { + "epoch": 0.92, + "learning_rate": 3.7232665170612857e-07, + "loss": 0.249, + "step": 10680 + }, + { + "epoch": 0.92, + "learning_rate": 3.715765180584141e-07, + "loss": 0.287, + "step": 10681 + }, + { + "epoch": 0.92, + "learning_rate": 3.708271265220087e-07, + "loss": 0.2442, + "step": 10682 + }, + { + "epoch": 0.92, + "learning_rate": 3.7007847715466506e-07, + "loss": 0.2762, + "step": 10683 + }, + { + "epoch": 0.92, + "learning_rate": 3.6933057001409366e-07, + "loss": 0.2545, + "step": 10684 + }, + { + "epoch": 0.92, + "learning_rate": 3.685834051579329e-07, + "loss": 0.3158, + "step": 10685 + }, + { + "epoch": 0.92, + "learning_rate": 3.678369826437733e-07, + "loss": 0.2856, + "step": 10686 + }, + { + "epoch": 0.92, + "learning_rate": 3.670913025291456e-07, + "loss": 0.2534, + "step": 10687 + }, + { + "epoch": 0.92, + "learning_rate": 3.663463648715226e-07, + "loss": 0.2485, + "step": 10688 + }, + { + "epoch": 0.92, + "learning_rate": 3.656021697283196e-07, + "loss": 0.283, + "step": 10689 + }, + { + "epoch": 0.92, + "learning_rate": 3.6485871715689735e-07, + "loss": 0.2891, + "step": 10690 + }, + { + "epoch": 0.92, + "learning_rate": 3.6411600721455776e-07, + "loss": 0.2845, + "step": 10691 + }, + { + "epoch": 0.92, + "learning_rate": 3.633740399585428e-07, + "loss": 0.2684, + "step": 10692 + }, + { + "epoch": 0.92, + "learning_rate": 3.6263281544603903e-07, + "loss": 0.2914, + "step": 10693 + }, + { + "epoch": 0.92, + "learning_rate": 3.6189233373418064e-07, + "loss": 0.2717, + "step": 10694 + }, + { + "epoch": 0.92, + "learning_rate": 3.611525948800376e-07, + "loss": 0.2533, + "step": 10695 + }, + { + "epoch": 0.92, + "learning_rate": 3.6041359894062544e-07, + "loss": 0.2678, + "step": 10696 + }, + { + "epoch": 0.92, + "learning_rate": 3.596753459729019e-07, + "loss": 0.2552, + "step": 10697 + }, + { + "epoch": 0.92, + "learning_rate": 3.589378360337692e-07, + "loss": 0.2858, + "step": 10698 + }, + { + "epoch": 0.92, + "learning_rate": 3.582010691800708e-07, + "loss": 0.2289, + "step": 10699 + }, + { + "epoch": 0.92, + "learning_rate": 3.574650454685902e-07, + "loss": 0.2962, + "step": 10700 + }, + { + "epoch": 0.92, + "learning_rate": 3.5672976495606084e-07, + "loss": 0.2483, + "step": 10701 + }, + { + "epoch": 0.92, + "learning_rate": 3.5599522769915074e-07, + "loss": 0.24, + "step": 10702 + }, + { + "epoch": 0.92, + "learning_rate": 3.5526143375447684e-07, + "loss": 0.2428, + "step": 10703 + }, + { + "epoch": 0.92, + "learning_rate": 3.5452838317859615e-07, + "loss": 0.3191, + "step": 10704 + }, + { + "epoch": 0.92, + "learning_rate": 3.537960760280068e-07, + "loss": 0.2846, + "step": 10705 + }, + { + "epoch": 0.92, + "learning_rate": 3.5306451235915475e-07, + "loss": 0.2684, + "step": 10706 + }, + { + "epoch": 0.92, + "learning_rate": 3.523336922284204e-07, + "loss": 0.2854, + "step": 10707 + }, + { + "epoch": 0.92, + "learning_rate": 3.5160361569213766e-07, + "loss": 0.3141, + "step": 10708 + }, + { + "epoch": 0.92, + "learning_rate": 3.5087428280657144e-07, + "loss": 0.2583, + "step": 10709 + }, + { + "epoch": 0.92, + "learning_rate": 3.50145693627939e-07, + "loss": 0.2971, + "step": 10710 + }, + { + "epoch": 0.92, + "learning_rate": 3.4941784821239445e-07, + "loss": 0.2782, + "step": 10711 + }, + { + "epoch": 0.92, + "learning_rate": 3.486907466160372e-07, + "loss": 0.2786, + "step": 10712 + }, + { + "epoch": 0.92, + "learning_rate": 3.4796438889491025e-07, + "loss": 0.2624, + "step": 10713 + }, + { + "epoch": 0.92, + "learning_rate": 3.472387751049944e-07, + "loss": 0.2983, + "step": 10714 + }, + { + "epoch": 0.92, + "learning_rate": 3.4651390530221927e-07, + "loss": 0.2867, + "step": 10715 + }, + { + "epoch": 0.92, + "learning_rate": 3.457897795424525e-07, + "loss": 0.303, + "step": 10716 + }, + { + "epoch": 0.92, + "learning_rate": 3.450663978815061e-07, + "loss": 0.3206, + "step": 10717 + }, + { + "epoch": 0.92, + "learning_rate": 3.443437603751354e-07, + "loss": 0.2963, + "step": 10718 + }, + { + "epoch": 0.92, + "learning_rate": 3.436218670790381e-07, + "loss": 0.2662, + "step": 10719 + }, + { + "epoch": 0.92, + "learning_rate": 3.4290071804885526e-07, + "loss": 0.2805, + "step": 10720 + }, + { + "epoch": 0.92, + "learning_rate": 3.4218031334016465e-07, + "loss": 0.2844, + "step": 10721 + }, + { + "epoch": 0.92, + "learning_rate": 3.414606530084974e-07, + "loss": 0.2551, + "step": 10722 + }, + { + "epoch": 0.92, + "learning_rate": 3.4074173710931804e-07, + "loss": 0.2351, + "step": 10723 + }, + { + "epoch": 0.92, + "learning_rate": 3.4002356569803775e-07, + "loss": 0.2902, + "step": 10724 + }, + { + "epoch": 0.92, + "learning_rate": 3.3930613883000897e-07, + "loss": 0.2559, + "step": 10725 + }, + { + "epoch": 0.92, + "learning_rate": 3.3858945656052855e-07, + "loss": 0.238, + "step": 10726 + }, + { + "epoch": 0.92, + "learning_rate": 3.3787351894483566e-07, + "loss": 0.2661, + "step": 10727 + }, + { + "epoch": 0.92, + "learning_rate": 3.3715832603810727e-07, + "loss": 0.2454, + "step": 10728 + }, + { + "epoch": 0.92, + "learning_rate": 3.3644387789547264e-07, + "loss": 0.2573, + "step": 10729 + }, + { + "epoch": 0.92, + "learning_rate": 3.357301745719932e-07, + "loss": 0.2639, + "step": 10730 + }, + { + "epoch": 0.92, + "learning_rate": 3.3501721612267833e-07, + "loss": 0.2828, + "step": 10731 + }, + { + "epoch": 0.92, + "learning_rate": 3.34305002602483e-07, + "loss": 0.3252, + "step": 10732 + }, + { + "epoch": 0.92, + "learning_rate": 3.335935340662966e-07, + "loss": 0.2589, + "step": 10733 + }, + { + "epoch": 0.92, + "learning_rate": 3.3288281056895746e-07, + "loss": 0.2573, + "step": 10734 + }, + { + "epoch": 0.92, + "learning_rate": 3.321728321652451e-07, + "loss": 0.345, + "step": 10735 + }, + { + "epoch": 0.92, + "learning_rate": 3.314635989098802e-07, + "loss": 0.3053, + "step": 10736 + }, + { + "epoch": 0.92, + "learning_rate": 3.307551108575291e-07, + "loss": 0.2269, + "step": 10737 + }, + { + "epoch": 0.92, + "learning_rate": 3.300473680627947e-07, + "loss": 0.2351, + "step": 10738 + }, + { + "epoch": 0.92, + "learning_rate": 3.2934037058023115e-07, + "loss": 0.2972, + "step": 10739 + }, + { + "epoch": 0.92, + "learning_rate": 3.28634118464326e-07, + "loss": 0.2173, + "step": 10740 + }, + { + "epoch": 0.92, + "learning_rate": 3.2792861176951465e-07, + "loss": 0.2764, + "step": 10741 + }, + { + "epoch": 0.92, + "learning_rate": 3.2722385055017567e-07, + "loss": 0.262, + "step": 10742 + }, + { + "epoch": 0.92, + "learning_rate": 3.26519834860628e-07, + "loss": 0.3237, + "step": 10743 + }, + { + "epoch": 0.92, + "learning_rate": 3.258165647551337e-07, + "loss": 0.2888, + "step": 10744 + }, + { + "epoch": 0.92, + "learning_rate": 3.2511404028789604e-07, + "loss": 0.2547, + "step": 10745 + }, + { + "epoch": 0.92, + "learning_rate": 3.2441226151306403e-07, + "loss": 0.2758, + "step": 10746 + }, + { + "epoch": 0.92, + "learning_rate": 3.2371122848472655e-07, + "loss": 0.222, + "step": 10747 + }, + { + "epoch": 0.92, + "learning_rate": 3.230109412569149e-07, + "loss": 0.267, + "step": 10748 + }, + { + "epoch": 0.92, + "learning_rate": 3.223113998836036e-07, + "loss": 0.265, + "step": 10749 + }, + { + "epoch": 0.92, + "learning_rate": 3.216126044187118e-07, + "loss": 0.2225, + "step": 10750 + }, + { + "epoch": 0.92, + "learning_rate": 3.209145549160997e-07, + "loss": 0.2956, + "step": 10751 + }, + { + "epoch": 0.92, + "learning_rate": 3.2021725142956537e-07, + "loss": 0.2444, + "step": 10752 + }, + { + "epoch": 0.92, + "learning_rate": 3.1952069401285814e-07, + "loss": 0.2349, + "step": 10753 + }, + { + "epoch": 0.92, + "learning_rate": 3.188248827196616e-07, + "loss": 0.2723, + "step": 10754 + }, + { + "epoch": 0.92, + "learning_rate": 3.181298176036074e-07, + "loss": 0.5592, + "step": 10755 + }, + { + "epoch": 0.92, + "learning_rate": 3.1743549871826704e-07, + "loss": 0.2473, + "step": 10756 + }, + { + "epoch": 0.92, + "learning_rate": 3.167419261171556e-07, + "loss": 0.2812, + "step": 10757 + }, + { + "epoch": 0.92, + "learning_rate": 3.160490998537313e-07, + "loss": 0.288, + "step": 10758 + }, + { + "epoch": 0.92, + "learning_rate": 3.1535701998139045e-07, + "loss": 0.2789, + "step": 10759 + }, + { + "epoch": 0.92, + "learning_rate": 3.146656865534803e-07, + "loss": 0.2982, + "step": 10760 + }, + { + "epoch": 0.92, + "learning_rate": 3.139750996232804e-07, + "loss": 0.2981, + "step": 10761 + }, + { + "epoch": 0.92, + "learning_rate": 3.132852592440194e-07, + "loss": 0.2327, + "step": 10762 + }, + { + "epoch": 0.92, + "learning_rate": 3.1259616546886804e-07, + "loss": 0.2523, + "step": 10763 + }, + { + "epoch": 0.92, + "learning_rate": 3.119078183509372e-07, + "loss": 0.3148, + "step": 10764 + }, + { + "epoch": 0.92, + "learning_rate": 3.1122021794328214e-07, + "loss": 0.2629, + "step": 10765 + }, + { + "epoch": 0.92, + "learning_rate": 3.1053336429889616e-07, + "loss": 0.5923, + "step": 10766 + }, + { + "epoch": 0.92, + "learning_rate": 3.098472574707245e-07, + "loss": 0.2656, + "step": 10767 + }, + { + "epoch": 0.92, + "learning_rate": 3.0916189751164506e-07, + "loss": 0.3156, + "step": 10768 + }, + { + "epoch": 0.92, + "learning_rate": 3.08477284474481e-07, + "loss": 0.2507, + "step": 10769 + }, + { + "epoch": 0.92, + "learning_rate": 3.077934184120035e-07, + "loss": 0.2425, + "step": 10770 + }, + { + "epoch": 0.92, + "learning_rate": 3.0711029937691704e-07, + "loss": 0.2481, + "step": 10771 + }, + { + "epoch": 0.92, + "learning_rate": 3.06427927421874e-07, + "loss": 0.2932, + "step": 10772 + }, + { + "epoch": 0.92, + "learning_rate": 3.0574630259947e-07, + "loss": 0.2834, + "step": 10773 + }, + { + "epoch": 0.92, + "learning_rate": 3.050654249622398e-07, + "loss": 0.2822, + "step": 10774 + }, + { + "epoch": 0.92, + "learning_rate": 3.0438529456266463e-07, + "loss": 0.2178, + "step": 10775 + }, + { + "epoch": 0.92, + "learning_rate": 3.037059114531604e-07, + "loss": 0.2725, + "step": 10776 + }, + { + "epoch": 0.92, + "learning_rate": 3.0302727568609637e-07, + "loss": 0.3517, + "step": 10777 + }, + { + "epoch": 0.92, + "learning_rate": 3.0234938731377394e-07, + "loss": 0.2649, + "step": 10778 + }, + { + "epoch": 0.92, + "learning_rate": 3.016722463884436e-07, + "loss": 0.2627, + "step": 10779 + }, + { + "epoch": 0.92, + "learning_rate": 3.009958529622958e-07, + "loss": 0.3549, + "step": 10780 + }, + { + "epoch": 0.92, + "learning_rate": 3.0032020708746334e-07, + "loss": 0.308, + "step": 10781 + }, + { + "epoch": 0.92, + "learning_rate": 2.996453088160234e-07, + "loss": 0.2462, + "step": 10782 + }, + { + "epoch": 0.92, + "learning_rate": 2.989711581999899e-07, + "loss": 0.2692, + "step": 10783 + }, + { + "epoch": 0.92, + "learning_rate": 2.982977552913269e-07, + "loss": 0.2842, + "step": 10784 + }, + { + "epoch": 0.92, + "learning_rate": 2.97625100141935e-07, + "loss": 0.2784, + "step": 10785 + }, + { + "epoch": 0.92, + "learning_rate": 2.969531928036595e-07, + "loss": 0.2428, + "step": 10786 + }, + { + "epoch": 0.92, + "learning_rate": 2.9628203332828675e-07, + "loss": 0.33, + "step": 10787 + }, + { + "epoch": 0.92, + "learning_rate": 2.9561162176754863e-07, + "loss": 0.2586, + "step": 10788 + }, + { + "epoch": 0.92, + "learning_rate": 2.949419581731161e-07, + "loss": 0.2617, + "step": 10789 + }, + { + "epoch": 0.92, + "learning_rate": 2.9427304259660117e-07, + "loss": 0.3091, + "step": 10790 + }, + { + "epoch": 0.92, + "learning_rate": 2.9360487508956594e-07, + "loss": 0.275, + "step": 10791 + }, + { + "epoch": 0.93, + "learning_rate": 2.9293745570350365e-07, + "loss": 0.2443, + "step": 10792 + }, + { + "epoch": 0.93, + "learning_rate": 2.922707844898598e-07, + "loss": 0.2662, + "step": 10793 + }, + { + "epoch": 0.93, + "learning_rate": 2.9160486150001556e-07, + "loss": 0.2491, + "step": 10794 + }, + { + "epoch": 0.93, + "learning_rate": 2.909396867852987e-07, + "loss": 0.2719, + "step": 10795 + }, + { + "epoch": 0.93, + "learning_rate": 2.9027526039697717e-07, + "loss": 0.2585, + "step": 10796 + }, + { + "epoch": 0.93, + "learning_rate": 2.8961158238625997e-07, + "loss": 0.2435, + "step": 10797 + }, + { + "epoch": 0.93, + "learning_rate": 2.889486528043028e-07, + "loss": 0.3191, + "step": 10798 + }, + { + "epoch": 0.93, + "learning_rate": 2.8828647170219937e-07, + "loss": 0.2316, + "step": 10799 + }, + { + "epoch": 0.93, + "learning_rate": 2.876250391309876e-07, + "loss": 0.2142, + "step": 10800 + }, + { + "epoch": 0.93, + "learning_rate": 2.869643551416479e-07, + "loss": 0.2639, + "step": 10801 + }, + { + "epoch": 0.93, + "learning_rate": 2.863044197851017e-07, + "loss": 0.2616, + "step": 10802 + }, + { + "epoch": 0.93, + "learning_rate": 2.85645233112215e-07, + "loss": 0.2632, + "step": 10803 + }, + { + "epoch": 0.93, + "learning_rate": 2.8498679517379277e-07, + "loss": 0.2493, + "step": 10804 + }, + { + "epoch": 0.93, + "learning_rate": 2.843291060205855e-07, + "loss": 0.2602, + "step": 10805 + }, + { + "epoch": 0.93, + "learning_rate": 2.836721657032848e-07, + "loss": 0.291, + "step": 10806 + }, + { + "epoch": 0.93, + "learning_rate": 2.8301597427252137e-07, + "loss": 0.2439, + "step": 10807 + }, + { + "epoch": 0.93, + "learning_rate": 2.823605317788769e-07, + "loss": 0.2838, + "step": 10808 + }, + { + "epoch": 0.93, + "learning_rate": 2.8170583827286435e-07, + "loss": 0.2596, + "step": 10809 + }, + { + "epoch": 0.93, + "learning_rate": 2.810518938049478e-07, + "loss": 0.2753, + "step": 10810 + }, + { + "epoch": 0.93, + "learning_rate": 2.8039869842552583e-07, + "loss": 0.2918, + "step": 10811 + }, + { + "epoch": 0.93, + "learning_rate": 2.797462521849481e-07, + "loss": 0.3076, + "step": 10812 + }, + { + "epoch": 0.93, + "learning_rate": 2.790945551335e-07, + "loss": 0.2723, + "step": 10813 + }, + { + "epoch": 0.93, + "learning_rate": 2.784436073214103e-07, + "loss": 0.2469, + "step": 10814 + }, + { + "epoch": 0.93, + "learning_rate": 2.777934087988532e-07, + "loss": 0.3251, + "step": 10815 + }, + { + "epoch": 0.93, + "learning_rate": 2.771439596159409e-07, + "loss": 0.2961, + "step": 10816 + }, + { + "epoch": 0.93, + "learning_rate": 2.7649525982272996e-07, + "loss": 0.5551, + "step": 10817 + }, + { + "epoch": 0.93, + "learning_rate": 2.7584730946921825e-07, + "loss": 0.2514, + "step": 10818 + }, + { + "epoch": 0.93, + "learning_rate": 2.75200108605348e-07, + "loss": 0.3074, + "step": 10819 + }, + { + "epoch": 0.93, + "learning_rate": 2.745536572810026e-07, + "loss": 0.3062, + "step": 10820 + }, + { + "epoch": 0.93, + "learning_rate": 2.739079555460056e-07, + "loss": 0.2678, + "step": 10821 + }, + { + "epoch": 0.93, + "learning_rate": 2.73263003450126e-07, + "loss": 0.286, + "step": 10822 + }, + { + "epoch": 0.93, + "learning_rate": 2.726188010430719e-07, + "loss": 0.3054, + "step": 10823 + }, + { + "epoch": 0.93, + "learning_rate": 2.719753483744969e-07, + "loss": 0.294, + "step": 10824 + }, + { + "epoch": 0.93, + "learning_rate": 2.7133264549399464e-07, + "loss": 0.2742, + "step": 10825 + }, + { + "epoch": 0.93, + "learning_rate": 2.70690692451101e-07, + "loss": 0.2859, + "step": 10826 + }, + { + "epoch": 0.93, + "learning_rate": 2.700494892952954e-07, + "loss": 0.2513, + "step": 10827 + }, + { + "epoch": 0.93, + "learning_rate": 2.69409036075996e-07, + "loss": 0.2698, + "step": 10828 + }, + { + "epoch": 0.93, + "learning_rate": 2.687693328425711e-07, + "loss": 0.2748, + "step": 10829 + }, + { + "epoch": 0.93, + "learning_rate": 2.681303796443202e-07, + "loss": 0.3055, + "step": 10830 + }, + { + "epoch": 0.93, + "learning_rate": 2.6749217653049385e-07, + "loss": 0.2891, + "step": 10831 + }, + { + "epoch": 0.93, + "learning_rate": 2.6685472355028053e-07, + "loss": 0.2393, + "step": 10832 + }, + { + "epoch": 0.93, + "learning_rate": 2.66218020752812e-07, + "loss": 0.249, + "step": 10833 + }, + { + "epoch": 0.93, + "learning_rate": 2.655820681871635e-07, + "loss": 0.2439, + "step": 10834 + }, + { + "epoch": 0.93, + "learning_rate": 2.6494686590234797e-07, + "loss": 0.285, + "step": 10835 + }, + { + "epoch": 0.93, + "learning_rate": 2.6431241394732856e-07, + "loss": 0.2506, + "step": 10836 + }, + { + "epoch": 0.93, + "learning_rate": 2.636787123710016e-07, + "loss": 0.2686, + "step": 10837 + }, + { + "epoch": 0.93, + "learning_rate": 2.6304576122221035e-07, + "loss": 0.2657, + "step": 10838 + }, + { + "epoch": 0.93, + "learning_rate": 2.624135605497402e-07, + "loss": 0.5769, + "step": 10839 + }, + { + "epoch": 0.93, + "learning_rate": 2.617821104023177e-07, + "loss": 0.2706, + "step": 10840 + }, + { + "epoch": 0.93, + "learning_rate": 2.6115141082861396e-07, + "loss": 0.2601, + "step": 10841 + }, + { + "epoch": 0.93, + "learning_rate": 2.605214618772356e-07, + "loss": 0.2571, + "step": 10842 + }, + { + "epoch": 0.93, + "learning_rate": 2.598922635967416e-07, + "loss": 0.2645, + "step": 10843 + }, + { + "epoch": 0.93, + "learning_rate": 2.592638160356231e-07, + "loss": 0.2885, + "step": 10844 + }, + { + "epoch": 0.93, + "learning_rate": 2.586361192423181e-07, + "loss": 0.2585, + "step": 10845 + }, + { + "epoch": 0.93, + "learning_rate": 2.5800917326521013e-07, + "loss": 0.5839, + "step": 10846 + }, + { + "epoch": 0.93, + "learning_rate": 2.573829781526171e-07, + "loss": 0.2693, + "step": 10847 + }, + { + "epoch": 0.93, + "learning_rate": 2.56757533952805e-07, + "loss": 0.2687, + "step": 10848 + }, + { + "epoch": 0.93, + "learning_rate": 2.561328407139785e-07, + "loss": 0.286, + "step": 10849 + }, + { + "epoch": 0.93, + "learning_rate": 2.555088984842868e-07, + "loss": 0.2788, + "step": 10850 + }, + { + "epoch": 0.93, + "learning_rate": 2.548857073118216e-07, + "loss": 0.2451, + "step": 10851 + }, + { + "epoch": 0.93, + "learning_rate": 2.5426326724461215e-07, + "loss": 0.2814, + "step": 10852 + }, + { + "epoch": 0.93, + "learning_rate": 2.5364157833063676e-07, + "loss": 0.279, + "step": 10853 + }, + { + "epoch": 0.93, + "learning_rate": 2.530206406178104e-07, + "loss": 0.2783, + "step": 10854 + }, + { + "epoch": 0.93, + "learning_rate": 2.524004541539904e-07, + "loss": 0.272, + "step": 10855 + }, + { + "epoch": 0.93, + "learning_rate": 2.517810189869796e-07, + "loss": 0.2605, + "step": 10856 + }, + { + "epoch": 0.93, + "learning_rate": 2.5116233516452094e-07, + "loss": 0.2546, + "step": 10857 + }, + { + "epoch": 0.93, + "learning_rate": 2.505444027342996e-07, + "loss": 0.2748, + "step": 10858 + }, + { + "epoch": 0.93, + "learning_rate": 2.4992722174393966e-07, + "loss": 0.2784, + "step": 10859 + }, + { + "epoch": 0.93, + "learning_rate": 2.493107922410165e-07, + "loss": 0.2602, + "step": 10860 + }, + { + "epoch": 0.93, + "learning_rate": 2.486951142730354e-07, + "loss": 0.3102, + "step": 10861 + }, + { + "epoch": 0.93, + "learning_rate": 2.480801878874528e-07, + "loss": 0.2849, + "step": 10862 + }, + { + "epoch": 0.93, + "learning_rate": 2.474660131316642e-07, + "loss": 0.2665, + "step": 10863 + }, + { + "epoch": 0.93, + "learning_rate": 2.468525900530061e-07, + "loss": 0.2656, + "step": 10864 + }, + { + "epoch": 0.93, + "learning_rate": 2.4623991869875965e-07, + "loss": 0.27, + "step": 10865 + }, + { + "epoch": 0.93, + "learning_rate": 2.456279991161437e-07, + "loss": 0.2473, + "step": 10866 + }, + { + "epoch": 0.93, + "learning_rate": 2.450168313523249e-07, + "loss": 0.277, + "step": 10867 + }, + { + "epoch": 0.93, + "learning_rate": 2.444064154544079e-07, + "loss": 0.2339, + "step": 10868 + }, + { + "epoch": 0.93, + "learning_rate": 2.437967514694406e-07, + "loss": 0.2688, + "step": 10869 + }, + { + "epoch": 0.93, + "learning_rate": 2.4318783944441314e-07, + "loss": 0.3087, + "step": 10870 + }, + { + "epoch": 0.93, + "learning_rate": 2.4257967942625694e-07, + "loss": 0.3168, + "step": 10871 + }, + { + "epoch": 0.93, + "learning_rate": 2.4197227146184664e-07, + "loss": 0.2666, + "step": 10872 + }, + { + "epoch": 0.93, + "learning_rate": 2.4136561559799597e-07, + "loss": 0.2789, + "step": 10873 + }, + { + "epoch": 0.93, + "learning_rate": 2.4075971188146754e-07, + "loss": 0.2799, + "step": 10874 + }, + { + "epoch": 0.93, + "learning_rate": 2.401545603589572e-07, + "loss": 0.2864, + "step": 10875 + }, + { + "epoch": 0.93, + "learning_rate": 2.3955016107710896e-07, + "loss": 0.2412, + "step": 10876 + }, + { + "epoch": 0.93, + "learning_rate": 2.3894651408250536e-07, + "loss": 0.2651, + "step": 10877 + }, + { + "epoch": 0.93, + "learning_rate": 2.3834361942167484e-07, + "loss": 0.2861, + "step": 10878 + }, + { + "epoch": 0.93, + "learning_rate": 2.3774147714108463e-07, + "loss": 0.2487, + "step": 10879 + }, + { + "epoch": 0.93, + "learning_rate": 2.371400872871432e-07, + "loss": 0.2917, + "step": 10880 + }, + { + "epoch": 0.93, + "learning_rate": 2.365394499062057e-07, + "loss": 0.2616, + "step": 10881 + }, + { + "epoch": 0.93, + "learning_rate": 2.3593956504456396e-07, + "loss": 0.2626, + "step": 10882 + }, + { + "epoch": 0.93, + "learning_rate": 2.353404327484543e-07, + "loss": 0.2569, + "step": 10883 + }, + { + "epoch": 0.93, + "learning_rate": 2.347420530640565e-07, + "loss": 0.2606, + "step": 10884 + }, + { + "epoch": 0.93, + "learning_rate": 2.3414442603748922e-07, + "loss": 0.3099, + "step": 10885 + }, + { + "epoch": 0.93, + "learning_rate": 2.335475517148167e-07, + "loss": 0.3089, + "step": 10886 + }, + { + "epoch": 0.93, + "learning_rate": 2.329514301420388e-07, + "loss": 0.3102, + "step": 10887 + }, + { + "epoch": 0.93, + "learning_rate": 2.3235606136510545e-07, + "loss": 0.2842, + "step": 10888 + }, + { + "epoch": 0.93, + "learning_rate": 2.3176144542990443e-07, + "loss": 0.2803, + "step": 10889 + }, + { + "epoch": 0.93, + "learning_rate": 2.3116758238226233e-07, + "loss": 0.2562, + "step": 10890 + }, + { + "epoch": 0.93, + "learning_rate": 2.3057447226795705e-07, + "loss": 0.2645, + "step": 10891 + }, + { + "epoch": 0.93, + "learning_rate": 2.2998211513269753e-07, + "loss": 0.2889, + "step": 10892 + }, + { + "epoch": 0.93, + "learning_rate": 2.293905110221406e-07, + "loss": 0.2865, + "step": 10893 + }, + { + "epoch": 0.93, + "learning_rate": 2.2879965998188646e-07, + "loss": 0.259, + "step": 10894 + }, + { + "epoch": 0.93, + "learning_rate": 2.2820956205747312e-07, + "loss": 0.2379, + "step": 10895 + }, + { + "epoch": 0.93, + "learning_rate": 2.2762021729438423e-07, + "loss": 0.2462, + "step": 10896 + }, + { + "epoch": 0.93, + "learning_rate": 2.2703162573804006e-07, + "loss": 0.2617, + "step": 10897 + }, + { + "epoch": 0.93, + "learning_rate": 2.264437874338099e-07, + "loss": 0.2662, + "step": 10898 + }, + { + "epoch": 0.93, + "learning_rate": 2.2585670242699975e-07, + "loss": 0.2913, + "step": 10899 + }, + { + "epoch": 0.93, + "learning_rate": 2.2527037076286008e-07, + "loss": 0.2559, + "step": 10900 + }, + { + "epoch": 0.93, + "learning_rate": 2.2468479248658026e-07, + "loss": 0.3108, + "step": 10901 + }, + { + "epoch": 0.93, + "learning_rate": 2.2409996764329644e-07, + "loss": 0.3046, + "step": 10902 + }, + { + "epoch": 0.93, + "learning_rate": 2.235158962780837e-07, + "loss": 0.2772, + "step": 10903 + }, + { + "epoch": 0.93, + "learning_rate": 2.2293257843595706e-07, + "loss": 0.2725, + "step": 10904 + }, + { + "epoch": 0.93, + "learning_rate": 2.223500141618795e-07, + "loss": 0.2646, + "step": 10905 + }, + { + "epoch": 0.93, + "learning_rate": 2.2176820350074846e-07, + "loss": 0.2773, + "step": 10906 + }, + { + "epoch": 0.93, + "learning_rate": 2.2118714649740912e-07, + "loss": 0.2639, + "step": 10907 + }, + { + "epoch": 0.94, + "learning_rate": 2.206068431966446e-07, + "loss": 0.2736, + "step": 10908 + }, + { + "epoch": 0.94, + "learning_rate": 2.2002729364318464e-07, + "loss": 0.226, + "step": 10909 + }, + { + "epoch": 0.94, + "learning_rate": 2.1944849788169798e-07, + "loss": 0.2361, + "step": 10910 + }, + { + "epoch": 0.94, + "learning_rate": 2.1887045595679112e-07, + "loss": 0.2853, + "step": 10911 + }, + { + "epoch": 0.94, + "learning_rate": 2.182931679130218e-07, + "loss": 0.2546, + "step": 10912 + }, + { + "epoch": 0.94, + "learning_rate": 2.1771663379488106e-07, + "loss": 0.2852, + "step": 10913 + }, + { + "epoch": 0.94, + "learning_rate": 2.1714085364680671e-07, + "loss": 0.2612, + "step": 10914 + }, + { + "epoch": 0.94, + "learning_rate": 2.1656582751317657e-07, + "loss": 0.2672, + "step": 10915 + }, + { + "epoch": 0.94, + "learning_rate": 2.1599155543831074e-07, + "loss": 0.261, + "step": 10916 + }, + { + "epoch": 0.94, + "learning_rate": 2.1541803746647272e-07, + "loss": 0.3041, + "step": 10917 + }, + { + "epoch": 0.94, + "learning_rate": 2.1484527364186492e-07, + "loss": 0.2885, + "step": 10918 + }, + { + "epoch": 0.94, + "learning_rate": 2.1427326400863424e-07, + "loss": 0.2695, + "step": 10919 + }, + { + "epoch": 0.94, + "learning_rate": 2.1370200861086655e-07, + "loss": 0.2617, + "step": 10920 + }, + { + "epoch": 0.94, + "learning_rate": 2.1313150749259216e-07, + "loss": 0.263, + "step": 10921 + }, + { + "epoch": 0.94, + "learning_rate": 2.1256176069778367e-07, + "loss": 0.2766, + "step": 10922 + }, + { + "epoch": 0.94, + "learning_rate": 2.1199276827035374e-07, + "loss": 0.2861, + "step": 10923 + }, + { + "epoch": 0.94, + "learning_rate": 2.1142453025415734e-07, + "loss": 0.2974, + "step": 10924 + }, + { + "epoch": 0.94, + "learning_rate": 2.1085704669299045e-07, + "loss": 0.2552, + "step": 10925 + }, + { + "epoch": 0.94, + "learning_rate": 2.102903176305926e-07, + "loss": 0.2514, + "step": 10926 + }, + { + "epoch": 0.94, + "learning_rate": 2.097243431106466e-07, + "loss": 0.3233, + "step": 10927 + }, + { + "epoch": 0.94, + "learning_rate": 2.091591231767709e-07, + "loss": 0.2919, + "step": 10928 + }, + { + "epoch": 0.94, + "learning_rate": 2.0859465787253396e-07, + "loss": 0.263, + "step": 10929 + }, + { + "epoch": 0.94, + "learning_rate": 2.0803094724143879e-07, + "loss": 0.2747, + "step": 10930 + }, + { + "epoch": 0.94, + "learning_rate": 2.0746799132693506e-07, + "loss": 0.2407, + "step": 10931 + }, + { + "epoch": 0.94, + "learning_rate": 2.069057901724114e-07, + "loss": 0.2614, + "step": 10932 + }, + { + "epoch": 0.94, + "learning_rate": 2.0634434382120205e-07, + "loss": 0.264, + "step": 10933 + }, + { + "epoch": 0.94, + "learning_rate": 2.0578365231657792e-07, + "loss": 0.2674, + "step": 10934 + }, + { + "epoch": 0.94, + "learning_rate": 2.0522371570175447e-07, + "loss": 0.2305, + "step": 10935 + }, + { + "epoch": 0.94, + "learning_rate": 2.046645340198905e-07, + "loss": 0.2805, + "step": 10936 + }, + { + "epoch": 0.94, + "learning_rate": 2.0410610731408377e-07, + "loss": 0.2699, + "step": 10937 + }, + { + "epoch": 0.94, + "learning_rate": 2.0354843562737537e-07, + "loss": 0.217, + "step": 10938 + }, + { + "epoch": 0.94, + "learning_rate": 2.0299151900274873e-07, + "loss": 0.3044, + "step": 10939 + }, + { + "epoch": 0.94, + "learning_rate": 2.0243535748312615e-07, + "loss": 0.2817, + "step": 10940 + }, + { + "epoch": 0.94, + "learning_rate": 2.018799511113767e-07, + "loss": 0.3231, + "step": 10941 + }, + { + "epoch": 0.94, + "learning_rate": 2.0132529993030392e-07, + "loss": 0.2599, + "step": 10942 + }, + { + "epoch": 0.94, + "learning_rate": 2.0077140398266248e-07, + "loss": 0.2949, + "step": 10943 + }, + { + "epoch": 0.94, + "learning_rate": 2.002182633111416e-07, + "loss": 0.6014, + "step": 10944 + }, + { + "epoch": 0.94, + "learning_rate": 1.9966587795837377e-07, + "loss": 0.2759, + "step": 10945 + }, + { + "epoch": 0.94, + "learning_rate": 1.9911424796693611e-07, + "loss": 0.3049, + "step": 10946 + }, + { + "epoch": 0.94, + "learning_rate": 1.985633733793446e-07, + "loss": 0.2601, + "step": 10947 + }, + { + "epoch": 0.94, + "learning_rate": 1.9801325423805862e-07, + "loss": 0.3292, + "step": 10948 + }, + { + "epoch": 0.94, + "learning_rate": 1.9746389058547534e-07, + "loss": 0.2265, + "step": 10949 + }, + { + "epoch": 0.94, + "learning_rate": 1.9691528246394197e-07, + "loss": 0.5916, + "step": 10950 + }, + { + "epoch": 0.94, + "learning_rate": 1.963674299157403e-07, + "loss": 0.2798, + "step": 10951 + }, + { + "epoch": 0.94, + "learning_rate": 1.9582033298309434e-07, + "loss": 0.2923, + "step": 10952 + }, + { + "epoch": 0.94, + "learning_rate": 1.9527399170817473e-07, + "loss": 0.2465, + "step": 10953 + }, + { + "epoch": 0.94, + "learning_rate": 1.9472840613308787e-07, + "loss": 0.2719, + "step": 10954 + }, + { + "epoch": 0.94, + "learning_rate": 1.9418357629988782e-07, + "loss": 0.3484, + "step": 10955 + }, + { + "epoch": 0.94, + "learning_rate": 1.936395022505644e-07, + "loss": 0.2772, + "step": 10956 + }, + { + "epoch": 0.94, + "learning_rate": 1.930961840270551e-07, + "loss": 0.2726, + "step": 10957 + }, + { + "epoch": 0.94, + "learning_rate": 1.9255362167123316e-07, + "loss": 0.2411, + "step": 10958 + }, + { + "epoch": 0.94, + "learning_rate": 1.920118152249173e-07, + "loss": 0.264, + "step": 10959 + }, + { + "epoch": 0.94, + "learning_rate": 1.914707647298697e-07, + "loss": 0.3056, + "step": 10960 + }, + { + "epoch": 0.94, + "learning_rate": 1.909304702277903e-07, + "loss": 0.2653, + "step": 10961 + }, + { + "epoch": 0.94, + "learning_rate": 1.903909317603214e-07, + "loss": 0.2652, + "step": 10962 + }, + { + "epoch": 0.94, + "learning_rate": 1.898521493690486e-07, + "loss": 0.2892, + "step": 10963 + }, + { + "epoch": 0.94, + "learning_rate": 1.8931412309549867e-07, + "loss": 0.2734, + "step": 10964 + }, + { + "epoch": 0.94, + "learning_rate": 1.8877685298114178e-07, + "loss": 0.2504, + "step": 10965 + }, + { + "epoch": 0.94, + "learning_rate": 1.882403390673837e-07, + "loss": 0.262, + "step": 10966 + }, + { + "epoch": 0.94, + "learning_rate": 1.8770458139558134e-07, + "loss": 0.2695, + "step": 10967 + }, + { + "epoch": 0.94, + "learning_rate": 1.87169580007025e-07, + "loss": 0.2392, + "step": 10968 + }, + { + "epoch": 0.94, + "learning_rate": 1.866353349429506e-07, + "loss": 0.2555, + "step": 10969 + }, + { + "epoch": 0.94, + "learning_rate": 1.861018462445352e-07, + "loss": 0.2729, + "step": 10970 + }, + { + "epoch": 0.94, + "learning_rate": 1.8556911395289811e-07, + "loss": 0.2485, + "step": 10971 + }, + { + "epoch": 0.94, + "learning_rate": 1.8503713810909984e-07, + "loss": 0.2606, + "step": 10972 + }, + { + "epoch": 0.94, + "learning_rate": 1.8450591875413981e-07, + "loss": 0.3301, + "step": 10973 + }, + { + "epoch": 0.94, + "learning_rate": 1.8397545592896527e-07, + "loss": 0.2673, + "step": 10974 + }, + { + "epoch": 0.94, + "learning_rate": 1.8344574967446015e-07, + "loss": 0.2759, + "step": 10975 + }, + { + "epoch": 0.94, + "learning_rate": 1.8291680003145074e-07, + "loss": 0.2781, + "step": 10976 + }, + { + "epoch": 0.94, + "learning_rate": 1.823886070407077e-07, + "loss": 0.2952, + "step": 10977 + }, + { + "epoch": 0.94, + "learning_rate": 1.8186117074293964e-07, + "loss": 0.3367, + "step": 10978 + }, + { + "epoch": 0.94, + "learning_rate": 1.8133449117880064e-07, + "loss": 0.2573, + "step": 10979 + }, + { + "epoch": 0.94, + "learning_rate": 1.808085683888827e-07, + "loss": 0.2839, + "step": 10980 + }, + { + "epoch": 0.94, + "learning_rate": 1.8028340241372345e-07, + "loss": 0.5588, + "step": 10981 + }, + { + "epoch": 0.94, + "learning_rate": 1.797589932937982e-07, + "loss": 0.2917, + "step": 10982 + }, + { + "epoch": 0.94, + "learning_rate": 1.792353410695269e-07, + "loss": 0.2467, + "step": 10983 + }, + { + "epoch": 0.94, + "learning_rate": 1.7871244578126835e-07, + "loss": 0.2821, + "step": 10984 + }, + { + "epoch": 0.94, + "learning_rate": 1.7819030746932696e-07, + "loss": 0.3131, + "step": 10985 + }, + { + "epoch": 0.94, + "learning_rate": 1.7766892617394727e-07, + "loss": 0.2578, + "step": 10986 + }, + { + "epoch": 0.94, + "learning_rate": 1.771483019353104e-07, + "loss": 0.2864, + "step": 10987 + }, + { + "epoch": 0.94, + "learning_rate": 1.7662843479354874e-07, + "loss": 0.2723, + "step": 10988 + }, + { + "epoch": 0.94, + "learning_rate": 1.761093247887269e-07, + "loss": 0.2858, + "step": 10989 + }, + { + "epoch": 0.94, + "learning_rate": 1.755909719608573e-07, + "loss": 0.3066, + "step": 10990 + }, + { + "epoch": 0.94, + "learning_rate": 1.750733763498924e-07, + "loss": 0.2475, + "step": 10991 + }, + { + "epoch": 0.94, + "learning_rate": 1.745565379957248e-07, + "loss": 0.2643, + "step": 10992 + }, + { + "epoch": 0.94, + "learning_rate": 1.7404045693819037e-07, + "loss": 0.2493, + "step": 10993 + }, + { + "epoch": 0.94, + "learning_rate": 1.7352513321706621e-07, + "loss": 0.303, + "step": 10994 + }, + { + "epoch": 0.94, + "learning_rate": 1.7301056687207053e-07, + "loss": 0.2623, + "step": 10995 + }, + { + "epoch": 0.94, + "learning_rate": 1.724967579428638e-07, + "loss": 0.2954, + "step": 10996 + }, + { + "epoch": 0.94, + "learning_rate": 1.7198370646904773e-07, + "loss": 0.3277, + "step": 10997 + }, + { + "epoch": 0.94, + "learning_rate": 1.714714124901662e-07, + "loss": 0.2625, + "step": 10998 + }, + { + "epoch": 0.94, + "learning_rate": 1.709598760457043e-07, + "loss": 0.2752, + "step": 10999 + }, + { + "epoch": 0.94, + "learning_rate": 1.7044909717508828e-07, + "loss": 0.3015, + "step": 11000 + }, + { + "epoch": 0.94, + "learning_rate": 1.6993907591768556e-07, + "loss": 0.2481, + "step": 11001 + }, + { + "epoch": 0.94, + "learning_rate": 1.6942981231280798e-07, + "loss": 0.3054, + "step": 11002 + }, + { + "epoch": 0.94, + "learning_rate": 1.6892130639970638e-07, + "loss": 0.2666, + "step": 11003 + }, + { + "epoch": 0.94, + "learning_rate": 1.6841355821757277e-07, + "loss": 0.2927, + "step": 11004 + }, + { + "epoch": 0.94, + "learning_rate": 1.679065678055447e-07, + "loss": 0.2709, + "step": 11005 + }, + { + "epoch": 0.94, + "learning_rate": 1.6740033520269538e-07, + "loss": 0.2939, + "step": 11006 + }, + { + "epoch": 0.94, + "learning_rate": 1.668948604480436e-07, + "loss": 0.2606, + "step": 11007 + }, + { + "epoch": 0.94, + "learning_rate": 1.6639014358054927e-07, + "loss": 0.2576, + "step": 11008 + }, + { + "epoch": 0.94, + "learning_rate": 1.6588618463911356e-07, + "loss": 0.2776, + "step": 11009 + }, + { + "epoch": 0.94, + "learning_rate": 1.6538298366257975e-07, + "loss": 0.2117, + "step": 11010 + }, + { + "epoch": 0.94, + "learning_rate": 1.6488054068972914e-07, + "loss": 0.5712, + "step": 11011 + }, + { + "epoch": 0.94, + "learning_rate": 1.643788557592918e-07, + "loss": 0.3251, + "step": 11012 + }, + { + "epoch": 0.94, + "learning_rate": 1.6387792890993238e-07, + "loss": 0.2996, + "step": 11013 + }, + { + "epoch": 0.94, + "learning_rate": 1.6337776018026108e-07, + "loss": 0.2424, + "step": 11014 + }, + { + "epoch": 0.94, + "learning_rate": 1.628783496088271e-07, + "loss": 0.272, + "step": 11015 + }, + { + "epoch": 0.94, + "learning_rate": 1.6237969723412294e-07, + "loss": 0.265, + "step": 11016 + }, + { + "epoch": 0.94, + "learning_rate": 1.6188180309458345e-07, + "loss": 0.2539, + "step": 11017 + }, + { + "epoch": 0.94, + "learning_rate": 1.6138466722858237e-07, + "loss": 0.2629, + "step": 11018 + }, + { + "epoch": 0.94, + "learning_rate": 1.6088828967443793e-07, + "loss": 0.2788, + "step": 11019 + }, + { + "epoch": 0.94, + "learning_rate": 1.6039267047040728e-07, + "loss": 0.292, + "step": 11020 + }, + { + "epoch": 0.94, + "learning_rate": 1.5989780965468994e-07, + "loss": 0.2114, + "step": 11021 + }, + { + "epoch": 0.94, + "learning_rate": 1.5940370726542864e-07, + "loss": 0.2867, + "step": 11022 + }, + { + "epoch": 0.94, + "learning_rate": 1.589103633407052e-07, + "loss": 0.2458, + "step": 11023 + }, + { + "epoch": 0.94, + "learning_rate": 1.5841777791854584e-07, + "loss": 0.3018, + "step": 11024 + }, + { + "epoch": 0.95, + "learning_rate": 1.5792595103691466e-07, + "loss": 0.307, + "step": 11025 + }, + { + "epoch": 0.95, + "learning_rate": 1.5743488273372133e-07, + "loss": 0.2418, + "step": 11026 + }, + { + "epoch": 0.95, + "learning_rate": 1.5694457304681222e-07, + "loss": 0.3038, + "step": 11027 + }, + { + "epoch": 0.95, + "learning_rate": 1.5645502201397933e-07, + "loss": 0.259, + "step": 11028 + }, + { + "epoch": 0.95, + "learning_rate": 1.5596622967295584e-07, + "loss": 0.2399, + "step": 11029 + }, + { + "epoch": 0.95, + "learning_rate": 1.554781960614138e-07, + "loss": 0.3043, + "step": 11030 + }, + { + "epoch": 0.95, + "learning_rate": 1.549909212169709e-07, + "loss": 0.2683, + "step": 11031 + }, + { + "epoch": 0.95, + "learning_rate": 1.5450440517717934e-07, + "loss": 0.2667, + "step": 11032 + }, + { + "epoch": 0.95, + "learning_rate": 1.5401864797954248e-07, + "loss": 0.2121, + "step": 11033 + }, + { + "epoch": 0.95, + "learning_rate": 1.5353364966149697e-07, + "loss": 0.2802, + "step": 11034 + }, + { + "epoch": 0.95, + "learning_rate": 1.5304941026042408e-07, + "loss": 0.2393, + "step": 11035 + }, + { + "epoch": 0.95, + "learning_rate": 1.5256592981364947e-07, + "loss": 0.265, + "step": 11036 + }, + { + "epoch": 0.95, + "learning_rate": 1.520832083584345e-07, + "loss": 0.2571, + "step": 11037 + }, + { + "epoch": 0.95, + "learning_rate": 1.51601245931986e-07, + "loss": 0.2621, + "step": 11038 + }, + { + "epoch": 0.95, + "learning_rate": 1.5112004257144986e-07, + "loss": 0.3093, + "step": 11039 + }, + { + "epoch": 0.95, + "learning_rate": 1.506395983139175e-07, + "loss": 0.3196, + "step": 11040 + }, + { + "epoch": 0.95, + "learning_rate": 1.501599131964182e-07, + "loss": 0.2657, + "step": 11041 + }, + { + "epoch": 0.95, + "learning_rate": 1.4968098725592127e-07, + "loss": 0.3131, + "step": 11042 + }, + { + "epoch": 0.95, + "learning_rate": 1.4920282052934387e-07, + "loss": 0.2822, + "step": 11043 + }, + { + "epoch": 0.95, + "learning_rate": 1.487254130535376e-07, + "loss": 0.2692, + "step": 11044 + }, + { + "epoch": 0.95, + "learning_rate": 1.482487648653008e-07, + "loss": 0.2626, + "step": 11045 + }, + { + "epoch": 0.95, + "learning_rate": 1.477728760013697e-07, + "loss": 0.2756, + "step": 11046 + }, + { + "epoch": 0.95, + "learning_rate": 1.4729774649842376e-07, + "loss": 0.2456, + "step": 11047 + }, + { + "epoch": 0.95, + "learning_rate": 1.4682337639308486e-07, + "loss": 0.2608, + "step": 11048 + }, + { + "epoch": 0.95, + "learning_rate": 1.4634976572191372e-07, + "loss": 0.2312, + "step": 11049 + }, + { + "epoch": 0.95, + "learning_rate": 1.458769145214145e-07, + "loss": 0.2748, + "step": 11050 + }, + { + "epoch": 0.95, + "learning_rate": 1.4540482282803136e-07, + "loss": 0.2961, + "step": 11051 + }, + { + "epoch": 0.95, + "learning_rate": 1.4493349067815188e-07, + "loss": 0.2699, + "step": 11052 + }, + { + "epoch": 0.95, + "learning_rate": 1.4446291810810365e-07, + "loss": 0.2286, + "step": 11053 + }, + { + "epoch": 0.95, + "learning_rate": 1.4399310515415655e-07, + "loss": 0.2567, + "step": 11054 + }, + { + "epoch": 0.95, + "learning_rate": 1.4352405185252048e-07, + "loss": 0.3259, + "step": 11055 + }, + { + "epoch": 0.95, + "learning_rate": 1.430557582393477e-07, + "loss": 0.2772, + "step": 11056 + }, + { + "epoch": 0.95, + "learning_rate": 1.425882243507337e-07, + "loss": 0.2618, + "step": 11057 + }, + { + "epoch": 0.95, + "learning_rate": 1.4212145022271196e-07, + "loss": 0.2341, + "step": 11058 + }, + { + "epoch": 0.95, + "learning_rate": 1.4165543589125918e-07, + "loss": 0.2669, + "step": 11059 + }, + { + "epoch": 0.95, + "learning_rate": 1.4119018139229333e-07, + "loss": 0.3201, + "step": 11060 + }, + { + "epoch": 0.95, + "learning_rate": 1.4072568676167575e-07, + "loss": 0.2262, + "step": 11061 + }, + { + "epoch": 0.95, + "learning_rate": 1.4026195203520666e-07, + "loss": 0.2884, + "step": 11062 + }, + { + "epoch": 0.95, + "learning_rate": 1.3979897724862523e-07, + "loss": 0.2607, + "step": 11063 + }, + { + "epoch": 0.95, + "learning_rate": 1.3933676243762072e-07, + "loss": 0.2772, + "step": 11064 + }, + { + "epoch": 0.95, + "learning_rate": 1.3887530763781465e-07, + "loss": 0.2661, + "step": 11065 + }, + { + "epoch": 0.95, + "learning_rate": 1.384146128847741e-07, + "loss": 0.3054, + "step": 11066 + }, + { + "epoch": 0.95, + "learning_rate": 1.3795467821400842e-07, + "loss": 0.2769, + "step": 11067 + }, + { + "epoch": 0.95, + "learning_rate": 1.37495503660966e-07, + "loss": 0.2525, + "step": 11068 + }, + { + "epoch": 0.95, + "learning_rate": 1.3703708926103842e-07, + "loss": 0.2617, + "step": 11069 + }, + { + "epoch": 0.95, + "learning_rate": 1.365794350495564e-07, + "loss": 0.309, + "step": 11070 + }, + { + "epoch": 0.95, + "learning_rate": 1.3612254106179723e-07, + "loss": 0.251, + "step": 11071 + }, + { + "epoch": 0.95, + "learning_rate": 1.3566640733297166e-07, + "loss": 0.3071, + "step": 11072 + }, + { + "epoch": 0.95, + "learning_rate": 1.352110338982382e-07, + "loss": 0.2515, + "step": 11073 + }, + { + "epoch": 0.95, + "learning_rate": 1.3475642079269659e-07, + "loss": 0.2426, + "step": 11074 + }, + { + "epoch": 0.95, + "learning_rate": 1.343025680513832e-07, + "loss": 0.3289, + "step": 11075 + }, + { + "epoch": 0.95, + "learning_rate": 1.338494757092812e-07, + "loss": 0.2855, + "step": 11076 + }, + { + "epoch": 0.95, + "learning_rate": 1.333971438013104e-07, + "loss": 0.2477, + "step": 11077 + }, + { + "epoch": 0.95, + "learning_rate": 1.329455723623352e-07, + "loss": 0.2708, + "step": 11078 + }, + { + "epoch": 0.95, + "learning_rate": 1.324947614271621e-07, + "loss": 0.2846, + "step": 11079 + }, + { + "epoch": 0.95, + "learning_rate": 1.320447110305345e-07, + "loss": 0.2825, + "step": 11080 + }, + { + "epoch": 0.95, + "learning_rate": 1.3159542120714352e-07, + "loss": 0.2745, + "step": 11081 + }, + { + "epoch": 0.95, + "learning_rate": 1.3114689199161478e-07, + "loss": 0.2184, + "step": 11082 + }, + { + "epoch": 0.95, + "learning_rate": 1.3069912341852064e-07, + "loss": 0.2872, + "step": 11083 + }, + { + "epoch": 0.95, + "learning_rate": 1.3025211552237127e-07, + "loss": 0.2186, + "step": 11084 + }, + { + "epoch": 0.95, + "learning_rate": 1.2980586833762242e-07, + "loss": 0.3228, + "step": 11085 + }, + { + "epoch": 0.95, + "learning_rate": 1.2936038189866773e-07, + "loss": 0.2629, + "step": 11086 + }, + { + "epoch": 0.95, + "learning_rate": 1.289156562398408e-07, + "loss": 0.2468, + "step": 11087 + }, + { + "epoch": 0.95, + "learning_rate": 1.2847169139542204e-07, + "loss": 0.2712, + "step": 11088 + }, + { + "epoch": 0.95, + "learning_rate": 1.2802848739962737e-07, + "loss": 0.2634, + "step": 11089 + }, + { + "epoch": 0.95, + "learning_rate": 1.2758604428661836e-07, + "loss": 0.2529, + "step": 11090 + }, + { + "epoch": 0.95, + "learning_rate": 1.2714436209049664e-07, + "loss": 0.2556, + "step": 11091 + }, + { + "epoch": 0.95, + "learning_rate": 1.2670344084530384e-07, + "loss": 0.2754, + "step": 11092 + }, + { + "epoch": 0.95, + "learning_rate": 1.2626328058502502e-07, + "loss": 0.271, + "step": 11093 + }, + { + "epoch": 0.95, + "learning_rate": 1.2582388134358414e-07, + "loss": 0.2536, + "step": 11094 + }, + { + "epoch": 0.95, + "learning_rate": 1.2538524315484968e-07, + "loss": 0.2448, + "step": 11095 + }, + { + "epoch": 0.95, + "learning_rate": 1.2494736605262792e-07, + "loss": 0.2592, + "step": 11096 + }, + { + "epoch": 0.95, + "learning_rate": 1.2451025007066963e-07, + "loss": 0.2485, + "step": 11097 + }, + { + "epoch": 0.95, + "learning_rate": 1.2407389524266456e-07, + "loss": 0.2639, + "step": 11098 + }, + { + "epoch": 0.95, + "learning_rate": 1.2363830160224465e-07, + "loss": 0.3061, + "step": 11099 + }, + { + "epoch": 0.95, + "learning_rate": 1.2320346918298644e-07, + "loss": 0.2926, + "step": 11100 + }, + { + "epoch": 0.95, + "learning_rate": 1.2276939801839972e-07, + "loss": 0.3254, + "step": 11101 + }, + { + "epoch": 0.95, + "learning_rate": 1.223360881419433e-07, + "loss": 0.27, + "step": 11102 + }, + { + "epoch": 0.95, + "learning_rate": 1.2190353958701495e-07, + "loss": 0.2877, + "step": 11103 + }, + { + "epoch": 0.95, + "learning_rate": 1.214717523869524e-07, + "loss": 0.2866, + "step": 11104 + }, + { + "epoch": 0.95, + "learning_rate": 1.2104072657503573e-07, + "loss": 0.2492, + "step": 11105 + }, + { + "epoch": 0.95, + "learning_rate": 1.2061046218448724e-07, + "loss": 0.221, + "step": 11106 + }, + { + "epoch": 0.95, + "learning_rate": 1.201809592484682e-07, + "loss": 0.258, + "step": 11107 + }, + { + "epoch": 0.95, + "learning_rate": 1.1975221780008317e-07, + "loss": 0.2326, + "step": 11108 + }, + { + "epoch": 0.95, + "learning_rate": 1.19324237872378e-07, + "loss": 0.2755, + "step": 11109 + }, + { + "epoch": 0.95, + "learning_rate": 1.1889701949833743e-07, + "loss": 0.3179, + "step": 11110 + }, + { + "epoch": 0.95, + "learning_rate": 1.1847056271089174e-07, + "loss": 0.2632, + "step": 11111 + }, + { + "epoch": 0.95, + "learning_rate": 1.1804486754290912e-07, + "loss": 0.2695, + "step": 11112 + }, + { + "epoch": 0.95, + "learning_rate": 1.1761993402719884e-07, + "loss": 0.2848, + "step": 11113 + }, + { + "epoch": 0.95, + "learning_rate": 1.1719576219651585e-07, + "loss": 0.2698, + "step": 11114 + }, + { + "epoch": 0.95, + "learning_rate": 1.1677235208354842e-07, + "loss": 0.2657, + "step": 11115 + }, + { + "epoch": 0.95, + "learning_rate": 1.163497037209349e-07, + "loss": 0.2778, + "step": 11116 + }, + { + "epoch": 0.95, + "learning_rate": 1.1592781714125034e-07, + "loss": 0.2684, + "step": 11117 + }, + { + "epoch": 0.95, + "learning_rate": 1.1550669237700985e-07, + "loss": 0.256, + "step": 11118 + }, + { + "epoch": 0.95, + "learning_rate": 1.150863294606741e-07, + "loss": 0.3174, + "step": 11119 + }, + { + "epoch": 0.95, + "learning_rate": 1.1466672842464055e-07, + "loss": 0.3174, + "step": 11120 + }, + { + "epoch": 0.95, + "learning_rate": 1.1424788930125108e-07, + "loss": 0.598, + "step": 11121 + }, + { + "epoch": 0.95, + "learning_rate": 1.1382981212278655e-07, + "loss": 0.2747, + "step": 11122 + }, + { + "epoch": 0.95, + "learning_rate": 1.1341249692147116e-07, + "loss": 0.3002, + "step": 11123 + }, + { + "epoch": 0.95, + "learning_rate": 1.1299594372947031e-07, + "loss": 0.286, + "step": 11124 + }, + { + "epoch": 0.95, + "learning_rate": 1.1258015257888832e-07, + "loss": 0.2589, + "step": 11125 + }, + { + "epoch": 0.95, + "learning_rate": 1.12165123501774e-07, + "loss": 0.2772, + "step": 11126 + }, + { + "epoch": 0.95, + "learning_rate": 1.1175085653011397e-07, + "loss": 0.2831, + "step": 11127 + }, + { + "epoch": 0.95, + "learning_rate": 1.1133735169583826e-07, + "loss": 0.2903, + "step": 11128 + }, + { + "epoch": 0.95, + "learning_rate": 1.1092460903081803e-07, + "loss": 0.2384, + "step": 11129 + }, + { + "epoch": 0.95, + "learning_rate": 1.1051262856686673e-07, + "loss": 0.3017, + "step": 11130 + }, + { + "epoch": 0.95, + "learning_rate": 1.1010141033573562e-07, + "loss": 0.2823, + "step": 11131 + }, + { + "epoch": 0.95, + "learning_rate": 1.0969095436912047e-07, + "loss": 0.264, + "step": 11132 + }, + { + "epoch": 0.95, + "learning_rate": 1.0928126069865819e-07, + "loss": 0.2909, + "step": 11133 + }, + { + "epoch": 0.95, + "learning_rate": 1.0887232935592351e-07, + "loss": 0.2753, + "step": 11134 + }, + { + "epoch": 0.95, + "learning_rate": 1.0846416037243678e-07, + "loss": 0.2318, + "step": 11135 + }, + { + "epoch": 0.95, + "learning_rate": 1.080567537796573e-07, + "loss": 0.324, + "step": 11136 + }, + { + "epoch": 0.95, + "learning_rate": 1.0765010960898548e-07, + "loss": 0.2839, + "step": 11137 + }, + { + "epoch": 0.95, + "learning_rate": 1.0724422789176404e-07, + "loss": 0.3337, + "step": 11138 + }, + { + "epoch": 0.95, + "learning_rate": 1.068391086592746e-07, + "loss": 0.2611, + "step": 11139 + }, + { + "epoch": 0.95, + "learning_rate": 1.0643475194274444e-07, + "loss": 0.2267, + "step": 11140 + }, + { + "epoch": 0.95, + "learning_rate": 1.060311577733375e-07, + "loss": 0.2205, + "step": 11141 + }, + { + "epoch": 0.96, + "learning_rate": 1.0562832618216223e-07, + "loss": 0.2562, + "step": 11142 + }, + { + "epoch": 0.96, + "learning_rate": 1.0522625720026491e-07, + "loss": 0.2975, + "step": 11143 + }, + { + "epoch": 0.96, + "learning_rate": 1.0482495085863631e-07, + "loss": 0.2576, + "step": 11144 + }, + { + "epoch": 0.96, + "learning_rate": 1.0442440718820834e-07, + "loss": 0.308, + "step": 11145 + }, + { + "epoch": 0.96, + "learning_rate": 1.0402462621984965e-07, + "loss": 0.2617, + "step": 11146 + }, + { + "epoch": 0.96, + "learning_rate": 1.0362560798437671e-07, + "loss": 0.2844, + "step": 11147 + }, + { + "epoch": 0.96, + "learning_rate": 1.0322735251254156e-07, + "loss": 0.5356, + "step": 11148 + }, + { + "epoch": 0.96, + "learning_rate": 1.028298598350408e-07, + "loss": 0.2559, + "step": 11149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0243312998251209e-07, + "loss": 0.2758, + "step": 11150 + }, + { + "epoch": 0.96, + "learning_rate": 1.0203716298553212e-07, + "loss": 0.2247, + "step": 11151 + }, + { + "epoch": 0.96, + "learning_rate": 1.016419588746198e-07, + "loss": 0.2969, + "step": 11152 + }, + { + "epoch": 0.96, + "learning_rate": 1.0124751768023633e-07, + "loss": 0.2437, + "step": 11153 + }, + { + "epoch": 0.96, + "learning_rate": 1.0085383943278293e-07, + "loss": 0.3262, + "step": 11154 + }, + { + "epoch": 0.96, + "learning_rate": 1.0046092416260312e-07, + "loss": 0.2685, + "step": 11155 + }, + { + "epoch": 0.96, + "learning_rate": 1.0006877189997821e-07, + "loss": 0.2482, + "step": 11156 + }, + { + "epoch": 0.96, + "learning_rate": 9.967738267513737e-08, + "loss": 0.2437, + "step": 11157 + }, + { + "epoch": 0.96, + "learning_rate": 9.928675651824427e-08, + "loss": 0.284, + "step": 11158 + }, + { + "epoch": 0.96, + "learning_rate": 9.889689345940812e-08, + "loss": 0.2286, + "step": 11159 + }, + { + "epoch": 0.96, + "learning_rate": 9.85077935286749e-08, + "loss": 0.2334, + "step": 11160 + }, + { + "epoch": 0.96, + "learning_rate": 9.811945675603729e-08, + "loss": 0.3089, + "step": 11161 + }, + { + "epoch": 0.96, + "learning_rate": 9.773188317142579e-08, + "loss": 0.2933, + "step": 11162 + }, + { + "epoch": 0.96, + "learning_rate": 9.734507280471094e-08, + "loss": 0.2717, + "step": 11163 + }, + { + "epoch": 0.96, + "learning_rate": 9.69590256857078e-08, + "loss": 0.2385, + "step": 11164 + }, + { + "epoch": 0.96, + "learning_rate": 9.657374184417146e-08, + "loss": 0.22, + "step": 11165 + }, + { + "epoch": 0.96, + "learning_rate": 9.61892213097959e-08, + "loss": 0.2663, + "step": 11166 + }, + { + "epoch": 0.96, + "learning_rate": 9.580546411221858e-08, + "loss": 0.2419, + "step": 11167 + }, + { + "epoch": 0.96, + "learning_rate": 9.542247028101914e-08, + "loss": 0.2545, + "step": 11168 + }, + { + "epoch": 0.96, + "learning_rate": 9.50402398457162e-08, + "loss": 0.2546, + "step": 11169 + }, + { + "epoch": 0.96, + "learning_rate": 9.46587728357673e-08, + "loss": 0.2878, + "step": 11170 + }, + { + "epoch": 0.96, + "learning_rate": 9.427806928057893e-08, + "loss": 0.3085, + "step": 11171 + }, + { + "epoch": 0.96, + "learning_rate": 9.389812920949093e-08, + "loss": 0.2669, + "step": 11172 + }, + { + "epoch": 0.96, + "learning_rate": 9.351895265178656e-08, + "loss": 0.5581, + "step": 11173 + }, + { + "epoch": 0.96, + "learning_rate": 9.314053963669245e-08, + "loss": 0.2675, + "step": 11174 + }, + { + "epoch": 0.96, + "learning_rate": 9.276289019337415e-08, + "loss": 0.2682, + "step": 11175 + }, + { + "epoch": 0.96, + "learning_rate": 9.238600435094058e-08, + "loss": 0.2626, + "step": 11176 + }, + { + "epoch": 0.96, + "learning_rate": 9.200988213843631e-08, + "loss": 0.2632, + "step": 11177 + }, + { + "epoch": 0.96, + "learning_rate": 9.163452358485591e-08, + "loss": 0.2552, + "step": 11178 + }, + { + "epoch": 0.96, + "learning_rate": 9.125992871912626e-08, + "loss": 0.2536, + "step": 11179 + }, + { + "epoch": 0.96, + "learning_rate": 9.088609757012201e-08, + "loss": 0.2545, + "step": 11180 + }, + { + "epoch": 0.96, + "learning_rate": 9.051303016665347e-08, + "loss": 0.2369, + "step": 11181 + }, + { + "epoch": 0.96, + "learning_rate": 9.014072653747763e-08, + "loss": 0.2917, + "step": 11182 + }, + { + "epoch": 0.96, + "learning_rate": 8.97691867112882e-08, + "loss": 0.2599, + "step": 11183 + }, + { + "epoch": 0.96, + "learning_rate": 8.939841071672117e-08, + "loss": 0.249, + "step": 11184 + }, + { + "epoch": 0.96, + "learning_rate": 8.90283985823559e-08, + "loss": 0.3525, + "step": 11185 + }, + { + "epoch": 0.96, + "learning_rate": 8.865915033671069e-08, + "loss": 0.2525, + "step": 11186 + }, + { + "epoch": 0.96, + "learning_rate": 8.829066600824277e-08, + "loss": 0.3111, + "step": 11187 + }, + { + "epoch": 0.96, + "learning_rate": 8.79229456253572e-08, + "loss": 0.266, + "step": 11188 + }, + { + "epoch": 0.96, + "learning_rate": 8.755598921639241e-08, + "loss": 0.2385, + "step": 11189 + }, + { + "epoch": 0.96, + "learning_rate": 8.718979680963469e-08, + "loss": 0.3097, + "step": 11190 + }, + { + "epoch": 0.96, + "learning_rate": 8.682436843330477e-08, + "loss": 0.3012, + "step": 11191 + }, + { + "epoch": 0.96, + "learning_rate": 8.645970411557125e-08, + "loss": 0.2763, + "step": 11192 + }, + { + "epoch": 0.96, + "learning_rate": 8.609580388454052e-08, + "loss": 0.2753, + "step": 11193 + }, + { + "epoch": 0.96, + "learning_rate": 8.573266776825683e-08, + "loss": 0.2708, + "step": 11194 + }, + { + "epoch": 0.96, + "learning_rate": 8.537029579471334e-08, + "loss": 0.2969, + "step": 11195 + }, + { + "epoch": 0.96, + "learning_rate": 8.50086879918366e-08, + "loss": 0.2747, + "step": 11196 + }, + { + "epoch": 0.96, + "learning_rate": 8.464784438749985e-08, + "loss": 0.3044, + "step": 11197 + }, + { + "epoch": 0.96, + "learning_rate": 8.428776500951308e-08, + "loss": 0.2467, + "step": 11198 + }, + { + "epoch": 0.96, + "learning_rate": 8.392844988563075e-08, + "loss": 0.284, + "step": 11199 + }, + { + "epoch": 0.96, + "learning_rate": 8.35698990435474e-08, + "loss": 0.3075, + "step": 11200 + }, + { + "epoch": 0.96, + "learning_rate": 8.321211251089645e-08, + "loss": 0.2837, + "step": 11201 + }, + { + "epoch": 0.96, + "learning_rate": 8.285509031525696e-08, + "loss": 0.2766, + "step": 11202 + }, + { + "epoch": 0.96, + "learning_rate": 8.249883248414359e-08, + "loss": 0.2557, + "step": 11203 + }, + { + "epoch": 0.96, + "learning_rate": 8.21433390450177e-08, + "loss": 0.2319, + "step": 11204 + }, + { + "epoch": 0.96, + "learning_rate": 8.178861002527628e-08, + "loss": 0.5624, + "step": 11205 + }, + { + "epoch": 0.96, + "learning_rate": 8.143464545226298e-08, + "loss": 0.2836, + "step": 11206 + }, + { + "epoch": 0.96, + "learning_rate": 8.108144535325713e-08, + "loss": 0.2432, + "step": 11207 + }, + { + "epoch": 0.96, + "learning_rate": 8.072900975548248e-08, + "loss": 0.2726, + "step": 11208 + }, + { + "epoch": 0.96, + "learning_rate": 8.037733868610509e-08, + "loss": 0.3044, + "step": 11209 + }, + { + "epoch": 0.96, + "learning_rate": 8.002643217222661e-08, + "loss": 0.2598, + "step": 11210 + }, + { + "epoch": 0.96, + "learning_rate": 7.967629024089429e-08, + "loss": 0.3063, + "step": 11211 + }, + { + "epoch": 0.96, + "learning_rate": 7.932691291909656e-08, + "loss": 0.2604, + "step": 11212 + }, + { + "epoch": 0.96, + "learning_rate": 7.897830023376074e-08, + "loss": 0.2823, + "step": 11213 + }, + { + "epoch": 0.96, + "learning_rate": 7.863045221175647e-08, + "loss": 0.2502, + "step": 11214 + }, + { + "epoch": 0.96, + "learning_rate": 7.82833688798934e-08, + "loss": 0.304, + "step": 11215 + }, + { + "epoch": 0.96, + "learning_rate": 7.793705026492459e-08, + "loss": 0.2799, + "step": 11216 + }, + { + "epoch": 0.96, + "learning_rate": 7.759149639354091e-08, + "loss": 0.3023, + "step": 11217 + }, + { + "epoch": 0.96, + "learning_rate": 7.72467072923766e-08, + "loss": 0.2812, + "step": 11218 + }, + { + "epoch": 0.96, + "learning_rate": 7.690268298800596e-08, + "loss": 0.2649, + "step": 11219 + }, + { + "epoch": 0.96, + "learning_rate": 7.655942350694556e-08, + "loss": 0.2226, + "step": 11220 + }, + { + "epoch": 0.96, + "learning_rate": 7.621692887565202e-08, + "loss": 0.2224, + "step": 11221 + }, + { + "epoch": 0.96, + "learning_rate": 7.587519912052199e-08, + "loss": 0.2657, + "step": 11222 + }, + { + "epoch": 0.96, + "learning_rate": 7.553423426789664e-08, + "loss": 0.2788, + "step": 11223 + }, + { + "epoch": 0.96, + "learning_rate": 7.51940343440527e-08, + "loss": 0.2589, + "step": 11224 + }, + { + "epoch": 0.96, + "learning_rate": 7.485459937521256e-08, + "loss": 0.2335, + "step": 11225 + }, + { + "epoch": 0.96, + "learning_rate": 7.451592938753971e-08, + "loss": 0.2879, + "step": 11226 + }, + { + "epoch": 0.96, + "learning_rate": 7.417802440713439e-08, + "loss": 0.2693, + "step": 11227 + }, + { + "epoch": 0.96, + "learning_rate": 7.384088446004356e-08, + "loss": 0.2811, + "step": 11228 + }, + { + "epoch": 0.96, + "learning_rate": 7.350450957224864e-08, + "loss": 0.2823, + "step": 11229 + }, + { + "epoch": 0.96, + "learning_rate": 7.31688997696789e-08, + "loss": 0.2487, + "step": 11230 + }, + { + "epoch": 0.96, + "learning_rate": 7.283405507820141e-08, + "loss": 0.2932, + "step": 11231 + }, + { + "epoch": 0.96, + "learning_rate": 7.249997552362109e-08, + "loss": 0.2537, + "step": 11232 + }, + { + "epoch": 0.96, + "learning_rate": 7.21666611316918e-08, + "loss": 0.2381, + "step": 11233 + }, + { + "epoch": 0.96, + "learning_rate": 7.183411192810075e-08, + "loss": 0.24, + "step": 11234 + }, + { + "epoch": 0.96, + "learning_rate": 7.150232793847967e-08, + "loss": 0.2476, + "step": 11235 + }, + { + "epoch": 0.96, + "learning_rate": 7.117130918840032e-08, + "loss": 0.303, + "step": 11236 + }, + { + "epoch": 0.96, + "learning_rate": 7.084105570337785e-08, + "loss": 0.2562, + "step": 11237 + }, + { + "epoch": 0.96, + "learning_rate": 7.051156750886523e-08, + "loss": 0.5919, + "step": 11238 + }, + { + "epoch": 0.96, + "learning_rate": 7.01828446302566e-08, + "loss": 0.366, + "step": 11239 + }, + { + "epoch": 0.96, + "learning_rate": 6.985488709289057e-08, + "loss": 0.2456, + "step": 11240 + }, + { + "epoch": 0.96, + "learning_rate": 6.952769492204359e-08, + "loss": 0.2977, + "step": 11241 + }, + { + "epoch": 0.96, + "learning_rate": 6.920126814293438e-08, + "loss": 0.274, + "step": 11242 + }, + { + "epoch": 0.96, + "learning_rate": 6.887560678072169e-08, + "loss": 0.265, + "step": 11243 + }, + { + "epoch": 0.96, + "learning_rate": 6.855071086050547e-08, + "loss": 0.2159, + "step": 11244 + }, + { + "epoch": 0.96, + "learning_rate": 6.822658040732899e-08, + "loss": 0.2888, + "step": 11245 + }, + { + "epoch": 0.96, + "learning_rate": 6.790321544617117e-08, + "loss": 0.2625, + "step": 11246 + }, + { + "epoch": 0.96, + "learning_rate": 6.758061600195986e-08, + "loss": 0.2703, + "step": 11247 + }, + { + "epoch": 0.96, + "learning_rate": 6.725878209955628e-08, + "loss": 0.3077, + "step": 11248 + }, + { + "epoch": 0.96, + "learning_rate": 6.693771376376612e-08, + "loss": 0.3311, + "step": 11249 + }, + { + "epoch": 0.96, + "learning_rate": 6.661741101933628e-08, + "loss": 0.2686, + "step": 11250 + }, + { + "epoch": 0.96, + "learning_rate": 6.629787389095476e-08, + "loss": 0.2845, + "step": 11251 + }, + { + "epoch": 0.96, + "learning_rate": 6.597910240324967e-08, + "loss": 0.3286, + "step": 11252 + }, + { + "epoch": 0.96, + "learning_rate": 6.566109658078912e-08, + "loss": 0.2987, + "step": 11253 + }, + { + "epoch": 0.96, + "learning_rate": 6.534385644808461e-08, + "loss": 0.2812, + "step": 11254 + }, + { + "epoch": 0.96, + "learning_rate": 6.502738202958658e-08, + "loss": 0.3301, + "step": 11255 + }, + { + "epoch": 0.96, + "learning_rate": 6.471167334968887e-08, + "loss": 0.2943, + "step": 11256 + }, + { + "epoch": 0.96, + "learning_rate": 6.439673043272199e-08, + "loss": 0.2758, + "step": 11257 + }, + { + "epoch": 0.97, + "learning_rate": 6.40825533029632e-08, + "loss": 0.2596, + "step": 11258 + }, + { + "epoch": 0.97, + "learning_rate": 6.376914198462648e-08, + "loss": 0.2995, + "step": 11259 + }, + { + "epoch": 0.97, + "learning_rate": 6.345649650186691e-08, + "loss": 0.2594, + "step": 11260 + }, + { + "epoch": 0.97, + "learning_rate": 6.314461687878415e-08, + "loss": 0.2642, + "step": 11261 + }, + { + "epoch": 0.97, + "learning_rate": 6.28335031394134e-08, + "loss": 0.2514, + "step": 11262 + }, + { + "epoch": 0.97, + "learning_rate": 6.252315530773545e-08, + "loss": 0.2921, + "step": 11263 + }, + { + "epoch": 0.97, + "learning_rate": 6.22135734076712e-08, + "loss": 0.2561, + "step": 11264 + }, + { + "epoch": 0.97, + "learning_rate": 6.190475746307933e-08, + "loss": 0.2963, + "step": 11265 + }, + { + "epoch": 0.97, + "learning_rate": 6.159670749776414e-08, + "loss": 0.2861, + "step": 11266 + }, + { + "epoch": 0.97, + "learning_rate": 6.128942353546775e-08, + "loss": 0.263, + "step": 11267 + }, + { + "epoch": 0.97, + "learning_rate": 6.098290559987342e-08, + "loss": 0.282, + "step": 11268 + }, + { + "epoch": 0.97, + "learning_rate": 6.06771537146067e-08, + "loss": 0.2546, + "step": 11269 + }, + { + "epoch": 0.97, + "learning_rate": 6.037216790323319e-08, + "loss": 0.3136, + "step": 11270 + }, + { + "epoch": 0.97, + "learning_rate": 6.006794818926077e-08, + "loss": 0.2807, + "step": 11271 + }, + { + "epoch": 0.97, + "learning_rate": 5.976449459613509e-08, + "loss": 0.2397, + "step": 11272 + }, + { + "epoch": 0.97, + "learning_rate": 5.946180714724636e-08, + "loss": 0.2944, + "step": 11273 + }, + { + "epoch": 0.97, + "learning_rate": 5.915988586592481e-08, + "loss": 0.2356, + "step": 11274 + }, + { + "epoch": 0.97, + "learning_rate": 5.8858730775438465e-08, + "loss": 0.2962, + "step": 11275 + }, + { + "epoch": 0.97, + "learning_rate": 5.855834189900211e-08, + "loss": 0.3015, + "step": 11276 + }, + { + "epoch": 0.97, + "learning_rate": 5.8258719259765006e-08, + "loss": 0.2362, + "step": 11277 + }, + { + "epoch": 0.97, + "learning_rate": 5.795986288082422e-08, + "loss": 0.2847, + "step": 11278 + }, + { + "epoch": 0.97, + "learning_rate": 5.7661772785211345e-08, + "loss": 0.2512, + "step": 11279 + }, + { + "epoch": 0.97, + "learning_rate": 5.7364448995901324e-08, + "loss": 0.2693, + "step": 11280 + }, + { + "epoch": 0.97, + "learning_rate": 5.706789153581249e-08, + "loss": 0.2752, + "step": 11281 + }, + { + "epoch": 0.97, + "learning_rate": 5.677210042780212e-08, + "loss": 0.2554, + "step": 11282 + }, + { + "epoch": 0.97, + "learning_rate": 5.647707569466643e-08, + "loss": 0.2544, + "step": 11283 + }, + { + "epoch": 0.97, + "learning_rate": 5.618281735914499e-08, + "loss": 0.3107, + "step": 11284 + }, + { + "epoch": 0.97, + "learning_rate": 5.5889325443918565e-08, + "loss": 0.2534, + "step": 11285 + }, + { + "epoch": 0.97, + "learning_rate": 5.5596599971606823e-08, + "loss": 0.2894, + "step": 11286 + }, + { + "epoch": 0.97, + "learning_rate": 5.530464096477395e-08, + "loss": 0.2811, + "step": 11287 + }, + { + "epoch": 0.97, + "learning_rate": 5.5013448445919716e-08, + "loss": 0.2862, + "step": 11288 + }, + { + "epoch": 0.97, + "learning_rate": 5.4723022437489506e-08, + "loss": 0.2396, + "step": 11289 + }, + { + "epoch": 0.97, + "learning_rate": 5.443336296186874e-08, + "loss": 0.2404, + "step": 11290 + }, + { + "epoch": 0.97, + "learning_rate": 5.414447004138068e-08, + "loss": 0.2172, + "step": 11291 + }, + { + "epoch": 0.97, + "learning_rate": 5.3856343698294176e-08, + "loss": 0.3086, + "step": 11292 + }, + { + "epoch": 0.97, + "learning_rate": 5.3568983954813694e-08, + "loss": 0.2487, + "step": 11293 + }, + { + "epoch": 0.97, + "learning_rate": 5.3282390833090393e-08, + "loss": 0.2922, + "step": 11294 + }, + { + "epoch": 0.97, + "learning_rate": 5.299656435521217e-08, + "loss": 0.2282, + "step": 11295 + }, + { + "epoch": 0.97, + "learning_rate": 5.271150454320917e-08, + "loss": 0.2324, + "step": 11296 + }, + { + "epoch": 0.97, + "learning_rate": 5.2427211419051605e-08, + "loss": 0.265, + "step": 11297 + }, + { + "epoch": 0.97, + "learning_rate": 5.214368500465305e-08, + "loss": 0.278, + "step": 11298 + }, + { + "epoch": 0.97, + "learning_rate": 5.186092532186493e-08, + "loss": 0.2397, + "step": 11299 + }, + { + "epoch": 0.97, + "learning_rate": 5.157893239248202e-08, + "loss": 0.2766, + "step": 11300 + }, + { + "epoch": 0.97, + "learning_rate": 5.129770623823804e-08, + "loss": 0.2867, + "step": 11301 + }, + { + "epoch": 0.97, + "learning_rate": 5.1017246880809e-08, + "loss": 0.3179, + "step": 11302 + }, + { + "epoch": 0.97, + "learning_rate": 5.073755434181093e-08, + "loss": 0.2878, + "step": 11303 + }, + { + "epoch": 0.97, + "learning_rate": 5.0458628642802156e-08, + "loss": 0.2625, + "step": 11304 + }, + { + "epoch": 0.97, + "learning_rate": 5.01804698052788e-08, + "loss": 0.3158, + "step": 11305 + }, + { + "epoch": 0.97, + "learning_rate": 4.99030778506826e-08, + "loss": 0.2543, + "step": 11306 + }, + { + "epoch": 0.97, + "learning_rate": 4.962645280039202e-08, + "loss": 0.2661, + "step": 11307 + }, + { + "epoch": 0.97, + "learning_rate": 4.935059467572778e-08, + "loss": 0.2993, + "step": 11308 + }, + { + "epoch": 0.97, + "learning_rate": 4.907550349795287e-08, + "loss": 0.2518, + "step": 11309 + }, + { + "epoch": 0.97, + "learning_rate": 4.8801179288268105e-08, + "loss": 0.3093, + "step": 11310 + }, + { + "epoch": 0.97, + "learning_rate": 4.85276220678188e-08, + "loss": 0.2347, + "step": 11311 + }, + { + "epoch": 0.97, + "learning_rate": 4.82548318576892e-08, + "loss": 0.2411, + "step": 11312 + }, + { + "epoch": 0.97, + "learning_rate": 4.798280867890359e-08, + "loss": 0.319, + "step": 11313 + }, + { + "epoch": 0.97, + "learning_rate": 4.771155255242854e-08, + "loss": 0.2524, + "step": 11314 + }, + { + "epoch": 0.97, + "learning_rate": 4.744106349917066e-08, + "loss": 0.2753, + "step": 11315 + }, + { + "epoch": 0.97, + "learning_rate": 4.717134153997993e-08, + "loss": 0.573, + "step": 11316 + }, + { + "epoch": 0.97, + "learning_rate": 4.6902386695644174e-08, + "loss": 0.2864, + "step": 11317 + }, + { + "epoch": 0.97, + "learning_rate": 4.663419898689125e-08, + "loss": 0.3143, + "step": 11318 + }, + { + "epoch": 0.97, + "learning_rate": 4.6366778434393524e-08, + "loss": 0.2847, + "step": 11319 + }, + { + "epoch": 0.97, + "learning_rate": 4.610012505876338e-08, + "loss": 0.2456, + "step": 11320 + }, + { + "epoch": 0.97, + "learning_rate": 4.583423888055105e-08, + "loss": 0.2608, + "step": 11321 + }, + { + "epoch": 0.97, + "learning_rate": 4.556911992025015e-08, + "loss": 0.2645, + "step": 11322 + }, + { + "epoch": 0.97, + "learning_rate": 4.530476819829655e-08, + "loss": 0.2343, + "step": 11323 + }, + { + "epoch": 0.97, + "learning_rate": 4.504118373506283e-08, + "loss": 0.2856, + "step": 11324 + }, + { + "epoch": 0.97, + "learning_rate": 4.477836655086498e-08, + "loss": 0.2693, + "step": 11325 + }, + { + "epoch": 0.97, + "learning_rate": 4.451631666596123e-08, + "loss": 0.2758, + "step": 11326 + }, + { + "epoch": 0.97, + "learning_rate": 4.4255034100548766e-08, + "loss": 0.3412, + "step": 11327 + }, + { + "epoch": 0.97, + "learning_rate": 4.3994518874765914e-08, + "loss": 0.2557, + "step": 11328 + }, + { + "epoch": 0.97, + "learning_rate": 4.3734771008689947e-08, + "loss": 0.3047, + "step": 11329 + }, + { + "epoch": 0.97, + "learning_rate": 4.347579052234374e-08, + "loss": 0.2413, + "step": 11330 + }, + { + "epoch": 0.97, + "learning_rate": 4.3217577435686886e-08, + "loss": 0.2524, + "step": 11331 + }, + { + "epoch": 0.97, + "learning_rate": 4.296013176862013e-08, + "loss": 0.3312, + "step": 11332 + }, + { + "epoch": 0.97, + "learning_rate": 4.2703453540988704e-08, + "loss": 0.3398, + "step": 11333 + }, + { + "epoch": 0.97, + "learning_rate": 4.244754277257346e-08, + "loss": 0.2469, + "step": 11334 + }, + { + "epoch": 0.97, + "learning_rate": 4.219239948310083e-08, + "loss": 0.2914, + "step": 11335 + }, + { + "epoch": 0.97, + "learning_rate": 4.193802369223399e-08, + "loss": 0.2906, + "step": 11336 + }, + { + "epoch": 0.97, + "learning_rate": 4.168441541958168e-08, + "loss": 0.2784, + "step": 11337 + }, + { + "epoch": 0.97, + "learning_rate": 4.143157468468717e-08, + "loss": 0.2576, + "step": 11338 + }, + { + "epoch": 0.97, + "learning_rate": 4.117950150704153e-08, + "loss": 0.2973, + "step": 11339 + }, + { + "epoch": 0.97, + "learning_rate": 4.092819590607144e-08, + "loss": 0.2581, + "step": 11340 + }, + { + "epoch": 0.97, + "learning_rate": 4.067765790114697e-08, + "loss": 0.2953, + "step": 11341 + }, + { + "epoch": 0.97, + "learning_rate": 4.0427887511578224e-08, + "loss": 0.2675, + "step": 11342 + }, + { + "epoch": 0.97, + "learning_rate": 4.017888475661536e-08, + "loss": 0.2936, + "step": 11343 + }, + { + "epoch": 0.97, + "learning_rate": 3.99306496554519e-08, + "loss": 0.2623, + "step": 11344 + }, + { + "epoch": 0.97, + "learning_rate": 3.968318222722034e-08, + "loss": 0.3088, + "step": 11345 + }, + { + "epoch": 0.97, + "learning_rate": 3.943648249099319e-08, + "loss": 0.2405, + "step": 11346 + }, + { + "epoch": 0.97, + "learning_rate": 3.9190550465785236e-08, + "loss": 0.2439, + "step": 11347 + }, + { + "epoch": 0.97, + "learning_rate": 3.894538617055243e-08, + "loss": 0.2798, + "step": 11348 + }, + { + "epoch": 0.97, + "learning_rate": 3.8700989624189666e-08, + "loss": 0.2879, + "step": 11349 + }, + { + "epoch": 0.97, + "learning_rate": 3.845736084553408e-08, + "loss": 0.265, + "step": 11350 + }, + { + "epoch": 0.97, + "learning_rate": 3.8214499853364007e-08, + "loss": 0.2798, + "step": 11351 + }, + { + "epoch": 0.97, + "learning_rate": 3.79724066663989e-08, + "loss": 0.2814, + "step": 11352 + }, + { + "epoch": 0.97, + "learning_rate": 3.773108130329495e-08, + "loss": 0.2737, + "step": 11353 + }, + { + "epoch": 0.97, + "learning_rate": 3.749052378265505e-08, + "loss": 0.2594, + "step": 11354 + }, + { + "epoch": 0.97, + "learning_rate": 3.725073412301994e-08, + "loss": 0.2728, + "step": 11355 + }, + { + "epoch": 0.97, + "learning_rate": 3.7011712342870376e-08, + "loss": 0.2447, + "step": 11356 + }, + { + "epoch": 0.97, + "learning_rate": 3.677345846062941e-08, + "loss": 0.2601, + "step": 11357 + }, + { + "epoch": 0.97, + "learning_rate": 3.653597249466012e-08, + "loss": 0.2836, + "step": 11358 + }, + { + "epoch": 0.97, + "learning_rate": 3.6299254463267877e-08, + "loss": 0.2546, + "step": 11359 + }, + { + "epoch": 0.97, + "learning_rate": 3.606330438469585e-08, + "loss": 0.2912, + "step": 11360 + }, + { + "epoch": 0.97, + "learning_rate": 3.5828122277132836e-08, + "loss": 0.2618, + "step": 11361 + }, + { + "epoch": 0.97, + "learning_rate": 3.559370815870211e-08, + "loss": 0.2906, + "step": 11362 + }, + { + "epoch": 0.97, + "learning_rate": 3.536006204747366e-08, + "loss": 0.2775, + "step": 11363 + }, + { + "epoch": 0.97, + "learning_rate": 3.5127183961454204e-08, + "loss": 0.2874, + "step": 11364 + }, + { + "epoch": 0.97, + "learning_rate": 3.4895073918593814e-08, + "loss": 0.2669, + "step": 11365 + }, + { + "epoch": 0.97, + "learning_rate": 3.466373193678263e-08, + "loss": 0.2559, + "step": 11366 + }, + { + "epoch": 0.97, + "learning_rate": 3.443315803385083e-08, + "loss": 0.2654, + "step": 11367 + }, + { + "epoch": 0.97, + "learning_rate": 3.4203352227569766e-08, + "loss": 0.2661, + "step": 11368 + }, + { + "epoch": 0.97, + "learning_rate": 3.397431453565192e-08, + "loss": 0.2279, + "step": 11369 + }, + { + "epoch": 0.97, + "learning_rate": 3.3746044975749845e-08, + "loss": 0.2541, + "step": 11370 + }, + { + "epoch": 0.97, + "learning_rate": 3.351854356545725e-08, + "loss": 0.2838, + "step": 11371 + }, + { + "epoch": 0.97, + "learning_rate": 3.3291810322311214e-08, + "loss": 0.2454, + "step": 11372 + }, + { + "epoch": 0.97, + "learning_rate": 3.306584526378442e-08, + "loss": 0.2866, + "step": 11373 + }, + { + "epoch": 0.97, + "learning_rate": 3.284064840729406e-08, + "loss": 0.3113, + "step": 11374 + }, + { + "epoch": 0.98, + "learning_rate": 3.261621977019846e-08, + "loss": 0.2747, + "step": 11375 + }, + { + "epoch": 0.98, + "learning_rate": 3.239255936979269e-08, + "loss": 0.278, + "step": 11376 + }, + { + "epoch": 0.98, + "learning_rate": 3.216966722331849e-08, + "loss": 0.2897, + "step": 11377 + }, + { + "epoch": 0.98, + "learning_rate": 3.1947543347953246e-08, + "loss": 0.3158, + "step": 11378 + }, + { + "epoch": 0.98, + "learning_rate": 3.1726187760817704e-08, + "loss": 0.2634, + "step": 11379 + }, + { + "epoch": 0.98, + "learning_rate": 3.1505600478973775e-08, + "loss": 0.3062, + "step": 11380 + }, + { + "epoch": 0.98, + "learning_rate": 3.128578151942119e-08, + "loss": 0.2733, + "step": 11381 + }, + { + "epoch": 0.98, + "learning_rate": 3.106673089910417e-08, + "loss": 0.2527, + "step": 11382 + }, + { + "epoch": 0.98, + "learning_rate": 3.0848448634905884e-08, + "loss": 0.3233, + "step": 11383 + }, + { + "epoch": 0.98, + "learning_rate": 3.063093474364953e-08, + "loss": 0.319, + "step": 11384 + }, + { + "epoch": 0.98, + "learning_rate": 3.04141892421006e-08, + "loss": 0.265, + "step": 11385 + }, + { + "epoch": 0.98, + "learning_rate": 3.019821214696572e-08, + "loss": 0.281, + "step": 11386 + }, + { + "epoch": 0.98, + "learning_rate": 2.998300347488936e-08, + "loss": 0.2572, + "step": 11387 + }, + { + "epoch": 0.98, + "learning_rate": 2.9768563242460468e-08, + "loss": 0.6128, + "step": 11388 + }, + { + "epoch": 0.98, + "learning_rate": 2.9554891466205826e-08, + "loss": 0.2444, + "step": 11389 + }, + { + "epoch": 0.98, + "learning_rate": 2.9341988162595593e-08, + "loss": 0.286, + "step": 11390 + }, + { + "epoch": 0.98, + "learning_rate": 2.912985334803775e-08, + "loss": 0.2689, + "step": 11391 + }, + { + "epoch": 0.98, + "learning_rate": 2.891848703888367e-08, + "loss": 0.2728, + "step": 11392 + }, + { + "epoch": 0.98, + "learning_rate": 2.8707889251423647e-08, + "loss": 0.2765, + "step": 11393 + }, + { + "epoch": 0.98, + "learning_rate": 2.849806000189026e-08, + "loss": 0.2752, + "step": 11394 + }, + { + "epoch": 0.98, + "learning_rate": 2.8288999306456122e-08, + "loss": 0.2593, + "step": 11395 + }, + { + "epoch": 0.98, + "learning_rate": 2.8080707181232792e-08, + "loss": 0.2748, + "step": 11396 + }, + { + "epoch": 0.98, + "learning_rate": 2.7873183642277423e-08, + "loss": 0.2015, + "step": 11397 + }, + { + "epoch": 0.98, + "learning_rate": 2.766642870558278e-08, + "loss": 0.2672, + "step": 11398 + }, + { + "epoch": 0.98, + "learning_rate": 2.7460442387085005e-08, + "loss": 0.2762, + "step": 11399 + }, + { + "epoch": 0.98, + "learning_rate": 2.7255224702660288e-08, + "loss": 0.3265, + "step": 11400 + }, + { + "epoch": 0.98, + "learning_rate": 2.705077566812708e-08, + "loss": 0.3335, + "step": 11401 + }, + { + "epoch": 0.98, + "learning_rate": 2.6847095299241678e-08, + "loss": 0.3121, + "step": 11402 + }, + { + "epoch": 0.98, + "learning_rate": 2.6644183611702623e-08, + "loss": 0.2772, + "step": 11403 + }, + { + "epoch": 0.98, + "learning_rate": 2.6442040621150743e-08, + "loss": 0.2513, + "step": 11404 + }, + { + "epoch": 0.98, + "learning_rate": 2.62406663431658e-08, + "loss": 0.2474, + "step": 11405 + }, + { + "epoch": 0.98, + "learning_rate": 2.6040060793268705e-08, + "loss": 0.2712, + "step": 11406 + }, + { + "epoch": 0.98, + "learning_rate": 2.5840223986920432e-08, + "loss": 0.3283, + "step": 11407 + }, + { + "epoch": 0.98, + "learning_rate": 2.5641155939524208e-08, + "loss": 0.2649, + "step": 11408 + }, + { + "epoch": 0.98, + "learning_rate": 2.544285666642221e-08, + "loss": 0.2557, + "step": 11409 + }, + { + "epoch": 0.98, + "learning_rate": 2.5245326182899987e-08, + "loss": 0.2036, + "step": 11410 + }, + { + "epoch": 0.98, + "learning_rate": 2.5048564504180918e-08, + "loss": 0.2394, + "step": 11411 + }, + { + "epoch": 0.98, + "learning_rate": 2.4852571645430645e-08, + "loss": 0.3246, + "step": 11412 + }, + { + "epoch": 0.98, + "learning_rate": 2.4657347621755977e-08, + "loss": 0.2924, + "step": 11413 + }, + { + "epoch": 0.98, + "learning_rate": 2.4462892448202657e-08, + "loss": 0.275, + "step": 11414 + }, + { + "epoch": 0.98, + "learning_rate": 2.4269206139759804e-08, + "loss": 0.2529, + "step": 11415 + }, + { + "epoch": 0.98, + "learning_rate": 2.407628871135437e-08, + "loss": 0.2819, + "step": 11416 + }, + { + "epoch": 0.98, + "learning_rate": 2.3884140177856675e-08, + "loss": 0.2571, + "step": 11417 + }, + { + "epoch": 0.98, + "learning_rate": 2.369276055407599e-08, + "loss": 0.2749, + "step": 11418 + }, + { + "epoch": 0.98, + "learning_rate": 2.3502149854762734e-08, + "loss": 0.2868, + "step": 11419 + }, + { + "epoch": 0.98, + "learning_rate": 2.3312308094607382e-08, + "loss": 0.2536, + "step": 11420 + }, + { + "epoch": 0.98, + "learning_rate": 2.3123235288244895e-08, + "loss": 0.5137, + "step": 11421 + }, + { + "epoch": 0.98, + "learning_rate": 2.2934931450245833e-08, + "loss": 0.2585, + "step": 11422 + }, + { + "epoch": 0.98, + "learning_rate": 2.2747396595123038e-08, + "loss": 0.2614, + "step": 11423 + }, + { + "epoch": 0.98, + "learning_rate": 2.256063073733272e-08, + "loss": 0.3104, + "step": 11424 + }, + { + "epoch": 0.98, + "learning_rate": 2.2374633891268928e-08, + "loss": 0.2795, + "step": 11425 + }, + { + "epoch": 0.98, + "learning_rate": 2.218940607126685e-08, + "loss": 0.2569, + "step": 11426 + }, + { + "epoch": 0.98, + "learning_rate": 2.2004947291603964e-08, + "loss": 0.2558, + "step": 11427 + }, + { + "epoch": 0.98, + "learning_rate": 2.182125756649778e-08, + "loss": 0.2653, + "step": 11428 + }, + { + "epoch": 0.98, + "learning_rate": 2.163833691010475e-08, + "loss": 0.2581, + "step": 11429 + }, + { + "epoch": 0.98, + "learning_rate": 2.1456185336524714e-08, + "loss": 0.3249, + "step": 11430 + }, + { + "epoch": 0.98, + "learning_rate": 2.1274802859795328e-08, + "loss": 0.2426, + "step": 11431 + }, + { + "epoch": 0.98, + "learning_rate": 2.109418949389874e-08, + "loss": 0.2612, + "step": 11432 + }, + { + "epoch": 0.98, + "learning_rate": 2.091434525275382e-08, + "loss": 0.2803, + "step": 11433 + }, + { + "epoch": 0.98, + "learning_rate": 2.0735270150223917e-08, + "loss": 0.274, + "step": 11434 + }, + { + "epoch": 0.98, + "learning_rate": 2.055696420011022e-08, + "loss": 0.2836, + "step": 11435 + }, + { + "epoch": 0.98, + "learning_rate": 2.037942741615617e-08, + "loss": 0.2904, + "step": 11436 + }, + { + "epoch": 0.98, + "learning_rate": 2.0202659812045278e-08, + "loss": 0.2227, + "step": 11437 + }, + { + "epoch": 0.98, + "learning_rate": 2.002666140140108e-08, + "loss": 0.251, + "step": 11438 + }, + { + "epoch": 0.98, + "learning_rate": 1.98514321977894e-08, + "loss": 0.3287, + "step": 11439 + }, + { + "epoch": 0.98, + "learning_rate": 1.9676972214716095e-08, + "loss": 0.2678, + "step": 11440 + }, + { + "epoch": 0.98, + "learning_rate": 1.9503281465627077e-08, + "loss": 0.2937, + "step": 11441 + }, + { + "epoch": 0.98, + "learning_rate": 1.9330359963910527e-08, + "loss": 0.3317, + "step": 11442 + }, + { + "epoch": 0.98, + "learning_rate": 1.9158207722893564e-08, + "loss": 0.2131, + "step": 11443 + }, + { + "epoch": 0.98, + "learning_rate": 1.8986824755846677e-08, + "loss": 0.6558, + "step": 11444 + }, + { + "epoch": 0.98, + "learning_rate": 1.8816211075975976e-08, + "loss": 0.2593, + "step": 11445 + }, + { + "epoch": 0.98, + "learning_rate": 1.864636669643427e-08, + "loss": 0.2697, + "step": 11446 + }, + { + "epoch": 0.98, + "learning_rate": 1.847729163030998e-08, + "loss": 0.2943, + "step": 11447 + }, + { + "epoch": 0.98, + "learning_rate": 1.8308985890637122e-08, + "loss": 0.2432, + "step": 11448 + }, + { + "epoch": 0.98, + "learning_rate": 1.814144949038643e-08, + "loss": 0.3052, + "step": 11449 + }, + { + "epoch": 0.98, + "learning_rate": 1.7974682442470915e-08, + "loss": 0.2794, + "step": 11450 + }, + { + "epoch": 0.98, + "learning_rate": 1.780868475974362e-08, + "loss": 0.2299, + "step": 11451 + }, + { + "epoch": 0.98, + "learning_rate": 1.7643456454999875e-08, + "loss": 0.2826, + "step": 11452 + }, + { + "epoch": 0.98, + "learning_rate": 1.747899754097504e-08, + "loss": 0.2925, + "step": 11453 + }, + { + "epoch": 0.98, + "learning_rate": 1.7315308030342314e-08, + "loss": 0.5677, + "step": 11454 + }, + { + "epoch": 0.98, + "learning_rate": 1.7152387935721603e-08, + "loss": 0.2896, + "step": 11455 + }, + { + "epoch": 0.98, + "learning_rate": 1.699023726966731e-08, + "loss": 0.3215, + "step": 11456 + }, + { + "epoch": 0.98, + "learning_rate": 1.682885604467721e-08, + "loss": 0.2579, + "step": 11457 + }, + { + "epoch": 0.98, + "learning_rate": 1.666824427319136e-08, + "loss": 0.2515, + "step": 11458 + }, + { + "epoch": 0.98, + "learning_rate": 1.6508401967588738e-08, + "loss": 0.2817, + "step": 11459 + }, + { + "epoch": 0.98, + "learning_rate": 1.6349329140188385e-08, + "loss": 0.2773, + "step": 11460 + }, + { + "epoch": 0.98, + "learning_rate": 1.6191025803250493e-08, + "loss": 0.28, + "step": 11461 + }, + { + "epoch": 0.98, + "learning_rate": 1.6033491968976412e-08, + "loss": 0.2805, + "step": 11462 + }, + { + "epoch": 0.98, + "learning_rate": 1.587672764950976e-08, + "loss": 0.285, + "step": 11463 + }, + { + "epoch": 0.98, + "learning_rate": 1.5720732856930878e-08, + "loss": 0.2961, + "step": 11464 + }, + { + "epoch": 0.98, + "learning_rate": 1.5565507603264585e-08, + "loss": 0.2628, + "step": 11465 + }, + { + "epoch": 0.98, + "learning_rate": 1.541105190047465e-08, + "loss": 0.2618, + "step": 11466 + }, + { + "epoch": 0.98, + "learning_rate": 1.525736576046599e-08, + "loss": 0.2624, + "step": 11467 + }, + { + "epoch": 0.98, + "learning_rate": 1.5104449195082473e-08, + "loss": 0.3469, + "step": 11468 + }, + { + "epoch": 0.98, + "learning_rate": 1.4952302216112437e-08, + "loss": 0.3184, + "step": 11469 + }, + { + "epoch": 0.98, + "learning_rate": 1.480092483527984e-08, + "loss": 0.2355, + "step": 11470 + }, + { + "epoch": 0.98, + "learning_rate": 1.4650317064254238e-08, + "loss": 0.3027, + "step": 11471 + }, + { + "epoch": 0.98, + "learning_rate": 1.4500478914644122e-08, + "loss": 0.2668, + "step": 11472 + }, + { + "epoch": 0.98, + "learning_rate": 1.4351410397995813e-08, + "loss": 0.2556, + "step": 11473 + }, + { + "epoch": 0.98, + "learning_rate": 1.4203111525801228e-08, + "loss": 0.2491, + "step": 11474 + }, + { + "epoch": 0.98, + "learning_rate": 1.4055582309489002e-08, + "loss": 0.2595, + "step": 11475 + }, + { + "epoch": 0.98, + "learning_rate": 1.390882276043115e-08, + "loss": 0.2816, + "step": 11476 + }, + { + "epoch": 0.98, + "learning_rate": 1.3762832889937517e-08, + "loss": 0.3431, + "step": 11477 + }, + { + "epoch": 0.98, + "learning_rate": 1.3617612709262428e-08, + "loss": 0.2835, + "step": 11478 + }, + { + "epoch": 0.98, + "learning_rate": 1.3473162229596937e-08, + "loss": 0.2776, + "step": 11479 + }, + { + "epoch": 0.98, + "learning_rate": 1.3329481462075466e-08, + "loss": 0.2713, + "step": 11480 + }, + { + "epoch": 0.98, + "learning_rate": 1.3186570417771383e-08, + "loss": 0.2608, + "step": 11481 + }, + { + "epoch": 0.98, + "learning_rate": 1.3044429107700319e-08, + "loss": 0.2422, + "step": 11482 + }, + { + "epoch": 0.98, + "learning_rate": 1.2903057542817954e-08, + "loss": 0.2609, + "step": 11483 + }, + { + "epoch": 0.98, + "learning_rate": 1.2762455734020018e-08, + "loss": 0.2527, + "step": 11484 + }, + { + "epoch": 0.98, + "learning_rate": 1.262262369214451e-08, + "loss": 0.2543, + "step": 11485 + }, + { + "epoch": 0.98, + "learning_rate": 1.248356142796725e-08, + "loss": 0.5543, + "step": 11486 + }, + { + "epoch": 0.98, + "learning_rate": 1.2345268952207445e-08, + "loss": 0.3311, + "step": 11487 + }, + { + "epoch": 0.98, + "learning_rate": 1.2207746275523235e-08, + "loss": 0.3029, + "step": 11488 + }, + { + "epoch": 0.98, + "learning_rate": 1.2070993408516141e-08, + "loss": 0.2455, + "step": 11489 + }, + { + "epoch": 0.98, + "learning_rate": 1.1935010361724397e-08, + "loss": 0.2894, + "step": 11490 + }, + { + "epoch": 0.98, + "learning_rate": 1.1799797145628511e-08, + "loss": 0.307, + "step": 11491 + }, + { + "epoch": 0.99, + "learning_rate": 1.1665353770652366e-08, + "loss": 0.2742, + "step": 11492 + }, + { + "epoch": 0.99, + "learning_rate": 1.1531680247156562e-08, + "loss": 0.2823, + "step": 11493 + }, + { + "epoch": 0.99, + "learning_rate": 1.1398776585445082e-08, + "loss": 0.2738, + "step": 11494 + }, + { + "epoch": 0.99, + "learning_rate": 1.126664279575973e-08, + "loss": 0.2755, + "step": 11495 + }, + { + "epoch": 0.99, + "learning_rate": 1.1135278888286805e-08, + "loss": 0.2807, + "step": 11496 + }, + { + "epoch": 0.99, + "learning_rate": 1.1004684873149319e-08, + "loss": 0.2877, + "step": 11497 + }, + { + "epoch": 0.99, + "learning_rate": 1.0874860760413664e-08, + "loss": 0.2685, + "step": 11498 + }, + { + "epoch": 0.99, + "learning_rate": 1.0745806560086281e-08, + "loss": 0.2725, + "step": 11499 + }, + { + "epoch": 0.99, + "learning_rate": 1.0617522282113656e-08, + "loss": 0.3019, + "step": 11500 + }, + { + "epoch": 0.99, + "learning_rate": 1.0490007936383439e-08, + "loss": 0.2753, + "step": 11501 + }, + { + "epoch": 0.99, + "learning_rate": 1.0363263532724433e-08, + "loss": 0.2572, + "step": 11502 + }, + { + "epoch": 0.99, + "learning_rate": 1.0237289080904377e-08, + "loss": 0.2715, + "step": 11503 + }, + { + "epoch": 0.99, + "learning_rate": 1.0112084590633287e-08, + "loss": 0.2794, + "step": 11504 + }, + { + "epoch": 0.99, + "learning_rate": 9.987650071561217e-09, + "loss": 0.2914, + "step": 11505 + }, + { + "epoch": 0.99, + "learning_rate": 9.863985533278275e-09, + "loss": 0.2257, + "step": 11506 + }, + { + "epoch": 0.99, + "learning_rate": 9.741090985316836e-09, + "loss": 0.2817, + "step": 11507 + }, + { + "epoch": 0.99, + "learning_rate": 9.618966437149324e-09, + "loss": 0.2543, + "step": 11508 + }, + { + "epoch": 0.99, + "learning_rate": 9.497611898188209e-09, + "loss": 0.2716, + "step": 11509 + }, + { + "epoch": 0.99, + "learning_rate": 9.377027377786007e-09, + "loss": 0.2936, + "step": 11510 + }, + { + "epoch": 0.99, + "learning_rate": 9.25721288523751e-09, + "loss": 0.276, + "step": 11511 + }, + { + "epoch": 0.99, + "learning_rate": 9.13816842977755e-09, + "loss": 0.25, + "step": 11512 + }, + { + "epoch": 0.99, + "learning_rate": 9.01989402058101e-09, + "loss": 0.2637, + "step": 11513 + }, + { + "epoch": 0.99, + "learning_rate": 8.902389666765044e-09, + "loss": 0.2573, + "step": 11514 + }, + { + "epoch": 0.99, + "learning_rate": 8.785655377384628e-09, + "loss": 0.3189, + "step": 11515 + }, + { + "epoch": 0.99, + "learning_rate": 8.66969116143701e-09, + "loss": 0.2859, + "step": 11516 + }, + { + "epoch": 0.99, + "learning_rate": 8.554497027862818e-09, + "loss": 0.3009, + "step": 11517 + }, + { + "epoch": 0.99, + "learning_rate": 8.440072985537174e-09, + "loss": 0.2725, + "step": 11518 + }, + { + "epoch": 0.99, + "learning_rate": 8.326419043281909e-09, + "loss": 0.3018, + "step": 11519 + }, + { + "epoch": 0.99, + "learning_rate": 8.213535209855571e-09, + "loss": 0.259, + "step": 11520 + }, + { + "epoch": 0.99, + "learning_rate": 8.101421493958984e-09, + "loss": 0.2982, + "step": 11521 + }, + { + "epoch": 0.99, + "learning_rate": 7.990077904234117e-09, + "loss": 0.2548, + "step": 11522 + }, + { + "epoch": 0.99, + "learning_rate": 7.879504449261887e-09, + "loss": 0.2206, + "step": 11523 + }, + { + "epoch": 0.99, + "learning_rate": 7.769701137564368e-09, + "loss": 0.5447, + "step": 11524 + }, + { + "epoch": 0.99, + "learning_rate": 7.660667977605896e-09, + "loss": 0.3079, + "step": 11525 + }, + { + "epoch": 0.99, + "learning_rate": 7.552404977788641e-09, + "loss": 0.2728, + "step": 11526 + }, + { + "epoch": 0.99, + "learning_rate": 7.44491214645815e-09, + "loss": 0.3044, + "step": 11527 + }, + { + "epoch": 0.99, + "learning_rate": 7.338189491900016e-09, + "loss": 0.3257, + "step": 11528 + }, + { + "epoch": 0.99, + "learning_rate": 7.232237022338773e-09, + "loss": 0.2479, + "step": 11529 + }, + { + "epoch": 0.99, + "learning_rate": 7.12705474594011e-09, + "loss": 0.2343, + "step": 11530 + }, + { + "epoch": 0.99, + "learning_rate": 7.0226426708119855e-09, + "loss": 0.2923, + "step": 11531 + }, + { + "epoch": 0.99, + "learning_rate": 6.919000805002407e-09, + "loss": 0.2963, + "step": 11532 + }, + { + "epoch": 0.99, + "learning_rate": 6.816129156499429e-09, + "loss": 0.2526, + "step": 11533 + }, + { + "epoch": 0.99, + "learning_rate": 6.714027733230044e-09, + "loss": 0.2718, + "step": 11534 + }, + { + "epoch": 0.99, + "learning_rate": 6.612696543066843e-09, + "loss": 0.2905, + "step": 11535 + }, + { + "epoch": 0.99, + "learning_rate": 6.512135593816915e-09, + "loss": 0.2753, + "step": 11536 + }, + { + "epoch": 0.99, + "learning_rate": 6.412344893232947e-09, + "loss": 0.2878, + "step": 11537 + }, + { + "epoch": 0.99, + "learning_rate": 6.3133244490043434e-09, + "loss": 0.2676, + "step": 11538 + }, + { + "epoch": 0.99, + "learning_rate": 6.215074268766108e-09, + "loss": 0.2621, + "step": 11539 + }, + { + "epoch": 0.99, + "learning_rate": 6.117594360088852e-09, + "loss": 0.2385, + "step": 11540 + }, + { + "epoch": 0.99, + "learning_rate": 6.020884730485455e-09, + "loss": 0.306, + "step": 11541 + }, + { + "epoch": 0.99, + "learning_rate": 5.924945387411063e-09, + "loss": 0.3141, + "step": 11542 + }, + { + "epoch": 0.99, + "learning_rate": 5.8297763382597625e-09, + "loss": 0.2732, + "step": 11543 + }, + { + "epoch": 0.99, + "learning_rate": 5.735377590366797e-09, + "loss": 0.2704, + "step": 11544 + }, + { + "epoch": 0.99, + "learning_rate": 5.6417491510074584e-09, + "loss": 0.3116, + "step": 11545 + }, + { + "epoch": 0.99, + "learning_rate": 5.548891027398195e-09, + "loss": 0.2813, + "step": 11546 + }, + { + "epoch": 0.99, + "learning_rate": 5.456803226696616e-09, + "loss": 0.3231, + "step": 11547 + }, + { + "epoch": 0.99, + "learning_rate": 5.365485756000377e-09, + "loss": 0.2432, + "step": 11548 + }, + { + "epoch": 0.99, + "learning_rate": 5.274938622348291e-09, + "loss": 0.2744, + "step": 11549 + }, + { + "epoch": 0.99, + "learning_rate": 5.185161832718111e-09, + "loss": 0.2561, + "step": 11550 + }, + { + "epoch": 0.99, + "learning_rate": 5.096155394028746e-09, + "loss": 0.2561, + "step": 11551 + }, + { + "epoch": 0.99, + "learning_rate": 5.007919313142484e-09, + "loss": 0.2926, + "step": 11552 + }, + { + "epoch": 0.99, + "learning_rate": 4.920453596859443e-09, + "loss": 0.2908, + "step": 11553 + }, + { + "epoch": 0.99, + "learning_rate": 4.833758251919785e-09, + "loss": 0.2725, + "step": 11554 + }, + { + "epoch": 0.99, + "learning_rate": 4.747833285007053e-09, + "loss": 0.2717, + "step": 11555 + }, + { + "epoch": 0.99, + "learning_rate": 4.662678702742618e-09, + "loss": 0.2628, + "step": 11556 + }, + { + "epoch": 0.99, + "learning_rate": 4.578294511691228e-09, + "loss": 0.29, + "step": 11557 + }, + { + "epoch": 0.99, + "learning_rate": 4.494680718355459e-09, + "loss": 0.2753, + "step": 11558 + }, + { + "epoch": 0.99, + "learning_rate": 4.411837329181268e-09, + "loss": 0.2796, + "step": 11559 + }, + { + "epoch": 0.99, + "learning_rate": 4.329764350552434e-09, + "loss": 0.3345, + "step": 11560 + }, + { + "epoch": 0.99, + "learning_rate": 4.248461788795011e-09, + "loss": 0.2124, + "step": 11561 + }, + { + "epoch": 0.99, + "learning_rate": 4.167929650176206e-09, + "loss": 0.265, + "step": 11562 + }, + { + "epoch": 0.99, + "learning_rate": 4.088167940902166e-09, + "loss": 0.2834, + "step": 11563 + }, + { + "epoch": 0.99, + "learning_rate": 4.009176667121306e-09, + "loss": 0.5439, + "step": 11564 + }, + { + "epoch": 0.99, + "learning_rate": 3.93095583492209e-09, + "loss": 0.2127, + "step": 11565 + }, + { + "epoch": 0.99, + "learning_rate": 3.853505450331918e-09, + "loss": 0.2473, + "step": 11566 + }, + { + "epoch": 0.99, + "learning_rate": 3.7768255193204595e-09, + "loss": 0.2492, + "step": 11567 + }, + { + "epoch": 0.99, + "learning_rate": 3.700916047799652e-09, + "loss": 0.2236, + "step": 11568 + }, + { + "epoch": 0.99, + "learning_rate": 3.6257770416192606e-09, + "loss": 0.2706, + "step": 11569 + }, + { + "epoch": 0.99, + "learning_rate": 3.5514085065690984e-09, + "loss": 0.2792, + "step": 11570 + }, + { + "epoch": 0.99, + "learning_rate": 3.4778104483834676e-09, + "loss": 0.2814, + "step": 11571 + }, + { + "epoch": 0.99, + "learning_rate": 3.4049828727333866e-09, + "loss": 0.306, + "step": 11572 + }, + { + "epoch": 0.99, + "learning_rate": 3.3329257852332543e-09, + "loss": 0.2882, + "step": 11573 + }, + { + "epoch": 0.99, + "learning_rate": 3.2616391914364056e-09, + "loss": 0.2967, + "step": 11574 + }, + { + "epoch": 0.99, + "learning_rate": 3.1911230968362238e-09, + "loss": 0.2654, + "step": 11575 + }, + { + "epoch": 0.99, + "learning_rate": 3.1213775068683617e-09, + "loss": 0.2779, + "step": 11576 + }, + { + "epoch": 0.99, + "learning_rate": 3.0524024269096286e-09, + "loss": 0.2856, + "step": 11577 + }, + { + "epoch": 0.99, + "learning_rate": 2.9841978622746624e-09, + "loss": 0.3293, + "step": 11578 + }, + { + "epoch": 0.99, + "learning_rate": 2.9167638182214796e-09, + "loss": 0.2673, + "step": 11579 + }, + { + "epoch": 0.99, + "learning_rate": 2.850100299947034e-09, + "loss": 0.2715, + "step": 11580 + }, + { + "epoch": 0.99, + "learning_rate": 2.7842073125894377e-09, + "loss": 0.2838, + "step": 11581 + }, + { + "epoch": 0.99, + "learning_rate": 2.7190848612279606e-09, + "loss": 0.2588, + "step": 11582 + }, + { + "epoch": 0.99, + "learning_rate": 2.654732950880812e-09, + "loss": 0.3109, + "step": 11583 + }, + { + "epoch": 0.99, + "learning_rate": 2.591151586508467e-09, + "loss": 0.3022, + "step": 11584 + }, + { + "epoch": 0.99, + "learning_rate": 2.5283407730114506e-09, + "loss": 0.3181, + "step": 11585 + }, + { + "epoch": 0.99, + "learning_rate": 2.4663005152314455e-09, + "loss": 0.2777, + "step": 11586 + }, + { + "epoch": 0.99, + "learning_rate": 2.405030817949072e-09, + "loss": 0.2429, + "step": 11587 + }, + { + "epoch": 0.99, + "learning_rate": 2.3445316858883295e-09, + "loss": 0.2407, + "step": 11588 + }, + { + "epoch": 0.99, + "learning_rate": 2.2848031237099332e-09, + "loss": 0.2783, + "step": 11589 + }, + { + "epoch": 0.99, + "learning_rate": 2.225845136019089e-09, + "loss": 0.2467, + "step": 11590 + }, + { + "epoch": 0.99, + "learning_rate": 2.1676577273610498e-09, + "loss": 0.2602, + "step": 11591 + }, + { + "epoch": 0.99, + "learning_rate": 2.110240902217786e-09, + "loss": 0.2925, + "step": 11592 + }, + { + "epoch": 0.99, + "learning_rate": 2.053594665016867e-09, + "loss": 0.2975, + "step": 11593 + }, + { + "epoch": 0.99, + "learning_rate": 1.9977190201225793e-09, + "loss": 0.2503, + "step": 11594 + }, + { + "epoch": 0.99, + "learning_rate": 1.9426139718436986e-09, + "loss": 0.2598, + "step": 11595 + }, + { + "epoch": 0.99, + "learning_rate": 1.8882795244257177e-09, + "loss": 0.2744, + "step": 11596 + }, + { + "epoch": 0.99, + "learning_rate": 1.8347156820563983e-09, + "loss": 0.2524, + "step": 11597 + }, + { + "epoch": 0.99, + "learning_rate": 1.7819224488657695e-09, + "loss": 0.31, + "step": 11598 + }, + { + "epoch": 0.99, + "learning_rate": 1.7298998289216884e-09, + "loss": 0.2989, + "step": 11599 + }, + { + "epoch": 0.99, + "learning_rate": 1.678647826234281e-09, + "loss": 0.3439, + "step": 11600 + }, + { + "epoch": 0.99, + "learning_rate": 1.6281664447526103e-09, + "loss": 0.28, + "step": 11601 + }, + { + "epoch": 0.99, + "learning_rate": 1.5784556883691183e-09, + "loss": 0.5886, + "step": 11602 + }, + { + "epoch": 0.99, + "learning_rate": 1.5295155609151845e-09, + "loss": 0.2851, + "step": 11603 + }, + { + "epoch": 0.99, + "learning_rate": 1.4813460661611267e-09, + "loss": 0.2918, + "step": 11604 + }, + { + "epoch": 0.99, + "learning_rate": 1.433947207821751e-09, + "loss": 0.2743, + "step": 11605 + }, + { + "epoch": 0.99, + "learning_rate": 1.3873189895485806e-09, + "loss": 0.2662, + "step": 11606 + }, + { + "epoch": 0.99, + "learning_rate": 1.3414614149365179e-09, + "loss": 0.2792, + "step": 11607 + }, + { + "epoch": 1.0, + "learning_rate": 1.2963744875205132e-09, + "loss": 0.2714, + "step": 11608 + }, + { + "epoch": 1.0, + "learning_rate": 1.2520582107733436e-09, + "loss": 0.2455, + "step": 11609 + }, + { + "epoch": 1.0, + "learning_rate": 1.2085125881133863e-09, + "loss": 0.2542, + "step": 11610 + }, + { + "epoch": 1.0, + "learning_rate": 1.165737622895735e-09, + "loss": 0.2796, + "step": 11611 + }, + { + "epoch": 1.0, + "learning_rate": 1.1237333184177523e-09, + "loss": 0.2532, + "step": 11612 + }, + { + "epoch": 1.0, + "learning_rate": 1.082499677915738e-09, + "loss": 0.2457, + "step": 11613 + }, + { + "epoch": 1.0, + "learning_rate": 1.042036704568261e-09, + "loss": 0.2759, + "step": 11614 + }, + { + "epoch": 1.0, + "learning_rate": 1.002344401495048e-09, + "loss": 0.2508, + "step": 11615 + }, + { + "epoch": 1.0, + "learning_rate": 9.634227717547629e-10, + "loss": 0.5614, + "step": 11616 + }, + { + "epoch": 1.0, + "learning_rate": 9.252718183472287e-10, + "loss": 0.2728, + "step": 11617 + }, + { + "epoch": 1.0, + "learning_rate": 8.878915442123159e-10, + "loss": 0.2224, + "step": 11618 + }, + { + "epoch": 1.0, + "learning_rate": 8.512819522310533e-10, + "loss": 0.5873, + "step": 11619 + }, + { + "epoch": 1.0, + "learning_rate": 8.154430452267381e-10, + "loss": 0.2794, + "step": 11620 + }, + { + "epoch": 1.0, + "learning_rate": 7.803748259604949e-10, + "loss": 0.2524, + "step": 11621 + }, + { + "epoch": 1.0, + "learning_rate": 7.460772971357167e-10, + "loss": 0.2491, + "step": 11622 + }, + { + "epoch": 1.0, + "learning_rate": 7.125504613947343e-10, + "loss": 0.2253, + "step": 11623 + }, + { + "epoch": 1.0, + "learning_rate": 6.797943213232572e-10, + "loss": 0.2627, + "step": 11624 + }, + { + "epoch": 1.0, + "learning_rate": 6.478088794448223e-10, + "loss": 0.3286, + "step": 11625 + }, + { + "epoch": 1.0, + "learning_rate": 6.165941382241248e-10, + "loss": 0.2869, + "step": 11626 + }, + { + "epoch": 1.0, + "learning_rate": 5.861501000692382e-10, + "loss": 0.2546, + "step": 11627 + }, + { + "epoch": 1.0, + "learning_rate": 5.564767673249538e-10, + "loss": 0.3231, + "step": 11628 + }, + { + "epoch": 1.0, + "learning_rate": 5.27574142278331e-10, + "loss": 0.2661, + "step": 11629 + }, + { + "epoch": 1.0, + "learning_rate": 4.994422271575872e-10, + "loss": 0.2284, + "step": 11630 + }, + { + "epoch": 1.0, + "learning_rate": 4.720810241309881e-10, + "loss": 0.2263, + "step": 11631 + }, + { + "epoch": 1.0, + "learning_rate": 4.454905353068473e-10, + "loss": 0.23, + "step": 11632 + }, + { + "epoch": 1.0, + "learning_rate": 4.1967076273574657e-10, + "loss": 0.254, + "step": 11633 + }, + { + "epoch": 1.0, + "learning_rate": 3.946217084072057e-10, + "loss": 0.2627, + "step": 11634 + }, + { + "epoch": 1.0, + "learning_rate": 3.7034337425079225e-10, + "loss": 0.232, + "step": 11635 + }, + { + "epoch": 1.0, + "learning_rate": 3.4683576213945245e-10, + "loss": 0.3064, + "step": 11636 + }, + { + "epoch": 1.0, + "learning_rate": 3.240988738839601e-10, + "loss": 0.24, + "step": 11637 + }, + { + "epoch": 1.0, + "learning_rate": 3.0213271123735735e-10, + "loss": 0.2811, + "step": 11638 + }, + { + "epoch": 1.0, + "learning_rate": 2.80937275891624e-10, + "loss": 0.5745, + "step": 11639 + }, + { + "epoch": 1.0, + "learning_rate": 2.605125694821187e-10, + "loss": 0.2322, + "step": 11640 + }, + { + "epoch": 1.0, + "learning_rate": 2.4085859358202733e-10, + "loss": 0.5648, + "step": 11641 + }, + { + "epoch": 1.0, + "learning_rate": 2.2197534970569424e-10, + "loss": 0.2676, + "step": 11642 + }, + { + "epoch": 1.0, + "learning_rate": 2.0386283930973194e-10, + "loss": 0.2805, + "step": 11643 + }, + { + "epoch": 1.0, + "learning_rate": 1.8652106378969082e-10, + "loss": 0.2921, + "step": 11644 + }, + { + "epoch": 1.0, + "learning_rate": 1.6995002448116916e-10, + "loss": 0.2927, + "step": 11645 + }, + { + "epoch": 1.0, + "learning_rate": 1.5414972266314389e-10, + "loss": 0.2812, + "step": 11646 + }, + { + "epoch": 1.0, + "learning_rate": 1.3912015955241942e-10, + "loss": 0.2792, + "step": 11647 + }, + { + "epoch": 1.0, + "learning_rate": 1.248613363069584e-10, + "loss": 0.2933, + "step": 11648 + }, + { + "epoch": 1.0, + "learning_rate": 1.1137325402699184e-10, + "loss": 0.3054, + "step": 11649 + }, + { + "epoch": 1.0, + "learning_rate": 9.865591375168848e-11, + "loss": 0.2521, + "step": 11650 + }, + { + "epoch": 1.0, + "learning_rate": 8.670931646026504e-11, + "loss": 0.2402, + "step": 11651 + }, + { + "epoch": 1.0, + "learning_rate": 7.553346307420662e-11, + "loss": 0.2681, + "step": 11652 + }, + { + "epoch": 1.0, + "learning_rate": 6.512835445615651e-11, + "loss": 0.2566, + "step": 11653 + }, + { + "epoch": 1.0, + "learning_rate": 5.549399140547529e-11, + "loss": 0.2853, + "step": 11654 + }, + { + "epoch": 1.0, + "learning_rate": 4.663037466712261e-11, + "loss": 0.2484, + "step": 11655 + }, + { + "epoch": 1.0, + "learning_rate": 3.853750492277542e-11, + "loss": 0.2497, + "step": 11656 + }, + { + "epoch": 1.0, + "learning_rate": 3.1215382796379036e-11, + "loss": 0.3162, + "step": 11657 + }, + { + "epoch": 1.0, + "learning_rate": 2.466400885303699e-11, + "loss": 0.2766, + "step": 11658 + }, + { + "epoch": 1.0, + "learning_rate": 1.8883383597900763e-11, + "loss": 0.2817, + "step": 11659 + }, + { + "epoch": 1.0, + "learning_rate": 1.3873507475059556e-11, + "loss": 0.2537, + "step": 11660 + }, + { + "epoch": 1.0, + "learning_rate": 9.634380871981209e-12, + "loss": 0.262, + "step": 11661 + }, + { + "epoch": 1.0, + "learning_rate": 6.166004113961066e-12, + "loss": 0.239, + "step": 11662 + }, + { + "epoch": 1.0, + "learning_rate": 3.468377470783324e-12, + "loss": 0.2936, + "step": 11663 + }, + { + "epoch": 1.0, + "learning_rate": 1.5415011478392416e-12, + "loss": 0.2358, + "step": 11664 + }, + { + "epoch": 1.0, + "learning_rate": 3.853752938987043e-13, + "loss": 0.6515, + "step": 11665 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.3406, + "step": 11666 + }, + { + "epoch": 1.0, + "step": 11666, + "total_flos": 1.6909495139958784e+16, + "train_loss": 0.30232172586201356, + "train_runtime": 93155.4817, + "train_samples_per_second": 16.029, + "train_steps_per_second": 0.125 + } + ], + "max_steps": 11666, + "num_train_epochs": 1, + "total_flos": 1.6909495139958784e+16, + "trial_name": null, + "trial_params": null +}