diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,50143 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 8353, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2e-08, + "loss": 3.7323, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4e-08, + "loss": 3.7137, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 3.6367, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8e-08, + "loss": 3.5486, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000001e-07, + "loss": 3.6136, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-07, + "loss": 3.5602, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.4e-07, + "loss": 3.6057, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 3.6638, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.8e-07, + "loss": 3.6412, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000002e-07, + "loss": 3.6552, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.2e-07, + "loss": 3.6624, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 2.4000000000000003e-07, + "loss": 3.4984, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 2.6e-07, + "loss": 3.6175, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.8e-07, + "loss": 3.6035, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 3.0000000000000004e-07, + "loss": 3.6981, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.2e-07, + "loss": 3.5488, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 3.4000000000000003e-07, + "loss": 3.5959, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 3.6132, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 3.8e-07, + "loss": 3.7469, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 3.6056, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.6174, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 4.4e-07, + "loss": 3.5833, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 4.6000000000000004e-07, + "loss": 3.7458, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 4.800000000000001e-07, + "loss": 3.51, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 3.6455, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.2e-07, + "loss": 3.59, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 5.4e-07, + "loss": 3.5142, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 5.6e-07, + "loss": 3.6316, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 5.800000000000001e-07, + "loss": 3.5492, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-07, + "loss": 3.5157, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.200000000000001e-07, + "loss": 3.5312, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 6.4e-07, + "loss": 3.6773, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 6.6e-07, + "loss": 3.516, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 6.800000000000001e-07, + "loss": 3.5887, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-07, + "loss": 3.552, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.2e-07, + "loss": 3.6936, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 7.4e-07, + "loss": 3.6376, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 7.6e-07, + "loss": 3.4676, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 7.8e-07, + "loss": 3.5527, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 3.6337, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8.200000000000001e-07, + "loss": 3.6403, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 8.400000000000001e-07, + "loss": 3.5982, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 8.6e-07, + "loss": 3.5006, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 8.8e-07, + "loss": 3.5175, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 9.000000000000001e-07, + "loss": 3.5362, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000001e-07, + "loss": 3.5184, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 9.400000000000001e-07, + "loss": 3.5962, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 9.600000000000001e-07, + "loss": 3.5796, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 9.800000000000001e-07, + "loss": 3.6106, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 1.0000000000000002e-06, + "loss": 3.5675, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.02e-06, + "loss": 3.4991, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 1.04e-06, + "loss": 3.6283, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 1.06e-06, + "loss": 3.4481, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 1.08e-06, + "loss": 3.5471, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 1.1e-06, + "loss": 3.5264, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 1.12e-06, + "loss": 3.5195, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 1.14e-06, + "loss": 3.623, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 1.1600000000000001e-06, + "loss": 3.4824, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 1.1800000000000001e-06, + "loss": 3.6108, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 1.2000000000000002e-06, + "loss": 3.5415, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.2200000000000002e-06, + "loss": 3.5379, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 1.2400000000000002e-06, + "loss": 3.6074, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 1.26e-06, + "loss": 3.6348, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 1.28e-06, + "loss": 3.5117, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.3e-06, + "loss": 3.6086, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 1.32e-06, + "loss": 3.5531, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 1.34e-06, + "loss": 3.5341, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 1.3600000000000001e-06, + "loss": 3.4518, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 1.3800000000000001e-06, + "loss": 3.4945, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-06, + "loss": 3.5467, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.42e-06, + "loss": 3.5064, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 1.44e-06, + "loss": 3.4558, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 1.46e-06, + "loss": 3.6236, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 1.48e-06, + "loss": 3.4112, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-06, + "loss": 3.4346, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 1.52e-06, + "loss": 3.5229, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 1.54e-06, + "loss": 3.5693, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 1.56e-06, + "loss": 3.5549, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 1.5800000000000001e-06, + "loss": 3.5007, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000001e-06, + "loss": 3.4937, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.6200000000000002e-06, + "loss": 3.6853, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 1.6400000000000002e-06, + "loss": 3.4964, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 1.6600000000000002e-06, + "loss": 3.5038, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 1.6800000000000002e-06, + "loss": 3.5303, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 1.7000000000000002e-06, + "loss": 3.6223, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.72e-06, + "loss": 3.5823, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 1.74e-06, + "loss": 3.4536, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 1.76e-06, + "loss": 3.4622, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 1.7800000000000001e-06, + "loss": 3.5351, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 1.8000000000000001e-06, + "loss": 3.5045, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.8200000000000002e-06, + "loss": 3.5873, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 1.8400000000000002e-06, + "loss": 3.5361, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 1.8600000000000002e-06, + "loss": 3.6108, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 1.8800000000000002e-06, + "loss": 3.4927, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 1.9000000000000002e-06, + "loss": 3.5481, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.9200000000000003e-06, + "loss": 3.5281, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.94e-06, + "loss": 3.5092, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 1.9600000000000003e-06, + "loss": 3.5297, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 1.98e-06, + "loss": 3.5722, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 3.4638, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 2.02e-06, + "loss": 3.5265, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 2.04e-06, + "loss": 3.5187, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 2.06e-06, + "loss": 3.5453, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 2.08e-06, + "loss": 3.5852, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 2.1000000000000002e-06, + "loss": 3.5127, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.12e-06, + "loss": 3.5489, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 2.1400000000000003e-06, + "loss": 3.5276, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 2.16e-06, + "loss": 3.444, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 2.1800000000000003e-06, + "loss": 3.4195, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 2.2e-06, + "loss": 3.601, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.2200000000000003e-06, + "loss": 3.455, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 3.5275, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 2.2600000000000004e-06, + "loss": 3.4837, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 2.28e-06, + "loss": 3.5999, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 2.3000000000000004e-06, + "loss": 3.5134, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.3200000000000002e-06, + "loss": 3.5527, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 2.3400000000000005e-06, + "loss": 3.5631, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 2.3600000000000003e-06, + "loss": 3.471, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 2.38e-06, + "loss": 3.5592, + "step": 119 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 3.4264, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.42e-06, + "loss": 3.5303, + "step": 121 + }, + { + "epoch": 0.01, + "learning_rate": 2.4400000000000004e-06, + "loss": 3.4599, + "step": 122 + }, + { + "epoch": 0.01, + "learning_rate": 2.46e-06, + "loss": 3.55, + "step": 123 + }, + { + "epoch": 0.01, + "learning_rate": 2.4800000000000004e-06, + "loss": 3.3518, + "step": 124 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 3.5255, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 2.52e-06, + "loss": 3.5349, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 2.5400000000000002e-06, + "loss": 3.5229, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 2.56e-06, + "loss": 3.4806, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 2.5800000000000003e-06, + "loss": 3.4338, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 2.6e-06, + "loss": 3.5073, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 2.6200000000000003e-06, + "loss": 3.5286, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 2.64e-06, + "loss": 3.4175, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 2.6600000000000004e-06, + "loss": 3.4689, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 2.68e-06, + "loss": 3.5717, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 2.7000000000000004e-06, + "loss": 3.6018, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 2.7200000000000002e-06, + "loss": 3.5552, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 2.7400000000000004e-06, + "loss": 3.5714, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 2.7600000000000003e-06, + "loss": 3.533, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 2.7800000000000005e-06, + "loss": 3.5498, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 2.8000000000000003e-06, + "loss": 3.4394, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 2.82e-06, + "loss": 3.525, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 2.84e-06, + "loss": 3.5113, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 2.86e-06, + "loss": 3.5528, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 2.88e-06, + "loss": 3.5723, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 2.9e-06, + "loss": 3.538, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 2.92e-06, + "loss": 3.4708, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 2.9400000000000002e-06, + "loss": 3.5312, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 2.96e-06, + "loss": 3.546, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 2.9800000000000003e-06, + "loss": 3.5787, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 3.5079, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 3.0200000000000003e-06, + "loss": 3.5157, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 3.04e-06, + "loss": 3.487, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 3.0600000000000003e-06, + "loss": 3.5814, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 3.08e-06, + "loss": 3.5155, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 3.1000000000000004e-06, + "loss": 3.56, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 3.12e-06, + "loss": 3.468, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 3.1400000000000004e-06, + "loss": 3.5285, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 3.1600000000000002e-06, + "loss": 3.5289, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 3.1800000000000005e-06, + "loss": 3.5315, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 3.2000000000000003e-06, + "loss": 3.4919, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 3.2200000000000005e-06, + "loss": 3.4659, + "step": 161 + }, + { + "epoch": 0.02, + "learning_rate": 3.2400000000000003e-06, + "loss": 3.4861, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 3.2600000000000006e-06, + "loss": 3.5563, + "step": 163 + }, + { + "epoch": 0.02, + "learning_rate": 3.2800000000000004e-06, + "loss": 3.4905, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 3.3000000000000006e-06, + "loss": 3.5122, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 3.3200000000000004e-06, + "loss": 3.4332, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 3.3400000000000006e-06, + "loss": 3.4551, + "step": 167 + }, + { + "epoch": 0.02, + "learning_rate": 3.3600000000000004e-06, + "loss": 3.5413, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 3.3800000000000007e-06, + "loss": 3.4896, + "step": 169 + }, + { + "epoch": 0.02, + "learning_rate": 3.4000000000000005e-06, + "loss": 3.495, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 3.4200000000000007e-06, + "loss": 3.4511, + "step": 171 + }, + { + "epoch": 0.02, + "learning_rate": 3.44e-06, + "loss": 3.499, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 3.46e-06, + "loss": 3.5349, + "step": 173 + }, + { + "epoch": 0.02, + "learning_rate": 3.48e-06, + "loss": 3.5299, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 3.5e-06, + "loss": 3.3971, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 3.52e-06, + "loss": 3.5318, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 3.54e-06, + "loss": 3.4478, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 3.5600000000000002e-06, + "loss": 3.4938, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 3.58e-06, + "loss": 3.6172, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 3.6000000000000003e-06, + "loss": 3.3897, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 3.62e-06, + "loss": 3.5722, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 3.6400000000000003e-06, + "loss": 3.5292, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 3.66e-06, + "loss": 3.4581, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 3.3718, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 3.7e-06, + "loss": 3.5642, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 3.7200000000000004e-06, + "loss": 3.451, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 3.74e-06, + "loss": 3.4989, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 3.7600000000000004e-06, + "loss": 3.4142, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 3.7800000000000002e-06, + "loss": 3.3315, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 3.8000000000000005e-06, + "loss": 3.5248, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 3.820000000000001e-06, + "loss": 3.5681, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 3.8400000000000005e-06, + "loss": 3.4855, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 3.86e-06, + "loss": 3.5585, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 3.88e-06, + "loss": 3.5826, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 3.900000000000001e-06, + "loss": 3.5308, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 3.920000000000001e-06, + "loss": 3.5823, + "step": 196 + }, + { + "epoch": 0.02, + "learning_rate": 3.94e-06, + "loss": 3.4154, + "step": 197 + }, + { + "epoch": 0.02, + "learning_rate": 3.96e-06, + "loss": 3.383, + "step": 198 + }, + { + "epoch": 0.02, + "learning_rate": 3.980000000000001e-06, + "loss": 3.5263, + "step": 199 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 3.4644, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 4.0200000000000005e-06, + "loss": 3.4609, + "step": 201 + }, + { + "epoch": 0.02, + "learning_rate": 4.04e-06, + "loss": 3.4262, + "step": 202 + }, + { + "epoch": 0.02, + "learning_rate": 4.060000000000001e-06, + "loss": 3.397, + "step": 203 + }, + { + "epoch": 0.02, + "learning_rate": 4.08e-06, + "loss": 3.4506, + "step": 204 + }, + { + "epoch": 0.02, + "learning_rate": 4.1e-06, + "loss": 3.4367, + "step": 205 + }, + { + "epoch": 0.02, + "learning_rate": 4.12e-06, + "loss": 3.3999, + "step": 206 + }, + { + "epoch": 0.02, + "learning_rate": 4.14e-06, + "loss": 3.4773, + "step": 207 + }, + { + "epoch": 0.02, + "learning_rate": 4.16e-06, + "loss": 3.518, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 4.18e-06, + "loss": 3.5913, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 4.2000000000000004e-06, + "loss": 3.425, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 4.22e-06, + "loss": 3.4567, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 4.24e-06, + "loss": 3.4119, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 4.26e-06, + "loss": 3.5247, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 4.2800000000000005e-06, + "loss": 3.5013, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 4.3e-06, + "loss": 3.5418, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 4.32e-06, + "loss": 3.4256, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 4.34e-06, + "loss": 3.4211, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 4.360000000000001e-06, + "loss": 3.4789, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 4.38e-06, + "loss": 3.4759, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 4.4e-06, + "loss": 3.4964, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 4.42e-06, + "loss": 3.4907, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 4.440000000000001e-06, + "loss": 3.4452, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 4.4600000000000005e-06, + "loss": 3.4275, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 4.48e-06, + "loss": 3.4328, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 4.5e-06, + "loss": 3.38, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 4.520000000000001e-06, + "loss": 3.5428, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 4.540000000000001e-06, + "loss": 3.4562, + "step": 227 + }, + { + "epoch": 0.03, + "learning_rate": 4.56e-06, + "loss": 3.5182, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 4.58e-06, + "loss": 3.4828, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 4.600000000000001e-06, + "loss": 3.4033, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 4.620000000000001e-06, + "loss": 3.539, + "step": 231 + }, + { + "epoch": 0.03, + "learning_rate": 4.6400000000000005e-06, + "loss": 3.5518, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 4.66e-06, + "loss": 3.4647, + "step": 233 + }, + { + "epoch": 0.03, + "learning_rate": 4.680000000000001e-06, + "loss": 3.358, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 4.7e-06, + "loss": 3.5417, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 4.7200000000000005e-06, + "loss": 3.4533, + "step": 236 + }, + { + "epoch": 0.03, + "learning_rate": 4.74e-06, + "loss": 3.471, + "step": 237 + }, + { + "epoch": 0.03, + "learning_rate": 4.76e-06, + "loss": 3.3832, + "step": 238 + }, + { + "epoch": 0.03, + "learning_rate": 4.78e-06, + "loss": 3.4809, + "step": 239 + }, + { + "epoch": 0.03, + "learning_rate": 4.800000000000001e-06, + "loss": 3.4727, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 4.8200000000000004e-06, + "loss": 3.4913, + "step": 241 + }, + { + "epoch": 0.03, + "learning_rate": 4.84e-06, + "loss": 3.4193, + "step": 242 + }, + { + "epoch": 0.03, + "learning_rate": 4.86e-06, + "loss": 3.4723, + "step": 243 + }, + { + "epoch": 0.03, + "learning_rate": 4.880000000000001e-06, + "loss": 3.4168, + "step": 244 + }, + { + "epoch": 0.03, + "learning_rate": 4.9000000000000005e-06, + "loss": 3.4535, + "step": 245 + }, + { + "epoch": 0.03, + "learning_rate": 4.92e-06, + "loss": 3.5212, + "step": 246 + }, + { + "epoch": 0.03, + "learning_rate": 4.94e-06, + "loss": 3.4314, + "step": 247 + }, + { + "epoch": 0.03, + "learning_rate": 4.960000000000001e-06, + "loss": 3.4973, + "step": 248 + }, + { + "epoch": 0.03, + "learning_rate": 4.980000000000001e-06, + "loss": 3.4406, + "step": 249 + }, + { + "epoch": 0.03, + "learning_rate": 5e-06, + "loss": 3.4547, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 5.02e-06, + "loss": 3.5058, + "step": 251 + }, + { + "epoch": 0.03, + "learning_rate": 5.04e-06, + "loss": 3.5257, + "step": 252 + }, + { + "epoch": 0.03, + "learning_rate": 5.060000000000001e-06, + "loss": 3.5063, + "step": 253 + }, + { + "epoch": 0.03, + "learning_rate": 5.0800000000000005e-06, + "loss": 3.4964, + "step": 254 + }, + { + "epoch": 0.03, + "learning_rate": 5.1e-06, + "loss": 3.4224, + "step": 255 + }, + { + "epoch": 0.03, + "learning_rate": 5.12e-06, + "loss": 3.5307, + "step": 256 + }, + { + "epoch": 0.03, + "learning_rate": 5.140000000000001e-06, + "loss": 3.5405, + "step": 257 + }, + { + "epoch": 0.03, + "learning_rate": 5.1600000000000006e-06, + "loss": 3.5068, + "step": 258 + }, + { + "epoch": 0.03, + "learning_rate": 5.18e-06, + "loss": 3.5086, + "step": 259 + }, + { + "epoch": 0.03, + "learning_rate": 5.2e-06, + "loss": 3.4756, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 5.220000000000001e-06, + "loss": 3.5602, + "step": 261 + }, + { + "epoch": 0.03, + "learning_rate": 5.240000000000001e-06, + "loss": 3.4704, + "step": 262 + }, + { + "epoch": 0.03, + "learning_rate": 5.2600000000000005e-06, + "loss": 3.4812, + "step": 263 + }, + { + "epoch": 0.03, + "learning_rate": 5.28e-06, + "loss": 3.4746, + "step": 264 + }, + { + "epoch": 0.03, + "learning_rate": 5.300000000000001e-06, + "loss": 3.4694, + "step": 265 + }, + { + "epoch": 0.03, + "learning_rate": 5.320000000000001e-06, + "loss": 3.4963, + "step": 266 + }, + { + "epoch": 0.03, + "learning_rate": 5.3400000000000005e-06, + "loss": 3.5148, + "step": 267 + }, + { + "epoch": 0.03, + "learning_rate": 5.36e-06, + "loss": 3.4962, + "step": 268 + }, + { + "epoch": 0.03, + "learning_rate": 5.380000000000001e-06, + "loss": 3.4728, + "step": 269 + }, + { + "epoch": 0.03, + "learning_rate": 5.400000000000001e-06, + "loss": 3.4845, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 5.420000000000001e-06, + "loss": 3.4124, + "step": 271 + }, + { + "epoch": 0.03, + "learning_rate": 5.4400000000000004e-06, + "loss": 3.4928, + "step": 272 + }, + { + "epoch": 0.03, + "learning_rate": 5.460000000000001e-06, + "loss": 3.451, + "step": 273 + }, + { + "epoch": 0.03, + "learning_rate": 5.480000000000001e-06, + "loss": 3.4944, + "step": 274 + }, + { + "epoch": 0.03, + "learning_rate": 5.500000000000001e-06, + "loss": 3.4845, + "step": 275 + }, + { + "epoch": 0.03, + "learning_rate": 5.5200000000000005e-06, + "loss": 3.4862, + "step": 276 + }, + { + "epoch": 0.03, + "learning_rate": 5.540000000000001e-06, + "loss": 3.4852, + "step": 277 + }, + { + "epoch": 0.03, + "learning_rate": 5.560000000000001e-06, + "loss": 3.4437, + "step": 278 + }, + { + "epoch": 0.03, + "learning_rate": 5.580000000000001e-06, + "loss": 3.494, + "step": 279 + }, + { + "epoch": 0.03, + "learning_rate": 5.600000000000001e-06, + "loss": 3.4525, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 5.620000000000001e-06, + "loss": 3.4632, + "step": 281 + }, + { + "epoch": 0.03, + "learning_rate": 5.64e-06, + "loss": 3.519, + "step": 282 + }, + { + "epoch": 0.03, + "learning_rate": 5.66e-06, + "loss": 3.3807, + "step": 283 + }, + { + "epoch": 0.03, + "learning_rate": 5.68e-06, + "loss": 3.4154, + "step": 284 + }, + { + "epoch": 0.03, + "learning_rate": 5.7e-06, + "loss": 3.4645, + "step": 285 + }, + { + "epoch": 0.03, + "learning_rate": 5.72e-06, + "loss": 3.4135, + "step": 286 + }, + { + "epoch": 0.03, + "learning_rate": 5.74e-06, + "loss": 3.5097, + "step": 287 + }, + { + "epoch": 0.03, + "learning_rate": 5.76e-06, + "loss": 3.406, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 5.78e-06, + "loss": 3.4167, + "step": 289 + }, + { + "epoch": 0.03, + "learning_rate": 5.8e-06, + "loss": 3.4954, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 5.82e-06, + "loss": 3.5116, + "step": 291 + }, + { + "epoch": 0.03, + "learning_rate": 5.84e-06, + "loss": 3.4269, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 5.86e-06, + "loss": 3.4835, + "step": 293 + }, + { + "epoch": 0.04, + "learning_rate": 5.8800000000000005e-06, + "loss": 3.4997, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 5.9e-06, + "loss": 3.4408, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 5.92e-06, + "loss": 3.4042, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 5.94e-06, + "loss": 3.4766, + "step": 297 + }, + { + "epoch": 0.04, + "learning_rate": 5.9600000000000005e-06, + "loss": 3.5762, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 5.98e-06, + "loss": 3.4793, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 6e-06, + "loss": 3.5667, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 6.02e-06, + "loss": 3.4949, + "step": 301 + }, + { + "epoch": 0.04, + "learning_rate": 6.040000000000001e-06, + "loss": 3.3489, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 6.0600000000000004e-06, + "loss": 3.4937, + "step": 303 + }, + { + "epoch": 0.04, + "learning_rate": 6.08e-06, + "loss": 3.4728, + "step": 304 + }, + { + "epoch": 0.04, + "learning_rate": 6.1e-06, + "loss": 3.5092, + "step": 305 + }, + { + "epoch": 0.04, + "learning_rate": 6.120000000000001e-06, + "loss": 3.4086, + "step": 306 + }, + { + "epoch": 0.04, + "learning_rate": 6.1400000000000005e-06, + "loss": 3.3792, + "step": 307 + }, + { + "epoch": 0.04, + "learning_rate": 6.16e-06, + "loss": 3.4208, + "step": 308 + }, + { + "epoch": 0.04, + "learning_rate": 6.18e-06, + "loss": 3.4187, + "step": 309 + }, + { + "epoch": 0.04, + "learning_rate": 6.200000000000001e-06, + "loss": 3.4898, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 6.220000000000001e-06, + "loss": 3.3771, + "step": 311 + }, + { + "epoch": 0.04, + "learning_rate": 6.24e-06, + "loss": 3.495, + "step": 312 + }, + { + "epoch": 0.04, + "learning_rate": 6.26e-06, + "loss": 3.4031, + "step": 313 + }, + { + "epoch": 0.04, + "learning_rate": 6.280000000000001e-06, + "loss": 3.4497, + "step": 314 + }, + { + "epoch": 0.04, + "learning_rate": 6.300000000000001e-06, + "loss": 3.4178, + "step": 315 + }, + { + "epoch": 0.04, + "learning_rate": 6.3200000000000005e-06, + "loss": 3.4735, + "step": 316 + }, + { + "epoch": 0.04, + "learning_rate": 6.34e-06, + "loss": 3.4925, + "step": 317 + }, + { + "epoch": 0.04, + "learning_rate": 6.360000000000001e-06, + "loss": 3.433, + "step": 318 + }, + { + "epoch": 0.04, + "learning_rate": 6.380000000000001e-06, + "loss": 3.4027, + "step": 319 + }, + { + "epoch": 0.04, + "learning_rate": 6.4000000000000006e-06, + "loss": 3.5192, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 6.42e-06, + "loss": 3.5038, + "step": 321 + }, + { + "epoch": 0.04, + "learning_rate": 6.440000000000001e-06, + "loss": 3.4815, + "step": 322 + }, + { + "epoch": 0.04, + "learning_rate": 6.460000000000001e-06, + "loss": 3.405, + "step": 323 + }, + { + "epoch": 0.04, + "learning_rate": 6.480000000000001e-06, + "loss": 3.4241, + "step": 324 + }, + { + "epoch": 0.04, + "learning_rate": 6.5000000000000004e-06, + "loss": 3.3476, + "step": 325 + }, + { + "epoch": 0.04, + "learning_rate": 6.520000000000001e-06, + "loss": 3.4546, + "step": 326 + }, + { + "epoch": 0.04, + "learning_rate": 6.540000000000001e-06, + "loss": 3.5562, + "step": 327 + }, + { + "epoch": 0.04, + "learning_rate": 6.560000000000001e-06, + "loss": 3.4639, + "step": 328 + }, + { + "epoch": 0.04, + "learning_rate": 6.5800000000000005e-06, + "loss": 3.4512, + "step": 329 + }, + { + "epoch": 0.04, + "learning_rate": 6.600000000000001e-06, + "loss": 3.5148, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 6.620000000000001e-06, + "loss": 3.4237, + "step": 331 + }, + { + "epoch": 0.04, + "learning_rate": 6.640000000000001e-06, + "loss": 3.4923, + "step": 332 + }, + { + "epoch": 0.04, + "learning_rate": 6.660000000000001e-06, + "loss": 3.4351, + "step": 333 + }, + { + "epoch": 0.04, + "learning_rate": 6.680000000000001e-06, + "loss": 3.5047, + "step": 334 + }, + { + "epoch": 0.04, + "learning_rate": 6.700000000000001e-06, + "loss": 3.4776, + "step": 335 + }, + { + "epoch": 0.04, + "learning_rate": 6.720000000000001e-06, + "loss": 3.4456, + "step": 336 + }, + { + "epoch": 0.04, + "learning_rate": 6.740000000000001e-06, + "loss": 3.4186, + "step": 337 + }, + { + "epoch": 0.04, + "learning_rate": 6.760000000000001e-06, + "loss": 3.5689, + "step": 338 + }, + { + "epoch": 0.04, + "learning_rate": 6.780000000000001e-06, + "loss": 3.5343, + "step": 339 + }, + { + "epoch": 0.04, + "learning_rate": 6.800000000000001e-06, + "loss": 3.4686, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 6.820000000000001e-06, + "loss": 3.362, + "step": 341 + }, + { + "epoch": 0.04, + "learning_rate": 6.8400000000000014e-06, + "loss": 3.4372, + "step": 342 + }, + { + "epoch": 0.04, + "learning_rate": 6.860000000000001e-06, + "loss": 3.506, + "step": 343 + }, + { + "epoch": 0.04, + "learning_rate": 6.88e-06, + "loss": 3.4716, + "step": 344 + }, + { + "epoch": 0.04, + "learning_rate": 6.9e-06, + "loss": 3.5198, + "step": 345 + }, + { + "epoch": 0.04, + "learning_rate": 6.92e-06, + "loss": 3.3977, + "step": 346 + }, + { + "epoch": 0.04, + "learning_rate": 6.9400000000000005e-06, + "loss": 3.4112, + "step": 347 + }, + { + "epoch": 0.04, + "learning_rate": 6.96e-06, + "loss": 3.4709, + "step": 348 + }, + { + "epoch": 0.04, + "learning_rate": 6.98e-06, + "loss": 3.3945, + "step": 349 + }, + { + "epoch": 0.04, + "learning_rate": 7e-06, + "loss": 3.4589, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 7.0200000000000006e-06, + "loss": 3.3037, + "step": 351 + }, + { + "epoch": 0.04, + "learning_rate": 7.04e-06, + "loss": 3.4702, + "step": 352 + }, + { + "epoch": 0.04, + "learning_rate": 7.06e-06, + "loss": 3.4397, + "step": 353 + }, + { + "epoch": 0.04, + "learning_rate": 7.08e-06, + "loss": 3.4755, + "step": 354 + }, + { + "epoch": 0.04, + "learning_rate": 7.100000000000001e-06, + "loss": 3.4415, + "step": 355 + }, + { + "epoch": 0.04, + "learning_rate": 7.1200000000000004e-06, + "loss": 3.4173, + "step": 356 + }, + { + "epoch": 0.04, + "learning_rate": 7.14e-06, + "loss": 3.5845, + "step": 357 + }, + { + "epoch": 0.04, + "learning_rate": 7.16e-06, + "loss": 3.4043, + "step": 358 + }, + { + "epoch": 0.04, + "learning_rate": 7.180000000000001e-06, + "loss": 3.5315, + "step": 359 + }, + { + "epoch": 0.04, + "learning_rate": 7.2000000000000005e-06, + "loss": 3.3935, + "step": 360 + }, + { + "epoch": 0.04, + "learning_rate": 7.22e-06, + "loss": 3.4366, + "step": 361 + }, + { + "epoch": 0.04, + "learning_rate": 7.24e-06, + "loss": 3.4546, + "step": 362 + }, + { + "epoch": 0.04, + "learning_rate": 7.260000000000001e-06, + "loss": 3.4882, + "step": 363 + }, + { + "epoch": 0.04, + "learning_rate": 7.280000000000001e-06, + "loss": 3.4794, + "step": 364 + }, + { + "epoch": 0.04, + "learning_rate": 7.3e-06, + "loss": 3.521, + "step": 365 + }, + { + "epoch": 0.04, + "learning_rate": 7.32e-06, + "loss": 3.5121, + "step": 366 + }, + { + "epoch": 0.04, + "learning_rate": 7.340000000000001e-06, + "loss": 3.4349, + "step": 367 + }, + { + "epoch": 0.04, + "learning_rate": 7.360000000000001e-06, + "loss": 3.5197, + "step": 368 + }, + { + "epoch": 0.04, + "learning_rate": 7.3800000000000005e-06, + "loss": 3.4139, + "step": 369 + }, + { + "epoch": 0.04, + "learning_rate": 7.4e-06, + "loss": 3.3723, + "step": 370 + }, + { + "epoch": 0.04, + "learning_rate": 7.420000000000001e-06, + "loss": 3.3887, + "step": 371 + }, + { + "epoch": 0.04, + "learning_rate": 7.440000000000001e-06, + "loss": 3.5284, + "step": 372 + }, + { + "epoch": 0.04, + "learning_rate": 7.4600000000000006e-06, + "loss": 3.4186, + "step": 373 + }, + { + "epoch": 0.04, + "learning_rate": 7.48e-06, + "loss": 3.4267, + "step": 374 + }, + { + "epoch": 0.04, + "learning_rate": 7.500000000000001e-06, + "loss": 3.4837, + "step": 375 + }, + { + "epoch": 0.05, + "learning_rate": 7.520000000000001e-06, + "loss": 3.4861, + "step": 376 + }, + { + "epoch": 0.05, + "learning_rate": 7.540000000000001e-06, + "loss": 3.4127, + "step": 377 + }, + { + "epoch": 0.05, + "learning_rate": 7.5600000000000005e-06, + "loss": 3.4252, + "step": 378 + }, + { + "epoch": 0.05, + "learning_rate": 7.58e-06, + "loss": 3.4129, + "step": 379 + }, + { + "epoch": 0.05, + "learning_rate": 7.600000000000001e-06, + "loss": 3.3826, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 7.620000000000001e-06, + "loss": 3.4218, + "step": 381 + }, + { + "epoch": 0.05, + "learning_rate": 7.640000000000001e-06, + "loss": 3.4645, + "step": 382 + }, + { + "epoch": 0.05, + "learning_rate": 7.660000000000001e-06, + "loss": 3.3702, + "step": 383 + }, + { + "epoch": 0.05, + "learning_rate": 7.680000000000001e-06, + "loss": 3.4837, + "step": 384 + }, + { + "epoch": 0.05, + "learning_rate": 7.7e-06, + "loss": 3.4199, + "step": 385 + }, + { + "epoch": 0.05, + "learning_rate": 7.72e-06, + "loss": 3.5145, + "step": 386 + }, + { + "epoch": 0.05, + "learning_rate": 7.74e-06, + "loss": 3.4834, + "step": 387 + }, + { + "epoch": 0.05, + "learning_rate": 7.76e-06, + "loss": 3.4762, + "step": 388 + }, + { + "epoch": 0.05, + "learning_rate": 7.78e-06, + "loss": 3.4538, + "step": 389 + }, + { + "epoch": 0.05, + "learning_rate": 7.800000000000002e-06, + "loss": 3.497, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 7.820000000000001e-06, + "loss": 3.4715, + "step": 391 + }, + { + "epoch": 0.05, + "learning_rate": 7.840000000000001e-06, + "loss": 3.4676, + "step": 392 + }, + { + "epoch": 0.05, + "learning_rate": 7.860000000000001e-06, + "loss": 3.5029, + "step": 393 + }, + { + "epoch": 0.05, + "learning_rate": 7.88e-06, + "loss": 3.43, + "step": 394 + }, + { + "epoch": 0.05, + "learning_rate": 7.9e-06, + "loss": 3.4504, + "step": 395 + }, + { + "epoch": 0.05, + "learning_rate": 7.92e-06, + "loss": 3.4617, + "step": 396 + }, + { + "epoch": 0.05, + "learning_rate": 7.94e-06, + "loss": 3.4499, + "step": 397 + }, + { + "epoch": 0.05, + "learning_rate": 7.960000000000002e-06, + "loss": 3.4947, + "step": 398 + }, + { + "epoch": 0.05, + "learning_rate": 7.980000000000002e-06, + "loss": 3.346, + "step": 399 + }, + { + "epoch": 0.05, + "learning_rate": 8.000000000000001e-06, + "loss": 3.457, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 8.020000000000001e-06, + "loss": 3.4013, + "step": 401 + }, + { + "epoch": 0.05, + "learning_rate": 8.040000000000001e-06, + "loss": 3.5218, + "step": 402 + }, + { + "epoch": 0.05, + "learning_rate": 8.06e-06, + "loss": 3.3919, + "step": 403 + }, + { + "epoch": 0.05, + "learning_rate": 8.08e-06, + "loss": 3.4786, + "step": 404 + }, + { + "epoch": 0.05, + "learning_rate": 8.1e-06, + "loss": 3.4732, + "step": 405 + }, + { + "epoch": 0.05, + "learning_rate": 8.120000000000002e-06, + "loss": 3.4729, + "step": 406 + }, + { + "epoch": 0.05, + "learning_rate": 8.14e-06, + "loss": 3.446, + "step": 407 + }, + { + "epoch": 0.05, + "learning_rate": 8.16e-06, + "loss": 3.478, + "step": 408 + }, + { + "epoch": 0.05, + "learning_rate": 8.18e-06, + "loss": 3.4943, + "step": 409 + }, + { + "epoch": 0.05, + "learning_rate": 8.2e-06, + "loss": 3.3914, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 8.220000000000001e-06, + "loss": 3.5698, + "step": 411 + }, + { + "epoch": 0.05, + "learning_rate": 8.24e-06, + "loss": 3.5909, + "step": 412 + }, + { + "epoch": 0.05, + "learning_rate": 8.26e-06, + "loss": 3.5257, + "step": 413 + }, + { + "epoch": 0.05, + "learning_rate": 8.28e-06, + "loss": 3.3866, + "step": 414 + }, + { + "epoch": 0.05, + "learning_rate": 8.3e-06, + "loss": 3.4127, + "step": 415 + }, + { + "epoch": 0.05, + "learning_rate": 8.32e-06, + "loss": 3.4817, + "step": 416 + }, + { + "epoch": 0.05, + "learning_rate": 8.34e-06, + "loss": 3.5687, + "step": 417 + }, + { + "epoch": 0.05, + "learning_rate": 8.36e-06, + "loss": 3.4527, + "step": 418 + }, + { + "epoch": 0.05, + "learning_rate": 8.380000000000001e-06, + "loss": 3.4249, + "step": 419 + }, + { + "epoch": 0.05, + "learning_rate": 8.400000000000001e-06, + "loss": 3.449, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 8.42e-06, + "loss": 3.3908, + "step": 421 + }, + { + "epoch": 0.05, + "learning_rate": 8.44e-06, + "loss": 3.4499, + "step": 422 + }, + { + "epoch": 0.05, + "learning_rate": 8.46e-06, + "loss": 3.5658, + "step": 423 + }, + { + "epoch": 0.05, + "learning_rate": 8.48e-06, + "loss": 3.4081, + "step": 424 + }, + { + "epoch": 0.05, + "learning_rate": 8.5e-06, + "loss": 3.5629, + "step": 425 + }, + { + "epoch": 0.05, + "learning_rate": 8.52e-06, + "loss": 3.4464, + "step": 426 + }, + { + "epoch": 0.05, + "learning_rate": 8.540000000000001e-06, + "loss": 3.502, + "step": 427 + }, + { + "epoch": 0.05, + "learning_rate": 8.560000000000001e-06, + "loss": 3.4991, + "step": 428 + }, + { + "epoch": 0.05, + "learning_rate": 8.580000000000001e-06, + "loss": 3.4729, + "step": 429 + }, + { + "epoch": 0.05, + "learning_rate": 8.6e-06, + "loss": 3.3907, + "step": 430 + }, + { + "epoch": 0.05, + "learning_rate": 8.62e-06, + "loss": 3.4637, + "step": 431 + }, + { + "epoch": 0.05, + "learning_rate": 8.64e-06, + "loss": 3.4289, + "step": 432 + }, + { + "epoch": 0.05, + "learning_rate": 8.66e-06, + "loss": 3.5177, + "step": 433 + }, + { + "epoch": 0.05, + "learning_rate": 8.68e-06, + "loss": 3.4505, + "step": 434 + }, + { + "epoch": 0.05, + "learning_rate": 8.700000000000001e-06, + "loss": 3.4333, + "step": 435 + }, + { + "epoch": 0.05, + "learning_rate": 8.720000000000001e-06, + "loss": 3.6156, + "step": 436 + }, + { + "epoch": 0.05, + "learning_rate": 8.740000000000001e-06, + "loss": 3.476, + "step": 437 + }, + { + "epoch": 0.05, + "learning_rate": 8.76e-06, + "loss": 3.532, + "step": 438 + }, + { + "epoch": 0.05, + "learning_rate": 8.78e-06, + "loss": 3.4683, + "step": 439 + }, + { + "epoch": 0.05, + "learning_rate": 8.8e-06, + "loss": 3.3907, + "step": 440 + }, + { + "epoch": 0.05, + "learning_rate": 8.82e-06, + "loss": 3.3688, + "step": 441 + }, + { + "epoch": 0.05, + "learning_rate": 8.84e-06, + "loss": 3.5018, + "step": 442 + }, + { + "epoch": 0.05, + "learning_rate": 8.860000000000002e-06, + "loss": 3.3819, + "step": 443 + }, + { + "epoch": 0.05, + "learning_rate": 8.880000000000001e-06, + "loss": 3.4631, + "step": 444 + }, + { + "epoch": 0.05, + "learning_rate": 8.900000000000001e-06, + "loss": 3.4287, + "step": 445 + }, + { + "epoch": 0.05, + "learning_rate": 8.920000000000001e-06, + "loss": 3.4252, + "step": 446 + }, + { + "epoch": 0.05, + "learning_rate": 8.94e-06, + "loss": 3.4939, + "step": 447 + }, + { + "epoch": 0.05, + "learning_rate": 8.96e-06, + "loss": 3.4412, + "step": 448 + }, + { + "epoch": 0.05, + "learning_rate": 8.98e-06, + "loss": 3.4418, + "step": 449 + }, + { + "epoch": 0.05, + "learning_rate": 9e-06, + "loss": 3.5411, + "step": 450 + }, + { + "epoch": 0.05, + "learning_rate": 9.020000000000002e-06, + "loss": 3.5125, + "step": 451 + }, + { + "epoch": 0.05, + "learning_rate": 9.040000000000002e-06, + "loss": 3.5112, + "step": 452 + }, + { + "epoch": 0.05, + "learning_rate": 9.060000000000001e-06, + "loss": 3.5031, + "step": 453 + }, + { + "epoch": 0.05, + "learning_rate": 9.080000000000001e-06, + "loss": 3.491, + "step": 454 + }, + { + "epoch": 0.05, + "learning_rate": 9.100000000000001e-06, + "loss": 3.3861, + "step": 455 + }, + { + "epoch": 0.05, + "learning_rate": 9.12e-06, + "loss": 3.5518, + "step": 456 + }, + { + "epoch": 0.05, + "learning_rate": 9.14e-06, + "loss": 3.4181, + "step": 457 + }, + { + "epoch": 0.05, + "learning_rate": 9.16e-06, + "loss": 3.4794, + "step": 458 + }, + { + "epoch": 0.05, + "learning_rate": 9.180000000000002e-06, + "loss": 3.4145, + "step": 459 + }, + { + "epoch": 0.06, + "learning_rate": 9.200000000000002e-06, + "loss": 3.4515, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 9.220000000000002e-06, + "loss": 3.51, + "step": 461 + }, + { + "epoch": 0.06, + "learning_rate": 9.240000000000001e-06, + "loss": 3.4164, + "step": 462 + }, + { + "epoch": 0.06, + "learning_rate": 9.260000000000001e-06, + "loss": 3.4122, + "step": 463 + }, + { + "epoch": 0.06, + "learning_rate": 9.280000000000001e-06, + "loss": 3.488, + "step": 464 + }, + { + "epoch": 0.06, + "learning_rate": 9.3e-06, + "loss": 3.4722, + "step": 465 + }, + { + "epoch": 0.06, + "learning_rate": 9.32e-06, + "loss": 3.5493, + "step": 466 + }, + { + "epoch": 0.06, + "learning_rate": 9.340000000000002e-06, + "loss": 3.3499, + "step": 467 + }, + { + "epoch": 0.06, + "learning_rate": 9.360000000000002e-06, + "loss": 3.5354, + "step": 468 + }, + { + "epoch": 0.06, + "learning_rate": 9.38e-06, + "loss": 3.4701, + "step": 469 + }, + { + "epoch": 0.06, + "learning_rate": 9.4e-06, + "loss": 3.382, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 9.42e-06, + "loss": 3.3352, + "step": 471 + }, + { + "epoch": 0.06, + "learning_rate": 9.440000000000001e-06, + "loss": 3.463, + "step": 472 + }, + { + "epoch": 0.06, + "learning_rate": 9.460000000000001e-06, + "loss": 3.4408, + "step": 473 + }, + { + "epoch": 0.06, + "learning_rate": 9.48e-06, + "loss": 3.543, + "step": 474 + }, + { + "epoch": 0.06, + "learning_rate": 9.5e-06, + "loss": 3.4513, + "step": 475 + }, + { + "epoch": 0.06, + "learning_rate": 9.52e-06, + "loss": 3.5432, + "step": 476 + }, + { + "epoch": 0.06, + "learning_rate": 9.54e-06, + "loss": 3.4744, + "step": 477 + }, + { + "epoch": 0.06, + "learning_rate": 9.56e-06, + "loss": 3.4469, + "step": 478 + }, + { + "epoch": 0.06, + "learning_rate": 9.58e-06, + "loss": 3.4394, + "step": 479 + }, + { + "epoch": 0.06, + "learning_rate": 9.600000000000001e-06, + "loss": 3.4259, + "step": 480 + }, + { + "epoch": 0.06, + "learning_rate": 9.620000000000001e-06, + "loss": 3.4721, + "step": 481 + }, + { + "epoch": 0.06, + "learning_rate": 9.640000000000001e-06, + "loss": 3.4922, + "step": 482 + }, + { + "epoch": 0.06, + "learning_rate": 9.66e-06, + "loss": 3.4895, + "step": 483 + }, + { + "epoch": 0.06, + "learning_rate": 9.68e-06, + "loss": 3.4053, + "step": 484 + }, + { + "epoch": 0.06, + "learning_rate": 9.7e-06, + "loss": 3.4883, + "step": 485 + }, + { + "epoch": 0.06, + "learning_rate": 9.72e-06, + "loss": 3.4834, + "step": 486 + }, + { + "epoch": 0.06, + "learning_rate": 9.74e-06, + "loss": 3.4442, + "step": 487 + }, + { + "epoch": 0.06, + "learning_rate": 9.760000000000001e-06, + "loss": 3.5049, + "step": 488 + }, + { + "epoch": 0.06, + "learning_rate": 9.780000000000001e-06, + "loss": 3.5349, + "step": 489 + }, + { + "epoch": 0.06, + "learning_rate": 9.800000000000001e-06, + "loss": 3.4556, + "step": 490 + }, + { + "epoch": 0.06, + "learning_rate": 9.820000000000001e-06, + "loss": 3.4774, + "step": 491 + }, + { + "epoch": 0.06, + "learning_rate": 9.84e-06, + "loss": 3.4052, + "step": 492 + }, + { + "epoch": 0.06, + "learning_rate": 9.86e-06, + "loss": 3.4035, + "step": 493 + }, + { + "epoch": 0.06, + "learning_rate": 9.88e-06, + "loss": 3.4207, + "step": 494 + }, + { + "epoch": 0.06, + "learning_rate": 9.9e-06, + "loss": 3.4196, + "step": 495 + }, + { + "epoch": 0.06, + "learning_rate": 9.920000000000002e-06, + "loss": 3.5353, + "step": 496 + }, + { + "epoch": 0.06, + "learning_rate": 9.940000000000001e-06, + "loss": 3.4284, + "step": 497 + }, + { + "epoch": 0.06, + "learning_rate": 9.960000000000001e-06, + "loss": 3.4141, + "step": 498 + }, + { + "epoch": 0.06, + "learning_rate": 9.980000000000001e-06, + "loss": 3.3429, + "step": 499 + }, + { + "epoch": 0.06, + "learning_rate": 1e-05, + "loss": 3.5059, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 9.999999599899999e-06, + "loss": 3.4364, + "step": 501 + }, + { + "epoch": 0.06, + "learning_rate": 9.999998399600057e-06, + "loss": 3.5167, + "step": 502 + }, + { + "epoch": 0.06, + "learning_rate": 9.99999639910037e-06, + "loss": 3.4654, + "step": 503 + }, + { + "epoch": 0.06, + "learning_rate": 9.999993598401252e-06, + "loss": 3.4821, + "step": 504 + }, + { + "epoch": 0.06, + "learning_rate": 9.999989997503157e-06, + "loss": 3.4842, + "step": 505 + }, + { + "epoch": 0.06, + "learning_rate": 9.999985596406659e-06, + "loss": 3.4956, + "step": 506 + }, + { + "epoch": 0.06, + "learning_rate": 9.999980395112462e-06, + "loss": 3.5023, + "step": 507 + }, + { + "epoch": 0.06, + "learning_rate": 9.9999743936214e-06, + "loss": 3.4575, + "step": 508 + }, + { + "epoch": 0.06, + "learning_rate": 9.99996759193443e-06, + "loss": 3.4686, + "step": 509 + }, + { + "epoch": 0.06, + "learning_rate": 9.999959990052645e-06, + "loss": 3.5061, + "step": 510 + }, + { + "epoch": 0.06, + "learning_rate": 9.99995158797726e-06, + "loss": 3.5496, + "step": 511 + }, + { + "epoch": 0.06, + "learning_rate": 9.999942385709616e-06, + "loss": 3.4134, + "step": 512 + }, + { + "epoch": 0.06, + "learning_rate": 9.999932383251192e-06, + "loss": 3.4391, + "step": 513 + }, + { + "epoch": 0.06, + "learning_rate": 9.999921580603584e-06, + "loss": 3.4596, + "step": 514 + }, + { + "epoch": 0.06, + "learning_rate": 9.999909977768524e-06, + "loss": 3.4648, + "step": 515 + }, + { + "epoch": 0.06, + "learning_rate": 9.999897574747866e-06, + "loss": 3.3624, + "step": 516 + }, + { + "epoch": 0.06, + "learning_rate": 9.999884371543598e-06, + "loss": 3.4494, + "step": 517 + }, + { + "epoch": 0.06, + "learning_rate": 9.999870368157831e-06, + "loss": 3.3918, + "step": 518 + }, + { + "epoch": 0.06, + "learning_rate": 9.999855564592807e-06, + "loss": 3.451, + "step": 519 + }, + { + "epoch": 0.06, + "learning_rate": 9.999839960850894e-06, + "loss": 3.5059, + "step": 520 + }, + { + "epoch": 0.06, + "learning_rate": 9.99982355693459e-06, + "loss": 3.4561, + "step": 521 + }, + { + "epoch": 0.06, + "learning_rate": 9.99980635284652e-06, + "loss": 3.4667, + "step": 522 + }, + { + "epoch": 0.06, + "learning_rate": 9.99978834858944e-06, + "loss": 3.5818, + "step": 523 + }, + { + "epoch": 0.06, + "learning_rate": 9.999769544166226e-06, + "loss": 3.3243, + "step": 524 + }, + { + "epoch": 0.06, + "learning_rate": 9.999749939579892e-06, + "loss": 3.4099, + "step": 525 + }, + { + "epoch": 0.06, + "learning_rate": 9.999729534833574e-06, + "loss": 3.4348, + "step": 526 + }, + { + "epoch": 0.06, + "learning_rate": 9.999708329930536e-06, + "loss": 3.5206, + "step": 527 + }, + { + "epoch": 0.06, + "learning_rate": 9.999686324874175e-06, + "loss": 3.4191, + "step": 528 + }, + { + "epoch": 0.06, + "learning_rate": 9.99966351966801e-06, + "loss": 3.5305, + "step": 529 + }, + { + "epoch": 0.06, + "learning_rate": 9.999639914315693e-06, + "loss": 3.4442, + "step": 530 + }, + { + "epoch": 0.06, + "learning_rate": 9.999615508820998e-06, + "loss": 3.5081, + "step": 531 + }, + { + "epoch": 0.06, + "learning_rate": 9.999590303187836e-06, + "loss": 3.4355, + "step": 532 + }, + { + "epoch": 0.06, + "learning_rate": 9.999564297420237e-06, + "loss": 3.4076, + "step": 533 + }, + { + "epoch": 0.06, + "learning_rate": 9.999537491522365e-06, + "loss": 3.5814, + "step": 534 + }, + { + "epoch": 0.06, + "learning_rate": 9.999509885498508e-06, + "loss": 3.5086, + "step": 535 + }, + { + "epoch": 0.06, + "learning_rate": 9.999481479353087e-06, + "loss": 3.4397, + "step": 536 + }, + { + "epoch": 0.06, + "learning_rate": 9.999452273090644e-06, + "loss": 3.4276, + "step": 537 + }, + { + "epoch": 0.06, + "learning_rate": 9.999422266715857e-06, + "loss": 3.4327, + "step": 538 + }, + { + "epoch": 0.06, + "learning_rate": 9.999391460233526e-06, + "loss": 3.4161, + "step": 539 + }, + { + "epoch": 0.06, + "learning_rate": 9.999359853648582e-06, + "loss": 3.5419, + "step": 540 + }, + { + "epoch": 0.06, + "learning_rate": 9.999327446966084e-06, + "loss": 3.5112, + "step": 541 + }, + { + "epoch": 0.06, + "learning_rate": 9.999294240191218e-06, + "loss": 3.4719, + "step": 542 + }, + { + "epoch": 0.07, + "learning_rate": 9.999260233329298e-06, + "loss": 3.4196, + "step": 543 + }, + { + "epoch": 0.07, + "learning_rate": 9.999225426385766e-06, + "loss": 3.5287, + "step": 544 + }, + { + "epoch": 0.07, + "learning_rate": 9.999189819366194e-06, + "loss": 3.4181, + "step": 545 + }, + { + "epoch": 0.07, + "learning_rate": 9.99915341227628e-06, + "loss": 3.557, + "step": 546 + }, + { + "epoch": 0.07, + "learning_rate": 9.999116205121848e-06, + "loss": 3.4411, + "step": 547 + }, + { + "epoch": 0.07, + "learning_rate": 9.999078197908855e-06, + "loss": 3.4173, + "step": 548 + }, + { + "epoch": 0.07, + "learning_rate": 9.999039390643386e-06, + "loss": 3.442, + "step": 549 + }, + { + "epoch": 0.07, + "learning_rate": 9.998999783331649e-06, + "loss": 3.471, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 9.998959375979982e-06, + "loss": 3.4069, + "step": 551 + }, + { + "epoch": 0.07, + "learning_rate": 9.998918168594852e-06, + "loss": 3.392, + "step": 552 + }, + { + "epoch": 0.07, + "learning_rate": 9.998876161182856e-06, + "loss": 3.4585, + "step": 553 + }, + { + "epoch": 0.07, + "learning_rate": 9.998833353750714e-06, + "loss": 3.5137, + "step": 554 + }, + { + "epoch": 0.07, + "learning_rate": 9.99878974630528e-06, + "loss": 3.4528, + "step": 555 + }, + { + "epoch": 0.07, + "learning_rate": 9.998745338853531e-06, + "loss": 3.4334, + "step": 556 + }, + { + "epoch": 0.07, + "learning_rate": 9.998700131402574e-06, + "loss": 3.4362, + "step": 557 + }, + { + "epoch": 0.07, + "learning_rate": 9.998654123959645e-06, + "loss": 3.4802, + "step": 558 + }, + { + "epoch": 0.07, + "learning_rate": 9.998607316532105e-06, + "loss": 3.3739, + "step": 559 + }, + { + "epoch": 0.07, + "learning_rate": 9.998559709127448e-06, + "loss": 3.4597, + "step": 560 + }, + { + "epoch": 0.07, + "learning_rate": 9.998511301753291e-06, + "loss": 3.6192, + "step": 561 + }, + { + "epoch": 0.07, + "learning_rate": 9.99846209441738e-06, + "loss": 3.4244, + "step": 562 + }, + { + "epoch": 0.07, + "learning_rate": 9.998412087127593e-06, + "loss": 3.3875, + "step": 563 + }, + { + "epoch": 0.07, + "learning_rate": 9.998361279891933e-06, + "loss": 3.4249, + "step": 564 + }, + { + "epoch": 0.07, + "learning_rate": 9.998309672718529e-06, + "loss": 3.4381, + "step": 565 + }, + { + "epoch": 0.07, + "learning_rate": 9.99825726561564e-06, + "loss": 3.4812, + "step": 566 + }, + { + "epoch": 0.07, + "learning_rate": 9.998204058591657e-06, + "loss": 3.489, + "step": 567 + }, + { + "epoch": 0.07, + "learning_rate": 9.998150051655091e-06, + "loss": 3.4234, + "step": 568 + }, + { + "epoch": 0.07, + "learning_rate": 9.99809524481459e-06, + "loss": 3.4915, + "step": 569 + }, + { + "epoch": 0.07, + "learning_rate": 9.99803963807892e-06, + "loss": 3.3922, + "step": 570 + }, + { + "epoch": 0.07, + "learning_rate": 9.997983231456982e-06, + "loss": 3.4557, + "step": 571 + }, + { + "epoch": 0.07, + "learning_rate": 9.997926024957805e-06, + "loss": 3.5498, + "step": 572 + }, + { + "epoch": 0.07, + "learning_rate": 9.997868018590542e-06, + "loss": 3.4711, + "step": 573 + }, + { + "epoch": 0.07, + "learning_rate": 9.99780921236448e-06, + "loss": 3.4159, + "step": 574 + }, + { + "epoch": 0.07, + "learning_rate": 9.997749606289026e-06, + "loss": 3.491, + "step": 575 + }, + { + "epoch": 0.07, + "learning_rate": 9.997689200373723e-06, + "loss": 3.4376, + "step": 576 + }, + { + "epoch": 0.07, + "learning_rate": 9.997627994628236e-06, + "loss": 3.4385, + "step": 577 + }, + { + "epoch": 0.07, + "learning_rate": 9.997565989062361e-06, + "loss": 3.4295, + "step": 578 + }, + { + "epoch": 0.07, + "learning_rate": 9.997503183686022e-06, + "loss": 3.3342, + "step": 579 + }, + { + "epoch": 0.07, + "learning_rate": 9.997439578509268e-06, + "loss": 3.5081, + "step": 580 + }, + { + "epoch": 0.07, + "learning_rate": 9.997375173542282e-06, + "loss": 3.5026, + "step": 581 + }, + { + "epoch": 0.07, + "learning_rate": 9.99730996879537e-06, + "loss": 3.4188, + "step": 582 + }, + { + "epoch": 0.07, + "learning_rate": 9.997243964278964e-06, + "loss": 3.4835, + "step": 583 + }, + { + "epoch": 0.07, + "learning_rate": 9.997177160003633e-06, + "loss": 3.4757, + "step": 584 + }, + { + "epoch": 0.07, + "learning_rate": 9.997109555980064e-06, + "loss": 3.3467, + "step": 585 + }, + { + "epoch": 0.07, + "learning_rate": 9.99704115221908e-06, + "loss": 3.3822, + "step": 586 + }, + { + "epoch": 0.07, + "learning_rate": 9.996971948731626e-06, + "loss": 3.4902, + "step": 587 + }, + { + "epoch": 0.07, + "learning_rate": 9.996901945528776e-06, + "loss": 3.4689, + "step": 588 + }, + { + "epoch": 0.07, + "learning_rate": 9.996831142621736e-06, + "loss": 3.516, + "step": 589 + }, + { + "epoch": 0.07, + "learning_rate": 9.996759540021837e-06, + "loss": 3.4637, + "step": 590 + }, + { + "epoch": 0.07, + "learning_rate": 9.996687137740537e-06, + "loss": 3.4993, + "step": 591 + }, + { + "epoch": 0.07, + "learning_rate": 9.996613935789422e-06, + "loss": 3.3946, + "step": 592 + }, + { + "epoch": 0.07, + "learning_rate": 9.996539934180211e-06, + "loss": 3.4841, + "step": 593 + }, + { + "epoch": 0.07, + "learning_rate": 9.996465132924746e-06, + "loss": 3.4859, + "step": 594 + }, + { + "epoch": 0.07, + "learning_rate": 9.996389532034995e-06, + "loss": 3.4509, + "step": 595 + }, + { + "epoch": 0.07, + "learning_rate": 9.996313131523061e-06, + "loss": 3.4146, + "step": 596 + }, + { + "epoch": 0.07, + "learning_rate": 9.996235931401171e-06, + "loss": 3.5077, + "step": 597 + }, + { + "epoch": 0.07, + "learning_rate": 9.996157931681678e-06, + "loss": 3.5915, + "step": 598 + }, + { + "epoch": 0.07, + "learning_rate": 9.996079132377065e-06, + "loss": 3.4893, + "step": 599 + }, + { + "epoch": 0.07, + "learning_rate": 9.995999533499946e-06, + "loss": 3.433, + "step": 600 + }, + { + "epoch": 0.07, + "learning_rate": 9.995919135063055e-06, + "loss": 3.5199, + "step": 601 + }, + { + "epoch": 0.07, + "learning_rate": 9.995837937079264e-06, + "loss": 3.43, + "step": 602 + }, + { + "epoch": 0.07, + "learning_rate": 9.995755939561566e-06, + "loss": 3.441, + "step": 603 + }, + { + "epoch": 0.07, + "learning_rate": 9.995673142523083e-06, + "loss": 3.5547, + "step": 604 + }, + { + "epoch": 0.07, + "learning_rate": 9.995589545977066e-06, + "loss": 3.3807, + "step": 605 + }, + { + "epoch": 0.07, + "learning_rate": 9.995505149936897e-06, + "loss": 3.4262, + "step": 606 + }, + { + "epoch": 0.07, + "learning_rate": 9.995419954416077e-06, + "loss": 3.4942, + "step": 607 + }, + { + "epoch": 0.07, + "learning_rate": 9.995333959428245e-06, + "loss": 3.5442, + "step": 608 + }, + { + "epoch": 0.07, + "learning_rate": 9.995247164987164e-06, + "loss": 3.4893, + "step": 609 + }, + { + "epoch": 0.07, + "learning_rate": 9.995159571106722e-06, + "loss": 3.5513, + "step": 610 + }, + { + "epoch": 0.07, + "learning_rate": 9.995071177800938e-06, + "loss": 3.4865, + "step": 611 + }, + { + "epoch": 0.07, + "learning_rate": 9.99498198508396e-06, + "loss": 3.6033, + "step": 612 + }, + { + "epoch": 0.07, + "learning_rate": 9.99489199297006e-06, + "loss": 3.5204, + "step": 613 + }, + { + "epoch": 0.07, + "learning_rate": 9.994801201473643e-06, + "loss": 3.4403, + "step": 614 + }, + { + "epoch": 0.07, + "learning_rate": 9.994709610609239e-06, + "loss": 3.2906, + "step": 615 + }, + { + "epoch": 0.07, + "learning_rate": 9.994617220391502e-06, + "loss": 3.3889, + "step": 616 + }, + { + "epoch": 0.07, + "learning_rate": 9.994524030835224e-06, + "loss": 3.4951, + "step": 617 + }, + { + "epoch": 0.07, + "learning_rate": 9.994430041955316e-06, + "loss": 3.5272, + "step": 618 + }, + { + "epoch": 0.07, + "learning_rate": 9.99433525376682e-06, + "loss": 3.5216, + "step": 619 + }, + { + "epoch": 0.07, + "learning_rate": 9.994239666284908e-06, + "loss": 3.4712, + "step": 620 + }, + { + "epoch": 0.07, + "learning_rate": 9.994143279524875e-06, + "loss": 3.4363, + "step": 621 + }, + { + "epoch": 0.07, + "learning_rate": 9.994046093502148e-06, + "loss": 3.4691, + "step": 622 + }, + { + "epoch": 0.07, + "learning_rate": 9.99394810823228e-06, + "loss": 3.4324, + "step": 623 + }, + { + "epoch": 0.07, + "learning_rate": 9.993849323730952e-06, + "loss": 3.4555, + "step": 624 + }, + { + "epoch": 0.07, + "learning_rate": 9.993749740013978e-06, + "loss": 3.482, + "step": 625 + }, + { + "epoch": 0.07, + "learning_rate": 9.993649357097289e-06, + "loss": 3.535, + "step": 626 + }, + { + "epoch": 0.08, + "learning_rate": 9.993548174996954e-06, + "loss": 3.4017, + "step": 627 + }, + { + "epoch": 0.08, + "learning_rate": 9.993446193729167e-06, + "loss": 3.507, + "step": 628 + }, + { + "epoch": 0.08, + "learning_rate": 9.993343413310246e-06, + "loss": 3.4624, + "step": 629 + }, + { + "epoch": 0.08, + "learning_rate": 9.99323983375664e-06, + "loss": 3.4813, + "step": 630 + }, + { + "epoch": 0.08, + "learning_rate": 9.99313545508493e-06, + "loss": 3.4949, + "step": 631 + }, + { + "epoch": 0.08, + "learning_rate": 9.993030277311817e-06, + "loss": 3.4387, + "step": 632 + }, + { + "epoch": 0.08, + "learning_rate": 9.992924300454134e-06, + "loss": 3.4701, + "step": 633 + }, + { + "epoch": 0.08, + "learning_rate": 9.992817524528844e-06, + "loss": 3.4476, + "step": 634 + }, + { + "epoch": 0.08, + "learning_rate": 9.992709949553033e-06, + "loss": 3.4164, + "step": 635 + }, + { + "epoch": 0.08, + "learning_rate": 9.992601575543916e-06, + "loss": 3.4357, + "step": 636 + }, + { + "epoch": 0.08, + "learning_rate": 9.992492402518843e-06, + "loss": 3.4761, + "step": 637 + }, + { + "epoch": 0.08, + "learning_rate": 9.99238243049528e-06, + "loss": 3.4447, + "step": 638 + }, + { + "epoch": 0.08, + "learning_rate": 9.992271659490828e-06, + "loss": 3.474, + "step": 639 + }, + { + "epoch": 0.08, + "learning_rate": 9.992160089523218e-06, + "loss": 3.3904, + "step": 640 + }, + { + "epoch": 0.08, + "learning_rate": 9.992047720610304e-06, + "loss": 3.3564, + "step": 641 + }, + { + "epoch": 0.08, + "learning_rate": 9.991934552770069e-06, + "loss": 3.4432, + "step": 642 + }, + { + "epoch": 0.08, + "learning_rate": 9.991820586020622e-06, + "loss": 3.4285, + "step": 643 + }, + { + "epoch": 0.08, + "learning_rate": 9.991705820380207e-06, + "loss": 3.4953, + "step": 644 + }, + { + "epoch": 0.08, + "learning_rate": 9.991590255867188e-06, + "loss": 3.3296, + "step": 645 + }, + { + "epoch": 0.08, + "learning_rate": 9.991473892500062e-06, + "loss": 3.4679, + "step": 646 + }, + { + "epoch": 0.08, + "learning_rate": 9.99135673029745e-06, + "loss": 3.4901, + "step": 647 + }, + { + "epoch": 0.08, + "learning_rate": 9.991238769278102e-06, + "loss": 3.491, + "step": 648 + }, + { + "epoch": 0.08, + "learning_rate": 9.9911200094609e-06, + "loss": 3.4347, + "step": 649 + }, + { + "epoch": 0.08, + "learning_rate": 9.991000450864846e-06, + "loss": 3.5869, + "step": 650 + }, + { + "epoch": 0.08, + "learning_rate": 9.990880093509076e-06, + "loss": 3.415, + "step": 651 + }, + { + "epoch": 0.08, + "learning_rate": 9.990758937412855e-06, + "loss": 3.3519, + "step": 652 + }, + { + "epoch": 0.08, + "learning_rate": 9.990636982595567e-06, + "loss": 3.4252, + "step": 653 + }, + { + "epoch": 0.08, + "learning_rate": 9.990514229076734e-06, + "loss": 3.4442, + "step": 654 + }, + { + "epoch": 0.08, + "learning_rate": 9.990390676876001e-06, + "loss": 3.4142, + "step": 655 + }, + { + "epoch": 0.08, + "learning_rate": 9.990266326013139e-06, + "loss": 3.4827, + "step": 656 + }, + { + "epoch": 0.08, + "learning_rate": 9.990141176508051e-06, + "loss": 3.461, + "step": 657 + }, + { + "epoch": 0.08, + "learning_rate": 9.990015228380766e-06, + "loss": 3.4888, + "step": 658 + }, + { + "epoch": 0.08, + "learning_rate": 9.989888481651438e-06, + "loss": 3.3034, + "step": 659 + }, + { + "epoch": 0.08, + "learning_rate": 9.989760936340355e-06, + "loss": 3.5789, + "step": 660 + }, + { + "epoch": 0.08, + "learning_rate": 9.989632592467928e-06, + "loss": 3.5585, + "step": 661 + }, + { + "epoch": 0.08, + "learning_rate": 9.989503450054698e-06, + "loss": 3.427, + "step": 662 + }, + { + "epoch": 0.08, + "learning_rate": 9.989373509121332e-06, + "loss": 3.422, + "step": 663 + }, + { + "epoch": 0.08, + "learning_rate": 9.989242769688625e-06, + "loss": 3.484, + "step": 664 + }, + { + "epoch": 0.08, + "learning_rate": 9.989111231777504e-06, + "loss": 3.4144, + "step": 665 + }, + { + "epoch": 0.08, + "learning_rate": 9.988978895409015e-06, + "loss": 3.4756, + "step": 666 + }, + { + "epoch": 0.08, + "learning_rate": 9.988845760604341e-06, + "loss": 3.42, + "step": 667 + }, + { + "epoch": 0.08, + "learning_rate": 9.988711827384788e-06, + "loss": 3.5353, + "step": 668 + }, + { + "epoch": 0.08, + "learning_rate": 9.98857709577179e-06, + "loss": 3.4882, + "step": 669 + }, + { + "epoch": 0.08, + "learning_rate": 9.98844156578691e-06, + "loss": 3.443, + "step": 670 + }, + { + "epoch": 0.08, + "learning_rate": 9.988305237451839e-06, + "loss": 3.427, + "step": 671 + }, + { + "epoch": 0.08, + "learning_rate": 9.988168110788394e-06, + "loss": 3.3769, + "step": 672 + }, + { + "epoch": 0.08, + "learning_rate": 9.98803018581852e-06, + "loss": 3.3367, + "step": 673 + }, + { + "epoch": 0.08, + "learning_rate": 9.987891462564293e-06, + "loss": 3.4368, + "step": 674 + }, + { + "epoch": 0.08, + "learning_rate": 9.987751941047911e-06, + "loss": 3.4922, + "step": 675 + }, + { + "epoch": 0.08, + "learning_rate": 9.987611621291705e-06, + "loss": 3.4996, + "step": 676 + }, + { + "epoch": 0.08, + "learning_rate": 9.987470503318131e-06, + "loss": 3.4255, + "step": 677 + }, + { + "epoch": 0.08, + "learning_rate": 9.987328587149776e-06, + "loss": 3.404, + "step": 678 + }, + { + "epoch": 0.08, + "learning_rate": 9.987185872809348e-06, + "loss": 3.4403, + "step": 679 + }, + { + "epoch": 0.08, + "learning_rate": 9.98704236031969e-06, + "loss": 3.5333, + "step": 680 + }, + { + "epoch": 0.08, + "learning_rate": 9.98689804970377e-06, + "loss": 3.5433, + "step": 681 + }, + { + "epoch": 0.08, + "learning_rate": 9.986752940984682e-06, + "loss": 3.4658, + "step": 682 + }, + { + "epoch": 0.08, + "learning_rate": 9.98660703418565e-06, + "loss": 3.5124, + "step": 683 + }, + { + "epoch": 0.08, + "learning_rate": 9.986460329330024e-06, + "loss": 3.5686, + "step": 684 + }, + { + "epoch": 0.08, + "learning_rate": 9.986312826441282e-06, + "loss": 3.3707, + "step": 685 + }, + { + "epoch": 0.08, + "learning_rate": 9.986164525543035e-06, + "loss": 3.3715, + "step": 686 + }, + { + "epoch": 0.08, + "learning_rate": 9.98601542665901e-06, + "loss": 3.3967, + "step": 687 + }, + { + "epoch": 0.08, + "learning_rate": 9.985865529813074e-06, + "loss": 3.5003, + "step": 688 + }, + { + "epoch": 0.08, + "learning_rate": 9.985714835029216e-06, + "loss": 3.4562, + "step": 689 + }, + { + "epoch": 0.08, + "learning_rate": 9.98556334233155e-06, + "loss": 3.4592, + "step": 690 + }, + { + "epoch": 0.08, + "learning_rate": 9.985411051744324e-06, + "loss": 3.4199, + "step": 691 + }, + { + "epoch": 0.08, + "learning_rate": 9.98525796329191e-06, + "loss": 3.5115, + "step": 692 + }, + { + "epoch": 0.08, + "learning_rate": 9.985104076998808e-06, + "loss": 3.4228, + "step": 693 + }, + { + "epoch": 0.08, + "learning_rate": 9.984949392889647e-06, + "loss": 3.4145, + "step": 694 + }, + { + "epoch": 0.08, + "learning_rate": 9.98479391098918e-06, + "loss": 3.4315, + "step": 695 + }, + { + "epoch": 0.08, + "learning_rate": 9.984637631322291e-06, + "loss": 3.4704, + "step": 696 + }, + { + "epoch": 0.08, + "learning_rate": 9.984480553913993e-06, + "loss": 3.4898, + "step": 697 + }, + { + "epoch": 0.08, + "learning_rate": 9.984322678789424e-06, + "loss": 3.4852, + "step": 698 + }, + { + "epoch": 0.08, + "learning_rate": 9.984164005973849e-06, + "loss": 3.4049, + "step": 699 + }, + { + "epoch": 0.08, + "learning_rate": 9.984004535492665e-06, + "loss": 3.3228, + "step": 700 + }, + { + "epoch": 0.08, + "learning_rate": 9.983844267371387e-06, + "loss": 3.4605, + "step": 701 + }, + { + "epoch": 0.08, + "learning_rate": 9.983683201635672e-06, + "loss": 3.5113, + "step": 702 + }, + { + "epoch": 0.08, + "learning_rate": 9.983521338311293e-06, + "loss": 3.4863, + "step": 703 + }, + { + "epoch": 0.08, + "learning_rate": 9.983358677424156e-06, + "loss": 3.5074, + "step": 704 + }, + { + "epoch": 0.08, + "learning_rate": 9.983195219000291e-06, + "loss": 3.4363, + "step": 705 + }, + { + "epoch": 0.08, + "learning_rate": 9.983030963065862e-06, + "loss": 3.4471, + "step": 706 + }, + { + "epoch": 0.08, + "learning_rate": 9.982865909647152e-06, + "loss": 3.4031, + "step": 707 + }, + { + "epoch": 0.08, + "learning_rate": 9.98270005877058e-06, + "loss": 3.4407, + "step": 708 + }, + { + "epoch": 0.08, + "learning_rate": 9.982533410462685e-06, + "loss": 3.3964, + "step": 709 + }, + { + "epoch": 0.08, + "learning_rate": 9.982365964750141e-06, + "loss": 3.5565, + "step": 710 + }, + { + "epoch": 0.09, + "learning_rate": 9.982197721659743e-06, + "loss": 3.4656, + "step": 711 + }, + { + "epoch": 0.09, + "learning_rate": 9.982028681218418e-06, + "loss": 3.4357, + "step": 712 + }, + { + "epoch": 0.09, + "learning_rate": 9.98185884345322e-06, + "loss": 3.442, + "step": 713 + }, + { + "epoch": 0.09, + "learning_rate": 9.981688208391328e-06, + "loss": 3.4018, + "step": 714 + }, + { + "epoch": 0.09, + "learning_rate": 9.981516776060053e-06, + "loss": 3.4139, + "step": 715 + }, + { + "epoch": 0.09, + "learning_rate": 9.981344546486829e-06, + "loss": 3.4799, + "step": 716 + }, + { + "epoch": 0.09, + "learning_rate": 9.98117151969922e-06, + "loss": 3.4209, + "step": 717 + }, + { + "epoch": 0.09, + "learning_rate": 9.980997695724916e-06, + "loss": 3.3628, + "step": 718 + }, + { + "epoch": 0.09, + "learning_rate": 9.98082307459174e-06, + "loss": 3.3765, + "step": 719 + }, + { + "epoch": 0.09, + "learning_rate": 9.980647656327634e-06, + "loss": 3.459, + "step": 720 + }, + { + "epoch": 0.09, + "learning_rate": 9.980471440960671e-06, + "loss": 3.399, + "step": 721 + }, + { + "epoch": 0.09, + "learning_rate": 9.980294428519059e-06, + "loss": 3.5074, + "step": 722 + }, + { + "epoch": 0.09, + "learning_rate": 9.98011661903112e-06, + "loss": 3.4064, + "step": 723 + }, + { + "epoch": 0.09, + "learning_rate": 9.979938012525315e-06, + "loss": 3.5303, + "step": 724 + }, + { + "epoch": 0.09, + "learning_rate": 9.979758609030227e-06, + "loss": 3.473, + "step": 725 + }, + { + "epoch": 0.09, + "learning_rate": 9.979578408574567e-06, + "loss": 3.5364, + "step": 726 + }, + { + "epoch": 0.09, + "learning_rate": 9.979397411187175e-06, + "loss": 3.3546, + "step": 727 + }, + { + "epoch": 0.09, + "learning_rate": 9.979215616897018e-06, + "loss": 3.4238, + "step": 728 + }, + { + "epoch": 0.09, + "learning_rate": 9.97903302573319e-06, + "loss": 3.4091, + "step": 729 + }, + { + "epoch": 0.09, + "learning_rate": 9.978849637724912e-06, + "loss": 3.4749, + "step": 730 + }, + { + "epoch": 0.09, + "learning_rate": 9.978665452901536e-06, + "loss": 3.5488, + "step": 731 + }, + { + "epoch": 0.09, + "learning_rate": 9.978480471292537e-06, + "loss": 3.5631, + "step": 732 + }, + { + "epoch": 0.09, + "learning_rate": 9.97829469292752e-06, + "loss": 3.3966, + "step": 733 + }, + { + "epoch": 0.09, + "learning_rate": 9.978108117836214e-06, + "loss": 3.4665, + "step": 734 + }, + { + "epoch": 0.09, + "learning_rate": 9.977920746048484e-06, + "loss": 3.3947, + "step": 735 + }, + { + "epoch": 0.09, + "learning_rate": 9.977732577594313e-06, + "loss": 3.3799, + "step": 736 + }, + { + "epoch": 0.09, + "learning_rate": 9.977543612503817e-06, + "loss": 3.5093, + "step": 737 + }, + { + "epoch": 0.09, + "learning_rate": 9.977353850807236e-06, + "loss": 3.4693, + "step": 738 + }, + { + "epoch": 0.09, + "learning_rate": 9.977163292534943e-06, + "loss": 3.4665, + "step": 739 + }, + { + "epoch": 0.09, + "learning_rate": 9.976971937717432e-06, + "loss": 3.4758, + "step": 740 + }, + { + "epoch": 0.09, + "learning_rate": 9.97677978638533e-06, + "loss": 3.4435, + "step": 741 + }, + { + "epoch": 0.09, + "learning_rate": 9.976586838569386e-06, + "loss": 3.4935, + "step": 742 + }, + { + "epoch": 0.09, + "learning_rate": 9.97639309430048e-06, + "loss": 3.544, + "step": 743 + }, + { + "epoch": 0.09, + "learning_rate": 9.976198553609622e-06, + "loss": 3.4769, + "step": 744 + }, + { + "epoch": 0.09, + "learning_rate": 9.976003216527943e-06, + "loss": 3.4413, + "step": 745 + }, + { + "epoch": 0.09, + "learning_rate": 9.975807083086703e-06, + "loss": 3.43, + "step": 746 + }, + { + "epoch": 0.09, + "learning_rate": 9.975610153317297e-06, + "loss": 3.4612, + "step": 747 + }, + { + "epoch": 0.09, + "learning_rate": 9.975412427251237e-06, + "loss": 3.4125, + "step": 748 + }, + { + "epoch": 0.09, + "learning_rate": 9.97521390492017e-06, + "loss": 3.548, + "step": 749 + }, + { + "epoch": 0.09, + "learning_rate": 9.975014586355863e-06, + "loss": 3.4561, + "step": 750 + }, + { + "epoch": 0.09, + "learning_rate": 9.97481447159022e-06, + "loss": 3.4965, + "step": 751 + }, + { + "epoch": 0.09, + "learning_rate": 9.974613560655265e-06, + "loss": 3.4463, + "step": 752 + }, + { + "epoch": 0.09, + "learning_rate": 9.974411853583152e-06, + "loss": 3.4097, + "step": 753 + }, + { + "epoch": 0.09, + "learning_rate": 9.974209350406163e-06, + "loss": 3.4715, + "step": 754 + }, + { + "epoch": 0.09, + "learning_rate": 9.974006051156705e-06, + "loss": 3.4638, + "step": 755 + }, + { + "epoch": 0.09, + "learning_rate": 9.973801955867315e-06, + "loss": 3.4089, + "step": 756 + }, + { + "epoch": 0.09, + "learning_rate": 9.973597064570657e-06, + "loss": 3.4712, + "step": 757 + }, + { + "epoch": 0.09, + "learning_rate": 9.973391377299522e-06, + "loss": 3.4774, + "step": 758 + }, + { + "epoch": 0.09, + "learning_rate": 9.973184894086824e-06, + "loss": 3.3332, + "step": 759 + }, + { + "epoch": 0.09, + "learning_rate": 9.972977614965616e-06, + "loss": 3.4522, + "step": 760 + }, + { + "epoch": 0.09, + "learning_rate": 9.972769539969064e-06, + "loss": 3.5528, + "step": 761 + }, + { + "epoch": 0.09, + "learning_rate": 9.972560669130472e-06, + "loss": 3.4287, + "step": 762 + }, + { + "epoch": 0.09, + "learning_rate": 9.972351002483267e-06, + "loss": 3.483, + "step": 763 + }, + { + "epoch": 0.09, + "learning_rate": 9.972140540061004e-06, + "loss": 3.5173, + "step": 764 + }, + { + "epoch": 0.09, + "learning_rate": 9.971929281897365e-06, + "loss": 3.4507, + "step": 765 + }, + { + "epoch": 0.09, + "learning_rate": 9.971717228026161e-06, + "loss": 3.4027, + "step": 766 + }, + { + "epoch": 0.09, + "learning_rate": 9.971504378481326e-06, + "loss": 3.4355, + "step": 767 + }, + { + "epoch": 0.09, + "learning_rate": 9.97129073329693e-06, + "loss": 3.46, + "step": 768 + }, + { + "epoch": 0.09, + "learning_rate": 9.97107629250716e-06, + "loss": 3.411, + "step": 769 + }, + { + "epoch": 0.09, + "learning_rate": 9.970861056146334e-06, + "loss": 3.4111, + "step": 770 + }, + { + "epoch": 0.09, + "learning_rate": 9.970645024248904e-06, + "loss": 3.3984, + "step": 771 + }, + { + "epoch": 0.09, + "learning_rate": 9.970428196849439e-06, + "loss": 3.4433, + "step": 772 + }, + { + "epoch": 0.09, + "learning_rate": 9.970210573982645e-06, + "loss": 3.3969, + "step": 773 + }, + { + "epoch": 0.09, + "learning_rate": 9.969992155683344e-06, + "loss": 3.4711, + "step": 774 + }, + { + "epoch": 0.09, + "learning_rate": 9.969772941986493e-06, + "loss": 3.4748, + "step": 775 + }, + { + "epoch": 0.09, + "learning_rate": 9.96955293292718e-06, + "loss": 3.5071, + "step": 776 + }, + { + "epoch": 0.09, + "learning_rate": 9.969332128540611e-06, + "loss": 3.4882, + "step": 777 + }, + { + "epoch": 0.09, + "learning_rate": 9.969110528862125e-06, + "loss": 3.4139, + "step": 778 + }, + { + "epoch": 0.09, + "learning_rate": 9.968888133927186e-06, + "loss": 3.4757, + "step": 779 + }, + { + "epoch": 0.09, + "learning_rate": 9.968664943771386e-06, + "loss": 3.4277, + "step": 780 + }, + { + "epoch": 0.09, + "learning_rate": 9.968440958430445e-06, + "loss": 3.4082, + "step": 781 + }, + { + "epoch": 0.09, + "learning_rate": 9.968216177940209e-06, + "loss": 3.4626, + "step": 782 + }, + { + "epoch": 0.09, + "learning_rate": 9.967990602336653e-06, + "loss": 3.5199, + "step": 783 + }, + { + "epoch": 0.09, + "learning_rate": 9.967764231655877e-06, + "loss": 3.457, + "step": 784 + }, + { + "epoch": 0.09, + "learning_rate": 9.96753706593411e-06, + "loss": 3.5078, + "step": 785 + }, + { + "epoch": 0.09, + "learning_rate": 9.967309105207707e-06, + "loss": 3.5274, + "step": 786 + }, + { + "epoch": 0.09, + "learning_rate": 9.967080349513152e-06, + "loss": 3.487, + "step": 787 + }, + { + "epoch": 0.09, + "learning_rate": 9.966850798887053e-06, + "loss": 3.4411, + "step": 788 + }, + { + "epoch": 0.09, + "learning_rate": 9.966620453366149e-06, + "loss": 3.488, + "step": 789 + }, + { + "epoch": 0.09, + "learning_rate": 9.966389312987303e-06, + "loss": 3.4129, + "step": 790 + }, + { + "epoch": 0.09, + "learning_rate": 9.966157377787508e-06, + "loss": 3.4806, + "step": 791 + }, + { + "epoch": 0.09, + "learning_rate": 9.965924647803884e-06, + "loss": 3.5187, + "step": 792 + }, + { + "epoch": 0.09, + "learning_rate": 9.965691123073676e-06, + "loss": 3.5157, + "step": 793 + }, + { + "epoch": 0.1, + "learning_rate": 9.965456803634256e-06, + "loss": 3.3987, + "step": 794 + }, + { + "epoch": 0.1, + "learning_rate": 9.965221689523125e-06, + "loss": 3.4461, + "step": 795 + }, + { + "epoch": 0.1, + "learning_rate": 9.964985780777912e-06, + "loss": 3.4897, + "step": 796 + }, + { + "epoch": 0.1, + "learning_rate": 9.96474907743637e-06, + "loss": 3.3755, + "step": 797 + }, + { + "epoch": 0.1, + "learning_rate": 9.964511579536384e-06, + "loss": 3.4337, + "step": 798 + }, + { + "epoch": 0.1, + "learning_rate": 9.96427328711596e-06, + "loss": 3.4284, + "step": 799 + }, + { + "epoch": 0.1, + "learning_rate": 9.964034200213235e-06, + "loss": 3.4223, + "step": 800 + }, + { + "epoch": 0.1, + "learning_rate": 9.963794318866474e-06, + "loss": 3.3943, + "step": 801 + }, + { + "epoch": 0.1, + "learning_rate": 9.963553643114065e-06, + "loss": 3.5218, + "step": 802 + }, + { + "epoch": 0.1, + "learning_rate": 9.96331217299453e-06, + "loss": 3.4159, + "step": 803 + }, + { + "epoch": 0.1, + "learning_rate": 9.963069908546509e-06, + "loss": 3.5294, + "step": 804 + }, + { + "epoch": 0.1, + "learning_rate": 9.962826849808778e-06, + "loss": 3.4027, + "step": 805 + }, + { + "epoch": 0.1, + "learning_rate": 9.962582996820232e-06, + "loss": 3.4916, + "step": 806 + }, + { + "epoch": 0.1, + "learning_rate": 9.962338349619901e-06, + "loss": 3.4151, + "step": 807 + }, + { + "epoch": 0.1, + "learning_rate": 9.962092908246936e-06, + "loss": 3.4302, + "step": 808 + }, + { + "epoch": 0.1, + "learning_rate": 9.96184667274062e-06, + "loss": 3.3748, + "step": 809 + }, + { + "epoch": 0.1, + "learning_rate": 9.961599643140357e-06, + "loss": 3.4246, + "step": 810 + }, + { + "epoch": 0.1, + "learning_rate": 9.961351819485684e-06, + "loss": 3.4376, + "step": 811 + }, + { + "epoch": 0.1, + "learning_rate": 9.961103201816264e-06, + "loss": 3.4757, + "step": 812 + }, + { + "epoch": 0.1, + "learning_rate": 9.960853790171881e-06, + "loss": 3.4087, + "step": 813 + }, + { + "epoch": 0.1, + "learning_rate": 9.960603584592456e-06, + "loss": 3.4498, + "step": 814 + }, + { + "epoch": 0.1, + "learning_rate": 9.96035258511803e-06, + "loss": 3.3849, + "step": 815 + }, + { + "epoch": 0.1, + "learning_rate": 9.96010079178877e-06, + "loss": 3.4172, + "step": 816 + }, + { + "epoch": 0.1, + "learning_rate": 9.959848204644978e-06, + "loss": 3.3709, + "step": 817 + }, + { + "epoch": 0.1, + "learning_rate": 9.959594823727075e-06, + "loss": 3.3803, + "step": 818 + }, + { + "epoch": 0.1, + "learning_rate": 9.959340649075612e-06, + "loss": 3.4456, + "step": 819 + }, + { + "epoch": 0.1, + "learning_rate": 9.959085680731269e-06, + "loss": 3.4788, + "step": 820 + }, + { + "epoch": 0.1, + "learning_rate": 9.95882991873485e-06, + "loss": 3.4455, + "step": 821 + }, + { + "epoch": 0.1, + "learning_rate": 9.958573363127286e-06, + "loss": 3.5325, + "step": 822 + }, + { + "epoch": 0.1, + "learning_rate": 9.958316013949638e-06, + "loss": 3.4798, + "step": 823 + }, + { + "epoch": 0.1, + "learning_rate": 9.958057871243091e-06, + "loss": 3.4614, + "step": 824 + }, + { + "epoch": 0.1, + "learning_rate": 9.95779893504896e-06, + "loss": 3.4467, + "step": 825 + }, + { + "epoch": 0.1, + "learning_rate": 9.957539205408684e-06, + "loss": 3.438, + "step": 826 + }, + { + "epoch": 0.1, + "learning_rate": 9.957278682363829e-06, + "loss": 3.3815, + "step": 827 + }, + { + "epoch": 0.1, + "learning_rate": 9.95701736595609e-06, + "loss": 3.5374, + "step": 828 + }, + { + "epoch": 0.1, + "learning_rate": 9.956755256227288e-06, + "loss": 3.429, + "step": 829 + }, + { + "epoch": 0.1, + "learning_rate": 9.956492353219371e-06, + "loss": 3.4506, + "step": 830 + }, + { + "epoch": 0.1, + "learning_rate": 9.956228656974415e-06, + "loss": 3.4486, + "step": 831 + }, + { + "epoch": 0.1, + "learning_rate": 9.95596416753462e-06, + "loss": 3.5174, + "step": 832 + }, + { + "epoch": 0.1, + "learning_rate": 9.955698884942318e-06, + "loss": 3.4195, + "step": 833 + }, + { + "epoch": 0.1, + "learning_rate": 9.95543280923996e-06, + "loss": 3.5185, + "step": 834 + }, + { + "epoch": 0.1, + "learning_rate": 9.955165940470133e-06, + "loss": 3.4406, + "step": 835 + }, + { + "epoch": 0.1, + "learning_rate": 9.954898278675547e-06, + "loss": 3.5067, + "step": 836 + }, + { + "epoch": 0.1, + "learning_rate": 9.954629823899033e-06, + "loss": 3.4502, + "step": 837 + }, + { + "epoch": 0.1, + "learning_rate": 9.954360576183562e-06, + "loss": 3.4141, + "step": 838 + }, + { + "epoch": 0.1, + "learning_rate": 9.954090535572218e-06, + "loss": 3.4661, + "step": 839 + }, + { + "epoch": 0.1, + "learning_rate": 9.953819702108222e-06, + "loss": 3.4849, + "step": 840 + }, + { + "epoch": 0.1, + "learning_rate": 9.95354807583492e-06, + "loss": 3.4226, + "step": 841 + }, + { + "epoch": 0.1, + "learning_rate": 9.953275656795778e-06, + "loss": 3.4517, + "step": 842 + }, + { + "epoch": 0.1, + "learning_rate": 9.953002445034395e-06, + "loss": 3.4427, + "step": 843 + }, + { + "epoch": 0.1, + "learning_rate": 9.9527284405945e-06, + "loss": 3.408, + "step": 844 + }, + { + "epoch": 0.1, + "learning_rate": 9.952453643519939e-06, + "loss": 3.4097, + "step": 845 + }, + { + "epoch": 0.1, + "learning_rate": 9.952178053854694e-06, + "loss": 3.434, + "step": 846 + }, + { + "epoch": 0.1, + "learning_rate": 9.95190167164287e-06, + "loss": 3.5055, + "step": 847 + }, + { + "epoch": 0.1, + "learning_rate": 9.9516244969287e-06, + "loss": 3.4616, + "step": 848 + }, + { + "epoch": 0.1, + "learning_rate": 9.95134652975654e-06, + "loss": 3.5104, + "step": 849 + }, + { + "epoch": 0.1, + "learning_rate": 9.951067770170879e-06, + "loss": 3.4968, + "step": 850 + }, + { + "epoch": 0.1, + "learning_rate": 9.950788218216328e-06, + "loss": 3.3801, + "step": 851 + }, + { + "epoch": 0.1, + "learning_rate": 9.950507873937626e-06, + "loss": 3.4596, + "step": 852 + }, + { + "epoch": 0.1, + "learning_rate": 9.950226737379641e-06, + "loss": 3.4662, + "step": 853 + }, + { + "epoch": 0.1, + "learning_rate": 9.949944808587365e-06, + "loss": 3.4973, + "step": 854 + }, + { + "epoch": 0.1, + "learning_rate": 9.949662087605919e-06, + "loss": 3.4749, + "step": 855 + }, + { + "epoch": 0.1, + "learning_rate": 9.949378574480548e-06, + "loss": 3.4564, + "step": 856 + }, + { + "epoch": 0.1, + "learning_rate": 9.949094269256627e-06, + "loss": 3.4263, + "step": 857 + }, + { + "epoch": 0.1, + "learning_rate": 9.948809171979655e-06, + "loss": 3.4164, + "step": 858 + }, + { + "epoch": 0.1, + "learning_rate": 9.94852328269526e-06, + "loss": 3.3771, + "step": 859 + }, + { + "epoch": 0.1, + "learning_rate": 9.948236601449194e-06, + "loss": 3.5144, + "step": 860 + }, + { + "epoch": 0.1, + "learning_rate": 9.947949128287342e-06, + "loss": 3.4646, + "step": 861 + }, + { + "epoch": 0.1, + "learning_rate": 9.947660863255705e-06, + "loss": 3.5283, + "step": 862 + }, + { + "epoch": 0.1, + "learning_rate": 9.94737180640042e-06, + "loss": 3.4378, + "step": 863 + }, + { + "epoch": 0.1, + "learning_rate": 9.947081957767748e-06, + "loss": 3.4082, + "step": 864 + }, + { + "epoch": 0.1, + "learning_rate": 9.946791317404077e-06, + "loss": 3.479, + "step": 865 + }, + { + "epoch": 0.1, + "learning_rate": 9.946499885355919e-06, + "loss": 3.5879, + "step": 866 + }, + { + "epoch": 0.1, + "learning_rate": 9.946207661669914e-06, + "loss": 3.4253, + "step": 867 + }, + { + "epoch": 0.1, + "learning_rate": 9.945914646392833e-06, + "loss": 3.4285, + "step": 868 + }, + { + "epoch": 0.1, + "learning_rate": 9.945620839571568e-06, + "loss": 3.4648, + "step": 869 + }, + { + "epoch": 0.1, + "learning_rate": 9.945326241253141e-06, + "loss": 3.3655, + "step": 870 + }, + { + "epoch": 0.1, + "learning_rate": 9.945030851484698e-06, + "loss": 3.4475, + "step": 871 + }, + { + "epoch": 0.1, + "learning_rate": 9.944734670313514e-06, + "loss": 3.5574, + "step": 872 + }, + { + "epoch": 0.1, + "learning_rate": 9.944437697786988e-06, + "loss": 3.4024, + "step": 873 + }, + { + "epoch": 0.1, + "learning_rate": 9.944139933952653e-06, + "loss": 3.3932, + "step": 874 + }, + { + "epoch": 0.1, + "learning_rate": 9.943841378858157e-06, + "loss": 3.4593, + "step": 875 + }, + { + "epoch": 0.1, + "learning_rate": 9.943542032551284e-06, + "loss": 3.4374, + "step": 876 + }, + { + "epoch": 0.1, + "learning_rate": 9.943241895079938e-06, + "loss": 3.5098, + "step": 877 + }, + { + "epoch": 0.11, + "learning_rate": 9.942940966492159e-06, + "loss": 3.4328, + "step": 878 + }, + { + "epoch": 0.11, + "learning_rate": 9.942639246836101e-06, + "loss": 3.3775, + "step": 879 + }, + { + "epoch": 0.11, + "learning_rate": 9.942336736160056e-06, + "loss": 3.4238, + "step": 880 + }, + { + "epoch": 0.11, + "learning_rate": 9.942033434512436e-06, + "loss": 3.4318, + "step": 881 + }, + { + "epoch": 0.11, + "learning_rate": 9.94172934194178e-06, + "loss": 3.4538, + "step": 882 + }, + { + "epoch": 0.11, + "learning_rate": 9.941424458496761e-06, + "loss": 3.4501, + "step": 883 + }, + { + "epoch": 0.11, + "learning_rate": 9.941118784226164e-06, + "loss": 3.277, + "step": 884 + }, + { + "epoch": 0.11, + "learning_rate": 9.940812319178914e-06, + "loss": 3.3692, + "step": 885 + }, + { + "epoch": 0.11, + "learning_rate": 9.940505063404056e-06, + "loss": 3.3772, + "step": 886 + }, + { + "epoch": 0.11, + "learning_rate": 9.940197016950766e-06, + "loss": 3.4768, + "step": 887 + }, + { + "epoch": 0.11, + "learning_rate": 9.93988817986834e-06, + "loss": 3.5333, + "step": 888 + }, + { + "epoch": 0.11, + "learning_rate": 9.939578552206207e-06, + "loss": 3.3658, + "step": 889 + }, + { + "epoch": 0.11, + "learning_rate": 9.93926813401392e-06, + "loss": 3.562, + "step": 890 + }, + { + "epoch": 0.11, + "learning_rate": 9.938956925341154e-06, + "loss": 3.4844, + "step": 891 + }, + { + "epoch": 0.11, + "learning_rate": 9.938644926237721e-06, + "loss": 3.5074, + "step": 892 + }, + { + "epoch": 0.11, + "learning_rate": 9.938332136753551e-06, + "loss": 3.4622, + "step": 893 + }, + { + "epoch": 0.11, + "learning_rate": 9.9380185569387e-06, + "loss": 3.4205, + "step": 894 + }, + { + "epoch": 0.11, + "learning_rate": 9.937704186843356e-06, + "loss": 3.4511, + "step": 895 + }, + { + "epoch": 0.11, + "learning_rate": 9.937389026517829e-06, + "loss": 3.5285, + "step": 896 + }, + { + "epoch": 0.11, + "learning_rate": 9.937073076012562e-06, + "loss": 3.4555, + "step": 897 + }, + { + "epoch": 0.11, + "learning_rate": 9.936756335378114e-06, + "loss": 3.4349, + "step": 898 + }, + { + "epoch": 0.11, + "learning_rate": 9.936438804665178e-06, + "loss": 3.5152, + "step": 899 + }, + { + "epoch": 0.11, + "learning_rate": 9.936120483924574e-06, + "loss": 3.454, + "step": 900 + }, + { + "epoch": 0.11, + "learning_rate": 9.935801373207243e-06, + "loss": 3.4421, + "step": 901 + }, + { + "epoch": 0.11, + "learning_rate": 9.935481472564257e-06, + "loss": 3.4753, + "step": 902 + }, + { + "epoch": 0.11, + "learning_rate": 9.935160782046811e-06, + "loss": 3.4171, + "step": 903 + }, + { + "epoch": 0.11, + "learning_rate": 9.934839301706231e-06, + "loss": 3.5205, + "step": 904 + }, + { + "epoch": 0.11, + "learning_rate": 9.934517031593966e-06, + "loss": 3.5813, + "step": 905 + }, + { + "epoch": 0.11, + "learning_rate": 9.934193971761593e-06, + "loss": 3.442, + "step": 906 + }, + { + "epoch": 0.11, + "learning_rate": 9.93387012226081e-06, + "loss": 3.4544, + "step": 907 + }, + { + "epoch": 0.11, + "learning_rate": 9.933545483143452e-06, + "loss": 3.4787, + "step": 908 + }, + { + "epoch": 0.11, + "learning_rate": 9.933220054461469e-06, + "loss": 3.4773, + "step": 909 + }, + { + "epoch": 0.11, + "learning_rate": 9.932893836266945e-06, + "loss": 3.476, + "step": 910 + }, + { + "epoch": 0.11, + "learning_rate": 9.932566828612089e-06, + "loss": 3.4484, + "step": 911 + }, + { + "epoch": 0.11, + "learning_rate": 9.932239031549234e-06, + "loss": 3.4735, + "step": 912 + }, + { + "epoch": 0.11, + "learning_rate": 9.93191044513084e-06, + "loss": 3.6045, + "step": 913 + }, + { + "epoch": 0.11, + "learning_rate": 9.931581069409494e-06, + "loss": 3.5409, + "step": 914 + }, + { + "epoch": 0.11, + "learning_rate": 9.931250904437913e-06, + "loss": 3.4061, + "step": 915 + }, + { + "epoch": 0.11, + "learning_rate": 9.930919950268932e-06, + "loss": 3.4147, + "step": 916 + }, + { + "epoch": 0.11, + "learning_rate": 9.930588206955518e-06, + "loss": 3.5364, + "step": 917 + }, + { + "epoch": 0.11, + "learning_rate": 9.930255674550763e-06, + "loss": 3.3194, + "step": 918 + }, + { + "epoch": 0.11, + "learning_rate": 9.929922353107888e-06, + "loss": 3.3904, + "step": 919 + }, + { + "epoch": 0.11, + "learning_rate": 9.929588242680234e-06, + "loss": 3.4281, + "step": 920 + }, + { + "epoch": 0.11, + "learning_rate": 9.929253343321276e-06, + "loss": 3.4378, + "step": 921 + }, + { + "epoch": 0.11, + "learning_rate": 9.92891765508461e-06, + "loss": 3.5069, + "step": 922 + }, + { + "epoch": 0.11, + "learning_rate": 9.928581178023957e-06, + "loss": 3.3201, + "step": 923 + }, + { + "epoch": 0.11, + "learning_rate": 9.92824391219317e-06, + "loss": 3.4504, + "step": 924 + }, + { + "epoch": 0.11, + "learning_rate": 9.927905857646223e-06, + "loss": 3.4363, + "step": 925 + }, + { + "epoch": 0.11, + "learning_rate": 9.927567014437219e-06, + "loss": 3.4252, + "step": 926 + }, + { + "epoch": 0.11, + "learning_rate": 9.927227382620386e-06, + "loss": 3.42, + "step": 927 + }, + { + "epoch": 0.11, + "learning_rate": 9.92688696225008e-06, + "loss": 3.5463, + "step": 928 + }, + { + "epoch": 0.11, + "learning_rate": 9.92654575338078e-06, + "loss": 3.4582, + "step": 929 + }, + { + "epoch": 0.11, + "learning_rate": 9.926203756067094e-06, + "loss": 3.4687, + "step": 930 + }, + { + "epoch": 0.11, + "learning_rate": 9.925860970363755e-06, + "loss": 3.3861, + "step": 931 + }, + { + "epoch": 0.11, + "learning_rate": 9.925517396325623e-06, + "loss": 3.4457, + "step": 932 + }, + { + "epoch": 0.11, + "learning_rate": 9.925173034007684e-06, + "loss": 3.4436, + "step": 933 + }, + { + "epoch": 0.11, + "learning_rate": 9.92482788346505e-06, + "loss": 3.4245, + "step": 934 + }, + { + "epoch": 0.11, + "learning_rate": 9.924481944752956e-06, + "loss": 3.3796, + "step": 935 + }, + { + "epoch": 0.11, + "learning_rate": 9.924135217926769e-06, + "loss": 3.5144, + "step": 936 + }, + { + "epoch": 0.11, + "learning_rate": 9.923787703041977e-06, + "loss": 3.434, + "step": 937 + }, + { + "epoch": 0.11, + "learning_rate": 9.923439400154198e-06, + "loss": 3.4023, + "step": 938 + }, + { + "epoch": 0.11, + "learning_rate": 9.923090309319174e-06, + "loss": 3.4006, + "step": 939 + }, + { + "epoch": 0.11, + "learning_rate": 9.922740430592775e-06, + "loss": 3.5406, + "step": 940 + }, + { + "epoch": 0.11, + "learning_rate": 9.922389764030991e-06, + "loss": 3.5019, + "step": 941 + }, + { + "epoch": 0.11, + "learning_rate": 9.922038309689946e-06, + "loss": 3.5793, + "step": 942 + }, + { + "epoch": 0.11, + "learning_rate": 9.921686067625888e-06, + "loss": 3.4171, + "step": 943 + }, + { + "epoch": 0.11, + "learning_rate": 9.921333037895186e-06, + "loss": 3.3438, + "step": 944 + }, + { + "epoch": 0.11, + "learning_rate": 9.920979220554343e-06, + "loss": 3.4729, + "step": 945 + }, + { + "epoch": 0.11, + "learning_rate": 9.92062461565998e-06, + "loss": 3.4514, + "step": 946 + }, + { + "epoch": 0.11, + "learning_rate": 9.920269223268853e-06, + "loss": 3.407, + "step": 947 + }, + { + "epoch": 0.11, + "learning_rate": 9.919913043437832e-06, + "loss": 3.3449, + "step": 948 + }, + { + "epoch": 0.11, + "learning_rate": 9.919556076223926e-06, + "loss": 3.3368, + "step": 949 + }, + { + "epoch": 0.11, + "learning_rate": 9.919198321684263e-06, + "loss": 3.5235, + "step": 950 + }, + { + "epoch": 0.11, + "learning_rate": 9.918839779876095e-06, + "loss": 3.4599, + "step": 951 + }, + { + "epoch": 0.11, + "learning_rate": 9.918480450856807e-06, + "loss": 3.4033, + "step": 952 + }, + { + "epoch": 0.11, + "learning_rate": 9.918120334683902e-06, + "loss": 3.4156, + "step": 953 + }, + { + "epoch": 0.11, + "learning_rate": 9.917759431415015e-06, + "loss": 3.4437, + "step": 954 + }, + { + "epoch": 0.11, + "learning_rate": 9.917397741107906e-06, + "loss": 3.4791, + "step": 955 + }, + { + "epoch": 0.11, + "learning_rate": 9.917035263820459e-06, + "loss": 3.3432, + "step": 956 + }, + { + "epoch": 0.11, + "learning_rate": 9.916671999610685e-06, + "loss": 3.49, + "step": 957 + }, + { + "epoch": 0.11, + "learning_rate": 9.916307948536721e-06, + "loss": 3.4118, + "step": 958 + }, + { + "epoch": 0.11, + "learning_rate": 9.915943110656827e-06, + "loss": 3.4488, + "step": 959 + }, + { + "epoch": 0.11, + "learning_rate": 9.915577486029397e-06, + "loss": 3.4815, + "step": 960 + }, + { + "epoch": 0.12, + "learning_rate": 9.91521107471294e-06, + "loss": 3.427, + "step": 961 + }, + { + "epoch": 0.12, + "learning_rate": 9.9148438767661e-06, + "loss": 3.4866, + "step": 962 + }, + { + "epoch": 0.12, + "learning_rate": 9.914475892247642e-06, + "loss": 3.4915, + "step": 963 + }, + { + "epoch": 0.12, + "learning_rate": 9.91410712121646e-06, + "loss": 3.4183, + "step": 964 + }, + { + "epoch": 0.12, + "learning_rate": 9.91373756373157e-06, + "loss": 3.4312, + "step": 965 + }, + { + "epoch": 0.12, + "learning_rate": 9.913367219852116e-06, + "loss": 3.559, + "step": 966 + }, + { + "epoch": 0.12, + "learning_rate": 9.912996089637369e-06, + "loss": 3.3622, + "step": 967 + }, + { + "epoch": 0.12, + "learning_rate": 9.912624173146724e-06, + "loss": 3.4634, + "step": 968 + }, + { + "epoch": 0.12, + "learning_rate": 9.912251470439703e-06, + "loss": 3.3249, + "step": 969 + }, + { + "epoch": 0.12, + "learning_rate": 9.911877981575953e-06, + "loss": 3.337, + "step": 970 + }, + { + "epoch": 0.12, + "learning_rate": 9.911503706615247e-06, + "loss": 3.4099, + "step": 971 + }, + { + "epoch": 0.12, + "learning_rate": 9.911128645617485e-06, + "loss": 3.509, + "step": 972 + }, + { + "epoch": 0.12, + "learning_rate": 9.91075279864269e-06, + "loss": 3.4353, + "step": 973 + }, + { + "epoch": 0.12, + "learning_rate": 9.910376165751014e-06, + "loss": 3.4846, + "step": 974 + }, + { + "epoch": 0.12, + "learning_rate": 9.909998747002733e-06, + "loss": 3.5272, + "step": 975 + }, + { + "epoch": 0.12, + "learning_rate": 9.90962054245825e-06, + "loss": 3.4054, + "step": 976 + }, + { + "epoch": 0.12, + "learning_rate": 9.909241552178089e-06, + "loss": 3.5047, + "step": 977 + }, + { + "epoch": 0.12, + "learning_rate": 9.908861776222908e-06, + "loss": 3.4924, + "step": 978 + }, + { + "epoch": 0.12, + "learning_rate": 9.908481214653485e-06, + "loss": 3.499, + "step": 979 + }, + { + "epoch": 0.12, + "learning_rate": 9.908099867530724e-06, + "loss": 3.4075, + "step": 980 + }, + { + "epoch": 0.12, + "learning_rate": 9.907717734915658e-06, + "loss": 3.381, + "step": 981 + }, + { + "epoch": 0.12, + "learning_rate": 9.90733481686944e-06, + "loss": 3.4361, + "step": 982 + }, + { + "epoch": 0.12, + "learning_rate": 9.906951113453356e-06, + "loss": 3.4889, + "step": 983 + }, + { + "epoch": 0.12, + "learning_rate": 9.906566624728813e-06, + "loss": 3.439, + "step": 984 + }, + { + "epoch": 0.12, + "learning_rate": 9.906181350757343e-06, + "loss": 3.498, + "step": 985 + }, + { + "epoch": 0.12, + "learning_rate": 9.905795291600603e-06, + "loss": 3.4548, + "step": 986 + }, + { + "epoch": 0.12, + "learning_rate": 9.905408447320385e-06, + "loss": 3.5406, + "step": 987 + }, + { + "epoch": 0.12, + "learning_rate": 9.905020817978593e-06, + "loss": 3.4937, + "step": 988 + }, + { + "epoch": 0.12, + "learning_rate": 9.904632403637267e-06, + "loss": 3.3841, + "step": 989 + }, + { + "epoch": 0.12, + "learning_rate": 9.904243204358567e-06, + "loss": 3.3282, + "step": 990 + }, + { + "epoch": 0.12, + "learning_rate": 9.903853220204782e-06, + "loss": 3.5177, + "step": 991 + }, + { + "epoch": 0.12, + "learning_rate": 9.903462451238324e-06, + "loss": 3.4643, + "step": 992 + }, + { + "epoch": 0.12, + "learning_rate": 9.90307089752173e-06, + "loss": 3.5231, + "step": 993 + }, + { + "epoch": 0.12, + "learning_rate": 9.902678559117667e-06, + "loss": 3.4382, + "step": 994 + }, + { + "epoch": 0.12, + "learning_rate": 9.902285436088925e-06, + "loss": 3.5048, + "step": 995 + }, + { + "epoch": 0.12, + "learning_rate": 9.901891528498416e-06, + "loss": 3.4726, + "step": 996 + }, + { + "epoch": 0.12, + "learning_rate": 9.901496836409183e-06, + "loss": 3.4126, + "step": 997 + }, + { + "epoch": 0.12, + "learning_rate": 9.901101359884396e-06, + "loss": 3.4908, + "step": 998 + }, + { + "epoch": 0.12, + "learning_rate": 9.900705098987342e-06, + "loss": 3.5464, + "step": 999 + }, + { + "epoch": 0.12, + "learning_rate": 9.900308053781438e-06, + "loss": 3.3729, + "step": 1000 + }, + { + "epoch": 0.12, + "learning_rate": 9.899910224330232e-06, + "loss": 3.4135, + "step": 1001 + }, + { + "epoch": 0.12, + "learning_rate": 9.89951161069739e-06, + "loss": 3.4387, + "step": 1002 + }, + { + "epoch": 0.12, + "learning_rate": 9.899112212946706e-06, + "loss": 3.4158, + "step": 1003 + }, + { + "epoch": 0.12, + "learning_rate": 9.8987120311421e-06, + "loss": 3.4746, + "step": 1004 + }, + { + "epoch": 0.12, + "learning_rate": 9.898311065347616e-06, + "loss": 3.4884, + "step": 1005 + }, + { + "epoch": 0.12, + "learning_rate": 9.897909315627428e-06, + "loss": 3.3794, + "step": 1006 + }, + { + "epoch": 0.12, + "learning_rate": 9.897506782045828e-06, + "loss": 3.4427, + "step": 1007 + }, + { + "epoch": 0.12, + "learning_rate": 9.897103464667237e-06, + "loss": 3.4142, + "step": 1008 + }, + { + "epoch": 0.12, + "learning_rate": 9.896699363556206e-06, + "loss": 3.4166, + "step": 1009 + }, + { + "epoch": 0.12, + "learning_rate": 9.896294478777407e-06, + "loss": 3.4296, + "step": 1010 + }, + { + "epoch": 0.12, + "learning_rate": 9.895888810395633e-06, + "loss": 3.4977, + "step": 1011 + }, + { + "epoch": 0.12, + "learning_rate": 9.895482358475812e-06, + "loss": 3.3701, + "step": 1012 + }, + { + "epoch": 0.12, + "learning_rate": 9.895075123082989e-06, + "loss": 3.4539, + "step": 1013 + }, + { + "epoch": 0.12, + "learning_rate": 9.894667104282341e-06, + "loss": 3.5801, + "step": 1014 + }, + { + "epoch": 0.12, + "learning_rate": 9.894258302139166e-06, + "loss": 3.4414, + "step": 1015 + }, + { + "epoch": 0.12, + "learning_rate": 9.893848716718888e-06, + "loss": 3.3728, + "step": 1016 + }, + { + "epoch": 0.12, + "learning_rate": 9.89343834808706e-06, + "loss": 3.4917, + "step": 1017 + }, + { + "epoch": 0.12, + "learning_rate": 9.89302719630935e-06, + "loss": 3.4151, + "step": 1018 + }, + { + "epoch": 0.12, + "learning_rate": 9.89261526145157e-06, + "loss": 3.375, + "step": 1019 + }, + { + "epoch": 0.12, + "learning_rate": 9.892202543579637e-06, + "loss": 3.3959, + "step": 1020 + }, + { + "epoch": 0.12, + "learning_rate": 9.891789042759604e-06, + "loss": 3.348, + "step": 1021 + }, + { + "epoch": 0.12, + "learning_rate": 9.891374759057653e-06, + "loss": 3.485, + "step": 1022 + }, + { + "epoch": 0.12, + "learning_rate": 9.890959692540079e-06, + "loss": 3.3232, + "step": 1023 + }, + { + "epoch": 0.12, + "learning_rate": 9.890543843273312e-06, + "loss": 3.3411, + "step": 1024 + }, + { + "epoch": 0.12, + "learning_rate": 9.890127211323908e-06, + "loss": 3.3334, + "step": 1025 + }, + { + "epoch": 0.12, + "learning_rate": 9.889709796758537e-06, + "loss": 3.4661, + "step": 1026 + }, + { + "epoch": 0.12, + "learning_rate": 9.889291599644011e-06, + "loss": 3.3742, + "step": 1027 + }, + { + "epoch": 0.12, + "learning_rate": 9.888872620047251e-06, + "loss": 3.3336, + "step": 1028 + }, + { + "epoch": 0.12, + "learning_rate": 9.888452858035313e-06, + "loss": 3.4934, + "step": 1029 + }, + { + "epoch": 0.12, + "learning_rate": 9.888032313675377e-06, + "loss": 3.4962, + "step": 1030 + }, + { + "epoch": 0.12, + "learning_rate": 9.887610987034746e-06, + "loss": 3.4498, + "step": 1031 + }, + { + "epoch": 0.12, + "learning_rate": 9.88718887818085e-06, + "loss": 3.4989, + "step": 1032 + }, + { + "epoch": 0.12, + "learning_rate": 9.88676598718124e-06, + "loss": 3.4374, + "step": 1033 + }, + { + "epoch": 0.12, + "learning_rate": 9.8863423141036e-06, + "loss": 3.4783, + "step": 1034 + }, + { + "epoch": 0.12, + "learning_rate": 9.885917859015732e-06, + "loss": 3.4811, + "step": 1035 + }, + { + "epoch": 0.12, + "learning_rate": 9.885492621985567e-06, + "loss": 3.4486, + "step": 1036 + }, + { + "epoch": 0.12, + "learning_rate": 9.885066603081156e-06, + "loss": 3.4852, + "step": 1037 + }, + { + "epoch": 0.12, + "learning_rate": 9.884639802370686e-06, + "loss": 3.4248, + "step": 1038 + }, + { + "epoch": 0.12, + "learning_rate": 9.884212219922456e-06, + "loss": 3.5183, + "step": 1039 + }, + { + "epoch": 0.12, + "learning_rate": 9.8837838558049e-06, + "loss": 3.4566, + "step": 1040 + }, + { + "epoch": 0.12, + "learning_rate": 9.883354710086573e-06, + "loss": 3.4332, + "step": 1041 + }, + { + "epoch": 0.12, + "learning_rate": 9.882924782836151e-06, + "loss": 3.4053, + "step": 1042 + }, + { + "epoch": 0.12, + "learning_rate": 9.882494074122448e-06, + "loss": 3.3915, + "step": 1043 + }, + { + "epoch": 0.12, + "learning_rate": 9.882062584014386e-06, + "loss": 3.4641, + "step": 1044 + }, + { + "epoch": 0.13, + "learning_rate": 9.881630312581027e-06, + "loss": 3.3927, + "step": 1045 + }, + { + "epoch": 0.13, + "learning_rate": 9.881197259891548e-06, + "loss": 3.4503, + "step": 1046 + }, + { + "epoch": 0.13, + "learning_rate": 9.880763426015255e-06, + "loss": 3.4828, + "step": 1047 + }, + { + "epoch": 0.13, + "learning_rate": 9.880328811021581e-06, + "loss": 3.3531, + "step": 1048 + }, + { + "epoch": 0.13, + "learning_rate": 9.87989341498008e-06, + "loss": 3.5039, + "step": 1049 + }, + { + "epoch": 0.13, + "learning_rate": 9.879457237960435e-06, + "loss": 3.4963, + "step": 1050 + }, + { + "epoch": 0.13, + "learning_rate": 9.879020280032448e-06, + "loss": 3.4796, + "step": 1051 + }, + { + "epoch": 0.13, + "learning_rate": 9.878582541266054e-06, + "loss": 3.3917, + "step": 1052 + }, + { + "epoch": 0.13, + "learning_rate": 9.878144021731304e-06, + "loss": 3.471, + "step": 1053 + }, + { + "epoch": 0.13, + "learning_rate": 9.877704721498383e-06, + "loss": 3.4503, + "step": 1054 + }, + { + "epoch": 0.13, + "learning_rate": 9.877264640637592e-06, + "loss": 3.3974, + "step": 1055 + }, + { + "epoch": 0.13, + "learning_rate": 9.876823779219367e-06, + "loss": 3.4711, + "step": 1056 + }, + { + "epoch": 0.13, + "learning_rate": 9.87638213731426e-06, + "loss": 3.3885, + "step": 1057 + }, + { + "epoch": 0.13, + "learning_rate": 9.875939714992952e-06, + "loss": 3.4398, + "step": 1058 + }, + { + "epoch": 0.13, + "learning_rate": 9.87549651232625e-06, + "loss": 3.4797, + "step": 1059 + }, + { + "epoch": 0.13, + "learning_rate": 9.87505252938508e-06, + "loss": 3.4345, + "step": 1060 + }, + { + "epoch": 0.13, + "learning_rate": 9.874607766240501e-06, + "loss": 3.3822, + "step": 1061 + }, + { + "epoch": 0.13, + "learning_rate": 9.874162222963692e-06, + "loss": 3.5349, + "step": 1062 + }, + { + "epoch": 0.13, + "learning_rate": 9.873715899625958e-06, + "loss": 3.4794, + "step": 1063 + }, + { + "epoch": 0.13, + "learning_rate": 9.873268796298725e-06, + "loss": 3.4708, + "step": 1064 + }, + { + "epoch": 0.13, + "learning_rate": 9.872820913053552e-06, + "loss": 3.3982, + "step": 1065 + }, + { + "epoch": 0.13, + "learning_rate": 9.872372249962117e-06, + "loss": 3.5024, + "step": 1066 + }, + { + "epoch": 0.13, + "learning_rate": 9.871922807096224e-06, + "loss": 3.42, + "step": 1067 + }, + { + "epoch": 0.13, + "learning_rate": 9.8714725845278e-06, + "loss": 3.4886, + "step": 1068 + }, + { + "epoch": 0.13, + "learning_rate": 9.871021582328898e-06, + "loss": 3.3889, + "step": 1069 + }, + { + "epoch": 0.13, + "learning_rate": 9.870569800571703e-06, + "loss": 3.4452, + "step": 1070 + }, + { + "epoch": 0.13, + "learning_rate": 9.87011723932851e-06, + "loss": 3.4709, + "step": 1071 + }, + { + "epoch": 0.13, + "learning_rate": 9.869663898671754e-06, + "loss": 3.4966, + "step": 1072 + }, + { + "epoch": 0.13, + "learning_rate": 9.86920977867398e-06, + "loss": 3.3646, + "step": 1073 + }, + { + "epoch": 0.13, + "learning_rate": 9.868754879407873e-06, + "loss": 3.3671, + "step": 1074 + }, + { + "epoch": 0.13, + "learning_rate": 9.86829920094623e-06, + "loss": 3.4836, + "step": 1075 + }, + { + "epoch": 0.13, + "learning_rate": 9.867842743361978e-06, + "loss": 3.4284, + "step": 1076 + }, + { + "epoch": 0.13, + "learning_rate": 9.86738550672817e-06, + "loss": 3.4586, + "step": 1077 + }, + { + "epoch": 0.13, + "learning_rate": 9.866927491117984e-06, + "loss": 3.4851, + "step": 1078 + }, + { + "epoch": 0.13, + "learning_rate": 9.866468696604719e-06, + "loss": 3.4371, + "step": 1079 + }, + { + "epoch": 0.13, + "learning_rate": 9.866009123261798e-06, + "loss": 3.4586, + "step": 1080 + }, + { + "epoch": 0.13, + "learning_rate": 9.865548771162773e-06, + "loss": 3.4824, + "step": 1081 + }, + { + "epoch": 0.13, + "learning_rate": 9.86508764038132e-06, + "loss": 3.4262, + "step": 1082 + }, + { + "epoch": 0.13, + "learning_rate": 9.86462573099124e-06, + "loss": 3.5027, + "step": 1083 + }, + { + "epoch": 0.13, + "learning_rate": 9.864163043066453e-06, + "loss": 3.4617, + "step": 1084 + }, + { + "epoch": 0.13, + "learning_rate": 9.863699576681007e-06, + "loss": 3.4346, + "step": 1085 + }, + { + "epoch": 0.13, + "learning_rate": 9.86323533190908e-06, + "loss": 3.4038, + "step": 1086 + }, + { + "epoch": 0.13, + "learning_rate": 9.862770308824964e-06, + "loss": 3.455, + "step": 1087 + }, + { + "epoch": 0.13, + "learning_rate": 9.862304507503087e-06, + "loss": 3.4099, + "step": 1088 + }, + { + "epoch": 0.13, + "learning_rate": 9.861837928017992e-06, + "loss": 3.417, + "step": 1089 + }, + { + "epoch": 0.13, + "learning_rate": 9.86137057044435e-06, + "loss": 3.411, + "step": 1090 + }, + { + "epoch": 0.13, + "learning_rate": 9.86090243485696e-06, + "loss": 3.3934, + "step": 1091 + }, + { + "epoch": 0.13, + "learning_rate": 9.86043352133074e-06, + "loss": 3.5157, + "step": 1092 + }, + { + "epoch": 0.13, + "learning_rate": 9.859963829940736e-06, + "loss": 3.5535, + "step": 1093 + }, + { + "epoch": 0.13, + "learning_rate": 9.859493360762119e-06, + "loss": 3.4523, + "step": 1094 + }, + { + "epoch": 0.13, + "learning_rate": 9.859022113870178e-06, + "loss": 3.4419, + "step": 1095 + }, + { + "epoch": 0.13, + "learning_rate": 9.858550089340335e-06, + "loss": 3.498, + "step": 1096 + }, + { + "epoch": 0.13, + "learning_rate": 9.858077287248134e-06, + "loss": 3.428, + "step": 1097 + }, + { + "epoch": 0.13, + "learning_rate": 9.857603707669238e-06, + "loss": 3.5312, + "step": 1098 + }, + { + "epoch": 0.13, + "learning_rate": 9.857129350679442e-06, + "loss": 3.3741, + "step": 1099 + }, + { + "epoch": 0.13, + "learning_rate": 9.856654216354662e-06, + "loss": 3.4554, + "step": 1100 + }, + { + "epoch": 0.13, + "learning_rate": 9.856178304770935e-06, + "loss": 3.5287, + "step": 1101 + }, + { + "epoch": 0.13, + "learning_rate": 9.855701616004432e-06, + "loss": 3.5844, + "step": 1102 + }, + { + "epoch": 0.13, + "learning_rate": 9.855224150131437e-06, + "loss": 3.4962, + "step": 1103 + }, + { + "epoch": 0.13, + "learning_rate": 9.854745907228366e-06, + "loss": 3.3812, + "step": 1104 + }, + { + "epoch": 0.13, + "learning_rate": 9.854266887371755e-06, + "loss": 3.4187, + "step": 1105 + }, + { + "epoch": 0.13, + "learning_rate": 9.85378709063827e-06, + "loss": 3.4578, + "step": 1106 + }, + { + "epoch": 0.13, + "learning_rate": 9.853306517104694e-06, + "loss": 3.5438, + "step": 1107 + }, + { + "epoch": 0.13, + "learning_rate": 9.85282516684794e-06, + "loss": 3.4504, + "step": 1108 + }, + { + "epoch": 0.13, + "learning_rate": 9.852343039945042e-06, + "loss": 3.4058, + "step": 1109 + }, + { + "epoch": 0.13, + "learning_rate": 9.851860136473161e-06, + "loss": 3.4865, + "step": 1110 + }, + { + "epoch": 0.13, + "learning_rate": 9.85137645650958e-06, + "loss": 3.4567, + "step": 1111 + }, + { + "epoch": 0.13, + "learning_rate": 9.85089200013171e-06, + "loss": 3.5273, + "step": 1112 + }, + { + "epoch": 0.13, + "learning_rate": 9.850406767417078e-06, + "loss": 3.4436, + "step": 1113 + }, + { + "epoch": 0.13, + "learning_rate": 9.849920758443343e-06, + "loss": 3.4824, + "step": 1114 + }, + { + "epoch": 0.13, + "learning_rate": 9.849433973288286e-06, + "loss": 3.4265, + "step": 1115 + }, + { + "epoch": 0.13, + "learning_rate": 9.848946412029815e-06, + "loss": 3.5138, + "step": 1116 + }, + { + "epoch": 0.13, + "learning_rate": 9.848458074745954e-06, + "loss": 3.4375, + "step": 1117 + }, + { + "epoch": 0.13, + "learning_rate": 9.84796896151486e-06, + "loss": 3.3763, + "step": 1118 + }, + { + "epoch": 0.13, + "learning_rate": 9.847479072414811e-06, + "loss": 3.5061, + "step": 1119 + }, + { + "epoch": 0.13, + "learning_rate": 9.846988407524207e-06, + "loss": 3.4248, + "step": 1120 + }, + { + "epoch": 0.13, + "learning_rate": 9.846496966921574e-06, + "loss": 3.4441, + "step": 1121 + }, + { + "epoch": 0.13, + "learning_rate": 9.846004750685563e-06, + "loss": 3.4428, + "step": 1122 + }, + { + "epoch": 0.13, + "learning_rate": 9.845511758894948e-06, + "loss": 3.4055, + "step": 1123 + }, + { + "epoch": 0.13, + "learning_rate": 9.84501799162863e-06, + "loss": 3.4201, + "step": 1124 + }, + { + "epoch": 0.13, + "learning_rate": 9.844523448965626e-06, + "loss": 3.3958, + "step": 1125 + }, + { + "epoch": 0.13, + "learning_rate": 9.844028130985086e-06, + "loss": 3.5022, + "step": 1126 + }, + { + "epoch": 0.13, + "learning_rate": 9.843532037766281e-06, + "loss": 3.5112, + "step": 1127 + }, + { + "epoch": 0.14, + "learning_rate": 9.843035169388606e-06, + "loss": 3.4726, + "step": 1128 + }, + { + "epoch": 0.14, + "learning_rate": 9.842537525931579e-06, + "loss": 3.3546, + "step": 1129 + }, + { + "epoch": 0.14, + "learning_rate": 9.84203910747484e-06, + "loss": 3.435, + "step": 1130 + }, + { + "epoch": 0.14, + "learning_rate": 9.841539914098162e-06, + "loss": 3.4961, + "step": 1131 + }, + { + "epoch": 0.14, + "learning_rate": 9.841039945881432e-06, + "loss": 3.4936, + "step": 1132 + }, + { + "epoch": 0.14, + "learning_rate": 9.840539202904664e-06, + "loss": 3.4724, + "step": 1133 + }, + { + "epoch": 0.14, + "learning_rate": 9.840037685248001e-06, + "loss": 3.3769, + "step": 1134 + }, + { + "epoch": 0.14, + "learning_rate": 9.839535392991702e-06, + "loss": 3.4111, + "step": 1135 + }, + { + "epoch": 0.14, + "learning_rate": 9.839032326216153e-06, + "loss": 3.397, + "step": 1136 + }, + { + "epoch": 0.14, + "learning_rate": 9.838528485001871e-06, + "loss": 3.4133, + "step": 1137 + }, + { + "epoch": 0.14, + "learning_rate": 9.838023869429484e-06, + "loss": 3.4506, + "step": 1138 + }, + { + "epoch": 0.14, + "learning_rate": 9.837518479579753e-06, + "loss": 3.4172, + "step": 1139 + }, + { + "epoch": 0.14, + "learning_rate": 9.837012315533561e-06, + "loss": 3.5324, + "step": 1140 + }, + { + "epoch": 0.14, + "learning_rate": 9.836505377371916e-06, + "loss": 3.3667, + "step": 1141 + }, + { + "epoch": 0.14, + "learning_rate": 9.835997665175946e-06, + "loss": 3.3949, + "step": 1142 + }, + { + "epoch": 0.14, + "learning_rate": 9.835489179026908e-06, + "loss": 3.44, + "step": 1143 + }, + { + "epoch": 0.14, + "learning_rate": 9.834979919006176e-06, + "loss": 3.5089, + "step": 1144 + }, + { + "epoch": 0.14, + "learning_rate": 9.834469885195255e-06, + "loss": 3.5538, + "step": 1145 + }, + { + "epoch": 0.14, + "learning_rate": 9.83395907767577e-06, + "loss": 3.4733, + "step": 1146 + }, + { + "epoch": 0.14, + "learning_rate": 9.83344749652947e-06, + "loss": 3.4981, + "step": 1147 + }, + { + "epoch": 0.14, + "learning_rate": 9.83293514183823e-06, + "loss": 3.476, + "step": 1148 + }, + { + "epoch": 0.14, + "learning_rate": 9.832422013684047e-06, + "loss": 3.476, + "step": 1149 + }, + { + "epoch": 0.14, + "learning_rate": 9.83190811214904e-06, + "loss": 3.4814, + "step": 1150 + }, + { + "epoch": 0.14, + "learning_rate": 9.831393437315455e-06, + "loss": 3.4533, + "step": 1151 + }, + { + "epoch": 0.14, + "learning_rate": 9.83087798926566e-06, + "loss": 3.5272, + "step": 1152 + }, + { + "epoch": 0.14, + "learning_rate": 9.830361768082151e-06, + "loss": 3.3497, + "step": 1153 + }, + { + "epoch": 0.14, + "learning_rate": 9.82984477384754e-06, + "loss": 3.4944, + "step": 1154 + }, + { + "epoch": 0.14, + "learning_rate": 9.829327006644567e-06, + "loss": 3.4417, + "step": 1155 + }, + { + "epoch": 0.14, + "learning_rate": 9.828808466556096e-06, + "loss": 3.4958, + "step": 1156 + }, + { + "epoch": 0.14, + "learning_rate": 9.828289153665116e-06, + "loss": 3.4052, + "step": 1157 + }, + { + "epoch": 0.14, + "learning_rate": 9.827769068054736e-06, + "loss": 3.4835, + "step": 1158 + }, + { + "epoch": 0.14, + "learning_rate": 9.82724820980819e-06, + "loss": 3.5239, + "step": 1159 + }, + { + "epoch": 0.14, + "learning_rate": 9.826726579008837e-06, + "loss": 3.402, + "step": 1160 + }, + { + "epoch": 0.14, + "learning_rate": 9.82620417574016e-06, + "loss": 3.4595, + "step": 1161 + }, + { + "epoch": 0.14, + "learning_rate": 9.825681000085762e-06, + "loss": 3.6014, + "step": 1162 + }, + { + "epoch": 0.14, + "learning_rate": 9.825157052129373e-06, + "loss": 3.4453, + "step": 1163 + }, + { + "epoch": 0.14, + "learning_rate": 9.824632331954846e-06, + "loss": 3.401, + "step": 1164 + }, + { + "epoch": 0.14, + "learning_rate": 9.824106839646159e-06, + "loss": 3.5249, + "step": 1165 + }, + { + "epoch": 0.14, + "learning_rate": 9.823580575287407e-06, + "loss": 3.4981, + "step": 1166 + }, + { + "epoch": 0.14, + "learning_rate": 9.823053538962818e-06, + "loss": 3.4027, + "step": 1167 + }, + { + "epoch": 0.14, + "learning_rate": 9.822525730756736e-06, + "loss": 3.3974, + "step": 1168 + }, + { + "epoch": 0.14, + "learning_rate": 9.821997150753633e-06, + "loss": 3.4588, + "step": 1169 + }, + { + "epoch": 0.14, + "learning_rate": 9.821467799038104e-06, + "loss": 3.5418, + "step": 1170 + }, + { + "epoch": 0.14, + "learning_rate": 9.820937675694862e-06, + "loss": 3.5825, + "step": 1171 + }, + { + "epoch": 0.14, + "learning_rate": 9.820406780808753e-06, + "loss": 3.5065, + "step": 1172 + }, + { + "epoch": 0.14, + "learning_rate": 9.819875114464738e-06, + "loss": 3.4312, + "step": 1173 + }, + { + "epoch": 0.14, + "learning_rate": 9.819342676747908e-06, + "loss": 3.3577, + "step": 1174 + }, + { + "epoch": 0.14, + "learning_rate": 9.81880946774347e-06, + "loss": 3.5159, + "step": 1175 + }, + { + "epoch": 0.14, + "learning_rate": 9.818275487536764e-06, + "loss": 3.3959, + "step": 1176 + }, + { + "epoch": 0.14, + "learning_rate": 9.817740736213244e-06, + "loss": 3.4912, + "step": 1177 + }, + { + "epoch": 0.14, + "learning_rate": 9.817205213858492e-06, + "loss": 3.4739, + "step": 1178 + }, + { + "epoch": 0.14, + "learning_rate": 9.816668920558215e-06, + "loss": 3.5148, + "step": 1179 + }, + { + "epoch": 0.14, + "learning_rate": 9.81613185639824e-06, + "loss": 3.4953, + "step": 1180 + }, + { + "epoch": 0.14, + "learning_rate": 9.815594021464521e-06, + "loss": 3.5575, + "step": 1181 + }, + { + "epoch": 0.14, + "learning_rate": 9.81505541584313e-06, + "loss": 3.547, + "step": 1182 + }, + { + "epoch": 0.14, + "learning_rate": 9.814516039620267e-06, + "loss": 3.4553, + "step": 1183 + }, + { + "epoch": 0.14, + "learning_rate": 9.813975892882252e-06, + "loss": 3.4238, + "step": 1184 + }, + { + "epoch": 0.14, + "learning_rate": 9.813434975715534e-06, + "loss": 3.3869, + "step": 1185 + }, + { + "epoch": 0.14, + "learning_rate": 9.812893288206677e-06, + "loss": 3.4514, + "step": 1186 + }, + { + "epoch": 0.14, + "learning_rate": 9.812350830442377e-06, + "loss": 3.4368, + "step": 1187 + }, + { + "epoch": 0.14, + "learning_rate": 9.811807602509444e-06, + "loss": 3.3842, + "step": 1188 + }, + { + "epoch": 0.14, + "learning_rate": 9.81126360449482e-06, + "loss": 3.3989, + "step": 1189 + }, + { + "epoch": 0.14, + "learning_rate": 9.810718836485564e-06, + "loss": 3.4474, + "step": 1190 + }, + { + "epoch": 0.14, + "learning_rate": 9.810173298568864e-06, + "loss": 3.4798, + "step": 1191 + }, + { + "epoch": 0.14, + "learning_rate": 9.809626990832024e-06, + "loss": 3.5064, + "step": 1192 + }, + { + "epoch": 0.14, + "learning_rate": 9.809079913362478e-06, + "loss": 3.3892, + "step": 1193 + }, + { + "epoch": 0.14, + "learning_rate": 9.808532066247779e-06, + "loss": 3.4213, + "step": 1194 + }, + { + "epoch": 0.14, + "learning_rate": 9.807983449575603e-06, + "loss": 3.4372, + "step": 1195 + }, + { + "epoch": 0.14, + "learning_rate": 9.807434063433754e-06, + "loss": 3.543, + "step": 1196 + }, + { + "epoch": 0.14, + "learning_rate": 9.806883907910155e-06, + "loss": 3.4459, + "step": 1197 + }, + { + "epoch": 0.14, + "learning_rate": 9.80633298309285e-06, + "loss": 3.3945, + "step": 1198 + }, + { + "epoch": 0.14, + "learning_rate": 9.80578128907001e-06, + "loss": 3.4252, + "step": 1199 + }, + { + "epoch": 0.14, + "learning_rate": 9.80522882592993e-06, + "loss": 3.4469, + "step": 1200 + }, + { + "epoch": 0.14, + "learning_rate": 9.804675593761025e-06, + "loss": 3.3879, + "step": 1201 + }, + { + "epoch": 0.14, + "learning_rate": 9.804121592651835e-06, + "loss": 3.3693, + "step": 1202 + }, + { + "epoch": 0.14, + "learning_rate": 9.803566822691022e-06, + "loss": 3.4368, + "step": 1203 + }, + { + "epoch": 0.14, + "learning_rate": 9.803011283967371e-06, + "loss": 3.5264, + "step": 1204 + }, + { + "epoch": 0.14, + "learning_rate": 9.802454976569791e-06, + "loss": 3.4843, + "step": 1205 + }, + { + "epoch": 0.14, + "learning_rate": 9.801897900587312e-06, + "loss": 3.407, + "step": 1206 + }, + { + "epoch": 0.14, + "learning_rate": 9.80134005610909e-06, + "loss": 3.4185, + "step": 1207 + }, + { + "epoch": 0.14, + "learning_rate": 9.8007814432244e-06, + "loss": 3.4347, + "step": 1208 + }, + { + "epoch": 0.14, + "learning_rate": 9.800222062022648e-06, + "loss": 3.4312, + "step": 1209 + }, + { + "epoch": 0.14, + "learning_rate": 9.799661912593352e-06, + "loss": 3.479, + "step": 1210 + }, + { + "epoch": 0.14, + "learning_rate": 9.799100995026158e-06, + "loss": 3.4404, + "step": 1211 + }, + { + "epoch": 0.15, + "learning_rate": 9.79853930941084e-06, + "loss": 3.4743, + "step": 1212 + }, + { + "epoch": 0.15, + "learning_rate": 9.797976855837285e-06, + "loss": 3.4562, + "step": 1213 + }, + { + "epoch": 0.15, + "learning_rate": 9.797413634395513e-06, + "loss": 3.4056, + "step": 1214 + }, + { + "epoch": 0.15, + "learning_rate": 9.79684964517566e-06, + "loss": 3.4108, + "step": 1215 + }, + { + "epoch": 0.15, + "learning_rate": 9.796284888267984e-06, + "loss": 3.4373, + "step": 1216 + }, + { + "epoch": 0.15, + "learning_rate": 9.795719363762872e-06, + "loss": 3.4819, + "step": 1217 + }, + { + "epoch": 0.15, + "learning_rate": 9.795153071750828e-06, + "loss": 3.4884, + "step": 1218 + }, + { + "epoch": 0.15, + "learning_rate": 9.794586012322484e-06, + "loss": 3.5065, + "step": 1219 + }, + { + "epoch": 0.15, + "learning_rate": 9.794018185568592e-06, + "loss": 3.3809, + "step": 1220 + }, + { + "epoch": 0.15, + "learning_rate": 9.793449591580024e-06, + "loss": 3.459, + "step": 1221 + }, + { + "epoch": 0.15, + "learning_rate": 9.792880230447781e-06, + "loss": 3.4667, + "step": 1222 + }, + { + "epoch": 0.15, + "learning_rate": 9.792310102262982e-06, + "loss": 3.4113, + "step": 1223 + }, + { + "epoch": 0.15, + "learning_rate": 9.791739207116872e-06, + "loss": 3.3776, + "step": 1224 + }, + { + "epoch": 0.15, + "learning_rate": 9.791167545100815e-06, + "loss": 3.4509, + "step": 1225 + }, + { + "epoch": 0.15, + "learning_rate": 9.790595116306301e-06, + "loss": 3.398, + "step": 1226 + }, + { + "epoch": 0.15, + "learning_rate": 9.790021920824942e-06, + "loss": 3.4288, + "step": 1227 + }, + { + "epoch": 0.15, + "learning_rate": 9.789447958748468e-06, + "loss": 3.4163, + "step": 1228 + }, + { + "epoch": 0.15, + "learning_rate": 9.788873230168742e-06, + "loss": 3.4475, + "step": 1229 + }, + { + "epoch": 0.15, + "learning_rate": 9.78829773517774e-06, + "loss": 3.5106, + "step": 1230 + }, + { + "epoch": 0.15, + "learning_rate": 9.787721473867565e-06, + "loss": 3.5021, + "step": 1231 + }, + { + "epoch": 0.15, + "learning_rate": 9.78714444633044e-06, + "loss": 3.4387, + "step": 1232 + }, + { + "epoch": 0.15, + "learning_rate": 9.786566652658717e-06, + "loss": 3.4539, + "step": 1233 + }, + { + "epoch": 0.15, + "learning_rate": 9.785988092944862e-06, + "loss": 3.4017, + "step": 1234 + }, + { + "epoch": 0.15, + "learning_rate": 9.78540876728147e-06, + "loss": 3.5145, + "step": 1235 + }, + { + "epoch": 0.15, + "learning_rate": 9.784828675761255e-06, + "loss": 3.422, + "step": 1236 + }, + { + "epoch": 0.15, + "learning_rate": 9.784247818477055e-06, + "loss": 3.3631, + "step": 1237 + }, + { + "epoch": 0.15, + "learning_rate": 9.783666195521831e-06, + "loss": 3.4832, + "step": 1238 + }, + { + "epoch": 0.15, + "learning_rate": 9.783083806988665e-06, + "loss": 3.4826, + "step": 1239 + }, + { + "epoch": 0.15, + "learning_rate": 9.782500652970763e-06, + "loss": 3.4674, + "step": 1240 + }, + { + "epoch": 0.15, + "learning_rate": 9.781916733561453e-06, + "loss": 3.4814, + "step": 1241 + }, + { + "epoch": 0.15, + "learning_rate": 9.781332048854187e-06, + "loss": 3.3681, + "step": 1242 + }, + { + "epoch": 0.15, + "learning_rate": 9.780746598942535e-06, + "loss": 3.4219, + "step": 1243 + }, + { + "epoch": 0.15, + "learning_rate": 9.780160383920195e-06, + "loss": 3.4729, + "step": 1244 + }, + { + "epoch": 0.15, + "learning_rate": 9.779573403880981e-06, + "loss": 3.4894, + "step": 1245 + }, + { + "epoch": 0.15, + "learning_rate": 9.778985658918839e-06, + "loss": 3.4641, + "step": 1246 + }, + { + "epoch": 0.15, + "learning_rate": 9.778397149127826e-06, + "loss": 3.5063, + "step": 1247 + }, + { + "epoch": 0.15, + "learning_rate": 9.77780787460213e-06, + "loss": 3.423, + "step": 1248 + }, + { + "epoch": 0.15, + "learning_rate": 9.77721783543606e-06, + "loss": 3.5069, + "step": 1249 + }, + { + "epoch": 0.15, + "learning_rate": 9.776627031724045e-06, + "loss": 3.4093, + "step": 1250 + }, + { + "epoch": 0.15, + "learning_rate": 9.776035463560634e-06, + "loss": 3.3707, + "step": 1251 + }, + { + "epoch": 0.15, + "learning_rate": 9.775443131040504e-06, + "loss": 3.5712, + "step": 1252 + }, + { + "epoch": 0.15, + "learning_rate": 9.774850034258453e-06, + "loss": 3.4364, + "step": 1253 + }, + { + "epoch": 0.15, + "learning_rate": 9.774256173309397e-06, + "loss": 3.4633, + "step": 1254 + }, + { + "epoch": 0.15, + "learning_rate": 9.773661548288382e-06, + "loss": 3.5011, + "step": 1255 + }, + { + "epoch": 0.15, + "learning_rate": 9.773066159290568e-06, + "loss": 3.5419, + "step": 1256 + }, + { + "epoch": 0.15, + "learning_rate": 9.772470006411243e-06, + "loss": 3.4116, + "step": 1257 + }, + { + "epoch": 0.15, + "learning_rate": 9.771873089745816e-06, + "loss": 3.4218, + "step": 1258 + }, + { + "epoch": 0.15, + "learning_rate": 9.771275409389813e-06, + "loss": 3.4673, + "step": 1259 + }, + { + "epoch": 0.15, + "learning_rate": 9.770676965438892e-06, + "loss": 3.3998, + "step": 1260 + }, + { + "epoch": 0.15, + "learning_rate": 9.770077757988826e-06, + "loss": 3.4746, + "step": 1261 + }, + { + "epoch": 0.15, + "learning_rate": 9.769477787135511e-06, + "loss": 3.3867, + "step": 1262 + }, + { + "epoch": 0.15, + "learning_rate": 9.768877052974968e-06, + "loss": 3.4318, + "step": 1263 + }, + { + "epoch": 0.15, + "learning_rate": 9.76827555560334e-06, + "loss": 3.4043, + "step": 1264 + }, + { + "epoch": 0.15, + "learning_rate": 9.767673295116885e-06, + "loss": 3.4302, + "step": 1265 + }, + { + "epoch": 0.15, + "learning_rate": 9.767070271611995e-06, + "loss": 3.5324, + "step": 1266 + }, + { + "epoch": 0.15, + "learning_rate": 9.766466485185174e-06, + "loss": 3.4538, + "step": 1267 + }, + { + "epoch": 0.15, + "learning_rate": 9.765861935933054e-06, + "loss": 3.4469, + "step": 1268 + }, + { + "epoch": 0.15, + "learning_rate": 9.765256623952385e-06, + "loss": 3.4713, + "step": 1269 + }, + { + "epoch": 0.15, + "learning_rate": 9.764650549340043e-06, + "loss": 3.4924, + "step": 1270 + }, + { + "epoch": 0.15, + "learning_rate": 9.764043712193025e-06, + "loss": 3.5396, + "step": 1271 + }, + { + "epoch": 0.15, + "learning_rate": 9.763436112608447e-06, + "loss": 3.3874, + "step": 1272 + }, + { + "epoch": 0.15, + "learning_rate": 9.762827750683549e-06, + "loss": 3.5168, + "step": 1273 + }, + { + "epoch": 0.15, + "learning_rate": 9.762218626515696e-06, + "loss": 3.4562, + "step": 1274 + }, + { + "epoch": 0.15, + "learning_rate": 9.761608740202369e-06, + "loss": 3.4624, + "step": 1275 + }, + { + "epoch": 0.15, + "learning_rate": 9.760998091841177e-06, + "loss": 3.4055, + "step": 1276 + }, + { + "epoch": 0.15, + "learning_rate": 9.760386681529845e-06, + "loss": 3.4444, + "step": 1277 + }, + { + "epoch": 0.15, + "learning_rate": 9.759774509366227e-06, + "loss": 3.4108, + "step": 1278 + }, + { + "epoch": 0.15, + "learning_rate": 9.75916157544829e-06, + "loss": 3.4805, + "step": 1279 + }, + { + "epoch": 0.15, + "learning_rate": 9.758547879874134e-06, + "loss": 3.3971, + "step": 1280 + }, + { + "epoch": 0.15, + "learning_rate": 9.757933422741973e-06, + "loss": 3.4098, + "step": 1281 + }, + { + "epoch": 0.15, + "learning_rate": 9.757318204150141e-06, + "loss": 3.3888, + "step": 1282 + }, + { + "epoch": 0.15, + "learning_rate": 9.7567022241971e-06, + "loss": 3.4124, + "step": 1283 + }, + { + "epoch": 0.15, + "learning_rate": 9.756085482981433e-06, + "loss": 3.4859, + "step": 1284 + }, + { + "epoch": 0.15, + "learning_rate": 9.75546798060184e-06, + "loss": 3.3863, + "step": 1285 + }, + { + "epoch": 0.15, + "learning_rate": 9.75484971715715e-06, + "loss": 3.5092, + "step": 1286 + }, + { + "epoch": 0.15, + "learning_rate": 9.754230692746305e-06, + "loss": 3.4252, + "step": 1287 + }, + { + "epoch": 0.15, + "learning_rate": 9.753610907468379e-06, + "loss": 3.505, + "step": 1288 + }, + { + "epoch": 0.15, + "learning_rate": 9.752990361422559e-06, + "loss": 3.4501, + "step": 1289 + }, + { + "epoch": 0.15, + "learning_rate": 9.752369054708159e-06, + "loss": 3.3542, + "step": 1290 + }, + { + "epoch": 0.15, + "learning_rate": 9.75174698742461e-06, + "loss": 3.4462, + "step": 1291 + }, + { + "epoch": 0.15, + "learning_rate": 9.751124159671472e-06, + "loss": 3.4958, + "step": 1292 + }, + { + "epoch": 0.15, + "learning_rate": 9.75050057154842e-06, + "loss": 3.4726, + "step": 1293 + }, + { + "epoch": 0.15, + "learning_rate": 9.74987622315525e-06, + "loss": 3.3973, + "step": 1294 + }, + { + "epoch": 0.16, + "learning_rate": 9.749251114591888e-06, + "loss": 3.4591, + "step": 1295 + }, + { + "epoch": 0.16, + "learning_rate": 9.748625245958375e-06, + "loss": 3.4772, + "step": 1296 + }, + { + "epoch": 0.16, + "learning_rate": 9.747998617354873e-06, + "loss": 3.3969, + "step": 1297 + }, + { + "epoch": 0.16, + "learning_rate": 9.747371228881668e-06, + "loss": 3.4959, + "step": 1298 + }, + { + "epoch": 0.16, + "learning_rate": 9.74674308063917e-06, + "loss": 3.4723, + "step": 1299 + }, + { + "epoch": 0.16, + "learning_rate": 9.746114172727904e-06, + "loss": 3.422, + "step": 1300 + }, + { + "epoch": 0.16, + "learning_rate": 9.745484505248524e-06, + "loss": 3.3604, + "step": 1301 + }, + { + "epoch": 0.16, + "learning_rate": 9.744854078301799e-06, + "loss": 3.4577, + "step": 1302 + }, + { + "epoch": 0.16, + "learning_rate": 9.744222891988624e-06, + "loss": 3.4515, + "step": 1303 + }, + { + "epoch": 0.16, + "learning_rate": 9.743590946410014e-06, + "loss": 3.4406, + "step": 1304 + }, + { + "epoch": 0.16, + "learning_rate": 9.742958241667107e-06, + "loss": 3.3814, + "step": 1305 + }, + { + "epoch": 0.16, + "learning_rate": 9.742324777861156e-06, + "loss": 3.4431, + "step": 1306 + }, + { + "epoch": 0.16, + "learning_rate": 9.741690555093548e-06, + "loss": 3.539, + "step": 1307 + }, + { + "epoch": 0.16, + "learning_rate": 9.741055573465779e-06, + "loss": 3.4675, + "step": 1308 + }, + { + "epoch": 0.16, + "learning_rate": 9.740419833079471e-06, + "loss": 3.522, + "step": 1309 + }, + { + "epoch": 0.16, + "learning_rate": 9.73978333403637e-06, + "loss": 3.4273, + "step": 1310 + }, + { + "epoch": 0.16, + "learning_rate": 9.739146076438342e-06, + "loss": 3.444, + "step": 1311 + }, + { + "epoch": 0.16, + "learning_rate": 9.738508060387371e-06, + "loss": 3.5294, + "step": 1312 + }, + { + "epoch": 0.16, + "learning_rate": 9.737869285985568e-06, + "loss": 3.4892, + "step": 1313 + }, + { + "epoch": 0.16, + "learning_rate": 9.737229753335159e-06, + "loss": 3.4962, + "step": 1314 + }, + { + "epoch": 0.16, + "learning_rate": 9.736589462538498e-06, + "loss": 3.4799, + "step": 1315 + }, + { + "epoch": 0.16, + "learning_rate": 9.735948413698055e-06, + "loss": 3.5173, + "step": 1316 + }, + { + "epoch": 0.16, + "learning_rate": 9.735306606916426e-06, + "loss": 3.3864, + "step": 1317 + }, + { + "epoch": 0.16, + "learning_rate": 9.734664042296324e-06, + "loss": 3.3832, + "step": 1318 + }, + { + "epoch": 0.16, + "learning_rate": 9.734020719940585e-06, + "loss": 3.4055, + "step": 1319 + }, + { + "epoch": 0.16, + "learning_rate": 9.733376639952165e-06, + "loss": 3.5478, + "step": 1320 + }, + { + "epoch": 0.16, + "learning_rate": 9.732731802434146e-06, + "loss": 3.4571, + "step": 1321 + }, + { + "epoch": 0.16, + "learning_rate": 9.732086207489726e-06, + "loss": 3.4482, + "step": 1322 + }, + { + "epoch": 0.16, + "learning_rate": 9.731439855222224e-06, + "loss": 3.408, + "step": 1323 + }, + { + "epoch": 0.16, + "learning_rate": 9.730792745735084e-06, + "loss": 3.4433, + "step": 1324 + }, + { + "epoch": 0.16, + "learning_rate": 9.730144879131871e-06, + "loss": 3.4144, + "step": 1325 + }, + { + "epoch": 0.16, + "learning_rate": 9.729496255516269e-06, + "loss": 3.5032, + "step": 1326 + }, + { + "epoch": 0.16, + "learning_rate": 9.728846874992082e-06, + "loss": 3.4343, + "step": 1327 + }, + { + "epoch": 0.16, + "learning_rate": 9.728196737663237e-06, + "loss": 3.5621, + "step": 1328 + }, + { + "epoch": 0.16, + "learning_rate": 9.727545843633783e-06, + "loss": 3.4534, + "step": 1329 + }, + { + "epoch": 0.16, + "learning_rate": 9.726894193007887e-06, + "loss": 3.4108, + "step": 1330 + }, + { + "epoch": 0.16, + "learning_rate": 9.726241785889844e-06, + "loss": 3.4596, + "step": 1331 + }, + { + "epoch": 0.16, + "learning_rate": 9.72558862238406e-06, + "loss": 3.548, + "step": 1332 + }, + { + "epoch": 0.16, + "learning_rate": 9.72493470259507e-06, + "loss": 3.4469, + "step": 1333 + }, + { + "epoch": 0.16, + "learning_rate": 9.724280026627527e-06, + "loss": 3.3582, + "step": 1334 + }, + { + "epoch": 0.16, + "learning_rate": 9.723624594586206e-06, + "loss": 3.4568, + "step": 1335 + }, + { + "epoch": 0.16, + "learning_rate": 9.722968406575998e-06, + "loss": 3.2977, + "step": 1336 + }, + { + "epoch": 0.16, + "learning_rate": 9.722311462701927e-06, + "loss": 3.45, + "step": 1337 + }, + { + "epoch": 0.16, + "learning_rate": 9.721653763069123e-06, + "loss": 3.4011, + "step": 1338 + }, + { + "epoch": 0.16, + "learning_rate": 9.720995307782849e-06, + "loss": 3.4524, + "step": 1339 + }, + { + "epoch": 0.16, + "learning_rate": 9.720336096948482e-06, + "loss": 3.4182, + "step": 1340 + }, + { + "epoch": 0.16, + "learning_rate": 9.719676130671523e-06, + "loss": 3.4401, + "step": 1341 + }, + { + "epoch": 0.16, + "learning_rate": 9.719015409057591e-06, + "loss": 3.454, + "step": 1342 + }, + { + "epoch": 0.16, + "learning_rate": 9.718353932212432e-06, + "loss": 3.3309, + "step": 1343 + }, + { + "epoch": 0.16, + "learning_rate": 9.717691700241904e-06, + "loss": 3.4321, + "step": 1344 + }, + { + "epoch": 0.16, + "learning_rate": 9.717028713251993e-06, + "loss": 3.4964, + "step": 1345 + }, + { + "epoch": 0.16, + "learning_rate": 9.716364971348804e-06, + "loss": 3.4112, + "step": 1346 + }, + { + "epoch": 0.16, + "learning_rate": 9.715700474638562e-06, + "loss": 3.3768, + "step": 1347 + }, + { + "epoch": 0.16, + "learning_rate": 9.715035223227612e-06, + "loss": 3.5292, + "step": 1348 + }, + { + "epoch": 0.16, + "learning_rate": 9.71436921722242e-06, + "loss": 3.4992, + "step": 1349 + }, + { + "epoch": 0.16, + "learning_rate": 9.71370245672958e-06, + "loss": 3.4212, + "step": 1350 + }, + { + "epoch": 0.16, + "learning_rate": 9.71303494185579e-06, + "loss": 3.411, + "step": 1351 + }, + { + "epoch": 0.16, + "learning_rate": 9.712366672707887e-06, + "loss": 3.5213, + "step": 1352 + }, + { + "epoch": 0.16, + "learning_rate": 9.711697649392817e-06, + "loss": 3.5411, + "step": 1353 + }, + { + "epoch": 0.16, + "learning_rate": 9.711027872017652e-06, + "loss": 3.4248, + "step": 1354 + }, + { + "epoch": 0.16, + "learning_rate": 9.710357340689584e-06, + "loss": 3.4408, + "step": 1355 + }, + { + "epoch": 0.16, + "learning_rate": 9.709686055515923e-06, + "loss": 3.349, + "step": 1356 + }, + { + "epoch": 0.16, + "learning_rate": 9.709014016604102e-06, + "loss": 3.4897, + "step": 1357 + }, + { + "epoch": 0.16, + "learning_rate": 9.708341224061675e-06, + "loss": 3.3594, + "step": 1358 + }, + { + "epoch": 0.16, + "learning_rate": 9.707667677996315e-06, + "loss": 3.5759, + "step": 1359 + }, + { + "epoch": 0.16, + "learning_rate": 9.706993378515815e-06, + "loss": 3.4144, + "step": 1360 + }, + { + "epoch": 0.16, + "learning_rate": 9.706318325728092e-06, + "loss": 3.4472, + "step": 1361 + }, + { + "epoch": 0.16, + "learning_rate": 9.705642519741182e-06, + "loss": 3.5168, + "step": 1362 + }, + { + "epoch": 0.16, + "learning_rate": 9.704965960663239e-06, + "loss": 3.3999, + "step": 1363 + }, + { + "epoch": 0.16, + "learning_rate": 9.70428864860254e-06, + "loss": 3.4043, + "step": 1364 + }, + { + "epoch": 0.16, + "learning_rate": 9.703610583667482e-06, + "loss": 3.4451, + "step": 1365 + }, + { + "epoch": 0.16, + "learning_rate": 9.702931765966583e-06, + "loss": 3.4659, + "step": 1366 + }, + { + "epoch": 0.16, + "learning_rate": 9.702252195608479e-06, + "loss": 3.4341, + "step": 1367 + }, + { + "epoch": 0.16, + "learning_rate": 9.701571872701933e-06, + "loss": 3.5117, + "step": 1368 + }, + { + "epoch": 0.16, + "learning_rate": 9.70089079735582e-06, + "loss": 3.4087, + "step": 1369 + }, + { + "epoch": 0.16, + "learning_rate": 9.70020896967914e-06, + "loss": 3.4372, + "step": 1370 + }, + { + "epoch": 0.16, + "learning_rate": 9.699526389781013e-06, + "loss": 3.3833, + "step": 1371 + }, + { + "epoch": 0.16, + "learning_rate": 9.69884305777068e-06, + "loss": 3.5057, + "step": 1372 + }, + { + "epoch": 0.16, + "learning_rate": 9.698158973757499e-06, + "loss": 3.4334, + "step": 1373 + }, + { + "epoch": 0.16, + "learning_rate": 9.697474137850953e-06, + "loss": 3.4676, + "step": 1374 + }, + { + "epoch": 0.16, + "learning_rate": 9.696788550160644e-06, + "loss": 3.4606, + "step": 1375 + }, + { + "epoch": 0.16, + "learning_rate": 9.696102210796291e-06, + "loss": 3.4541, + "step": 1376 + }, + { + "epoch": 0.16, + "learning_rate": 9.695415119867737e-06, + "loss": 3.3958, + "step": 1377 + }, + { + "epoch": 0.16, + "learning_rate": 9.694727277484944e-06, + "loss": 3.4482, + "step": 1378 + }, + { + "epoch": 0.17, + "learning_rate": 9.694038683757993e-06, + "loss": 3.4384, + "step": 1379 + }, + { + "epoch": 0.17, + "learning_rate": 9.693349338797089e-06, + "loss": 3.4137, + "step": 1380 + }, + { + "epoch": 0.17, + "learning_rate": 9.692659242712552e-06, + "loss": 3.4738, + "step": 1381 + }, + { + "epoch": 0.17, + "learning_rate": 9.691968395614826e-06, + "loss": 3.4842, + "step": 1382 + }, + { + "epoch": 0.17, + "learning_rate": 9.691276797614476e-06, + "loss": 3.3738, + "step": 1383 + }, + { + "epoch": 0.17, + "learning_rate": 9.690584448822184e-06, + "loss": 3.4396, + "step": 1384 + }, + { + "epoch": 0.17, + "learning_rate": 9.689891349348753e-06, + "loss": 3.4539, + "step": 1385 + }, + { + "epoch": 0.17, + "learning_rate": 9.689197499305104e-06, + "loss": 3.4431, + "step": 1386 + }, + { + "epoch": 0.17, + "learning_rate": 9.688502898802287e-06, + "loss": 3.4487, + "step": 1387 + }, + { + "epoch": 0.17, + "learning_rate": 9.687807547951463e-06, + "loss": 3.5356, + "step": 1388 + }, + { + "epoch": 0.17, + "learning_rate": 9.687111446863915e-06, + "loss": 3.4102, + "step": 1389 + }, + { + "epoch": 0.17, + "learning_rate": 9.686414595651049e-06, + "loss": 3.4849, + "step": 1390 + }, + { + "epoch": 0.17, + "learning_rate": 9.685716994424386e-06, + "loss": 3.5349, + "step": 1391 + }, + { + "epoch": 0.17, + "learning_rate": 9.685018643295571e-06, + "loss": 3.5032, + "step": 1392 + }, + { + "epoch": 0.17, + "learning_rate": 9.68431954237637e-06, + "loss": 3.4711, + "step": 1393 + }, + { + "epoch": 0.17, + "learning_rate": 9.683619691778666e-06, + "loss": 3.5252, + "step": 1394 + }, + { + "epoch": 0.17, + "learning_rate": 9.682919091614462e-06, + "loss": 3.3281, + "step": 1395 + }, + { + "epoch": 0.17, + "learning_rate": 9.682217741995885e-06, + "loss": 3.4831, + "step": 1396 + }, + { + "epoch": 0.17, + "learning_rate": 9.681515643035175e-06, + "loss": 3.3791, + "step": 1397 + }, + { + "epoch": 0.17, + "learning_rate": 9.6808127948447e-06, + "loss": 3.4366, + "step": 1398 + }, + { + "epoch": 0.17, + "learning_rate": 9.68010919753694e-06, + "loss": 3.5015, + "step": 1399 + }, + { + "epoch": 0.17, + "learning_rate": 9.679404851224503e-06, + "loss": 3.471, + "step": 1400 + }, + { + "epoch": 0.17, + "learning_rate": 9.678699756020109e-06, + "loss": 3.4715, + "step": 1401 + }, + { + "epoch": 0.17, + "learning_rate": 9.677993912036602e-06, + "loss": 3.4389, + "step": 1402 + }, + { + "epoch": 0.17, + "learning_rate": 9.677287319386947e-06, + "loss": 3.568, + "step": 1403 + }, + { + "epoch": 0.17, + "learning_rate": 9.676579978184224e-06, + "loss": 3.3938, + "step": 1404 + }, + { + "epoch": 0.17, + "learning_rate": 9.67587188854164e-06, + "loss": 3.3897, + "step": 1405 + }, + { + "epoch": 0.17, + "learning_rate": 9.675163050572516e-06, + "loss": 3.4228, + "step": 1406 + }, + { + "epoch": 0.17, + "learning_rate": 9.674453464390292e-06, + "loss": 3.4214, + "step": 1407 + }, + { + "epoch": 0.17, + "learning_rate": 9.673743130108533e-06, + "loss": 3.4224, + "step": 1408 + }, + { + "epoch": 0.17, + "learning_rate": 9.67303204784092e-06, + "loss": 3.4993, + "step": 1409 + }, + { + "epoch": 0.17, + "learning_rate": 9.672320217701254e-06, + "loss": 3.6335, + "step": 1410 + }, + { + "epoch": 0.17, + "learning_rate": 9.671607639803458e-06, + "loss": 3.3976, + "step": 1411 + }, + { + "epoch": 0.17, + "learning_rate": 9.670894314261573e-06, + "loss": 3.3776, + "step": 1412 + }, + { + "epoch": 0.17, + "learning_rate": 9.670180241189755e-06, + "loss": 3.4756, + "step": 1413 + }, + { + "epoch": 0.17, + "learning_rate": 9.66946542070229e-06, + "loss": 3.4847, + "step": 1414 + }, + { + "epoch": 0.17, + "learning_rate": 9.668749852913575e-06, + "loss": 3.5321, + "step": 1415 + }, + { + "epoch": 0.17, + "learning_rate": 9.668033537938131e-06, + "loss": 3.4626, + "step": 1416 + }, + { + "epoch": 0.17, + "learning_rate": 9.667316475890596e-06, + "loss": 3.4051, + "step": 1417 + }, + { + "epoch": 0.17, + "learning_rate": 9.666598666885729e-06, + "loss": 3.4032, + "step": 1418 + }, + { + "epoch": 0.17, + "learning_rate": 9.665880111038407e-06, + "loss": 3.4516, + "step": 1419 + }, + { + "epoch": 0.17, + "learning_rate": 9.66516080846363e-06, + "loss": 3.3528, + "step": 1420 + }, + { + "epoch": 0.17, + "learning_rate": 9.664440759276512e-06, + "loss": 3.3697, + "step": 1421 + }, + { + "epoch": 0.17, + "learning_rate": 9.663719963592293e-06, + "loss": 3.4892, + "step": 1422 + }, + { + "epoch": 0.17, + "learning_rate": 9.662998421526326e-06, + "loss": 3.4028, + "step": 1423 + }, + { + "epoch": 0.17, + "learning_rate": 9.66227613319409e-06, + "loss": 3.5056, + "step": 1424 + }, + { + "epoch": 0.17, + "learning_rate": 9.661553098711177e-06, + "loss": 3.4418, + "step": 1425 + }, + { + "epoch": 0.17, + "learning_rate": 9.660829318193304e-06, + "loss": 3.5077, + "step": 1426 + }, + { + "epoch": 0.17, + "learning_rate": 9.660104791756302e-06, + "loss": 3.5394, + "step": 1427 + }, + { + "epoch": 0.17, + "learning_rate": 9.659379519516125e-06, + "loss": 3.4947, + "step": 1428 + }, + { + "epoch": 0.17, + "learning_rate": 9.658653501588848e-06, + "loss": 3.4952, + "step": 1429 + }, + { + "epoch": 0.17, + "learning_rate": 9.657926738090662e-06, + "loss": 3.4654, + "step": 1430 + }, + { + "epoch": 0.17, + "learning_rate": 9.657199229137876e-06, + "loss": 3.5055, + "step": 1431 + }, + { + "epoch": 0.17, + "learning_rate": 9.656470974846922e-06, + "loss": 3.5246, + "step": 1432 + }, + { + "epoch": 0.17, + "learning_rate": 9.65574197533435e-06, + "loss": 3.4758, + "step": 1433 + }, + { + "epoch": 0.17, + "learning_rate": 9.65501223071683e-06, + "loss": 3.454, + "step": 1434 + }, + { + "epoch": 0.17, + "learning_rate": 9.654281741111147e-06, + "loss": 3.4386, + "step": 1435 + }, + { + "epoch": 0.17, + "learning_rate": 9.653550506634211e-06, + "loss": 3.4301, + "step": 1436 + }, + { + "epoch": 0.17, + "learning_rate": 9.65281852740305e-06, + "loss": 3.4843, + "step": 1437 + }, + { + "epoch": 0.17, + "learning_rate": 9.652085803534808e-06, + "loss": 3.403, + "step": 1438 + }, + { + "epoch": 0.17, + "learning_rate": 9.65135233514675e-06, + "loss": 3.3784, + "step": 1439 + }, + { + "epoch": 0.17, + "learning_rate": 9.650618122356262e-06, + "loss": 3.4996, + "step": 1440 + }, + { + "epoch": 0.17, + "learning_rate": 9.649883165280845e-06, + "loss": 3.4821, + "step": 1441 + }, + { + "epoch": 0.17, + "learning_rate": 9.649147464038124e-06, + "loss": 3.3953, + "step": 1442 + }, + { + "epoch": 0.17, + "learning_rate": 9.648411018745838e-06, + "loss": 3.4086, + "step": 1443 + }, + { + "epoch": 0.17, + "learning_rate": 9.647673829521851e-06, + "loss": 3.422, + "step": 1444 + }, + { + "epoch": 0.17, + "learning_rate": 9.64693589648414e-06, + "loss": 3.3787, + "step": 1445 + }, + { + "epoch": 0.17, + "learning_rate": 9.646197219750803e-06, + "loss": 3.4592, + "step": 1446 + }, + { + "epoch": 0.17, + "learning_rate": 9.645457799440062e-06, + "loss": 3.4165, + "step": 1447 + }, + { + "epoch": 0.17, + "learning_rate": 9.64471763567025e-06, + "loss": 3.4887, + "step": 1448 + }, + { + "epoch": 0.17, + "learning_rate": 9.643976728559825e-06, + "loss": 3.4709, + "step": 1449 + }, + { + "epoch": 0.17, + "learning_rate": 9.64323507822736e-06, + "loss": 3.4649, + "step": 1450 + }, + { + "epoch": 0.17, + "learning_rate": 9.642492684791551e-06, + "loss": 3.503, + "step": 1451 + }, + { + "epoch": 0.17, + "learning_rate": 9.641749548371209e-06, + "loss": 3.3386, + "step": 1452 + }, + { + "epoch": 0.17, + "learning_rate": 9.641005669085266e-06, + "loss": 3.3897, + "step": 1453 + }, + { + "epoch": 0.17, + "learning_rate": 9.640261047052773e-06, + "loss": 3.5379, + "step": 1454 + }, + { + "epoch": 0.17, + "learning_rate": 9.639515682392898e-06, + "loss": 3.4149, + "step": 1455 + }, + { + "epoch": 0.17, + "learning_rate": 9.638769575224928e-06, + "loss": 3.4057, + "step": 1456 + }, + { + "epoch": 0.17, + "learning_rate": 9.638022725668273e-06, + "loss": 3.4281, + "step": 1457 + }, + { + "epoch": 0.17, + "learning_rate": 9.63727513384246e-06, + "loss": 3.5258, + "step": 1458 + }, + { + "epoch": 0.17, + "learning_rate": 9.636526799867128e-06, + "loss": 3.5075, + "step": 1459 + }, + { + "epoch": 0.17, + "learning_rate": 9.635777723862043e-06, + "loss": 3.4512, + "step": 1460 + }, + { + "epoch": 0.17, + "learning_rate": 9.63502790594709e-06, + "loss": 3.4674, + "step": 1461 + }, + { + "epoch": 0.18, + "learning_rate": 9.634277346242267e-06, + "loss": 3.4037, + "step": 1462 + }, + { + "epoch": 0.18, + "learning_rate": 9.633526044867694e-06, + "loss": 3.4241, + "step": 1463 + }, + { + "epoch": 0.18, + "learning_rate": 9.632774001943608e-06, + "loss": 3.4179, + "step": 1464 + }, + { + "epoch": 0.18, + "learning_rate": 9.63202121759037e-06, + "loss": 3.467, + "step": 1465 + }, + { + "epoch": 0.18, + "learning_rate": 9.631267691928452e-06, + "loss": 3.4911, + "step": 1466 + }, + { + "epoch": 0.18, + "learning_rate": 9.630513425078447e-06, + "loss": 3.4147, + "step": 1467 + }, + { + "epoch": 0.18, + "learning_rate": 9.629758417161073e-06, + "loss": 3.4814, + "step": 1468 + }, + { + "epoch": 0.18, + "learning_rate": 9.629002668297157e-06, + "loss": 3.4511, + "step": 1469 + }, + { + "epoch": 0.18, + "learning_rate": 9.62824617860765e-06, + "loss": 3.4969, + "step": 1470 + }, + { + "epoch": 0.18, + "learning_rate": 9.627488948213622e-06, + "loss": 3.45, + "step": 1471 + }, + { + "epoch": 0.18, + "learning_rate": 9.626730977236261e-06, + "loss": 3.4622, + "step": 1472 + }, + { + "epoch": 0.18, + "learning_rate": 9.625972265796868e-06, + "loss": 3.5547, + "step": 1473 + }, + { + "epoch": 0.18, + "learning_rate": 9.625212814016872e-06, + "loss": 3.4872, + "step": 1474 + }, + { + "epoch": 0.18, + "learning_rate": 9.624452622017814e-06, + "loss": 3.479, + "step": 1475 + }, + { + "epoch": 0.18, + "learning_rate": 9.623691689921356e-06, + "loss": 3.4273, + "step": 1476 + }, + { + "epoch": 0.18, + "learning_rate": 9.622930017849274e-06, + "loss": 3.3889, + "step": 1477 + }, + { + "epoch": 0.18, + "learning_rate": 9.62216760592347e-06, + "loss": 3.5028, + "step": 1478 + }, + { + "epoch": 0.18, + "learning_rate": 9.621404454265958e-06, + "loss": 3.3969, + "step": 1479 + }, + { + "epoch": 0.18, + "learning_rate": 9.620640562998875e-06, + "loss": 3.3672, + "step": 1480 + }, + { + "epoch": 0.18, + "learning_rate": 9.619875932244471e-06, + "loss": 3.5079, + "step": 1481 + }, + { + "epoch": 0.18, + "learning_rate": 9.619110562125121e-06, + "loss": 3.4405, + "step": 1482 + }, + { + "epoch": 0.18, + "learning_rate": 9.618344452763313e-06, + "loss": 3.367, + "step": 1483 + }, + { + "epoch": 0.18, + "learning_rate": 9.617577604281656e-06, + "loss": 3.3849, + "step": 1484 + }, + { + "epoch": 0.18, + "learning_rate": 9.616810016802874e-06, + "loss": 3.4091, + "step": 1485 + }, + { + "epoch": 0.18, + "learning_rate": 9.616041690449816e-06, + "loss": 3.4619, + "step": 1486 + }, + { + "epoch": 0.18, + "learning_rate": 9.61527262534544e-06, + "loss": 3.4432, + "step": 1487 + }, + { + "epoch": 0.18, + "learning_rate": 9.614502821612833e-06, + "loss": 3.4367, + "step": 1488 + }, + { + "epoch": 0.18, + "learning_rate": 9.613732279375188e-06, + "loss": 3.4243, + "step": 1489 + }, + { + "epoch": 0.18, + "learning_rate": 9.612960998755826e-06, + "loss": 3.4623, + "step": 1490 + }, + { + "epoch": 0.18, + "learning_rate": 9.612188979878183e-06, + "loss": 3.3748, + "step": 1491 + }, + { + "epoch": 0.18, + "learning_rate": 9.611416222865813e-06, + "loss": 3.3892, + "step": 1492 + }, + { + "epoch": 0.18, + "learning_rate": 9.610642727842385e-06, + "loss": 3.4544, + "step": 1493 + }, + { + "epoch": 0.18, + "learning_rate": 9.609868494931693e-06, + "loss": 3.4602, + "step": 1494 + }, + { + "epoch": 0.18, + "learning_rate": 9.609093524257644e-06, + "loss": 3.4346, + "step": 1495 + }, + { + "epoch": 0.18, + "learning_rate": 9.608317815944265e-06, + "loss": 3.4639, + "step": 1496 + }, + { + "epoch": 0.18, + "learning_rate": 9.607541370115698e-06, + "loss": 3.3786, + "step": 1497 + }, + { + "epoch": 0.18, + "learning_rate": 9.606764186896206e-06, + "loss": 3.3997, + "step": 1498 + }, + { + "epoch": 0.18, + "learning_rate": 9.605986266410172e-06, + "loss": 3.4518, + "step": 1499 + }, + { + "epoch": 0.18, + "learning_rate": 9.605207608782091e-06, + "loss": 3.4677, + "step": 1500 + }, + { + "epoch": 0.18, + "learning_rate": 9.604428214136582e-06, + "loss": 3.5482, + "step": 1501 + }, + { + "epoch": 0.18, + "learning_rate": 9.603648082598378e-06, + "loss": 3.3928, + "step": 1502 + }, + { + "epoch": 0.18, + "learning_rate": 9.602867214292331e-06, + "loss": 3.4455, + "step": 1503 + }, + { + "epoch": 0.18, + "learning_rate": 9.602085609343411e-06, + "loss": 3.404, + "step": 1504 + }, + { + "epoch": 0.18, + "learning_rate": 9.601303267876708e-06, + "loss": 3.4401, + "step": 1505 + }, + { + "epoch": 0.18, + "learning_rate": 9.600520190017426e-06, + "loss": 3.3899, + "step": 1506 + }, + { + "epoch": 0.18, + "learning_rate": 9.59973637589089e-06, + "loss": 3.4275, + "step": 1507 + }, + { + "epoch": 0.18, + "learning_rate": 9.598951825622541e-06, + "loss": 3.4668, + "step": 1508 + }, + { + "epoch": 0.18, + "learning_rate": 9.598166539337939e-06, + "loss": 3.4571, + "step": 1509 + }, + { + "epoch": 0.18, + "learning_rate": 9.59738051716276e-06, + "loss": 3.4183, + "step": 1510 + }, + { + "epoch": 0.18, + "learning_rate": 9.596593759222798e-06, + "loss": 3.3425, + "step": 1511 + }, + { + "epoch": 0.18, + "learning_rate": 9.59580626564397e-06, + "loss": 3.5323, + "step": 1512 + }, + { + "epoch": 0.18, + "learning_rate": 9.595018036552302e-06, + "loss": 3.5066, + "step": 1513 + }, + { + "epoch": 0.18, + "learning_rate": 9.594229072073945e-06, + "loss": 3.5006, + "step": 1514 + }, + { + "epoch": 0.18, + "learning_rate": 9.593439372335165e-06, + "loss": 3.4826, + "step": 1515 + }, + { + "epoch": 0.18, + "learning_rate": 9.592648937462344e-06, + "loss": 3.5072, + "step": 1516 + }, + { + "epoch": 0.18, + "learning_rate": 9.591857767581981e-06, + "loss": 3.4418, + "step": 1517 + }, + { + "epoch": 0.18, + "learning_rate": 9.5910658628207e-06, + "loss": 3.375, + "step": 1518 + }, + { + "epoch": 0.18, + "learning_rate": 9.590273223305235e-06, + "loss": 3.5464, + "step": 1519 + }, + { + "epoch": 0.18, + "learning_rate": 9.58947984916244e-06, + "loss": 3.4553, + "step": 1520 + }, + { + "epoch": 0.18, + "learning_rate": 9.588685740519285e-06, + "loss": 3.4317, + "step": 1521 + }, + { + "epoch": 0.18, + "learning_rate": 9.587890897502863e-06, + "loss": 3.3942, + "step": 1522 + }, + { + "epoch": 0.18, + "learning_rate": 9.587095320240375e-06, + "loss": 3.3888, + "step": 1523 + }, + { + "epoch": 0.18, + "learning_rate": 9.586299008859152e-06, + "loss": 3.4263, + "step": 1524 + }, + { + "epoch": 0.18, + "learning_rate": 9.585501963486633e-06, + "loss": 3.4178, + "step": 1525 + }, + { + "epoch": 0.18, + "learning_rate": 9.584704184250373e-06, + "loss": 3.5389, + "step": 1526 + }, + { + "epoch": 0.18, + "learning_rate": 9.583905671278053e-06, + "loss": 3.4746, + "step": 1527 + }, + { + "epoch": 0.18, + "learning_rate": 9.583106424697467e-06, + "loss": 3.442, + "step": 1528 + }, + { + "epoch": 0.18, + "learning_rate": 9.582306444636525e-06, + "loss": 3.452, + "step": 1529 + }, + { + "epoch": 0.18, + "learning_rate": 9.581505731223255e-06, + "loss": 3.4175, + "step": 1530 + }, + { + "epoch": 0.18, + "learning_rate": 9.580704284585807e-06, + "loss": 3.4718, + "step": 1531 + }, + { + "epoch": 0.18, + "learning_rate": 9.579902104852442e-06, + "loss": 3.4836, + "step": 1532 + }, + { + "epoch": 0.18, + "learning_rate": 9.579099192151537e-06, + "loss": 3.4608, + "step": 1533 + }, + { + "epoch": 0.18, + "learning_rate": 9.578295546611597e-06, + "loss": 3.4161, + "step": 1534 + }, + { + "epoch": 0.18, + "learning_rate": 9.577491168361232e-06, + "loss": 3.326, + "step": 1535 + }, + { + "epoch": 0.18, + "learning_rate": 9.576686057529181e-06, + "loss": 3.5082, + "step": 1536 + }, + { + "epoch": 0.18, + "learning_rate": 9.575880214244287e-06, + "loss": 3.5094, + "step": 1537 + }, + { + "epoch": 0.18, + "learning_rate": 9.57507363863552e-06, + "loss": 3.4573, + "step": 1538 + }, + { + "epoch": 0.18, + "learning_rate": 9.574266330831968e-06, + "loss": 3.3548, + "step": 1539 + }, + { + "epoch": 0.18, + "learning_rate": 9.573458290962825e-06, + "loss": 3.4416, + "step": 1540 + }, + { + "epoch": 0.18, + "learning_rate": 9.572649519157416e-06, + "loss": 3.4077, + "step": 1541 + }, + { + "epoch": 0.18, + "learning_rate": 9.571840015545174e-06, + "loss": 3.4209, + "step": 1542 + }, + { + "epoch": 0.18, + "learning_rate": 9.571029780255651e-06, + "loss": 3.4435, + "step": 1543 + }, + { + "epoch": 0.18, + "learning_rate": 9.57021881341852e-06, + "loss": 3.4153, + "step": 1544 + }, + { + "epoch": 0.18, + "learning_rate": 9.569407115163566e-06, + "loss": 3.4762, + "step": 1545 + }, + { + "epoch": 0.19, + "learning_rate": 9.568594685620696e-06, + "loss": 3.4059, + "step": 1546 + }, + { + "epoch": 0.19, + "learning_rate": 9.567781524919927e-06, + "loss": 3.3931, + "step": 1547 + }, + { + "epoch": 0.19, + "learning_rate": 9.566967633191402e-06, + "loss": 3.439, + "step": 1548 + }, + { + "epoch": 0.19, + "learning_rate": 9.56615301056537e-06, + "loss": 3.5619, + "step": 1549 + }, + { + "epoch": 0.19, + "learning_rate": 9.56533765717221e-06, + "loss": 3.3994, + "step": 1550 + }, + { + "epoch": 0.19, + "learning_rate": 9.564521573142406e-06, + "loss": 3.4214, + "step": 1551 + }, + { + "epoch": 0.19, + "learning_rate": 9.563704758606569e-06, + "loss": 3.4342, + "step": 1552 + }, + { + "epoch": 0.19, + "learning_rate": 9.562887213695417e-06, + "loss": 3.5368, + "step": 1553 + }, + { + "epoch": 0.19, + "learning_rate": 9.562068938539792e-06, + "loss": 3.4806, + "step": 1554 + }, + { + "epoch": 0.19, + "learning_rate": 9.56124993327065e-06, + "loss": 3.4746, + "step": 1555 + }, + { + "epoch": 0.19, + "learning_rate": 9.560430198019065e-06, + "loss": 3.5128, + "step": 1556 + }, + { + "epoch": 0.19, + "learning_rate": 9.55960973291623e-06, + "loss": 3.4296, + "step": 1557 + }, + { + "epoch": 0.19, + "learning_rate": 9.55878853809345e-06, + "loss": 3.3987, + "step": 1558 + }, + { + "epoch": 0.19, + "learning_rate": 9.557966613682148e-06, + "loss": 3.5154, + "step": 1559 + }, + { + "epoch": 0.19, + "learning_rate": 9.557143959813864e-06, + "loss": 3.4476, + "step": 1560 + }, + { + "epoch": 0.19, + "learning_rate": 9.556320576620259e-06, + "loss": 3.4539, + "step": 1561 + }, + { + "epoch": 0.19, + "learning_rate": 9.555496464233108e-06, + "loss": 3.4608, + "step": 1562 + }, + { + "epoch": 0.19, + "learning_rate": 9.554671622784296e-06, + "loss": 3.3701, + "step": 1563 + }, + { + "epoch": 0.19, + "learning_rate": 9.553846052405836e-06, + "loss": 3.4383, + "step": 1564 + }, + { + "epoch": 0.19, + "learning_rate": 9.553019753229849e-06, + "loss": 3.4197, + "step": 1565 + }, + { + "epoch": 0.19, + "learning_rate": 9.552192725388578e-06, + "loss": 3.5768, + "step": 1566 + }, + { + "epoch": 0.19, + "learning_rate": 9.55136496901438e-06, + "loss": 3.45, + "step": 1567 + }, + { + "epoch": 0.19, + "learning_rate": 9.55053648423973e-06, + "loss": 3.52, + "step": 1568 + }, + { + "epoch": 0.19, + "learning_rate": 9.549707271197216e-06, + "loss": 3.4685, + "step": 1569 + }, + { + "epoch": 0.19, + "learning_rate": 9.548877330019549e-06, + "loss": 3.4477, + "step": 1570 + }, + { + "epoch": 0.19, + "learning_rate": 9.548046660839549e-06, + "loss": 3.3781, + "step": 1571 + }, + { + "epoch": 0.19, + "learning_rate": 9.547215263790158e-06, + "loss": 3.3809, + "step": 1572 + }, + { + "epoch": 0.19, + "learning_rate": 9.546383139004433e-06, + "loss": 3.5361, + "step": 1573 + }, + { + "epoch": 0.19, + "learning_rate": 9.545550286615548e-06, + "loss": 3.3637, + "step": 1574 + }, + { + "epoch": 0.19, + "learning_rate": 9.54471670675679e-06, + "loss": 3.5042, + "step": 1575 + }, + { + "epoch": 0.19, + "learning_rate": 9.54388239956157e-06, + "loss": 3.4738, + "step": 1576 + }, + { + "epoch": 0.19, + "learning_rate": 9.543047365163407e-06, + "loss": 3.4982, + "step": 1577 + }, + { + "epoch": 0.19, + "learning_rate": 9.542211603695938e-06, + "loss": 3.4514, + "step": 1578 + }, + { + "epoch": 0.19, + "learning_rate": 9.541375115292922e-06, + "loss": 3.4945, + "step": 1579 + }, + { + "epoch": 0.19, + "learning_rate": 9.540537900088229e-06, + "loss": 3.5765, + "step": 1580 + }, + { + "epoch": 0.19, + "learning_rate": 9.539699958215847e-06, + "loss": 3.4971, + "step": 1581 + }, + { + "epoch": 0.19, + "learning_rate": 9.538861289809881e-06, + "loss": 3.3979, + "step": 1582 + }, + { + "epoch": 0.19, + "learning_rate": 9.53802189500455e-06, + "loss": 3.4681, + "step": 1583 + }, + { + "epoch": 0.19, + "learning_rate": 9.537181773934192e-06, + "loss": 3.4144, + "step": 1584 + }, + { + "epoch": 0.19, + "learning_rate": 9.53634092673326e-06, + "loss": 3.4642, + "step": 1585 + }, + { + "epoch": 0.19, + "learning_rate": 9.535499353536322e-06, + "loss": 3.5072, + "step": 1586 + }, + { + "epoch": 0.19, + "learning_rate": 9.534657054478065e-06, + "loss": 3.4698, + "step": 1587 + }, + { + "epoch": 0.19, + "learning_rate": 9.533814029693289e-06, + "loss": 3.5874, + "step": 1588 + }, + { + "epoch": 0.19, + "learning_rate": 9.532970279316913e-06, + "loss": 3.4076, + "step": 1589 + }, + { + "epoch": 0.19, + "learning_rate": 9.53212580348397e-06, + "loss": 3.4145, + "step": 1590 + }, + { + "epoch": 0.19, + "learning_rate": 9.53128060232961e-06, + "loss": 3.4387, + "step": 1591 + }, + { + "epoch": 0.19, + "learning_rate": 9.530434675989098e-06, + "loss": 3.389, + "step": 1592 + }, + { + "epoch": 0.19, + "learning_rate": 9.529588024597818e-06, + "loss": 3.4981, + "step": 1593 + }, + { + "epoch": 0.19, + "learning_rate": 9.528740648291268e-06, + "loss": 3.5114, + "step": 1594 + }, + { + "epoch": 0.19, + "learning_rate": 9.52789254720506e-06, + "loss": 3.3644, + "step": 1595 + }, + { + "epoch": 0.19, + "learning_rate": 9.527043721474927e-06, + "loss": 3.3932, + "step": 1596 + }, + { + "epoch": 0.19, + "learning_rate": 9.526194171236711e-06, + "loss": 3.4383, + "step": 1597 + }, + { + "epoch": 0.19, + "learning_rate": 9.525343896626378e-06, + "loss": 3.4384, + "step": 1598 + }, + { + "epoch": 0.19, + "learning_rate": 9.524492897780006e-06, + "loss": 3.4788, + "step": 1599 + }, + { + "epoch": 0.19, + "learning_rate": 9.523641174833786e-06, + "loss": 3.4747, + "step": 1600 + }, + { + "epoch": 0.19, + "learning_rate": 9.522788727924028e-06, + "loss": 3.3895, + "step": 1601 + }, + { + "epoch": 0.19, + "learning_rate": 9.52193555718716e-06, + "loss": 3.4462, + "step": 1602 + }, + { + "epoch": 0.19, + "learning_rate": 9.52108166275972e-06, + "loss": 3.4555, + "step": 1603 + }, + { + "epoch": 0.19, + "learning_rate": 9.520227044778371e-06, + "loss": 3.5151, + "step": 1604 + }, + { + "epoch": 0.19, + "learning_rate": 9.519371703379881e-06, + "loss": 3.4492, + "step": 1605 + }, + { + "epoch": 0.19, + "learning_rate": 9.51851563870114e-06, + "loss": 3.4663, + "step": 1606 + }, + { + "epoch": 0.19, + "learning_rate": 9.517658850879152e-06, + "loss": 3.4714, + "step": 1607 + }, + { + "epoch": 0.19, + "learning_rate": 9.516801340051039e-06, + "loss": 3.5025, + "step": 1608 + }, + { + "epoch": 0.19, + "learning_rate": 9.515943106354037e-06, + "loss": 3.4057, + "step": 1609 + }, + { + "epoch": 0.19, + "learning_rate": 9.515084149925495e-06, + "loss": 3.49, + "step": 1610 + }, + { + "epoch": 0.19, + "learning_rate": 9.514224470902885e-06, + "loss": 3.374, + "step": 1611 + }, + { + "epoch": 0.19, + "learning_rate": 9.513364069423785e-06, + "loss": 3.4688, + "step": 1612 + }, + { + "epoch": 0.19, + "learning_rate": 9.512502945625896e-06, + "loss": 3.5057, + "step": 1613 + }, + { + "epoch": 0.19, + "learning_rate": 9.511641099647034e-06, + "loss": 3.4038, + "step": 1614 + }, + { + "epoch": 0.19, + "learning_rate": 9.510778531625127e-06, + "loss": 3.5229, + "step": 1615 + }, + { + "epoch": 0.19, + "learning_rate": 9.50991524169822e-06, + "loss": 3.349, + "step": 1616 + }, + { + "epoch": 0.19, + "learning_rate": 9.509051230004473e-06, + "loss": 3.3924, + "step": 1617 + }, + { + "epoch": 0.19, + "learning_rate": 9.508186496682166e-06, + "loss": 3.5668, + "step": 1618 + }, + { + "epoch": 0.19, + "learning_rate": 9.507321041869688e-06, + "loss": 3.53, + "step": 1619 + }, + { + "epoch": 0.19, + "learning_rate": 9.506454865705547e-06, + "loss": 3.5512, + "step": 1620 + }, + { + "epoch": 0.19, + "learning_rate": 9.505587968328366e-06, + "loss": 3.4585, + "step": 1621 + }, + { + "epoch": 0.19, + "learning_rate": 9.504720349876883e-06, + "loss": 3.4284, + "step": 1622 + }, + { + "epoch": 0.19, + "learning_rate": 9.503852010489952e-06, + "loss": 3.4213, + "step": 1623 + }, + { + "epoch": 0.19, + "learning_rate": 9.502982950306543e-06, + "loss": 3.3304, + "step": 1624 + }, + { + "epoch": 0.19, + "learning_rate": 9.502113169465738e-06, + "loss": 3.4963, + "step": 1625 + }, + { + "epoch": 0.19, + "learning_rate": 9.501242668106739e-06, + "loss": 3.4745, + "step": 1626 + }, + { + "epoch": 0.19, + "learning_rate": 9.500371446368859e-06, + "loss": 3.4467, + "step": 1627 + }, + { + "epoch": 0.19, + "learning_rate": 9.49949950439153e-06, + "loss": 3.4021, + "step": 1628 + }, + { + "epoch": 0.2, + "learning_rate": 9.498626842314296e-06, + "loss": 3.4413, + "step": 1629 + }, + { + "epoch": 0.2, + "learning_rate": 9.497753460276821e-06, + "loss": 3.3838, + "step": 1630 + }, + { + "epoch": 0.2, + "learning_rate": 9.496879358418878e-06, + "loss": 3.5439, + "step": 1631 + }, + { + "epoch": 0.2, + "learning_rate": 9.49600453688036e-06, + "loss": 3.372, + "step": 1632 + }, + { + "epoch": 0.2, + "learning_rate": 9.49512899580127e-06, + "loss": 3.2784, + "step": 1633 + }, + { + "epoch": 0.2, + "learning_rate": 9.494252735321735e-06, + "loss": 3.4251, + "step": 1634 + }, + { + "epoch": 0.2, + "learning_rate": 9.493375755581988e-06, + "loss": 3.543, + "step": 1635 + }, + { + "epoch": 0.2, + "learning_rate": 9.492498056722382e-06, + "loss": 3.4366, + "step": 1636 + }, + { + "epoch": 0.2, + "learning_rate": 9.491619638883382e-06, + "loss": 3.4537, + "step": 1637 + }, + { + "epoch": 0.2, + "learning_rate": 9.490740502205573e-06, + "loss": 3.4077, + "step": 1638 + }, + { + "epoch": 0.2, + "learning_rate": 9.489860646829652e-06, + "loss": 3.5194, + "step": 1639 + }, + { + "epoch": 0.2, + "learning_rate": 9.488980072896428e-06, + "loss": 3.4065, + "step": 1640 + }, + { + "epoch": 0.2, + "learning_rate": 9.48809878054683e-06, + "loss": 3.4233, + "step": 1641 + }, + { + "epoch": 0.2, + "learning_rate": 9.4872167699219e-06, + "loss": 3.3805, + "step": 1642 + }, + { + "epoch": 0.2, + "learning_rate": 9.486334041162797e-06, + "loss": 3.396, + "step": 1643 + }, + { + "epoch": 0.2, + "learning_rate": 9.485450594410787e-06, + "loss": 3.5485, + "step": 1644 + }, + { + "epoch": 0.2, + "learning_rate": 9.484566429807263e-06, + "loss": 3.3498, + "step": 1645 + }, + { + "epoch": 0.2, + "learning_rate": 9.483681547493723e-06, + "loss": 3.4103, + "step": 1646 + }, + { + "epoch": 0.2, + "learning_rate": 9.482795947611787e-06, + "loss": 3.5373, + "step": 1647 + }, + { + "epoch": 0.2, + "learning_rate": 9.481909630303184e-06, + "loss": 3.4236, + "step": 1648 + }, + { + "epoch": 0.2, + "learning_rate": 9.481022595709759e-06, + "loss": 3.4292, + "step": 1649 + }, + { + "epoch": 0.2, + "learning_rate": 9.480134843973473e-06, + "loss": 3.4684, + "step": 1650 + }, + { + "epoch": 0.2, + "learning_rate": 9.479246375236407e-06, + "loss": 3.5541, + "step": 1651 + }, + { + "epoch": 0.2, + "learning_rate": 9.478357189640746e-06, + "loss": 3.4179, + "step": 1652 + }, + { + "epoch": 0.2, + "learning_rate": 9.477467287328797e-06, + "loss": 3.4855, + "step": 1653 + }, + { + "epoch": 0.2, + "learning_rate": 9.47657666844298e-06, + "loss": 3.5753, + "step": 1654 + }, + { + "epoch": 0.2, + "learning_rate": 9.47568533312583e-06, + "loss": 3.4184, + "step": 1655 + }, + { + "epoch": 0.2, + "learning_rate": 9.474793281519996e-06, + "loss": 3.5113, + "step": 1656 + }, + { + "epoch": 0.2, + "learning_rate": 9.473900513768243e-06, + "loss": 3.4222, + "step": 1657 + }, + { + "epoch": 0.2, + "learning_rate": 9.473007030013447e-06, + "loss": 3.4083, + "step": 1658 + }, + { + "epoch": 0.2, + "learning_rate": 9.472112830398602e-06, + "loss": 3.4007, + "step": 1659 + }, + { + "epoch": 0.2, + "learning_rate": 9.47121791506682e-06, + "loss": 3.4593, + "step": 1660 + }, + { + "epoch": 0.2, + "learning_rate": 9.470322284161315e-06, + "loss": 3.422, + "step": 1661 + }, + { + "epoch": 0.2, + "learning_rate": 9.469425937825431e-06, + "loss": 3.4337, + "step": 1662 + }, + { + "epoch": 0.2, + "learning_rate": 9.468528876202616e-06, + "loss": 3.4681, + "step": 1663 + }, + { + "epoch": 0.2, + "learning_rate": 9.467631099436435e-06, + "loss": 3.4991, + "step": 1664 + }, + { + "epoch": 0.2, + "learning_rate": 9.466732607670572e-06, + "loss": 3.4328, + "step": 1665 + }, + { + "epoch": 0.2, + "learning_rate": 9.465833401048818e-06, + "loss": 3.4956, + "step": 1666 + }, + { + "epoch": 0.2, + "learning_rate": 9.464933479715084e-06, + "loss": 3.3379, + "step": 1667 + }, + { + "epoch": 0.2, + "learning_rate": 9.464032843813394e-06, + "loss": 3.4235, + "step": 1668 + }, + { + "epoch": 0.2, + "learning_rate": 9.46313149348788e-06, + "loss": 3.4687, + "step": 1669 + }, + { + "epoch": 0.2, + "learning_rate": 9.462229428882802e-06, + "loss": 3.489, + "step": 1670 + }, + { + "epoch": 0.2, + "learning_rate": 9.461326650142522e-06, + "loss": 3.4994, + "step": 1671 + }, + { + "epoch": 0.2, + "learning_rate": 9.460423157411522e-06, + "loss": 3.4423, + "step": 1672 + }, + { + "epoch": 0.2, + "learning_rate": 9.459518950834396e-06, + "loss": 3.3912, + "step": 1673 + }, + { + "epoch": 0.2, + "learning_rate": 9.458614030555854e-06, + "loss": 3.4498, + "step": 1674 + }, + { + "epoch": 0.2, + "learning_rate": 9.45770839672072e-06, + "loss": 3.4755, + "step": 1675 + }, + { + "epoch": 0.2, + "learning_rate": 9.45680204947393e-06, + "loss": 3.5378, + "step": 1676 + }, + { + "epoch": 0.2, + "learning_rate": 9.455894988960537e-06, + "loss": 3.5047, + "step": 1677 + }, + { + "epoch": 0.2, + "learning_rate": 9.454987215325705e-06, + "loss": 3.3773, + "step": 1678 + }, + { + "epoch": 0.2, + "learning_rate": 9.45407872871472e-06, + "loss": 3.3495, + "step": 1679 + }, + { + "epoch": 0.2, + "learning_rate": 9.45316952927297e-06, + "loss": 3.5069, + "step": 1680 + }, + { + "epoch": 0.2, + "learning_rate": 9.452259617145965e-06, + "loss": 3.263, + "step": 1681 + }, + { + "epoch": 0.2, + "learning_rate": 9.451348992479326e-06, + "loss": 3.4, + "step": 1682 + }, + { + "epoch": 0.2, + "learning_rate": 9.450437655418792e-06, + "loss": 3.4671, + "step": 1683 + }, + { + "epoch": 0.2, + "learning_rate": 9.449525606110214e-06, + "loss": 3.4337, + "step": 1684 + }, + { + "epoch": 0.2, + "learning_rate": 9.448612844699554e-06, + "loss": 3.4405, + "step": 1685 + }, + { + "epoch": 0.2, + "learning_rate": 9.44769937133289e-06, + "loss": 3.4643, + "step": 1686 + }, + { + "epoch": 0.2, + "learning_rate": 9.446785186156415e-06, + "loss": 3.4444, + "step": 1687 + }, + { + "epoch": 0.2, + "learning_rate": 9.445870289316436e-06, + "loss": 3.4015, + "step": 1688 + }, + { + "epoch": 0.2, + "learning_rate": 9.444954680959376e-06, + "loss": 3.3983, + "step": 1689 + }, + { + "epoch": 0.2, + "learning_rate": 9.444038361231763e-06, + "loss": 3.4704, + "step": 1690 + }, + { + "epoch": 0.2, + "learning_rate": 9.443121330280245e-06, + "loss": 3.4409, + "step": 1691 + }, + { + "epoch": 0.2, + "learning_rate": 9.442203588251589e-06, + "loss": 3.4104, + "step": 1692 + }, + { + "epoch": 0.2, + "learning_rate": 9.441285135292666e-06, + "loss": 3.375, + "step": 1693 + }, + { + "epoch": 0.2, + "learning_rate": 9.440365971550468e-06, + "loss": 3.4832, + "step": 1694 + }, + { + "epoch": 0.2, + "learning_rate": 9.439446097172096e-06, + "loss": 3.4562, + "step": 1695 + }, + { + "epoch": 0.2, + "learning_rate": 9.438525512304767e-06, + "loss": 3.4294, + "step": 1696 + }, + { + "epoch": 0.2, + "learning_rate": 9.437604217095813e-06, + "loss": 3.4053, + "step": 1697 + }, + { + "epoch": 0.2, + "learning_rate": 9.436682211692676e-06, + "loss": 3.4432, + "step": 1698 + }, + { + "epoch": 0.2, + "learning_rate": 9.435759496242915e-06, + "loss": 3.4249, + "step": 1699 + }, + { + "epoch": 0.2, + "learning_rate": 9.4348360708942e-06, + "loss": 3.4444, + "step": 1700 + }, + { + "epoch": 0.2, + "learning_rate": 9.433911935794317e-06, + "loss": 3.4205, + "step": 1701 + }, + { + "epoch": 0.2, + "learning_rate": 9.432987091091165e-06, + "loss": 3.5518, + "step": 1702 + }, + { + "epoch": 0.2, + "learning_rate": 9.432061536932758e-06, + "loss": 3.4548, + "step": 1703 + }, + { + "epoch": 0.2, + "learning_rate": 9.431135273467217e-06, + "loss": 3.3886, + "step": 1704 + }, + { + "epoch": 0.2, + "learning_rate": 9.430208300842784e-06, + "loss": 3.4138, + "step": 1705 + }, + { + "epoch": 0.2, + "learning_rate": 9.42928061920781e-06, + "loss": 3.4839, + "step": 1706 + }, + { + "epoch": 0.2, + "learning_rate": 9.428352228710764e-06, + "loss": 3.3968, + "step": 1707 + }, + { + "epoch": 0.2, + "learning_rate": 9.427423129500225e-06, + "loss": 3.482, + "step": 1708 + }, + { + "epoch": 0.2, + "learning_rate": 9.426493321724883e-06, + "loss": 3.5258, + "step": 1709 + }, + { + "epoch": 0.2, + "learning_rate": 9.425562805533548e-06, + "loss": 3.4394, + "step": 1710 + }, + { + "epoch": 0.2, + "learning_rate": 9.424631581075137e-06, + "loss": 3.4073, + "step": 1711 + }, + { + "epoch": 0.2, + "learning_rate": 9.423699648498685e-06, + "loss": 3.5521, + "step": 1712 + }, + { + "epoch": 0.21, + "learning_rate": 9.42276700795334e-06, + "loss": 3.4594, + "step": 1713 + }, + { + "epoch": 0.21, + "learning_rate": 9.421833659588358e-06, + "loss": 3.4543, + "step": 1714 + }, + { + "epoch": 0.21, + "learning_rate": 9.420899603553114e-06, + "loss": 3.4025, + "step": 1715 + }, + { + "epoch": 0.21, + "learning_rate": 9.419964839997093e-06, + "loss": 3.4903, + "step": 1716 + }, + { + "epoch": 0.21, + "learning_rate": 9.419029369069897e-06, + "loss": 3.4365, + "step": 1717 + }, + { + "epoch": 0.21, + "learning_rate": 9.418093190921238e-06, + "loss": 3.3804, + "step": 1718 + }, + { + "epoch": 0.21, + "learning_rate": 9.417156305700941e-06, + "loss": 3.4116, + "step": 1719 + }, + { + "epoch": 0.21, + "learning_rate": 9.416218713558946e-06, + "loss": 3.4794, + "step": 1720 + }, + { + "epoch": 0.21, + "learning_rate": 9.415280414645304e-06, + "loss": 3.4243, + "step": 1721 + }, + { + "epoch": 0.21, + "learning_rate": 9.41434140911018e-06, + "loss": 3.3996, + "step": 1722 + }, + { + "epoch": 0.21, + "learning_rate": 9.413401697103856e-06, + "loss": 3.361, + "step": 1723 + }, + { + "epoch": 0.21, + "learning_rate": 9.41246127877672e-06, + "loss": 3.5103, + "step": 1724 + }, + { + "epoch": 0.21, + "learning_rate": 9.41152015427928e-06, + "loss": 3.5108, + "step": 1725 + }, + { + "epoch": 0.21, + "learning_rate": 9.41057832376215e-06, + "loss": 3.388, + "step": 1726 + }, + { + "epoch": 0.21, + "learning_rate": 9.40963578737606e-06, + "loss": 3.4303, + "step": 1727 + }, + { + "epoch": 0.21, + "learning_rate": 9.408692545271858e-06, + "loss": 3.4657, + "step": 1728 + }, + { + "epoch": 0.21, + "learning_rate": 9.407748597600497e-06, + "loss": 3.4598, + "step": 1729 + }, + { + "epoch": 0.21, + "learning_rate": 9.406803944513046e-06, + "loss": 3.4879, + "step": 1730 + }, + { + "epoch": 0.21, + "learning_rate": 9.40585858616069e-06, + "loss": 3.4073, + "step": 1731 + }, + { + "epoch": 0.21, + "learning_rate": 9.404912522694721e-06, + "loss": 3.506, + "step": 1732 + }, + { + "epoch": 0.21, + "learning_rate": 9.40396575426655e-06, + "loss": 3.4518, + "step": 1733 + }, + { + "epoch": 0.21, + "learning_rate": 9.403018281027696e-06, + "loss": 3.4482, + "step": 1734 + }, + { + "epoch": 0.21, + "learning_rate": 9.402070103129792e-06, + "loss": 3.4896, + "step": 1735 + }, + { + "epoch": 0.21, + "learning_rate": 9.401121220724587e-06, + "loss": 3.4018, + "step": 1736 + }, + { + "epoch": 0.21, + "learning_rate": 9.40017163396394e-06, + "loss": 3.4156, + "step": 1737 + }, + { + "epoch": 0.21, + "learning_rate": 9.399221342999818e-06, + "loss": 3.4644, + "step": 1738 + }, + { + "epoch": 0.21, + "learning_rate": 9.39827034798431e-06, + "loss": 3.3491, + "step": 1739 + }, + { + "epoch": 0.21, + "learning_rate": 9.397318649069614e-06, + "loss": 3.4553, + "step": 1740 + }, + { + "epoch": 0.21, + "learning_rate": 9.396366246408038e-06, + "loss": 3.547, + "step": 1741 + }, + { + "epoch": 0.21, + "learning_rate": 9.395413140152003e-06, + "loss": 3.3889, + "step": 1742 + }, + { + "epoch": 0.21, + "learning_rate": 9.394459330454047e-06, + "loss": 3.4466, + "step": 1743 + }, + { + "epoch": 0.21, + "learning_rate": 9.393504817466817e-06, + "loss": 3.4886, + "step": 1744 + }, + { + "epoch": 0.21, + "learning_rate": 9.392549601343071e-06, + "loss": 3.4099, + "step": 1745 + }, + { + "epoch": 0.21, + "learning_rate": 9.391593682235685e-06, + "loss": 3.3917, + "step": 1746 + }, + { + "epoch": 0.21, + "learning_rate": 9.390637060297642e-06, + "loss": 3.4266, + "step": 1747 + }, + { + "epoch": 0.21, + "learning_rate": 9.389679735682042e-06, + "loss": 3.3341, + "step": 1748 + }, + { + "epoch": 0.21, + "learning_rate": 9.388721708542093e-06, + "loss": 3.3559, + "step": 1749 + }, + { + "epoch": 0.21, + "learning_rate": 9.387762979031119e-06, + "loss": 3.4511, + "step": 1750 + }, + { + "epoch": 0.21, + "learning_rate": 9.386803547302554e-06, + "loss": 3.3536, + "step": 1751 + }, + { + "epoch": 0.21, + "learning_rate": 9.385843413509947e-06, + "loss": 3.3773, + "step": 1752 + }, + { + "epoch": 0.21, + "learning_rate": 9.384882577806957e-06, + "loss": 3.5101, + "step": 1753 + }, + { + "epoch": 0.21, + "learning_rate": 9.383921040347355e-06, + "loss": 3.5355, + "step": 1754 + }, + { + "epoch": 0.21, + "learning_rate": 9.382958801285026e-06, + "loss": 3.3682, + "step": 1755 + }, + { + "epoch": 0.21, + "learning_rate": 9.381995860773969e-06, + "loss": 3.4817, + "step": 1756 + }, + { + "epoch": 0.21, + "learning_rate": 9.38103221896829e-06, + "loss": 3.522, + "step": 1757 + }, + { + "epoch": 0.21, + "learning_rate": 9.380067876022213e-06, + "loss": 3.4949, + "step": 1758 + }, + { + "epoch": 0.21, + "learning_rate": 9.379102832090067e-06, + "loss": 3.4869, + "step": 1759 + }, + { + "epoch": 0.21, + "learning_rate": 9.378137087326302e-06, + "loss": 3.4965, + "step": 1760 + }, + { + "epoch": 0.21, + "learning_rate": 9.377170641885474e-06, + "loss": 3.3685, + "step": 1761 + }, + { + "epoch": 0.21, + "learning_rate": 9.376203495922254e-06, + "loss": 3.43, + "step": 1762 + }, + { + "epoch": 0.21, + "learning_rate": 9.375235649591423e-06, + "loss": 3.4855, + "step": 1763 + }, + { + "epoch": 0.21, + "learning_rate": 9.374267103047876e-06, + "loss": 3.4518, + "step": 1764 + }, + { + "epoch": 0.21, + "learning_rate": 9.37329785644662e-06, + "loss": 3.4053, + "step": 1765 + }, + { + "epoch": 0.21, + "learning_rate": 9.37232790994277e-06, + "loss": 3.4544, + "step": 1766 + }, + { + "epoch": 0.21, + "learning_rate": 9.371357263691557e-06, + "loss": 3.4062, + "step": 1767 + }, + { + "epoch": 0.21, + "learning_rate": 9.370385917848325e-06, + "loss": 3.5059, + "step": 1768 + }, + { + "epoch": 0.21, + "learning_rate": 9.369413872568528e-06, + "loss": 3.4349, + "step": 1769 + }, + { + "epoch": 0.21, + "learning_rate": 9.368441128007733e-06, + "loss": 3.489, + "step": 1770 + }, + { + "epoch": 0.21, + "learning_rate": 9.367467684321614e-06, + "loss": 3.489, + "step": 1771 + }, + { + "epoch": 0.21, + "learning_rate": 9.366493541665967e-06, + "loss": 3.4259, + "step": 1772 + }, + { + "epoch": 0.21, + "learning_rate": 9.365518700196687e-06, + "loss": 3.4479, + "step": 1773 + }, + { + "epoch": 0.21, + "learning_rate": 9.364543160069792e-06, + "loss": 3.5363, + "step": 1774 + }, + { + "epoch": 0.21, + "learning_rate": 9.363566921441407e-06, + "loss": 3.4674, + "step": 1775 + }, + { + "epoch": 0.21, + "learning_rate": 9.362589984467769e-06, + "loss": 3.4105, + "step": 1776 + }, + { + "epoch": 0.21, + "learning_rate": 9.361612349305227e-06, + "loss": 3.4638, + "step": 1777 + }, + { + "epoch": 0.21, + "learning_rate": 9.36063401611024e-06, + "loss": 3.3841, + "step": 1778 + }, + { + "epoch": 0.21, + "learning_rate": 9.359654985039382e-06, + "loss": 3.473, + "step": 1779 + }, + { + "epoch": 0.21, + "learning_rate": 9.358675256249338e-06, + "loss": 3.4798, + "step": 1780 + }, + { + "epoch": 0.21, + "learning_rate": 9.357694829896902e-06, + "loss": 3.4406, + "step": 1781 + }, + { + "epoch": 0.21, + "learning_rate": 9.356713706138983e-06, + "loss": 3.437, + "step": 1782 + }, + { + "epoch": 0.21, + "learning_rate": 9.355731885132598e-06, + "loss": 3.393, + "step": 1783 + }, + { + "epoch": 0.21, + "learning_rate": 9.354749367034881e-06, + "loss": 3.3939, + "step": 1784 + }, + { + "epoch": 0.21, + "learning_rate": 9.35376615200307e-06, + "loss": 3.5137, + "step": 1785 + }, + { + "epoch": 0.21, + "learning_rate": 9.352782240194522e-06, + "loss": 3.4382, + "step": 1786 + }, + { + "epoch": 0.21, + "learning_rate": 9.3517976317667e-06, + "loss": 3.4022, + "step": 1787 + }, + { + "epoch": 0.21, + "learning_rate": 9.350812326877183e-06, + "loss": 3.3839, + "step": 1788 + }, + { + "epoch": 0.21, + "learning_rate": 9.349826325683658e-06, + "loss": 3.4302, + "step": 1789 + }, + { + "epoch": 0.21, + "learning_rate": 9.348839628343924e-06, + "loss": 3.4641, + "step": 1790 + }, + { + "epoch": 0.21, + "learning_rate": 9.347852235015892e-06, + "loss": 3.4728, + "step": 1791 + }, + { + "epoch": 0.21, + "learning_rate": 9.346864145857588e-06, + "loss": 3.4139, + "step": 1792 + }, + { + "epoch": 0.21, + "learning_rate": 9.345875361027141e-06, + "loss": 3.373, + "step": 1793 + }, + { + "epoch": 0.21, + "learning_rate": 9.344885880682798e-06, + "loss": 3.4178, + "step": 1794 + }, + { + "epoch": 0.21, + "learning_rate": 9.343895704982917e-06, + "loss": 3.3386, + "step": 1795 + }, + { + "epoch": 0.22, + "learning_rate": 9.342904834085961e-06, + "loss": 3.3733, + "step": 1796 + }, + { + "epoch": 0.22, + "learning_rate": 9.341913268150516e-06, + "loss": 3.4104, + "step": 1797 + }, + { + "epoch": 0.22, + "learning_rate": 9.340921007335264e-06, + "loss": 3.4607, + "step": 1798 + }, + { + "epoch": 0.22, + "learning_rate": 9.339928051799014e-06, + "loss": 3.4646, + "step": 1799 + }, + { + "epoch": 0.22, + "learning_rate": 9.338934401700675e-06, + "loss": 3.3956, + "step": 1800 + }, + { + "epoch": 0.22, + "learning_rate": 9.337940057199271e-06, + "loss": 3.3255, + "step": 1801 + }, + { + "epoch": 0.22, + "learning_rate": 9.336945018453936e-06, + "loss": 3.5551, + "step": 1802 + }, + { + "epoch": 0.22, + "learning_rate": 9.335949285623918e-06, + "loss": 3.3944, + "step": 1803 + }, + { + "epoch": 0.22, + "learning_rate": 9.334952858868574e-06, + "loss": 3.3146, + "step": 1804 + }, + { + "epoch": 0.22, + "learning_rate": 9.33395573834737e-06, + "loss": 3.4626, + "step": 1805 + }, + { + "epoch": 0.22, + "learning_rate": 9.332957924219888e-06, + "loss": 3.463, + "step": 1806 + }, + { + "epoch": 0.22, + "learning_rate": 9.331959416645814e-06, + "loss": 3.3906, + "step": 1807 + }, + { + "epoch": 0.22, + "learning_rate": 9.330960215784955e-06, + "loss": 3.4151, + "step": 1808 + }, + { + "epoch": 0.22, + "learning_rate": 9.329960321797216e-06, + "loss": 3.3609, + "step": 1809 + }, + { + "epoch": 0.22, + "learning_rate": 9.328959734842626e-06, + "loss": 3.4024, + "step": 1810 + }, + { + "epoch": 0.22, + "learning_rate": 9.327958455081319e-06, + "loss": 3.381, + "step": 1811 + }, + { + "epoch": 0.22, + "learning_rate": 9.326956482673534e-06, + "loss": 3.3776, + "step": 1812 + }, + { + "epoch": 0.22, + "learning_rate": 9.325953817779633e-06, + "loss": 3.4419, + "step": 1813 + }, + { + "epoch": 0.22, + "learning_rate": 9.324950460560078e-06, + "loss": 3.4332, + "step": 1814 + }, + { + "epoch": 0.22, + "learning_rate": 9.323946411175449e-06, + "loss": 3.4388, + "step": 1815 + }, + { + "epoch": 0.22, + "learning_rate": 9.322941669786433e-06, + "loss": 3.5133, + "step": 1816 + }, + { + "epoch": 0.22, + "learning_rate": 9.32193623655383e-06, + "loss": 3.5004, + "step": 1817 + }, + { + "epoch": 0.22, + "learning_rate": 9.320930111638547e-06, + "loss": 3.4252, + "step": 1818 + }, + { + "epoch": 0.22, + "learning_rate": 9.319923295201607e-06, + "loss": 3.4357, + "step": 1819 + }, + { + "epoch": 0.22, + "learning_rate": 9.31891578740414e-06, + "loss": 3.4924, + "step": 1820 + }, + { + "epoch": 0.22, + "learning_rate": 9.317907588407385e-06, + "loss": 3.3848, + "step": 1821 + }, + { + "epoch": 0.22, + "learning_rate": 9.316898698372699e-06, + "loss": 3.3988, + "step": 1822 + }, + { + "epoch": 0.22, + "learning_rate": 9.31588911746154e-06, + "loss": 3.2586, + "step": 1823 + }, + { + "epoch": 0.22, + "learning_rate": 9.314878845835484e-06, + "loss": 3.4339, + "step": 1824 + }, + { + "epoch": 0.22, + "learning_rate": 9.313867883656216e-06, + "loss": 3.4339, + "step": 1825 + }, + { + "epoch": 0.22, + "learning_rate": 9.312856231085528e-06, + "loss": 3.4757, + "step": 1826 + }, + { + "epoch": 0.22, + "learning_rate": 9.311843888285324e-06, + "loss": 3.4607, + "step": 1827 + }, + { + "epoch": 0.22, + "learning_rate": 9.310830855417622e-06, + "loss": 3.5463, + "step": 1828 + }, + { + "epoch": 0.22, + "learning_rate": 9.309817132644548e-06, + "loss": 3.3454, + "step": 1829 + }, + { + "epoch": 0.22, + "learning_rate": 9.308802720128337e-06, + "loss": 3.5152, + "step": 1830 + }, + { + "epoch": 0.22, + "learning_rate": 9.307787618031332e-06, + "loss": 3.4292, + "step": 1831 + }, + { + "epoch": 0.22, + "learning_rate": 9.306771826515996e-06, + "loss": 3.47, + "step": 1832 + }, + { + "epoch": 0.22, + "learning_rate": 9.305755345744894e-06, + "loss": 3.4421, + "step": 1833 + }, + { + "epoch": 0.22, + "learning_rate": 9.304738175880703e-06, + "loss": 3.3485, + "step": 1834 + }, + { + "epoch": 0.22, + "learning_rate": 9.303720317086211e-06, + "loss": 3.365, + "step": 1835 + }, + { + "epoch": 0.22, + "learning_rate": 9.302701769524316e-06, + "loss": 3.447, + "step": 1836 + }, + { + "epoch": 0.22, + "learning_rate": 9.301682533358028e-06, + "loss": 3.4869, + "step": 1837 + }, + { + "epoch": 0.22, + "learning_rate": 9.300662608750463e-06, + "loss": 3.3654, + "step": 1838 + }, + { + "epoch": 0.22, + "learning_rate": 9.299641995864851e-06, + "loss": 3.4111, + "step": 1839 + }, + { + "epoch": 0.22, + "learning_rate": 9.298620694864531e-06, + "loss": 3.4128, + "step": 1840 + }, + { + "epoch": 0.22, + "learning_rate": 9.297598705912952e-06, + "loss": 3.3602, + "step": 1841 + }, + { + "epoch": 0.22, + "learning_rate": 9.296576029173672e-06, + "loss": 3.3426, + "step": 1842 + }, + { + "epoch": 0.22, + "learning_rate": 9.295552664810363e-06, + "loss": 3.3388, + "step": 1843 + }, + { + "epoch": 0.22, + "learning_rate": 9.2945286129868e-06, + "loss": 3.4297, + "step": 1844 + }, + { + "epoch": 0.22, + "learning_rate": 9.293503873866876e-06, + "loss": 3.3904, + "step": 1845 + }, + { + "epoch": 0.22, + "learning_rate": 9.292478447614589e-06, + "loss": 3.4092, + "step": 1846 + }, + { + "epoch": 0.22, + "learning_rate": 9.291452334394048e-06, + "loss": 3.3305, + "step": 1847 + }, + { + "epoch": 0.22, + "learning_rate": 9.290425534369473e-06, + "loss": 3.5816, + "step": 1848 + }, + { + "epoch": 0.22, + "learning_rate": 9.289398047705191e-06, + "loss": 3.4465, + "step": 1849 + }, + { + "epoch": 0.22, + "learning_rate": 9.288369874565644e-06, + "loss": 3.3478, + "step": 1850 + }, + { + "epoch": 0.22, + "learning_rate": 9.287341015115376e-06, + "loss": 3.4283, + "step": 1851 + }, + { + "epoch": 0.22, + "learning_rate": 9.286311469519051e-06, + "loss": 3.4578, + "step": 1852 + }, + { + "epoch": 0.22, + "learning_rate": 9.285281237941435e-06, + "loss": 3.3536, + "step": 1853 + }, + { + "epoch": 0.22, + "learning_rate": 9.284250320547408e-06, + "loss": 3.4696, + "step": 1854 + }, + { + "epoch": 0.22, + "learning_rate": 9.283218717501955e-06, + "loss": 3.3647, + "step": 1855 + }, + { + "epoch": 0.22, + "learning_rate": 9.282186428970176e-06, + "loss": 3.4651, + "step": 1856 + }, + { + "epoch": 0.22, + "learning_rate": 9.281153455117277e-06, + "loss": 3.4686, + "step": 1857 + }, + { + "epoch": 0.22, + "learning_rate": 9.280119796108576e-06, + "loss": 3.4039, + "step": 1858 + }, + { + "epoch": 0.22, + "learning_rate": 9.2790854521095e-06, + "loss": 3.4466, + "step": 1859 + }, + { + "epoch": 0.22, + "learning_rate": 9.278050423285584e-06, + "loss": 3.4367, + "step": 1860 + }, + { + "epoch": 0.22, + "learning_rate": 9.277014709802477e-06, + "loss": 3.4498, + "step": 1861 + }, + { + "epoch": 0.22, + "learning_rate": 9.275978311825932e-06, + "loss": 3.4468, + "step": 1862 + }, + { + "epoch": 0.22, + "learning_rate": 9.274941229521814e-06, + "loss": 3.4567, + "step": 1863 + }, + { + "epoch": 0.22, + "learning_rate": 9.2739034630561e-06, + "loss": 3.5132, + "step": 1864 + }, + { + "epoch": 0.22, + "learning_rate": 9.272865012594872e-06, + "loss": 3.3075, + "step": 1865 + }, + { + "epoch": 0.22, + "learning_rate": 9.271825878304323e-06, + "loss": 3.4585, + "step": 1866 + }, + { + "epoch": 0.22, + "learning_rate": 9.27078606035076e-06, + "loss": 3.5386, + "step": 1867 + }, + { + "epoch": 0.22, + "learning_rate": 9.26974555890059e-06, + "loss": 3.44, + "step": 1868 + }, + { + "epoch": 0.22, + "learning_rate": 9.26870437412034e-06, + "loss": 3.4767, + "step": 1869 + }, + { + "epoch": 0.22, + "learning_rate": 9.267662506176637e-06, + "loss": 3.5272, + "step": 1870 + }, + { + "epoch": 0.22, + "learning_rate": 9.266619955236225e-06, + "loss": 3.4838, + "step": 1871 + }, + { + "epoch": 0.22, + "learning_rate": 9.265576721465952e-06, + "loss": 3.4731, + "step": 1872 + }, + { + "epoch": 0.22, + "learning_rate": 9.264532805032776e-06, + "loss": 3.4392, + "step": 1873 + }, + { + "epoch": 0.22, + "learning_rate": 9.263488206103768e-06, + "loss": 3.5319, + "step": 1874 + }, + { + "epoch": 0.22, + "learning_rate": 9.262442924846103e-06, + "loss": 3.314, + "step": 1875 + }, + { + "epoch": 0.22, + "learning_rate": 9.261396961427071e-06, + "loss": 3.3777, + "step": 1876 + }, + { + "epoch": 0.22, + "learning_rate": 9.260350316014064e-06, + "loss": 3.4001, + "step": 1877 + }, + { + "epoch": 0.22, + "learning_rate": 9.25930298877459e-06, + "loss": 3.4976, + "step": 1878 + }, + { + "epoch": 0.22, + "learning_rate": 9.258254979876265e-06, + "loss": 3.4114, + "step": 1879 + }, + { + "epoch": 0.23, + "learning_rate": 9.257206289486806e-06, + "loss": 3.4747, + "step": 1880 + }, + { + "epoch": 0.23, + "learning_rate": 9.256156917774053e-06, + "loss": 3.4383, + "step": 1881 + }, + { + "epoch": 0.23, + "learning_rate": 9.255106864905942e-06, + "loss": 3.3796, + "step": 1882 + }, + { + "epoch": 0.23, + "learning_rate": 9.254056131050525e-06, + "loss": 3.4304, + "step": 1883 + }, + { + "epoch": 0.23, + "learning_rate": 9.253004716375962e-06, + "loss": 3.3864, + "step": 1884 + }, + { + "epoch": 0.23, + "learning_rate": 9.251952621050521e-06, + "loss": 3.4629, + "step": 1885 + }, + { + "epoch": 0.23, + "learning_rate": 9.25089984524258e-06, + "loss": 3.4475, + "step": 1886 + }, + { + "epoch": 0.23, + "learning_rate": 9.249846389120622e-06, + "loss": 3.5317, + "step": 1887 + }, + { + "epoch": 0.23, + "learning_rate": 9.24879225285325e-06, + "loss": 3.4265, + "step": 1888 + }, + { + "epoch": 0.23, + "learning_rate": 9.247737436609158e-06, + "loss": 3.4188, + "step": 1889 + }, + { + "epoch": 0.23, + "learning_rate": 9.246681940557166e-06, + "loss": 3.4428, + "step": 1890 + }, + { + "epoch": 0.23, + "learning_rate": 9.245625764866191e-06, + "loss": 3.474, + "step": 1891 + }, + { + "epoch": 0.23, + "learning_rate": 9.244568909705268e-06, + "loss": 3.3579, + "step": 1892 + }, + { + "epoch": 0.23, + "learning_rate": 9.243511375243533e-06, + "loss": 3.478, + "step": 1893 + }, + { + "epoch": 0.23, + "learning_rate": 9.242453161650234e-06, + "loss": 3.3808, + "step": 1894 + }, + { + "epoch": 0.23, + "learning_rate": 9.241394269094725e-06, + "loss": 3.4412, + "step": 1895 + }, + { + "epoch": 0.23, + "learning_rate": 9.240334697746479e-06, + "loss": 3.4127, + "step": 1896 + }, + { + "epoch": 0.23, + "learning_rate": 9.239274447775062e-06, + "loss": 3.4123, + "step": 1897 + }, + { + "epoch": 0.23, + "learning_rate": 9.23821351935016e-06, + "loss": 3.3358, + "step": 1898 + }, + { + "epoch": 0.23, + "learning_rate": 9.237151912641563e-06, + "loss": 3.4854, + "step": 1899 + }, + { + "epoch": 0.23, + "learning_rate": 9.236089627819172e-06, + "loss": 3.376, + "step": 1900 + }, + { + "epoch": 0.23, + "learning_rate": 9.235026665052992e-06, + "loss": 3.3874, + "step": 1901 + }, + { + "epoch": 0.23, + "learning_rate": 9.233963024513142e-06, + "loss": 3.5177, + "step": 1902 + }, + { + "epoch": 0.23, + "learning_rate": 9.232898706369847e-06, + "loss": 3.4077, + "step": 1903 + }, + { + "epoch": 0.23, + "learning_rate": 9.23183371079344e-06, + "loss": 3.4352, + "step": 1904 + }, + { + "epoch": 0.23, + "learning_rate": 9.230768037954363e-06, + "loss": 3.4884, + "step": 1905 + }, + { + "epoch": 0.23, + "learning_rate": 9.229701688023166e-06, + "loss": 3.3862, + "step": 1906 + }, + { + "epoch": 0.23, + "learning_rate": 9.228634661170506e-06, + "loss": 3.3058, + "step": 1907 + }, + { + "epoch": 0.23, + "learning_rate": 9.227566957567154e-06, + "loss": 3.4257, + "step": 1908 + }, + { + "epoch": 0.23, + "learning_rate": 9.226498577383983e-06, + "loss": 3.4242, + "step": 1909 + }, + { + "epoch": 0.23, + "learning_rate": 9.225429520791974e-06, + "loss": 3.512, + "step": 1910 + }, + { + "epoch": 0.23, + "learning_rate": 9.224359787962222e-06, + "loss": 3.3582, + "step": 1911 + }, + { + "epoch": 0.23, + "learning_rate": 9.22328937906593e-06, + "loss": 3.4845, + "step": 1912 + }, + { + "epoch": 0.23, + "learning_rate": 9.222218294274398e-06, + "loss": 3.4133, + "step": 1913 + }, + { + "epoch": 0.23, + "learning_rate": 9.221146533759051e-06, + "loss": 3.459, + "step": 1914 + }, + { + "epoch": 0.23, + "learning_rate": 9.220074097691407e-06, + "loss": 3.4088, + "step": 1915 + }, + { + "epoch": 0.23, + "learning_rate": 9.219000986243103e-06, + "loss": 3.4072, + "step": 1916 + }, + { + "epoch": 0.23, + "learning_rate": 9.217927199585876e-06, + "loss": 3.3751, + "step": 1917 + }, + { + "epoch": 0.23, + "learning_rate": 9.21685273789158e-06, + "loss": 3.477, + "step": 1918 + }, + { + "epoch": 0.23, + "learning_rate": 9.215777601332167e-06, + "loss": 3.3967, + "step": 1919 + }, + { + "epoch": 0.23, + "learning_rate": 9.214701790079703e-06, + "loss": 3.3995, + "step": 1920 + }, + { + "epoch": 0.23, + "learning_rate": 9.213625304306362e-06, + "loss": 3.4961, + "step": 1921 + }, + { + "epoch": 0.23, + "learning_rate": 9.212548144184425e-06, + "loss": 3.4121, + "step": 1922 + }, + { + "epoch": 0.23, + "learning_rate": 9.21147030988628e-06, + "loss": 3.4712, + "step": 1923 + }, + { + "epoch": 0.23, + "learning_rate": 9.210391801584422e-06, + "loss": 3.4452, + "step": 1924 + }, + { + "epoch": 0.23, + "learning_rate": 9.209312619451459e-06, + "loss": 3.3748, + "step": 1925 + }, + { + "epoch": 0.23, + "learning_rate": 9.2082327636601e-06, + "loss": 3.3744, + "step": 1926 + }, + { + "epoch": 0.23, + "learning_rate": 9.207152234383165e-06, + "loss": 3.3897, + "step": 1927 + }, + { + "epoch": 0.23, + "learning_rate": 9.206071031793584e-06, + "loss": 3.4294, + "step": 1928 + }, + { + "epoch": 0.23, + "learning_rate": 9.204989156064395e-06, + "loss": 3.4489, + "step": 1929 + }, + { + "epoch": 0.23, + "learning_rate": 9.203906607368735e-06, + "loss": 3.3879, + "step": 1930 + }, + { + "epoch": 0.23, + "learning_rate": 9.20282338587986e-06, + "loss": 3.5638, + "step": 1931 + }, + { + "epoch": 0.23, + "learning_rate": 9.201739491771127e-06, + "loss": 3.4185, + "step": 1932 + }, + { + "epoch": 0.23, + "learning_rate": 9.200654925216002e-06, + "loss": 3.3473, + "step": 1933 + }, + { + "epoch": 0.23, + "learning_rate": 9.199569686388061e-06, + "loss": 3.4936, + "step": 1934 + }, + { + "epoch": 0.23, + "learning_rate": 9.198483775460983e-06, + "loss": 3.4251, + "step": 1935 + }, + { + "epoch": 0.23, + "learning_rate": 9.19739719260856e-06, + "loss": 3.4143, + "step": 1936 + }, + { + "epoch": 0.23, + "learning_rate": 9.196309938004686e-06, + "loss": 3.5226, + "step": 1937 + }, + { + "epoch": 0.23, + "learning_rate": 9.195222011823365e-06, + "loss": 3.3659, + "step": 1938 + }, + { + "epoch": 0.23, + "learning_rate": 9.194133414238713e-06, + "loss": 3.4724, + "step": 1939 + }, + { + "epoch": 0.23, + "learning_rate": 9.193044145424946e-06, + "loss": 3.475, + "step": 1940 + }, + { + "epoch": 0.23, + "learning_rate": 9.19195420555639e-06, + "loss": 3.426, + "step": 1941 + }, + { + "epoch": 0.23, + "learning_rate": 9.19086359480748e-06, + "loss": 3.431, + "step": 1942 + }, + { + "epoch": 0.23, + "learning_rate": 9.189772313352757e-06, + "loss": 3.3937, + "step": 1943 + }, + { + "epoch": 0.23, + "learning_rate": 9.18868036136687e-06, + "loss": 3.5288, + "step": 1944 + }, + { + "epoch": 0.23, + "learning_rate": 9.187587739024575e-06, + "loss": 3.4484, + "step": 1945 + }, + { + "epoch": 0.23, + "learning_rate": 9.186494446500736e-06, + "loss": 3.531, + "step": 1946 + }, + { + "epoch": 0.23, + "learning_rate": 9.185400483970321e-06, + "loss": 3.3458, + "step": 1947 + }, + { + "epoch": 0.23, + "learning_rate": 9.18430585160841e-06, + "loss": 3.3513, + "step": 1948 + }, + { + "epoch": 0.23, + "learning_rate": 9.183210549590188e-06, + "loss": 3.4238, + "step": 1949 + }, + { + "epoch": 0.23, + "learning_rate": 9.182114578090947e-06, + "loss": 3.5227, + "step": 1950 + }, + { + "epoch": 0.23, + "learning_rate": 9.181017937286085e-06, + "loss": 3.4718, + "step": 1951 + }, + { + "epoch": 0.23, + "learning_rate": 9.17992062735111e-06, + "loss": 3.4554, + "step": 1952 + }, + { + "epoch": 0.23, + "learning_rate": 9.178822648461634e-06, + "loss": 3.5118, + "step": 1953 + }, + { + "epoch": 0.23, + "learning_rate": 9.17772400079338e-06, + "loss": 3.5415, + "step": 1954 + }, + { + "epoch": 0.23, + "learning_rate": 9.176624684522172e-06, + "loss": 3.4038, + "step": 1955 + }, + { + "epoch": 0.23, + "learning_rate": 9.175524699823947e-06, + "loss": 3.4934, + "step": 1956 + }, + { + "epoch": 0.23, + "learning_rate": 9.174424046874746e-06, + "loss": 3.5123, + "step": 1957 + }, + { + "epoch": 0.23, + "learning_rate": 9.173322725850718e-06, + "loss": 3.4139, + "step": 1958 + }, + { + "epoch": 0.23, + "learning_rate": 9.172220736928117e-06, + "loss": 3.4965, + "step": 1959 + }, + { + "epoch": 0.23, + "learning_rate": 9.171118080283306e-06, + "loss": 3.5287, + "step": 1960 + }, + { + "epoch": 0.23, + "learning_rate": 9.170014756092756e-06, + "loss": 3.3579, + "step": 1961 + }, + { + "epoch": 0.23, + "learning_rate": 9.16891076453304e-06, + "loss": 3.4905, + "step": 1962 + }, + { + "epoch": 0.24, + "learning_rate": 9.167806105780841e-06, + "loss": 3.3528, + "step": 1963 + }, + { + "epoch": 0.24, + "learning_rate": 9.16670078001295e-06, + "loss": 3.3669, + "step": 1964 + }, + { + "epoch": 0.24, + "learning_rate": 9.165594787406267e-06, + "loss": 3.3776, + "step": 1965 + }, + { + "epoch": 0.24, + "learning_rate": 9.164488128137788e-06, + "loss": 3.4592, + "step": 1966 + }, + { + "epoch": 0.24, + "learning_rate": 9.163380802384627e-06, + "loss": 3.4525, + "step": 1967 + }, + { + "epoch": 0.24, + "learning_rate": 9.162272810323998e-06, + "loss": 3.5357, + "step": 1968 + }, + { + "epoch": 0.24, + "learning_rate": 9.161164152133226e-06, + "loss": 3.3807, + "step": 1969 + }, + { + "epoch": 0.24, + "learning_rate": 9.16005482798974e-06, + "loss": 3.4045, + "step": 1970 + }, + { + "epoch": 0.24, + "learning_rate": 9.158944838071078e-06, + "loss": 3.4981, + "step": 1971 + }, + { + "epoch": 0.24, + "learning_rate": 9.157834182554879e-06, + "loss": 3.4654, + "step": 1972 + }, + { + "epoch": 0.24, + "learning_rate": 9.156722861618894e-06, + "loss": 3.5281, + "step": 1973 + }, + { + "epoch": 0.24, + "learning_rate": 9.15561087544098e-06, + "loss": 3.4191, + "step": 1974 + }, + { + "epoch": 0.24, + "learning_rate": 9.154498224199099e-06, + "loss": 3.5252, + "step": 1975 + }, + { + "epoch": 0.24, + "learning_rate": 9.15338490807132e-06, + "loss": 3.3801, + "step": 1976 + }, + { + "epoch": 0.24, + "learning_rate": 9.152270927235815e-06, + "loss": 3.497, + "step": 1977 + }, + { + "epoch": 0.24, + "learning_rate": 9.15115628187087e-06, + "loss": 3.429, + "step": 1978 + }, + { + "epoch": 0.24, + "learning_rate": 9.150040972154869e-06, + "loss": 3.4537, + "step": 1979 + }, + { + "epoch": 0.24, + "learning_rate": 9.148924998266308e-06, + "loss": 3.5076, + "step": 1980 + }, + { + "epoch": 0.24, + "learning_rate": 9.147808360383787e-06, + "loss": 3.4715, + "step": 1981 + }, + { + "epoch": 0.24, + "learning_rate": 9.146691058686014e-06, + "loss": 3.469, + "step": 1982 + }, + { + "epoch": 0.24, + "learning_rate": 9.1455730933518e-06, + "loss": 3.4613, + "step": 1983 + }, + { + "epoch": 0.24, + "learning_rate": 9.144454464560066e-06, + "loss": 3.5032, + "step": 1984 + }, + { + "epoch": 0.24, + "learning_rate": 9.143335172489835e-06, + "loss": 3.5048, + "step": 1985 + }, + { + "epoch": 0.24, + "learning_rate": 9.142215217320241e-06, + "loss": 3.4591, + "step": 1986 + }, + { + "epoch": 0.24, + "learning_rate": 9.141094599230523e-06, + "loss": 3.2999, + "step": 1987 + }, + { + "epoch": 0.24, + "learning_rate": 9.139973318400019e-06, + "loss": 3.4062, + "step": 1988 + }, + { + "epoch": 0.24, + "learning_rate": 9.138851375008182e-06, + "loss": 3.393, + "step": 1989 + }, + { + "epoch": 0.24, + "learning_rate": 9.137728769234569e-06, + "loss": 3.3343, + "step": 1990 + }, + { + "epoch": 0.24, + "learning_rate": 9.13660550125884e-06, + "loss": 3.4826, + "step": 1991 + }, + { + "epoch": 0.24, + "learning_rate": 9.135481571260765e-06, + "loss": 3.4647, + "step": 1992 + }, + { + "epoch": 0.24, + "learning_rate": 9.134356979420216e-06, + "loss": 3.4296, + "step": 1993 + }, + { + "epoch": 0.24, + "learning_rate": 9.13323172591717e-06, + "loss": 3.4725, + "step": 1994 + }, + { + "epoch": 0.24, + "learning_rate": 9.13210581093172e-06, + "loss": 3.4902, + "step": 1995 + }, + { + "epoch": 0.24, + "learning_rate": 9.130979234644051e-06, + "loss": 3.4108, + "step": 1996 + }, + { + "epoch": 0.24, + "learning_rate": 9.129851997234462e-06, + "loss": 3.4993, + "step": 1997 + }, + { + "epoch": 0.24, + "learning_rate": 9.128724098883357e-06, + "loss": 3.4248, + "step": 1998 + }, + { + "epoch": 0.24, + "learning_rate": 9.127595539771243e-06, + "loss": 3.4091, + "step": 1999 + }, + { + "epoch": 0.24, + "learning_rate": 9.126466320078738e-06, + "loss": 3.4833, + "step": 2000 + }, + { + "epoch": 0.24, + "learning_rate": 9.125336439986559e-06, + "loss": 3.5011, + "step": 2001 + }, + { + "epoch": 0.24, + "learning_rate": 9.124205899675532e-06, + "loss": 3.4656, + "step": 2002 + }, + { + "epoch": 0.24, + "learning_rate": 9.123074699326591e-06, + "loss": 3.4339, + "step": 2003 + }, + { + "epoch": 0.24, + "learning_rate": 9.121942839120772e-06, + "loss": 3.3622, + "step": 2004 + }, + { + "epoch": 0.24, + "learning_rate": 9.120810319239218e-06, + "loss": 3.4238, + "step": 2005 + }, + { + "epoch": 0.24, + "learning_rate": 9.119677139863177e-06, + "loss": 3.4281, + "step": 2006 + }, + { + "epoch": 0.24, + "learning_rate": 9.118543301174003e-06, + "loss": 3.447, + "step": 2007 + }, + { + "epoch": 0.24, + "learning_rate": 9.117408803353158e-06, + "loss": 3.4512, + "step": 2008 + }, + { + "epoch": 0.24, + "learning_rate": 9.116273646582203e-06, + "loss": 3.3261, + "step": 2009 + }, + { + "epoch": 0.24, + "learning_rate": 9.115137831042811e-06, + "loss": 3.4455, + "step": 2010 + }, + { + "epoch": 0.24, + "learning_rate": 9.114001356916759e-06, + "loss": 3.4667, + "step": 2011 + }, + { + "epoch": 0.24, + "learning_rate": 9.112864224385925e-06, + "loss": 3.5228, + "step": 2012 + }, + { + "epoch": 0.24, + "learning_rate": 9.111726433632299e-06, + "loss": 3.4033, + "step": 2013 + }, + { + "epoch": 0.24, + "learning_rate": 9.11058798483797e-06, + "loss": 3.5399, + "step": 2014 + }, + { + "epoch": 0.24, + "learning_rate": 9.10944887818514e-06, + "loss": 3.4542, + "step": 2015 + }, + { + "epoch": 0.24, + "learning_rate": 9.108309113856104e-06, + "loss": 3.4083, + "step": 2016 + }, + { + "epoch": 0.24, + "learning_rate": 9.107168692033278e-06, + "loss": 3.3697, + "step": 2017 + }, + { + "epoch": 0.24, + "learning_rate": 9.106027612899169e-06, + "loss": 3.4164, + "step": 2018 + }, + { + "epoch": 0.24, + "learning_rate": 9.1048858766364e-06, + "loss": 3.3832, + "step": 2019 + }, + { + "epoch": 0.24, + "learning_rate": 9.103743483427692e-06, + "loss": 3.4312, + "step": 2020 + }, + { + "epoch": 0.24, + "learning_rate": 9.102600433455873e-06, + "loss": 3.3131, + "step": 2021 + }, + { + "epoch": 0.24, + "learning_rate": 9.101456726903877e-06, + "loss": 3.4527, + "step": 2022 + }, + { + "epoch": 0.24, + "learning_rate": 9.100312363954746e-06, + "loss": 3.4307, + "step": 2023 + }, + { + "epoch": 0.24, + "learning_rate": 9.09916734479162e-06, + "loss": 3.4337, + "step": 2024 + }, + { + "epoch": 0.24, + "learning_rate": 9.098021669597748e-06, + "loss": 3.3943, + "step": 2025 + }, + { + "epoch": 0.24, + "learning_rate": 9.096875338556486e-06, + "loss": 3.4369, + "step": 2026 + }, + { + "epoch": 0.24, + "learning_rate": 9.095728351851291e-06, + "loss": 3.4742, + "step": 2027 + }, + { + "epoch": 0.24, + "learning_rate": 9.094580709665728e-06, + "loss": 3.358, + "step": 2028 + }, + { + "epoch": 0.24, + "learning_rate": 9.093432412183467e-06, + "loss": 3.4086, + "step": 2029 + }, + { + "epoch": 0.24, + "learning_rate": 9.092283459588278e-06, + "loss": 3.321, + "step": 2030 + }, + { + "epoch": 0.24, + "learning_rate": 9.09113385206404e-06, + "loss": 3.4279, + "step": 2031 + }, + { + "epoch": 0.24, + "learning_rate": 9.08998358979474e-06, + "loss": 3.4617, + "step": 2032 + }, + { + "epoch": 0.24, + "learning_rate": 9.088832672964462e-06, + "loss": 3.4417, + "step": 2033 + }, + { + "epoch": 0.24, + "learning_rate": 9.0876811017574e-06, + "loss": 3.3324, + "step": 2034 + }, + { + "epoch": 0.24, + "learning_rate": 9.086528876357851e-06, + "loss": 3.4941, + "step": 2035 + }, + { + "epoch": 0.24, + "learning_rate": 9.08537599695022e-06, + "loss": 3.4323, + "step": 2036 + }, + { + "epoch": 0.24, + "learning_rate": 9.084222463719007e-06, + "loss": 3.4117, + "step": 2037 + }, + { + "epoch": 0.24, + "learning_rate": 9.083068276848831e-06, + "loss": 3.5003, + "step": 2038 + }, + { + "epoch": 0.24, + "learning_rate": 9.081913436524405e-06, + "loss": 3.3735, + "step": 2039 + }, + { + "epoch": 0.24, + "learning_rate": 9.08075794293055e-06, + "loss": 3.5008, + "step": 2040 + }, + { + "epoch": 0.24, + "learning_rate": 9.079601796252188e-06, + "loss": 3.3901, + "step": 2041 + }, + { + "epoch": 0.24, + "learning_rate": 9.078444996674353e-06, + "loss": 3.468, + "step": 2042 + }, + { + "epoch": 0.24, + "learning_rate": 9.07728754438218e-06, + "loss": 3.4287, + "step": 2043 + }, + { + "epoch": 0.24, + "learning_rate": 9.076129439560903e-06, + "loss": 3.4949, + "step": 2044 + }, + { + "epoch": 0.24, + "learning_rate": 9.074970682395868e-06, + "loss": 3.5339, + "step": 2045 + }, + { + "epoch": 0.24, + "learning_rate": 9.073811273072522e-06, + "loss": 3.4638, + "step": 2046 + }, + { + "epoch": 0.25, + "learning_rate": 9.072651211776417e-06, + "loss": 3.4053, + "step": 2047 + }, + { + "epoch": 0.25, + "learning_rate": 9.07149049869321e-06, + "loss": 3.4015, + "step": 2048 + }, + { + "epoch": 0.25, + "learning_rate": 9.07032913400866e-06, + "loss": 3.4316, + "step": 2049 + }, + { + "epoch": 0.25, + "learning_rate": 9.069167117908632e-06, + "loss": 3.3369, + "step": 2050 + }, + { + "epoch": 0.25, + "learning_rate": 9.068004450579096e-06, + "loss": 3.577, + "step": 2051 + }, + { + "epoch": 0.25, + "learning_rate": 9.066841132206124e-06, + "loss": 3.3476, + "step": 2052 + }, + { + "epoch": 0.25, + "learning_rate": 9.065677162975894e-06, + "loss": 3.4297, + "step": 2053 + }, + { + "epoch": 0.25, + "learning_rate": 9.064512543074688e-06, + "loss": 3.3906, + "step": 2054 + }, + { + "epoch": 0.25, + "learning_rate": 9.063347272688893e-06, + "loss": 3.4032, + "step": 2055 + }, + { + "epoch": 0.25, + "learning_rate": 9.062181352004998e-06, + "loss": 3.5255, + "step": 2056 + }, + { + "epoch": 0.25, + "learning_rate": 9.061014781209594e-06, + "loss": 3.4658, + "step": 2057 + }, + { + "epoch": 0.25, + "learning_rate": 9.059847560489382e-06, + "loss": 3.4077, + "step": 2058 + }, + { + "epoch": 0.25, + "learning_rate": 9.058679690031165e-06, + "loss": 3.4675, + "step": 2059 + }, + { + "epoch": 0.25, + "learning_rate": 9.057511170021846e-06, + "loss": 3.3845, + "step": 2060 + }, + { + "epoch": 0.25, + "learning_rate": 9.056342000648438e-06, + "loss": 3.4815, + "step": 2061 + }, + { + "epoch": 0.25, + "learning_rate": 9.055172182098051e-06, + "loss": 3.4507, + "step": 2062 + }, + { + "epoch": 0.25, + "learning_rate": 9.054001714557905e-06, + "loss": 3.3784, + "step": 2063 + }, + { + "epoch": 0.25, + "learning_rate": 9.052830598215323e-06, + "loss": 3.4613, + "step": 2064 + }, + { + "epoch": 0.25, + "learning_rate": 9.051658833257729e-06, + "loss": 3.5217, + "step": 2065 + }, + { + "epoch": 0.25, + "learning_rate": 9.050486419872652e-06, + "loss": 3.4542, + "step": 2066 + }, + { + "epoch": 0.25, + "learning_rate": 9.049313358247727e-06, + "loss": 3.4479, + "step": 2067 + }, + { + "epoch": 0.25, + "learning_rate": 9.048139648570686e-06, + "loss": 3.4354, + "step": 2068 + }, + { + "epoch": 0.25, + "learning_rate": 9.046965291029375e-06, + "loss": 3.4462, + "step": 2069 + }, + { + "epoch": 0.25, + "learning_rate": 9.045790285811735e-06, + "loss": 3.3337, + "step": 2070 + }, + { + "epoch": 0.25, + "learning_rate": 9.044614633105816e-06, + "loss": 3.4392, + "step": 2071 + }, + { + "epoch": 0.25, + "learning_rate": 9.043438333099768e-06, + "loss": 3.4294, + "step": 2072 + }, + { + "epoch": 0.25, + "learning_rate": 9.042261385981845e-06, + "loss": 3.468, + "step": 2073 + }, + { + "epoch": 0.25, + "learning_rate": 9.041083791940406e-06, + "loss": 3.4274, + "step": 2074 + }, + { + "epoch": 0.25, + "learning_rate": 9.039905551163917e-06, + "loss": 3.4588, + "step": 2075 + }, + { + "epoch": 0.25, + "learning_rate": 9.038726663840937e-06, + "loss": 3.4466, + "step": 2076 + }, + { + "epoch": 0.25, + "learning_rate": 9.03754713016014e-06, + "loss": 3.5208, + "step": 2077 + }, + { + "epoch": 0.25, + "learning_rate": 9.0363669503103e-06, + "loss": 3.3996, + "step": 2078 + }, + { + "epoch": 0.25, + "learning_rate": 9.035186124480287e-06, + "loss": 3.382, + "step": 2079 + }, + { + "epoch": 0.25, + "learning_rate": 9.034004652859085e-06, + "loss": 3.5843, + "step": 2080 + }, + { + "epoch": 0.25, + "learning_rate": 9.032822535635776e-06, + "loss": 3.4648, + "step": 2081 + }, + { + "epoch": 0.25, + "learning_rate": 9.031639772999544e-06, + "loss": 3.3698, + "step": 2082 + }, + { + "epoch": 0.25, + "learning_rate": 9.030456365139682e-06, + "loss": 3.4501, + "step": 2083 + }, + { + "epoch": 0.25, + "learning_rate": 9.029272312245579e-06, + "loss": 3.4877, + "step": 2084 + }, + { + "epoch": 0.25, + "learning_rate": 9.028087614506732e-06, + "loss": 3.4345, + "step": 2085 + }, + { + "epoch": 0.25, + "learning_rate": 9.026902272112738e-06, + "loss": 3.5083, + "step": 2086 + }, + { + "epoch": 0.25, + "learning_rate": 9.025716285253305e-06, + "loss": 3.4471, + "step": 2087 + }, + { + "epoch": 0.25, + "learning_rate": 9.024529654118233e-06, + "loss": 3.4417, + "step": 2088 + }, + { + "epoch": 0.25, + "learning_rate": 9.023342378897435e-06, + "loss": 3.4097, + "step": 2089 + }, + { + "epoch": 0.25, + "learning_rate": 9.022154459780916e-06, + "loss": 3.3709, + "step": 2090 + }, + { + "epoch": 0.25, + "learning_rate": 9.020965896958795e-06, + "loss": 3.4064, + "step": 2091 + }, + { + "epoch": 0.25, + "learning_rate": 9.01977669062129e-06, + "loss": 3.4199, + "step": 2092 + }, + { + "epoch": 0.25, + "learning_rate": 9.01858684095872e-06, + "loss": 3.3949, + "step": 2093 + }, + { + "epoch": 0.25, + "learning_rate": 9.01739634816151e-06, + "loss": 3.3819, + "step": 2094 + }, + { + "epoch": 0.25, + "learning_rate": 9.016205212420187e-06, + "loss": 3.4607, + "step": 2095 + }, + { + "epoch": 0.25, + "learning_rate": 9.015013433925375e-06, + "loss": 3.3768, + "step": 2096 + }, + { + "epoch": 0.25, + "learning_rate": 9.013821012867812e-06, + "loss": 3.5312, + "step": 2097 + }, + { + "epoch": 0.25, + "learning_rate": 9.01262794943833e-06, + "loss": 3.5273, + "step": 2098 + }, + { + "epoch": 0.25, + "learning_rate": 9.01143424382787e-06, + "loss": 3.4574, + "step": 2099 + }, + { + "epoch": 0.25, + "learning_rate": 9.010239896227468e-06, + "loss": 3.4664, + "step": 2100 + }, + { + "epoch": 0.25, + "learning_rate": 9.009044906828273e-06, + "loss": 3.3885, + "step": 2101 + }, + { + "epoch": 0.25, + "learning_rate": 9.007849275821527e-06, + "loss": 3.4821, + "step": 2102 + }, + { + "epoch": 0.25, + "learning_rate": 9.00665300339858e-06, + "loss": 3.4242, + "step": 2103 + }, + { + "epoch": 0.25, + "learning_rate": 9.005456089750883e-06, + "loss": 3.452, + "step": 2104 + }, + { + "epoch": 0.25, + "learning_rate": 9.004258535069989e-06, + "loss": 3.4388, + "step": 2105 + }, + { + "epoch": 0.25, + "learning_rate": 9.003060339547559e-06, + "loss": 3.5157, + "step": 2106 + }, + { + "epoch": 0.25, + "learning_rate": 9.001861503375349e-06, + "loss": 3.5952, + "step": 2107 + }, + { + "epoch": 0.25, + "learning_rate": 9.000662026745219e-06, + "loss": 3.4434, + "step": 2108 + }, + { + "epoch": 0.25, + "learning_rate": 8.999461909849137e-06, + "loss": 3.5574, + "step": 2109 + }, + { + "epoch": 0.25, + "learning_rate": 8.998261152879167e-06, + "loss": 3.4454, + "step": 2110 + }, + { + "epoch": 0.25, + "learning_rate": 8.99705975602748e-06, + "loss": 3.4159, + "step": 2111 + }, + { + "epoch": 0.25, + "learning_rate": 8.995857719486347e-06, + "loss": 3.516, + "step": 2112 + }, + { + "epoch": 0.25, + "learning_rate": 8.994655043448142e-06, + "loss": 3.4126, + "step": 2113 + }, + { + "epoch": 0.25, + "learning_rate": 8.99345172810534e-06, + "loss": 3.3998, + "step": 2114 + }, + { + "epoch": 0.25, + "learning_rate": 8.99224777365052e-06, + "loss": 3.3988, + "step": 2115 + }, + { + "epoch": 0.25, + "learning_rate": 8.991043180276363e-06, + "loss": 3.4777, + "step": 2116 + }, + { + "epoch": 0.25, + "learning_rate": 8.989837948175655e-06, + "loss": 3.3906, + "step": 2117 + }, + { + "epoch": 0.25, + "learning_rate": 8.988632077541277e-06, + "loss": 3.569, + "step": 2118 + }, + { + "epoch": 0.25, + "learning_rate": 8.987425568566219e-06, + "loss": 3.4713, + "step": 2119 + }, + { + "epoch": 0.25, + "learning_rate": 8.986218421443569e-06, + "loss": 3.495, + "step": 2120 + }, + { + "epoch": 0.25, + "learning_rate": 8.985010636366521e-06, + "loss": 3.519, + "step": 2121 + }, + { + "epoch": 0.25, + "learning_rate": 8.983802213528367e-06, + "loss": 3.3406, + "step": 2122 + }, + { + "epoch": 0.25, + "learning_rate": 8.982593153122505e-06, + "loss": 3.512, + "step": 2123 + }, + { + "epoch": 0.25, + "learning_rate": 8.981383455342432e-06, + "loss": 3.408, + "step": 2124 + }, + { + "epoch": 0.25, + "learning_rate": 8.980173120381746e-06, + "loss": 3.3892, + "step": 2125 + }, + { + "epoch": 0.25, + "learning_rate": 8.978962148434151e-06, + "loss": 3.3864, + "step": 2126 + }, + { + "epoch": 0.25, + "learning_rate": 8.977750539693453e-06, + "loss": 3.4832, + "step": 2127 + }, + { + "epoch": 0.25, + "learning_rate": 8.976538294353555e-06, + "loss": 3.4567, + "step": 2128 + }, + { + "epoch": 0.25, + "learning_rate": 8.975325412608465e-06, + "loss": 3.4861, + "step": 2129 + }, + { + "epoch": 0.25, + "learning_rate": 8.974111894652294e-06, + "loss": 3.4151, + "step": 2130 + }, + { + "epoch": 0.26, + "learning_rate": 8.97289774067925e-06, + "loss": 3.4271, + "step": 2131 + }, + { + "epoch": 0.26, + "learning_rate": 8.97168295088365e-06, + "loss": 3.3285, + "step": 2132 + }, + { + "epoch": 0.26, + "learning_rate": 8.970467525459909e-06, + "loss": 3.4473, + "step": 2133 + }, + { + "epoch": 0.26, + "learning_rate": 8.969251464602542e-06, + "loss": 3.5435, + "step": 2134 + }, + { + "epoch": 0.26, + "learning_rate": 8.968034768506167e-06, + "loss": 3.5085, + "step": 2135 + }, + { + "epoch": 0.26, + "learning_rate": 8.966817437365503e-06, + "loss": 3.4745, + "step": 2136 + }, + { + "epoch": 0.26, + "learning_rate": 8.965599471375375e-06, + "loss": 3.3813, + "step": 2137 + }, + { + "epoch": 0.26, + "learning_rate": 8.964380870730705e-06, + "loss": 3.4798, + "step": 2138 + }, + { + "epoch": 0.26, + "learning_rate": 8.963161635626518e-06, + "loss": 3.4364, + "step": 2139 + }, + { + "epoch": 0.26, + "learning_rate": 8.96194176625794e-06, + "loss": 3.4707, + "step": 2140 + }, + { + "epoch": 0.26, + "learning_rate": 8.960721262820198e-06, + "loss": 3.4072, + "step": 2141 + }, + { + "epoch": 0.26, + "learning_rate": 8.959500125508622e-06, + "loss": 3.4118, + "step": 2142 + }, + { + "epoch": 0.26, + "learning_rate": 8.958278354518644e-06, + "loss": 3.4604, + "step": 2143 + }, + { + "epoch": 0.26, + "learning_rate": 8.957055950045794e-06, + "loss": 3.3993, + "step": 2144 + }, + { + "epoch": 0.26, + "learning_rate": 8.955832912285707e-06, + "loss": 3.4959, + "step": 2145 + }, + { + "epoch": 0.26, + "learning_rate": 8.954609241434118e-06, + "loss": 3.5042, + "step": 2146 + }, + { + "epoch": 0.26, + "learning_rate": 8.953384937686865e-06, + "loss": 3.386, + "step": 2147 + }, + { + "epoch": 0.26, + "learning_rate": 8.952160001239881e-06, + "loss": 3.5236, + "step": 2148 + }, + { + "epoch": 0.26, + "learning_rate": 8.950934432289208e-06, + "loss": 3.4203, + "step": 2149 + }, + { + "epoch": 0.26, + "learning_rate": 8.949708231030986e-06, + "loss": 3.4091, + "step": 2150 + }, + { + "epoch": 0.26, + "learning_rate": 8.948481397661457e-06, + "loss": 3.3883, + "step": 2151 + }, + { + "epoch": 0.26, + "learning_rate": 8.947253932376961e-06, + "loss": 3.515, + "step": 2152 + }, + { + "epoch": 0.26, + "learning_rate": 8.946025835373942e-06, + "loss": 3.3662, + "step": 2153 + }, + { + "epoch": 0.26, + "learning_rate": 8.944797106848949e-06, + "loss": 3.4341, + "step": 2154 + }, + { + "epoch": 0.26, + "learning_rate": 8.94356774699862e-06, + "loss": 3.4891, + "step": 2155 + }, + { + "epoch": 0.26, + "learning_rate": 8.942337756019708e-06, + "loss": 3.5199, + "step": 2156 + }, + { + "epoch": 0.26, + "learning_rate": 8.941107134109057e-06, + "loss": 3.4633, + "step": 2157 + }, + { + "epoch": 0.26, + "learning_rate": 8.939875881463619e-06, + "loss": 3.4252, + "step": 2158 + }, + { + "epoch": 0.26, + "learning_rate": 8.93864399828044e-06, + "loss": 3.4018, + "step": 2159 + }, + { + "epoch": 0.26, + "learning_rate": 8.937411484756675e-06, + "loss": 3.607, + "step": 2160 + }, + { + "epoch": 0.26, + "learning_rate": 8.936178341089572e-06, + "loss": 3.3762, + "step": 2161 + }, + { + "epoch": 0.26, + "learning_rate": 8.934944567476484e-06, + "loss": 3.4753, + "step": 2162 + }, + { + "epoch": 0.26, + "learning_rate": 8.933710164114866e-06, + "loss": 3.3827, + "step": 2163 + }, + { + "epoch": 0.26, + "learning_rate": 8.932475131202269e-06, + "loss": 3.4649, + "step": 2164 + }, + { + "epoch": 0.26, + "learning_rate": 8.93123946893635e-06, + "loss": 3.4315, + "step": 2165 + }, + { + "epoch": 0.26, + "learning_rate": 8.930003177514864e-06, + "loss": 3.3904, + "step": 2166 + }, + { + "epoch": 0.26, + "learning_rate": 8.928766257135666e-06, + "loss": 3.4247, + "step": 2167 + }, + { + "epoch": 0.26, + "learning_rate": 8.927528707996713e-06, + "loss": 3.3834, + "step": 2168 + }, + { + "epoch": 0.26, + "learning_rate": 8.926290530296061e-06, + "loss": 3.4613, + "step": 2169 + }, + { + "epoch": 0.26, + "learning_rate": 8.925051724231875e-06, + "loss": 3.5339, + "step": 2170 + }, + { + "epoch": 0.26, + "learning_rate": 8.923812290002403e-06, + "loss": 3.4249, + "step": 2171 + }, + { + "epoch": 0.26, + "learning_rate": 8.922572227806013e-06, + "loss": 3.4312, + "step": 2172 + }, + { + "epoch": 0.26, + "learning_rate": 8.921331537841158e-06, + "loss": 3.4492, + "step": 2173 + }, + { + "epoch": 0.26, + "learning_rate": 8.9200902203064e-06, + "loss": 3.3744, + "step": 2174 + }, + { + "epoch": 0.26, + "learning_rate": 8.918848275400403e-06, + "loss": 3.4706, + "step": 2175 + }, + { + "epoch": 0.26, + "learning_rate": 8.917605703321923e-06, + "loss": 3.4019, + "step": 2176 + }, + { + "epoch": 0.26, + "learning_rate": 8.916362504269826e-06, + "loss": 3.3875, + "step": 2177 + }, + { + "epoch": 0.26, + "learning_rate": 8.915118678443068e-06, + "loss": 3.4424, + "step": 2178 + }, + { + "epoch": 0.26, + "learning_rate": 8.913874226040715e-06, + "loss": 3.5368, + "step": 2179 + }, + { + "epoch": 0.26, + "learning_rate": 8.912629147261929e-06, + "loss": 3.5816, + "step": 2180 + }, + { + "epoch": 0.26, + "learning_rate": 8.911383442305971e-06, + "loss": 3.4139, + "step": 2181 + }, + { + "epoch": 0.26, + "learning_rate": 8.910137111372204e-06, + "loss": 3.4064, + "step": 2182 + }, + { + "epoch": 0.26, + "learning_rate": 8.908890154660091e-06, + "loss": 3.4259, + "step": 2183 + }, + { + "epoch": 0.26, + "learning_rate": 8.907642572369193e-06, + "loss": 3.3302, + "step": 2184 + }, + { + "epoch": 0.26, + "learning_rate": 8.906394364699175e-06, + "loss": 3.4774, + "step": 2185 + }, + { + "epoch": 0.26, + "learning_rate": 8.9051455318498e-06, + "loss": 3.4576, + "step": 2186 + }, + { + "epoch": 0.26, + "learning_rate": 8.903896074020934e-06, + "loss": 3.4541, + "step": 2187 + }, + { + "epoch": 0.26, + "learning_rate": 8.902645991412537e-06, + "loss": 3.507, + "step": 2188 + }, + { + "epoch": 0.26, + "learning_rate": 8.90139528422467e-06, + "loss": 3.5329, + "step": 2189 + }, + { + "epoch": 0.26, + "learning_rate": 8.9001439526575e-06, + "loss": 3.4749, + "step": 2190 + }, + { + "epoch": 0.26, + "learning_rate": 8.898891996911291e-06, + "loss": 3.5004, + "step": 2191 + }, + { + "epoch": 0.26, + "learning_rate": 8.897639417186401e-06, + "loss": 3.4779, + "step": 2192 + }, + { + "epoch": 0.26, + "learning_rate": 8.896386213683297e-06, + "loss": 3.3413, + "step": 2193 + }, + { + "epoch": 0.26, + "learning_rate": 8.895132386602542e-06, + "loss": 3.3945, + "step": 2194 + }, + { + "epoch": 0.26, + "learning_rate": 8.893877936144794e-06, + "loss": 3.4588, + "step": 2195 + }, + { + "epoch": 0.26, + "learning_rate": 8.892622862510821e-06, + "loss": 3.5149, + "step": 2196 + }, + { + "epoch": 0.26, + "learning_rate": 8.891367165901481e-06, + "loss": 3.4157, + "step": 2197 + }, + { + "epoch": 0.26, + "learning_rate": 8.890110846517737e-06, + "loss": 3.3799, + "step": 2198 + }, + { + "epoch": 0.26, + "learning_rate": 8.88885390456065e-06, + "loss": 3.3539, + "step": 2199 + }, + { + "epoch": 0.26, + "learning_rate": 8.887596340231383e-06, + "loss": 3.474, + "step": 2200 + }, + { + "epoch": 0.26, + "learning_rate": 8.886338153731193e-06, + "loss": 3.353, + "step": 2201 + }, + { + "epoch": 0.26, + "learning_rate": 8.885079345261442e-06, + "loss": 3.4775, + "step": 2202 + }, + { + "epoch": 0.26, + "learning_rate": 8.88381991502359e-06, + "loss": 3.4513, + "step": 2203 + }, + { + "epoch": 0.26, + "learning_rate": 8.882559863219196e-06, + "loss": 3.4803, + "step": 2204 + }, + { + "epoch": 0.26, + "learning_rate": 8.881299190049919e-06, + "loss": 3.4469, + "step": 2205 + }, + { + "epoch": 0.26, + "learning_rate": 8.880037895717517e-06, + "loss": 3.4432, + "step": 2206 + }, + { + "epoch": 0.26, + "learning_rate": 8.878775980423847e-06, + "loss": 3.5574, + "step": 2207 + }, + { + "epoch": 0.26, + "learning_rate": 8.877513444370867e-06, + "loss": 3.5739, + "step": 2208 + }, + { + "epoch": 0.26, + "learning_rate": 8.876250287760631e-06, + "loss": 3.532, + "step": 2209 + }, + { + "epoch": 0.26, + "learning_rate": 8.874986510795296e-06, + "loss": 3.5181, + "step": 2210 + }, + { + "epoch": 0.26, + "learning_rate": 8.873722113677118e-06, + "loss": 3.4943, + "step": 2211 + }, + { + "epoch": 0.26, + "learning_rate": 8.87245709660845e-06, + "loss": 3.4208, + "step": 2212 + }, + { + "epoch": 0.26, + "learning_rate": 8.871191459791747e-06, + "loss": 3.5023, + "step": 2213 + }, + { + "epoch": 0.27, + "learning_rate": 8.869925203429556e-06, + "loss": 3.3763, + "step": 2214 + }, + { + "epoch": 0.27, + "learning_rate": 8.868658327724537e-06, + "loss": 3.4841, + "step": 2215 + }, + { + "epoch": 0.27, + "learning_rate": 8.867390832879433e-06, + "loss": 3.3225, + "step": 2216 + }, + { + "epoch": 0.27, + "learning_rate": 8.8661227190971e-06, + "loss": 3.4267, + "step": 2217 + }, + { + "epoch": 0.27, + "learning_rate": 8.864853986580482e-06, + "loss": 3.4871, + "step": 2218 + }, + { + "epoch": 0.27, + "learning_rate": 8.863584635532629e-06, + "loss": 3.407, + "step": 2219 + }, + { + "epoch": 0.27, + "learning_rate": 8.862314666156691e-06, + "loss": 3.3615, + "step": 2220 + }, + { + "epoch": 0.27, + "learning_rate": 8.861044078655909e-06, + "loss": 3.47, + "step": 2221 + }, + { + "epoch": 0.27, + "learning_rate": 8.859772873233629e-06, + "loss": 3.3649, + "step": 2222 + }, + { + "epoch": 0.27, + "learning_rate": 8.858501050093297e-06, + "loss": 3.4641, + "step": 2223 + }, + { + "epoch": 0.27, + "learning_rate": 8.857228609438453e-06, + "loss": 3.4122, + "step": 2224 + }, + { + "epoch": 0.27, + "learning_rate": 8.85595555147274e-06, + "loss": 3.2953, + "step": 2225 + }, + { + "epoch": 0.27, + "learning_rate": 8.854681876399898e-06, + "loss": 3.4344, + "step": 2226 + }, + { + "epoch": 0.27, + "learning_rate": 8.853407584423765e-06, + "loss": 3.5014, + "step": 2227 + }, + { + "epoch": 0.27, + "learning_rate": 8.85213267574828e-06, + "loss": 3.4482, + "step": 2228 + }, + { + "epoch": 0.27, + "learning_rate": 8.850857150577478e-06, + "loss": 3.4117, + "step": 2229 + }, + { + "epoch": 0.27, + "learning_rate": 8.849581009115495e-06, + "loss": 3.4511, + "step": 2230 + }, + { + "epoch": 0.27, + "learning_rate": 8.848304251566567e-06, + "loss": 3.5017, + "step": 2231 + }, + { + "epoch": 0.27, + "learning_rate": 8.847026878135021e-06, + "loss": 3.4873, + "step": 2232 + }, + { + "epoch": 0.27, + "learning_rate": 8.84574888902529e-06, + "loss": 3.4487, + "step": 2233 + }, + { + "epoch": 0.27, + "learning_rate": 8.844470284441906e-06, + "loss": 3.5821, + "step": 2234 + }, + { + "epoch": 0.27, + "learning_rate": 8.843191064589494e-06, + "loss": 3.3459, + "step": 2235 + }, + { + "epoch": 0.27, + "learning_rate": 8.841911229672781e-06, + "loss": 3.4507, + "step": 2236 + }, + { + "epoch": 0.27, + "learning_rate": 8.840630779896592e-06, + "loss": 3.4001, + "step": 2237 + }, + { + "epoch": 0.27, + "learning_rate": 8.839349715465851e-06, + "loss": 3.4935, + "step": 2238 + }, + { + "epoch": 0.27, + "learning_rate": 8.838068036585579e-06, + "loss": 3.3951, + "step": 2239 + }, + { + "epoch": 0.27, + "learning_rate": 8.836785743460895e-06, + "loss": 3.4178, + "step": 2240 + }, + { + "epoch": 0.27, + "learning_rate": 8.835502836297017e-06, + "loss": 3.4002, + "step": 2241 + }, + { + "epoch": 0.27, + "learning_rate": 8.834219315299264e-06, + "loss": 3.4151, + "step": 2242 + }, + { + "epoch": 0.27, + "learning_rate": 8.832935180673048e-06, + "loss": 3.4488, + "step": 2243 + }, + { + "epoch": 0.27, + "learning_rate": 8.831650432623882e-06, + "loss": 3.5063, + "step": 2244 + }, + { + "epoch": 0.27, + "learning_rate": 8.830365071357378e-06, + "loss": 3.4277, + "step": 2245 + }, + { + "epoch": 0.27, + "learning_rate": 8.829079097079246e-06, + "loss": 3.4658, + "step": 2246 + }, + { + "epoch": 0.27, + "learning_rate": 8.827792509995294e-06, + "loss": 3.4672, + "step": 2247 + }, + { + "epoch": 0.27, + "learning_rate": 8.826505310311424e-06, + "loss": 3.4904, + "step": 2248 + }, + { + "epoch": 0.27, + "learning_rate": 8.825217498233641e-06, + "loss": 3.466, + "step": 2249 + }, + { + "epoch": 0.27, + "learning_rate": 8.823929073968048e-06, + "loss": 3.4002, + "step": 2250 + }, + { + "epoch": 0.27, + "learning_rate": 8.822640037720842e-06, + "loss": 3.3105, + "step": 2251 + }, + { + "epoch": 0.27, + "learning_rate": 8.821350389698322e-06, + "loss": 3.5171, + "step": 2252 + }, + { + "epoch": 0.27, + "learning_rate": 8.820060130106884e-06, + "loss": 3.4262, + "step": 2253 + }, + { + "epoch": 0.27, + "learning_rate": 8.81876925915302e-06, + "loss": 3.4068, + "step": 2254 + }, + { + "epoch": 0.27, + "learning_rate": 8.817477777043318e-06, + "loss": 3.3921, + "step": 2255 + }, + { + "epoch": 0.27, + "learning_rate": 8.816185683984473e-06, + "loss": 3.4113, + "step": 2256 + }, + { + "epoch": 0.27, + "learning_rate": 8.814892980183268e-06, + "loss": 3.4662, + "step": 2257 + }, + { + "epoch": 0.27, + "learning_rate": 8.813599665846586e-06, + "loss": 3.5469, + "step": 2258 + }, + { + "epoch": 0.27, + "learning_rate": 8.81230574118141e-06, + "loss": 3.4186, + "step": 2259 + }, + { + "epoch": 0.27, + "learning_rate": 8.811011206394823e-06, + "loss": 3.3804, + "step": 2260 + }, + { + "epoch": 0.27, + "learning_rate": 8.809716061693999e-06, + "loss": 3.4155, + "step": 2261 + }, + { + "epoch": 0.27, + "learning_rate": 8.808420307286213e-06, + "loss": 3.4085, + "step": 2262 + }, + { + "epoch": 0.27, + "learning_rate": 8.807123943378839e-06, + "loss": 3.4817, + "step": 2263 + }, + { + "epoch": 0.27, + "learning_rate": 8.805826970179345e-06, + "loss": 3.4582, + "step": 2264 + }, + { + "epoch": 0.27, + "learning_rate": 8.804529387895302e-06, + "loss": 3.4043, + "step": 2265 + }, + { + "epoch": 0.27, + "learning_rate": 8.803231196734372e-06, + "loss": 3.5151, + "step": 2266 + }, + { + "epoch": 0.27, + "learning_rate": 8.801932396904318e-06, + "loss": 3.4623, + "step": 2267 + }, + { + "epoch": 0.27, + "learning_rate": 8.800632988613001e-06, + "loss": 3.432, + "step": 2268 + }, + { + "epoch": 0.27, + "learning_rate": 8.799332972068376e-06, + "loss": 3.414, + "step": 2269 + }, + { + "epoch": 0.27, + "learning_rate": 8.7980323474785e-06, + "loss": 3.502, + "step": 2270 + }, + { + "epoch": 0.27, + "learning_rate": 8.796731115051526e-06, + "loss": 3.4629, + "step": 2271 + }, + { + "epoch": 0.27, + "learning_rate": 8.795429274995702e-06, + "loss": 3.3266, + "step": 2272 + }, + { + "epoch": 0.27, + "learning_rate": 8.794126827519372e-06, + "loss": 3.4308, + "step": 2273 + }, + { + "epoch": 0.27, + "learning_rate": 8.792823772830984e-06, + "loss": 3.5029, + "step": 2274 + }, + { + "epoch": 0.27, + "learning_rate": 8.791520111139073e-06, + "loss": 3.4827, + "step": 2275 + }, + { + "epoch": 0.27, + "learning_rate": 8.790215842652283e-06, + "loss": 3.4342, + "step": 2276 + }, + { + "epoch": 0.27, + "learning_rate": 8.788910967579346e-06, + "loss": 3.427, + "step": 2277 + }, + { + "epoch": 0.27, + "learning_rate": 8.787605486129096e-06, + "loss": 3.4863, + "step": 2278 + }, + { + "epoch": 0.27, + "learning_rate": 8.78629939851046e-06, + "loss": 3.5004, + "step": 2279 + }, + { + "epoch": 0.27, + "learning_rate": 8.784992704932467e-06, + "loss": 3.3429, + "step": 2280 + }, + { + "epoch": 0.27, + "learning_rate": 8.783685405604238e-06, + "loss": 3.418, + "step": 2281 + }, + { + "epoch": 0.27, + "learning_rate": 8.782377500734993e-06, + "loss": 3.4365, + "step": 2282 + }, + { + "epoch": 0.27, + "learning_rate": 8.781068990534053e-06, + "loss": 3.369, + "step": 2283 + }, + { + "epoch": 0.27, + "learning_rate": 8.779759875210826e-06, + "loss": 3.4865, + "step": 2284 + }, + { + "epoch": 0.27, + "learning_rate": 8.778450154974827e-06, + "loss": 3.4431, + "step": 2285 + }, + { + "epoch": 0.27, + "learning_rate": 8.777139830035662e-06, + "loss": 3.4684, + "step": 2286 + }, + { + "epoch": 0.27, + "learning_rate": 8.775828900603036e-06, + "loss": 3.4323, + "step": 2287 + }, + { + "epoch": 0.27, + "learning_rate": 8.77451736688675e-06, + "loss": 3.4124, + "step": 2288 + }, + { + "epoch": 0.27, + "learning_rate": 8.773205229096701e-06, + "loss": 3.3944, + "step": 2289 + }, + { + "epoch": 0.27, + "learning_rate": 8.771892487442885e-06, + "loss": 3.4633, + "step": 2290 + }, + { + "epoch": 0.27, + "learning_rate": 8.770579142135394e-06, + "loss": 3.4378, + "step": 2291 + }, + { + "epoch": 0.27, + "learning_rate": 8.769265193384412e-06, + "loss": 3.5087, + "step": 2292 + }, + { + "epoch": 0.27, + "learning_rate": 8.767950641400227e-06, + "loss": 3.3176, + "step": 2293 + }, + { + "epoch": 0.27, + "learning_rate": 8.766635486393218e-06, + "loss": 3.4231, + "step": 2294 + }, + { + "epoch": 0.27, + "learning_rate": 8.765319728573864e-06, + "loss": 3.31, + "step": 2295 + }, + { + "epoch": 0.27, + "learning_rate": 8.764003368152736e-06, + "loss": 3.4, + "step": 2296 + }, + { + "epoch": 0.27, + "learning_rate": 8.762686405340509e-06, + "loss": 3.4102, + "step": 2297 + }, + { + "epoch": 0.28, + "learning_rate": 8.761368840347944e-06, + "loss": 3.3808, + "step": 2298 + }, + { + "epoch": 0.28, + "learning_rate": 8.760050673385909e-06, + "loss": 3.4969, + "step": 2299 + }, + { + "epoch": 0.28, + "learning_rate": 8.758731904665362e-06, + "loss": 3.4254, + "step": 2300 + }, + { + "epoch": 0.28, + "learning_rate": 8.757412534397355e-06, + "loss": 3.4241, + "step": 2301 + }, + { + "epoch": 0.28, + "learning_rate": 8.756092562793046e-06, + "loss": 3.3945, + "step": 2302 + }, + { + "epoch": 0.28, + "learning_rate": 8.75477199006368e-06, + "loss": 3.4653, + "step": 2303 + }, + { + "epoch": 0.28, + "learning_rate": 8.753450816420602e-06, + "loss": 3.4899, + "step": 2304 + }, + { + "epoch": 0.28, + "learning_rate": 8.752129042075252e-06, + "loss": 3.4929, + "step": 2305 + }, + { + "epoch": 0.28, + "learning_rate": 8.75080666723917e-06, + "loss": 3.4529, + "step": 2306 + }, + { + "epoch": 0.28, + "learning_rate": 8.749483692123982e-06, + "loss": 3.3336, + "step": 2307 + }, + { + "epoch": 0.28, + "learning_rate": 8.748160116941424e-06, + "loss": 3.3334, + "step": 2308 + }, + { + "epoch": 0.28, + "learning_rate": 8.746835941903317e-06, + "loss": 3.3377, + "step": 2309 + }, + { + "epoch": 0.28, + "learning_rate": 8.745511167221583e-06, + "loss": 3.3832, + "step": 2310 + }, + { + "epoch": 0.28, + "learning_rate": 8.74418579310824e-06, + "loss": 3.4898, + "step": 2311 + }, + { + "epoch": 0.28, + "learning_rate": 8.7428598197754e-06, + "loss": 3.4564, + "step": 2312 + }, + { + "epoch": 0.28, + "learning_rate": 8.741533247435269e-06, + "loss": 3.5192, + "step": 2313 + }, + { + "epoch": 0.28, + "learning_rate": 8.740206076300156e-06, + "loss": 3.4964, + "step": 2314 + }, + { + "epoch": 0.28, + "learning_rate": 8.738878306582462e-06, + "loss": 3.4156, + "step": 2315 + }, + { + "epoch": 0.28, + "learning_rate": 8.737549938494679e-06, + "loss": 3.435, + "step": 2316 + }, + { + "epoch": 0.28, + "learning_rate": 8.736220972249402e-06, + "loss": 3.4045, + "step": 2317 + }, + { + "epoch": 0.28, + "learning_rate": 8.734891408059319e-06, + "loss": 3.3937, + "step": 2318 + }, + { + "epoch": 0.28, + "learning_rate": 8.733561246137209e-06, + "loss": 3.4245, + "step": 2319 + }, + { + "epoch": 0.28, + "learning_rate": 8.732230486695956e-06, + "loss": 3.4312, + "step": 2320 + }, + { + "epoch": 0.28, + "learning_rate": 8.730899129948534e-06, + "loss": 3.4696, + "step": 2321 + }, + { + "epoch": 0.28, + "learning_rate": 8.729567176108011e-06, + "loss": 3.5284, + "step": 2322 + }, + { + "epoch": 0.28, + "learning_rate": 8.728234625387556e-06, + "loss": 3.4558, + "step": 2323 + }, + { + "epoch": 0.28, + "learning_rate": 8.726901478000428e-06, + "loss": 3.3692, + "step": 2324 + }, + { + "epoch": 0.28, + "learning_rate": 8.725567734159986e-06, + "loss": 3.4604, + "step": 2325 + }, + { + "epoch": 0.28, + "learning_rate": 8.724233394079679e-06, + "loss": 3.4494, + "step": 2326 + }, + { + "epoch": 0.28, + "learning_rate": 8.722898457973058e-06, + "loss": 3.4463, + "step": 2327 + }, + { + "epoch": 0.28, + "learning_rate": 8.721562926053763e-06, + "loss": 3.3796, + "step": 2328 + }, + { + "epoch": 0.28, + "learning_rate": 8.720226798535538e-06, + "loss": 3.4125, + "step": 2329 + }, + { + "epoch": 0.28, + "learning_rate": 8.718890075632212e-06, + "loss": 3.4696, + "step": 2330 + }, + { + "epoch": 0.28, + "learning_rate": 8.717552757557714e-06, + "loss": 3.5178, + "step": 2331 + }, + { + "epoch": 0.28, + "learning_rate": 8.716214844526072e-06, + "loss": 3.4181, + "step": 2332 + }, + { + "epoch": 0.28, + "learning_rate": 8.714876336751404e-06, + "loss": 3.4299, + "step": 2333 + }, + { + "epoch": 0.28, + "learning_rate": 8.713537234447924e-06, + "loss": 3.4937, + "step": 2334 + }, + { + "epoch": 0.28, + "learning_rate": 8.71219753782994e-06, + "loss": 3.3706, + "step": 2335 + }, + { + "epoch": 0.28, + "learning_rate": 8.710857247111861e-06, + "loss": 3.5165, + "step": 2336 + }, + { + "epoch": 0.28, + "learning_rate": 8.709516362508186e-06, + "loss": 3.3761, + "step": 2337 + }, + { + "epoch": 0.28, + "learning_rate": 8.708174884233509e-06, + "loss": 3.4121, + "step": 2338 + }, + { + "epoch": 0.28, + "learning_rate": 8.706832812502521e-06, + "loss": 3.4521, + "step": 2339 + }, + { + "epoch": 0.28, + "learning_rate": 8.705490147530006e-06, + "loss": 3.4669, + "step": 2340 + }, + { + "epoch": 0.28, + "learning_rate": 8.704146889530846e-06, + "loss": 3.5158, + "step": 2341 + }, + { + "epoch": 0.28, + "learning_rate": 8.702803038720016e-06, + "loss": 3.4264, + "step": 2342 + }, + { + "epoch": 0.28, + "learning_rate": 8.701458595312583e-06, + "loss": 3.4818, + "step": 2343 + }, + { + "epoch": 0.28, + "learning_rate": 8.700113559523716e-06, + "loss": 3.464, + "step": 2344 + }, + { + "epoch": 0.28, + "learning_rate": 8.69876793156867e-06, + "loss": 3.3066, + "step": 2345 + }, + { + "epoch": 0.28, + "learning_rate": 8.697421711662803e-06, + "loss": 3.511, + "step": 2346 + }, + { + "epoch": 0.28, + "learning_rate": 8.696074900021562e-06, + "loss": 3.5361, + "step": 2347 + }, + { + "epoch": 0.28, + "learning_rate": 8.694727496860491e-06, + "loss": 3.5149, + "step": 2348 + }, + { + "epoch": 0.28, + "learning_rate": 8.693379502395229e-06, + "loss": 3.4756, + "step": 2349 + }, + { + "epoch": 0.28, + "learning_rate": 8.692030916841508e-06, + "loss": 3.4175, + "step": 2350 + }, + { + "epoch": 0.28, + "learning_rate": 8.690681740415158e-06, + "loss": 3.3995, + "step": 2351 + }, + { + "epoch": 0.28, + "learning_rate": 8.689331973332099e-06, + "loss": 3.4163, + "step": 2352 + }, + { + "epoch": 0.28, + "learning_rate": 8.687981615808347e-06, + "loss": 3.4125, + "step": 2353 + }, + { + "epoch": 0.28, + "learning_rate": 8.686630668060015e-06, + "loss": 3.4755, + "step": 2354 + }, + { + "epoch": 0.28, + "learning_rate": 8.68527913030331e-06, + "loss": 3.4528, + "step": 2355 + }, + { + "epoch": 0.28, + "learning_rate": 8.683927002754528e-06, + "loss": 3.5118, + "step": 2356 + }, + { + "epoch": 0.28, + "learning_rate": 8.682574285630068e-06, + "loss": 3.4529, + "step": 2357 + }, + { + "epoch": 0.28, + "learning_rate": 8.681220979146415e-06, + "loss": 3.6052, + "step": 2358 + }, + { + "epoch": 0.28, + "learning_rate": 8.679867083520153e-06, + "loss": 3.3955, + "step": 2359 + }, + { + "epoch": 0.28, + "learning_rate": 8.67851259896796e-06, + "loss": 3.4225, + "step": 2360 + }, + { + "epoch": 0.28, + "learning_rate": 8.677157525706612e-06, + "loss": 3.3022, + "step": 2361 + }, + { + "epoch": 0.28, + "learning_rate": 8.675801863952968e-06, + "loss": 3.4822, + "step": 2362 + }, + { + "epoch": 0.28, + "learning_rate": 8.67444561392399e-06, + "loss": 3.447, + "step": 2363 + }, + { + "epoch": 0.28, + "learning_rate": 8.673088775836735e-06, + "loss": 3.2917, + "step": 2364 + }, + { + "epoch": 0.28, + "learning_rate": 8.67173134990835e-06, + "loss": 3.4397, + "step": 2365 + }, + { + "epoch": 0.28, + "learning_rate": 8.670373336356076e-06, + "loss": 3.4618, + "step": 2366 + }, + { + "epoch": 0.28, + "learning_rate": 8.66901473539725e-06, + "loss": 3.4522, + "step": 2367 + }, + { + "epoch": 0.28, + "learning_rate": 8.667655547249305e-06, + "loss": 3.5648, + "step": 2368 + }, + { + "epoch": 0.28, + "learning_rate": 8.666295772129763e-06, + "loss": 3.4062, + "step": 2369 + }, + { + "epoch": 0.28, + "learning_rate": 8.66493541025624e-06, + "loss": 3.4848, + "step": 2370 + }, + { + "epoch": 0.28, + "learning_rate": 8.663574461846456e-06, + "loss": 3.4686, + "step": 2371 + }, + { + "epoch": 0.28, + "learning_rate": 8.66221292711821e-06, + "loss": 3.4727, + "step": 2372 + }, + { + "epoch": 0.28, + "learning_rate": 8.660850806289404e-06, + "loss": 3.4555, + "step": 2373 + }, + { + "epoch": 0.28, + "learning_rate": 8.659488099578033e-06, + "loss": 3.3528, + "step": 2374 + }, + { + "epoch": 0.28, + "learning_rate": 8.658124807202183e-06, + "loss": 3.4704, + "step": 2375 + }, + { + "epoch": 0.28, + "learning_rate": 8.656760929380037e-06, + "loss": 3.3921, + "step": 2376 + }, + { + "epoch": 0.28, + "learning_rate": 8.65539646632987e-06, + "loss": 3.5519, + "step": 2377 + }, + { + "epoch": 0.28, + "learning_rate": 8.654031418270048e-06, + "loss": 3.5269, + "step": 2378 + }, + { + "epoch": 0.28, + "learning_rate": 8.652665785419036e-06, + "loss": 3.4453, + "step": 2379 + }, + { + "epoch": 0.28, + "learning_rate": 8.651299567995389e-06, + "loss": 3.4348, + "step": 2380 + }, + { + "epoch": 0.29, + "learning_rate": 8.649932766217757e-06, + "loss": 3.547, + "step": 2381 + }, + { + "epoch": 0.29, + "learning_rate": 8.648565380304881e-06, + "loss": 3.4593, + "step": 2382 + }, + { + "epoch": 0.29, + "learning_rate": 8.647197410475601e-06, + "loss": 3.471, + "step": 2383 + }, + { + "epoch": 0.29, + "learning_rate": 8.645828856948843e-06, + "loss": 3.5368, + "step": 2384 + }, + { + "epoch": 0.29, + "learning_rate": 8.644459719943634e-06, + "loss": 3.4761, + "step": 2385 + }, + { + "epoch": 0.29, + "learning_rate": 8.643089999679086e-06, + "loss": 3.4353, + "step": 2386 + }, + { + "epoch": 0.29, + "learning_rate": 8.641719696374413e-06, + "loss": 3.3858, + "step": 2387 + }, + { + "epoch": 0.29, + "learning_rate": 8.640348810248917e-06, + "loss": 3.3611, + "step": 2388 + }, + { + "epoch": 0.29, + "learning_rate": 8.638977341521997e-06, + "loss": 3.466, + "step": 2389 + }, + { + "epoch": 0.29, + "learning_rate": 8.637605290413137e-06, + "loss": 3.3908, + "step": 2390 + }, + { + "epoch": 0.29, + "learning_rate": 8.636232657141926e-06, + "loss": 3.413, + "step": 2391 + }, + { + "epoch": 0.29, + "learning_rate": 8.634859441928038e-06, + "loss": 3.4535, + "step": 2392 + }, + { + "epoch": 0.29, + "learning_rate": 8.63348564499124e-06, + "loss": 3.3957, + "step": 2393 + }, + { + "epoch": 0.29, + "learning_rate": 8.632111266551399e-06, + "loss": 3.3752, + "step": 2394 + }, + { + "epoch": 0.29, + "learning_rate": 8.630736306828468e-06, + "loss": 3.3547, + "step": 2395 + }, + { + "epoch": 0.29, + "learning_rate": 8.629360766042496e-06, + "loss": 3.3953, + "step": 2396 + }, + { + "epoch": 0.29, + "learning_rate": 8.627984644413622e-06, + "loss": 3.411, + "step": 2397 + }, + { + "epoch": 0.29, + "learning_rate": 8.626607942162085e-06, + "loss": 3.4887, + "step": 2398 + }, + { + "epoch": 0.29, + "learning_rate": 8.62523065950821e-06, + "loss": 3.5024, + "step": 2399 + }, + { + "epoch": 0.29, + "learning_rate": 8.623852796672418e-06, + "loss": 3.4837, + "step": 2400 + }, + { + "epoch": 0.29, + "learning_rate": 8.62247435387522e-06, + "loss": 3.4775, + "step": 2401 + }, + { + "epoch": 0.29, + "learning_rate": 8.621095331337226e-06, + "loss": 3.5324, + "step": 2402 + }, + { + "epoch": 0.29, + "learning_rate": 8.61971572927913e-06, + "loss": 3.4679, + "step": 2403 + }, + { + "epoch": 0.29, + "learning_rate": 8.618335547921728e-06, + "loss": 3.3615, + "step": 2404 + }, + { + "epoch": 0.29, + "learning_rate": 8.616954787485902e-06, + "loss": 3.4603, + "step": 2405 + }, + { + "epoch": 0.29, + "learning_rate": 8.61557344819263e-06, + "loss": 3.3329, + "step": 2406 + }, + { + "epoch": 0.29, + "learning_rate": 8.61419153026298e-06, + "loss": 3.497, + "step": 2407 + }, + { + "epoch": 0.29, + "learning_rate": 8.612809033918113e-06, + "loss": 3.4666, + "step": 2408 + }, + { + "epoch": 0.29, + "learning_rate": 8.611425959379288e-06, + "loss": 3.4919, + "step": 2409 + }, + { + "epoch": 0.29, + "learning_rate": 8.610042306867847e-06, + "loss": 3.4437, + "step": 2410 + }, + { + "epoch": 0.29, + "learning_rate": 8.608658076605234e-06, + "loss": 3.4304, + "step": 2411 + }, + { + "epoch": 0.29, + "learning_rate": 8.607273268812981e-06, + "loss": 3.4609, + "step": 2412 + }, + { + "epoch": 0.29, + "learning_rate": 8.60588788371271e-06, + "loss": 3.4666, + "step": 2413 + }, + { + "epoch": 0.29, + "learning_rate": 8.60450192152614e-06, + "loss": 3.5747, + "step": 2414 + }, + { + "epoch": 0.29, + "learning_rate": 8.60311538247508e-06, + "loss": 3.5285, + "step": 2415 + }, + { + "epoch": 0.29, + "learning_rate": 8.601728266781434e-06, + "loss": 3.4233, + "step": 2416 + }, + { + "epoch": 0.29, + "learning_rate": 8.60034057466719e-06, + "loss": 3.4326, + "step": 2417 + }, + { + "epoch": 0.29, + "learning_rate": 8.598952306354438e-06, + "loss": 3.3904, + "step": 2418 + }, + { + "epoch": 0.29, + "learning_rate": 8.597563462065357e-06, + "loss": 3.495, + "step": 2419 + }, + { + "epoch": 0.29, + "learning_rate": 8.596174042022216e-06, + "loss": 3.3919, + "step": 2420 + }, + { + "epoch": 0.29, + "learning_rate": 8.594784046447381e-06, + "loss": 3.405, + "step": 2421 + }, + { + "epoch": 0.29, + "learning_rate": 8.593393475563303e-06, + "loss": 3.4851, + "step": 2422 + }, + { + "epoch": 0.29, + "learning_rate": 8.59200232959253e-06, + "loss": 3.434, + "step": 2423 + }, + { + "epoch": 0.29, + "learning_rate": 8.590610608757703e-06, + "loss": 3.3634, + "step": 2424 + }, + { + "epoch": 0.29, + "learning_rate": 8.589218313281551e-06, + "loss": 3.3964, + "step": 2425 + }, + { + "epoch": 0.29, + "learning_rate": 8.587825443386897e-06, + "loss": 3.4421, + "step": 2426 + }, + { + "epoch": 0.29, + "learning_rate": 8.586431999296657e-06, + "loss": 3.4398, + "step": 2427 + }, + { + "epoch": 0.29, + "learning_rate": 8.585037981233837e-06, + "loss": 3.4866, + "step": 2428 + }, + { + "epoch": 0.29, + "learning_rate": 8.583643389421535e-06, + "loss": 3.4607, + "step": 2429 + }, + { + "epoch": 0.29, + "learning_rate": 8.582248224082944e-06, + "loss": 3.3825, + "step": 2430 + }, + { + "epoch": 0.29, + "learning_rate": 8.580852485441345e-06, + "loss": 3.4097, + "step": 2431 + }, + { + "epoch": 0.29, + "learning_rate": 8.57945617372011e-06, + "loss": 3.3831, + "step": 2432 + }, + { + "epoch": 0.29, + "learning_rate": 8.578059289142706e-06, + "loss": 3.337, + "step": 2433 + }, + { + "epoch": 0.29, + "learning_rate": 8.576661831932691e-06, + "loss": 3.4534, + "step": 2434 + }, + { + "epoch": 0.29, + "learning_rate": 8.575263802313715e-06, + "loss": 3.3669, + "step": 2435 + }, + { + "epoch": 0.29, + "learning_rate": 8.573865200509518e-06, + "loss": 3.4417, + "step": 2436 + }, + { + "epoch": 0.29, + "learning_rate": 8.572466026743929e-06, + "loss": 3.4614, + "step": 2437 + }, + { + "epoch": 0.29, + "learning_rate": 8.571066281240877e-06, + "loss": 3.4904, + "step": 2438 + }, + { + "epoch": 0.29, + "learning_rate": 8.569665964224371e-06, + "loss": 3.3851, + "step": 2439 + }, + { + "epoch": 0.29, + "learning_rate": 8.568265075918526e-06, + "loss": 3.4127, + "step": 2440 + }, + { + "epoch": 0.29, + "learning_rate": 8.566863616547533e-06, + "loss": 3.4618, + "step": 2441 + }, + { + "epoch": 0.29, + "learning_rate": 8.565461586335686e-06, + "loss": 3.4532, + "step": 2442 + }, + { + "epoch": 0.29, + "learning_rate": 8.564058985507362e-06, + "loss": 3.4564, + "step": 2443 + }, + { + "epoch": 0.29, + "learning_rate": 8.562655814287037e-06, + "loss": 3.4456, + "step": 2444 + }, + { + "epoch": 0.29, + "learning_rate": 8.561252072899271e-06, + "loss": 3.4768, + "step": 2445 + }, + { + "epoch": 0.29, + "learning_rate": 8.559847761568723e-06, + "loss": 3.4522, + "step": 2446 + }, + { + "epoch": 0.29, + "learning_rate": 8.558442880520136e-06, + "loss": 3.4257, + "step": 2447 + }, + { + "epoch": 0.29, + "learning_rate": 8.557037429978348e-06, + "loss": 3.4005, + "step": 2448 + }, + { + "epoch": 0.29, + "learning_rate": 8.555631410168286e-06, + "loss": 3.3375, + "step": 2449 + }, + { + "epoch": 0.29, + "learning_rate": 8.554224821314971e-06, + "loss": 3.4137, + "step": 2450 + }, + { + "epoch": 0.29, + "learning_rate": 8.552817663643513e-06, + "loss": 3.377, + "step": 2451 + }, + { + "epoch": 0.29, + "learning_rate": 8.551409937379113e-06, + "loss": 3.4783, + "step": 2452 + }, + { + "epoch": 0.29, + "learning_rate": 8.550001642747066e-06, + "loss": 3.3926, + "step": 2453 + }, + { + "epoch": 0.29, + "learning_rate": 8.54859277997275e-06, + "loss": 3.4164, + "step": 2454 + }, + { + "epoch": 0.29, + "learning_rate": 8.547183349281647e-06, + "loss": 3.3432, + "step": 2455 + }, + { + "epoch": 0.29, + "learning_rate": 8.545773350899316e-06, + "loss": 3.4211, + "step": 2456 + }, + { + "epoch": 0.29, + "learning_rate": 8.544362785051415e-06, + "loss": 3.369, + "step": 2457 + }, + { + "epoch": 0.29, + "learning_rate": 8.54295165196369e-06, + "loss": 3.4549, + "step": 2458 + }, + { + "epoch": 0.29, + "learning_rate": 8.541539951861983e-06, + "loss": 3.4268, + "step": 2459 + }, + { + "epoch": 0.29, + "learning_rate": 8.54012768497222e-06, + "loss": 3.4489, + "step": 2460 + }, + { + "epoch": 0.29, + "learning_rate": 8.538714851520418e-06, + "loss": 3.5027, + "step": 2461 + }, + { + "epoch": 0.29, + "learning_rate": 8.537301451732688e-06, + "loss": 3.3972, + "step": 2462 + }, + { + "epoch": 0.29, + "learning_rate": 8.53588748583523e-06, + "loss": 3.3971, + "step": 2463 + }, + { + "epoch": 0.29, + "learning_rate": 8.534472954054338e-06, + "loss": 3.4016, + "step": 2464 + }, + { + "epoch": 0.3, + "learning_rate": 8.533057856616392e-06, + "loss": 3.4099, + "step": 2465 + }, + { + "epoch": 0.3, + "learning_rate": 8.531642193747865e-06, + "loss": 3.3862, + "step": 2466 + }, + { + "epoch": 0.3, + "learning_rate": 8.530225965675316e-06, + "loss": 3.3649, + "step": 2467 + }, + { + "epoch": 0.3, + "learning_rate": 8.528809172625403e-06, + "loss": 3.4512, + "step": 2468 + }, + { + "epoch": 0.3, + "learning_rate": 8.527391814824866e-06, + "loss": 3.4191, + "step": 2469 + }, + { + "epoch": 0.3, + "learning_rate": 8.525973892500541e-06, + "loss": 3.5298, + "step": 2470 + }, + { + "epoch": 0.3, + "learning_rate": 8.524555405879354e-06, + "loss": 3.4905, + "step": 2471 + }, + { + "epoch": 0.3, + "learning_rate": 8.523136355188315e-06, + "loss": 3.4487, + "step": 2472 + }, + { + "epoch": 0.3, + "learning_rate": 8.52171674065453e-06, + "loss": 3.3239, + "step": 2473 + }, + { + "epoch": 0.3, + "learning_rate": 8.520296562505196e-06, + "loss": 3.4273, + "step": 2474 + }, + { + "epoch": 0.3, + "learning_rate": 8.518875820967598e-06, + "loss": 3.3697, + "step": 2475 + }, + { + "epoch": 0.3, + "learning_rate": 8.517454516269112e-06, + "loss": 3.4873, + "step": 2476 + }, + { + "epoch": 0.3, + "learning_rate": 8.516032648637201e-06, + "loss": 3.5316, + "step": 2477 + }, + { + "epoch": 0.3, + "learning_rate": 8.514610218299424e-06, + "loss": 3.4843, + "step": 2478 + }, + { + "epoch": 0.3, + "learning_rate": 8.513187225483424e-06, + "loss": 3.4356, + "step": 2479 + }, + { + "epoch": 0.3, + "learning_rate": 8.511763670416938e-06, + "loss": 3.3947, + "step": 2480 + }, + { + "epoch": 0.3, + "learning_rate": 8.510339553327792e-06, + "loss": 3.4947, + "step": 2481 + }, + { + "epoch": 0.3, + "learning_rate": 8.5089148744439e-06, + "loss": 3.4416, + "step": 2482 + }, + { + "epoch": 0.3, + "learning_rate": 8.50748963399327e-06, + "loss": 3.4554, + "step": 2483 + }, + { + "epoch": 0.3, + "learning_rate": 8.506063832203998e-06, + "loss": 3.4823, + "step": 2484 + }, + { + "epoch": 0.3, + "learning_rate": 8.504637469304264e-06, + "loss": 3.4594, + "step": 2485 + }, + { + "epoch": 0.3, + "learning_rate": 8.50321054552235e-06, + "loss": 3.4655, + "step": 2486 + }, + { + "epoch": 0.3, + "learning_rate": 8.501783061086614e-06, + "loss": 3.3955, + "step": 2487 + }, + { + "epoch": 0.3, + "learning_rate": 8.500355016225519e-06, + "loss": 3.4247, + "step": 2488 + }, + { + "epoch": 0.3, + "learning_rate": 8.498926411167601e-06, + "loss": 3.4843, + "step": 2489 + }, + { + "epoch": 0.3, + "learning_rate": 8.4974972461415e-06, + "loss": 3.5323, + "step": 2490 + }, + { + "epoch": 0.3, + "learning_rate": 8.496067521375936e-06, + "loss": 3.4294, + "step": 2491 + }, + { + "epoch": 0.3, + "learning_rate": 8.494637237099726e-06, + "loss": 3.519, + "step": 2492 + }, + { + "epoch": 0.3, + "learning_rate": 8.493206393541767e-06, + "loss": 3.3981, + "step": 2493 + }, + { + "epoch": 0.3, + "learning_rate": 8.491774990931054e-06, + "loss": 3.4354, + "step": 2494 + }, + { + "epoch": 0.3, + "learning_rate": 8.49034302949667e-06, + "loss": 3.5093, + "step": 2495 + }, + { + "epoch": 0.3, + "learning_rate": 8.488910509467785e-06, + "loss": 3.325, + "step": 2496 + }, + { + "epoch": 0.3, + "learning_rate": 8.48747743107366e-06, + "loss": 3.4688, + "step": 2497 + }, + { + "epoch": 0.3, + "learning_rate": 8.486043794543643e-06, + "loss": 3.449, + "step": 2498 + }, + { + "epoch": 0.3, + "learning_rate": 8.484609600107174e-06, + "loss": 3.4429, + "step": 2499 + }, + { + "epoch": 0.3, + "learning_rate": 8.483174847993785e-06, + "loss": 3.3771, + "step": 2500 + }, + { + "epoch": 0.3, + "learning_rate": 8.481739538433088e-06, + "loss": 3.5072, + "step": 2501 + }, + { + "epoch": 0.3, + "learning_rate": 8.480303671654795e-06, + "loss": 3.504, + "step": 2502 + }, + { + "epoch": 0.3, + "learning_rate": 8.478867247888697e-06, + "loss": 3.3545, + "step": 2503 + }, + { + "epoch": 0.3, + "learning_rate": 8.477430267364684e-06, + "loss": 3.4426, + "step": 2504 + }, + { + "epoch": 0.3, + "learning_rate": 8.475992730312728e-06, + "loss": 3.5132, + "step": 2505 + }, + { + "epoch": 0.3, + "learning_rate": 8.474554636962894e-06, + "loss": 3.5122, + "step": 2506 + }, + { + "epoch": 0.3, + "learning_rate": 8.473115987545333e-06, + "loss": 3.4863, + "step": 2507 + }, + { + "epoch": 0.3, + "learning_rate": 8.471676782290285e-06, + "loss": 3.2704, + "step": 2508 + }, + { + "epoch": 0.3, + "learning_rate": 8.470237021428086e-06, + "loss": 3.4634, + "step": 2509 + }, + { + "epoch": 0.3, + "learning_rate": 8.468796705189147e-06, + "loss": 3.457, + "step": 2510 + }, + { + "epoch": 0.3, + "learning_rate": 8.467355833803983e-06, + "loss": 3.4015, + "step": 2511 + }, + { + "epoch": 0.3, + "learning_rate": 8.465914407503188e-06, + "loss": 3.4521, + "step": 2512 + }, + { + "epoch": 0.3, + "learning_rate": 8.464472426517452e-06, + "loss": 3.332, + "step": 2513 + }, + { + "epoch": 0.3, + "learning_rate": 8.463029891077542e-06, + "loss": 3.5039, + "step": 2514 + }, + { + "epoch": 0.3, + "learning_rate": 8.461586801414328e-06, + "loss": 3.4102, + "step": 2515 + }, + { + "epoch": 0.3, + "learning_rate": 8.46014315775876e-06, + "loss": 3.4972, + "step": 2516 + }, + { + "epoch": 0.3, + "learning_rate": 8.458698960341877e-06, + "loss": 3.47, + "step": 2517 + }, + { + "epoch": 0.3, + "learning_rate": 8.45725420939481e-06, + "loss": 3.4207, + "step": 2518 + }, + { + "epoch": 0.3, + "learning_rate": 8.455808905148777e-06, + "loss": 3.4014, + "step": 2519 + }, + { + "epoch": 0.3, + "learning_rate": 8.454363047835087e-06, + "loss": 3.4165, + "step": 2520 + }, + { + "epoch": 0.3, + "learning_rate": 8.45291663768513e-06, + "loss": 3.3933, + "step": 2521 + }, + { + "epoch": 0.3, + "learning_rate": 8.45146967493039e-06, + "loss": 3.435, + "step": 2522 + }, + { + "epoch": 0.3, + "learning_rate": 8.450022159802443e-06, + "loss": 3.4158, + "step": 2523 + }, + { + "epoch": 0.3, + "learning_rate": 8.448574092532947e-06, + "loss": 3.4281, + "step": 2524 + }, + { + "epoch": 0.3, + "learning_rate": 8.447125473353648e-06, + "loss": 3.4031, + "step": 2525 + }, + { + "epoch": 0.3, + "learning_rate": 8.445676302496388e-06, + "loss": 3.4279, + "step": 2526 + }, + { + "epoch": 0.3, + "learning_rate": 8.444226580193092e-06, + "loss": 3.4738, + "step": 2527 + }, + { + "epoch": 0.3, + "learning_rate": 8.442776306675769e-06, + "loss": 3.4371, + "step": 2528 + }, + { + "epoch": 0.3, + "learning_rate": 8.441325482176524e-06, + "loss": 3.4168, + "step": 2529 + }, + { + "epoch": 0.3, + "learning_rate": 8.439874106927544e-06, + "loss": 3.3968, + "step": 2530 + }, + { + "epoch": 0.3, + "learning_rate": 8.438422181161112e-06, + "loss": 3.4046, + "step": 2531 + }, + { + "epoch": 0.3, + "learning_rate": 8.436969705109593e-06, + "loss": 3.3263, + "step": 2532 + }, + { + "epoch": 0.3, + "learning_rate": 8.435516679005437e-06, + "loss": 3.3665, + "step": 2533 + }, + { + "epoch": 0.3, + "learning_rate": 8.434063103081189e-06, + "loss": 3.4316, + "step": 2534 + }, + { + "epoch": 0.3, + "learning_rate": 8.43260897756948e-06, + "loss": 3.4289, + "step": 2535 + }, + { + "epoch": 0.3, + "learning_rate": 8.43115430270303e-06, + "loss": 3.5127, + "step": 2536 + }, + { + "epoch": 0.3, + "learning_rate": 8.429699078714642e-06, + "loss": 3.4663, + "step": 2537 + }, + { + "epoch": 0.3, + "learning_rate": 8.42824330583721e-06, + "loss": 3.4903, + "step": 2538 + }, + { + "epoch": 0.3, + "learning_rate": 8.426786984303717e-06, + "loss": 3.3975, + "step": 2539 + }, + { + "epoch": 0.3, + "learning_rate": 8.425330114347233e-06, + "loss": 3.4041, + "step": 2540 + }, + { + "epoch": 0.3, + "learning_rate": 8.423872696200914e-06, + "loss": 3.3546, + "step": 2541 + }, + { + "epoch": 0.3, + "learning_rate": 8.422414730098007e-06, + "loss": 3.3791, + "step": 2542 + }, + { + "epoch": 0.3, + "learning_rate": 8.420956216271842e-06, + "loss": 3.461, + "step": 2543 + }, + { + "epoch": 0.3, + "learning_rate": 8.419497154955845e-06, + "loss": 3.4097, + "step": 2544 + }, + { + "epoch": 0.3, + "learning_rate": 8.41803754638352e-06, + "loss": 3.4262, + "step": 2545 + }, + { + "epoch": 0.3, + "learning_rate": 8.416577390788461e-06, + "loss": 3.4608, + "step": 2546 + }, + { + "epoch": 0.3, + "learning_rate": 8.415116688404356e-06, + "loss": 3.4284, + "step": 2547 + }, + { + "epoch": 0.31, + "learning_rate": 8.413655439464973e-06, + "loss": 3.4209, + "step": 2548 + }, + { + "epoch": 0.31, + "learning_rate": 8.412193644204172e-06, + "loss": 3.4181, + "step": 2549 + }, + { + "epoch": 0.31, + "learning_rate": 8.410731302855897e-06, + "loss": 3.4134, + "step": 2550 + }, + { + "epoch": 0.31, + "learning_rate": 8.409268415654183e-06, + "loss": 3.4589, + "step": 2551 + }, + { + "epoch": 0.31, + "learning_rate": 8.407804982833147e-06, + "loss": 3.4555, + "step": 2552 + }, + { + "epoch": 0.31, + "learning_rate": 8.406341004627e-06, + "loss": 3.5406, + "step": 2553 + }, + { + "epoch": 0.31, + "learning_rate": 8.404876481270037e-06, + "loss": 3.4741, + "step": 2554 + }, + { + "epoch": 0.31, + "learning_rate": 8.40341141299664e-06, + "loss": 3.4461, + "step": 2555 + }, + { + "epoch": 0.31, + "learning_rate": 8.401945800041275e-06, + "loss": 3.4755, + "step": 2556 + }, + { + "epoch": 0.31, + "learning_rate": 8.400479642638504e-06, + "loss": 3.4722, + "step": 2557 + }, + { + "epoch": 0.31, + "learning_rate": 8.399012941022969e-06, + "loss": 3.297, + "step": 2558 + }, + { + "epoch": 0.31, + "learning_rate": 8.397545695429398e-06, + "loss": 3.3603, + "step": 2559 + }, + { + "epoch": 0.31, + "learning_rate": 8.396077906092614e-06, + "loss": 3.4172, + "step": 2560 + }, + { + "epoch": 0.31, + "learning_rate": 8.394609573247518e-06, + "loss": 3.4828, + "step": 2561 + }, + { + "epoch": 0.31, + "learning_rate": 8.393140697129104e-06, + "loss": 3.3931, + "step": 2562 + }, + { + "epoch": 0.31, + "learning_rate": 8.391671277972451e-06, + "loss": 3.4452, + "step": 2563 + }, + { + "epoch": 0.31, + "learning_rate": 8.390201316012723e-06, + "loss": 3.4101, + "step": 2564 + }, + { + "epoch": 0.31, + "learning_rate": 8.388730811485174e-06, + "loss": 3.4024, + "step": 2565 + }, + { + "epoch": 0.31, + "learning_rate": 8.387259764625144e-06, + "loss": 3.5652, + "step": 2566 + }, + { + "epoch": 0.31, + "learning_rate": 8.385788175668058e-06, + "loss": 3.4493, + "step": 2567 + }, + { + "epoch": 0.31, + "learning_rate": 8.38431604484943e-06, + "loss": 3.363, + "step": 2568 + }, + { + "epoch": 0.31, + "learning_rate": 8.38284337240486e-06, + "loss": 3.386, + "step": 2569 + }, + { + "epoch": 0.31, + "learning_rate": 8.381370158570033e-06, + "loss": 3.4869, + "step": 2570 + }, + { + "epoch": 0.31, + "learning_rate": 8.379896403580725e-06, + "loss": 3.2959, + "step": 2571 + }, + { + "epoch": 0.31, + "learning_rate": 8.378422107672795e-06, + "loss": 3.3993, + "step": 2572 + }, + { + "epoch": 0.31, + "learning_rate": 8.376947271082186e-06, + "loss": 3.3794, + "step": 2573 + }, + { + "epoch": 0.31, + "learning_rate": 8.375471894044935e-06, + "loss": 3.53, + "step": 2574 + }, + { + "epoch": 0.31, + "learning_rate": 8.373995976797158e-06, + "loss": 3.4547, + "step": 2575 + }, + { + "epoch": 0.31, + "learning_rate": 8.372519519575063e-06, + "loss": 3.4139, + "step": 2576 + }, + { + "epoch": 0.31, + "learning_rate": 8.371042522614942e-06, + "loss": 3.4206, + "step": 2577 + }, + { + "epoch": 0.31, + "learning_rate": 8.369564986153174e-06, + "loss": 3.3342, + "step": 2578 + }, + { + "epoch": 0.31, + "learning_rate": 8.368086910426222e-06, + "loss": 3.3832, + "step": 2579 + }, + { + "epoch": 0.31, + "learning_rate": 8.366608295670639e-06, + "loss": 3.3498, + "step": 2580 + }, + { + "epoch": 0.31, + "learning_rate": 8.365129142123062e-06, + "loss": 3.4435, + "step": 2581 + }, + { + "epoch": 0.31, + "learning_rate": 8.363649450020216e-06, + "loss": 3.5072, + "step": 2582 + }, + { + "epoch": 0.31, + "learning_rate": 8.362169219598905e-06, + "loss": 3.3807, + "step": 2583 + }, + { + "epoch": 0.31, + "learning_rate": 8.360688451096032e-06, + "loss": 3.4346, + "step": 2584 + }, + { + "epoch": 0.31, + "learning_rate": 8.359207144748578e-06, + "loss": 3.3293, + "step": 2585 + }, + { + "epoch": 0.31, + "learning_rate": 8.35772530079361e-06, + "loss": 3.3629, + "step": 2586 + }, + { + "epoch": 0.31, + "learning_rate": 8.35624291946828e-06, + "loss": 3.4565, + "step": 2587 + }, + { + "epoch": 0.31, + "learning_rate": 8.354760001009833e-06, + "loss": 3.3354, + "step": 2588 + }, + { + "epoch": 0.31, + "learning_rate": 8.353276545655593e-06, + "loss": 3.4341, + "step": 2589 + }, + { + "epoch": 0.31, + "learning_rate": 8.351792553642972e-06, + "loss": 3.3975, + "step": 2590 + }, + { + "epoch": 0.31, + "learning_rate": 8.350308025209468e-06, + "loss": 3.4339, + "step": 2591 + }, + { + "epoch": 0.31, + "learning_rate": 8.348822960592665e-06, + "loss": 3.5572, + "step": 2592 + }, + { + "epoch": 0.31, + "learning_rate": 8.347337360030233e-06, + "loss": 3.4212, + "step": 2593 + }, + { + "epoch": 0.31, + "learning_rate": 8.345851223759928e-06, + "loss": 3.4273, + "step": 2594 + }, + { + "epoch": 0.31, + "learning_rate": 8.344364552019589e-06, + "loss": 3.5332, + "step": 2595 + }, + { + "epoch": 0.31, + "learning_rate": 8.342877345047146e-06, + "loss": 3.317, + "step": 2596 + }, + { + "epoch": 0.31, + "learning_rate": 8.34138960308061e-06, + "loss": 3.4307, + "step": 2597 + }, + { + "epoch": 0.31, + "learning_rate": 8.33990132635808e-06, + "loss": 3.3612, + "step": 2598 + }, + { + "epoch": 0.31, + "learning_rate": 8.338412515117738e-06, + "loss": 3.4417, + "step": 2599 + }, + { + "epoch": 0.31, + "learning_rate": 8.336923169597857e-06, + "loss": 3.3826, + "step": 2600 + }, + { + "epoch": 0.31, + "learning_rate": 8.335433290036787e-06, + "loss": 3.4767, + "step": 2601 + }, + { + "epoch": 0.31, + "learning_rate": 8.333942876672973e-06, + "loss": 3.4867, + "step": 2602 + }, + { + "epoch": 0.31, + "learning_rate": 8.332451929744937e-06, + "loss": 3.4264, + "step": 2603 + }, + { + "epoch": 0.31, + "learning_rate": 8.330960449491292e-06, + "loss": 3.4841, + "step": 2604 + }, + { + "epoch": 0.31, + "learning_rate": 8.329468436150734e-06, + "loss": 3.4015, + "step": 2605 + }, + { + "epoch": 0.31, + "learning_rate": 8.327975889962046e-06, + "loss": 3.3915, + "step": 2606 + }, + { + "epoch": 0.31, + "learning_rate": 8.326482811164093e-06, + "loss": 3.4806, + "step": 2607 + }, + { + "epoch": 0.31, + "learning_rate": 8.32498919999583e-06, + "loss": 3.3861, + "step": 2608 + }, + { + "epoch": 0.31, + "learning_rate": 8.323495056696291e-06, + "loss": 3.5215, + "step": 2609 + }, + { + "epoch": 0.31, + "learning_rate": 8.322000381504603e-06, + "loss": 3.4804, + "step": 2610 + }, + { + "epoch": 0.31, + "learning_rate": 8.320505174659969e-06, + "loss": 3.4255, + "step": 2611 + }, + { + "epoch": 0.31, + "learning_rate": 8.319009436401685e-06, + "loss": 3.4499, + "step": 2612 + }, + { + "epoch": 0.31, + "learning_rate": 8.317513166969129e-06, + "loss": 3.4572, + "step": 2613 + }, + { + "epoch": 0.31, + "learning_rate": 8.316016366601765e-06, + "loss": 3.4677, + "step": 2614 + }, + { + "epoch": 0.31, + "learning_rate": 8.314519035539138e-06, + "loss": 3.3661, + "step": 2615 + }, + { + "epoch": 0.31, + "learning_rate": 8.31302117402088e-06, + "loss": 3.4021, + "step": 2616 + }, + { + "epoch": 0.31, + "learning_rate": 8.311522782286713e-06, + "loss": 3.3969, + "step": 2617 + }, + { + "epoch": 0.31, + "learning_rate": 8.310023860576438e-06, + "loss": 3.4175, + "step": 2618 + }, + { + "epoch": 0.31, + "learning_rate": 8.30852440912994e-06, + "loss": 3.4809, + "step": 2619 + }, + { + "epoch": 0.31, + "learning_rate": 8.307024428187194e-06, + "loss": 3.4567, + "step": 2620 + }, + { + "epoch": 0.31, + "learning_rate": 8.305523917988257e-06, + "loss": 3.3783, + "step": 2621 + }, + { + "epoch": 0.31, + "learning_rate": 8.30402287877327e-06, + "loss": 3.5218, + "step": 2622 + }, + { + "epoch": 0.31, + "learning_rate": 8.302521310782456e-06, + "loss": 3.485, + "step": 2623 + }, + { + "epoch": 0.31, + "learning_rate": 8.301019214256131e-06, + "loss": 3.43, + "step": 2624 + }, + { + "epoch": 0.31, + "learning_rate": 8.29951658943469e-06, + "loss": 3.4157, + "step": 2625 + }, + { + "epoch": 0.31, + "learning_rate": 8.298013436558609e-06, + "loss": 3.4151, + "step": 2626 + }, + { + "epoch": 0.31, + "learning_rate": 8.296509755868457e-06, + "loss": 3.4753, + "step": 2627 + }, + { + "epoch": 0.31, + "learning_rate": 8.29500554760488e-06, + "loss": 3.4286, + "step": 2628 + }, + { + "epoch": 0.31, + "learning_rate": 8.293500812008615e-06, + "loss": 3.4584, + "step": 2629 + }, + { + "epoch": 0.31, + "learning_rate": 8.291995549320476e-06, + "loss": 3.5257, + "step": 2630 + }, + { + "epoch": 0.31, + "learning_rate": 8.290489759781365e-06, + "loss": 3.4881, + "step": 2631 + }, + { + "epoch": 0.32, + "learning_rate": 8.288983443632274e-06, + "loss": 3.5049, + "step": 2632 + }, + { + "epoch": 0.32, + "learning_rate": 8.287476601114269e-06, + "loss": 3.3847, + "step": 2633 + }, + { + "epoch": 0.32, + "learning_rate": 8.285969232468506e-06, + "loss": 3.3573, + "step": 2634 + }, + { + "epoch": 0.32, + "learning_rate": 8.284461337936225e-06, + "loss": 3.4686, + "step": 2635 + }, + { + "epoch": 0.32, + "learning_rate": 8.282952917758749e-06, + "loss": 3.4308, + "step": 2636 + }, + { + "epoch": 0.32, + "learning_rate": 8.281443972177485e-06, + "loss": 3.3719, + "step": 2637 + }, + { + "epoch": 0.32, + "learning_rate": 8.279934501433926e-06, + "loss": 3.3761, + "step": 2638 + }, + { + "epoch": 0.32, + "learning_rate": 8.278424505769647e-06, + "loss": 3.4916, + "step": 2639 + }, + { + "epoch": 0.32, + "learning_rate": 8.276913985426307e-06, + "loss": 3.4601, + "step": 2640 + }, + { + "epoch": 0.32, + "learning_rate": 8.27540294064565e-06, + "loss": 3.3578, + "step": 2641 + }, + { + "epoch": 0.32, + "learning_rate": 8.273891371669505e-06, + "loss": 3.4077, + "step": 2642 + }, + { + "epoch": 0.32, + "learning_rate": 8.272379278739783e-06, + "loss": 3.4093, + "step": 2643 + }, + { + "epoch": 0.32, + "learning_rate": 8.270866662098476e-06, + "loss": 3.4466, + "step": 2644 + }, + { + "epoch": 0.32, + "learning_rate": 8.269353521987668e-06, + "loss": 3.386, + "step": 2645 + }, + { + "epoch": 0.32, + "learning_rate": 8.267839858649522e-06, + "loss": 3.5648, + "step": 2646 + }, + { + "epoch": 0.32, + "learning_rate": 8.26632567232628e-06, + "loss": 3.4124, + "step": 2647 + }, + { + "epoch": 0.32, + "learning_rate": 8.264810963260276e-06, + "loss": 3.3992, + "step": 2648 + }, + { + "epoch": 0.32, + "learning_rate": 8.263295731693923e-06, + "loss": 3.4205, + "step": 2649 + }, + { + "epoch": 0.32, + "learning_rate": 8.261779977869719e-06, + "loss": 3.4483, + "step": 2650 + }, + { + "epoch": 0.32, + "learning_rate": 8.260263702030244e-06, + "loss": 3.3985, + "step": 2651 + }, + { + "epoch": 0.32, + "learning_rate": 8.258746904418164e-06, + "loss": 3.3331, + "step": 2652 + }, + { + "epoch": 0.32, + "learning_rate": 8.257229585276228e-06, + "loss": 3.4053, + "step": 2653 + }, + { + "epoch": 0.32, + "learning_rate": 8.255711744847265e-06, + "loss": 3.4192, + "step": 2654 + }, + { + "epoch": 0.32, + "learning_rate": 8.254193383374192e-06, + "loss": 3.4386, + "step": 2655 + }, + { + "epoch": 0.32, + "learning_rate": 8.252674501100008e-06, + "loss": 3.268, + "step": 2656 + }, + { + "epoch": 0.32, + "learning_rate": 8.251155098267796e-06, + "loss": 3.4518, + "step": 2657 + }, + { + "epoch": 0.32, + "learning_rate": 8.249635175120718e-06, + "loss": 3.4802, + "step": 2658 + }, + { + "epoch": 0.32, + "learning_rate": 8.248114731902027e-06, + "loss": 3.5496, + "step": 2659 + }, + { + "epoch": 0.32, + "learning_rate": 8.246593768855048e-06, + "loss": 3.3528, + "step": 2660 + }, + { + "epoch": 0.32, + "learning_rate": 8.2450722862232e-06, + "loss": 3.3613, + "step": 2661 + }, + { + "epoch": 0.32, + "learning_rate": 8.243550284249983e-06, + "loss": 3.4039, + "step": 2662 + }, + { + "epoch": 0.32, + "learning_rate": 8.242027763178976e-06, + "loss": 3.4005, + "step": 2663 + }, + { + "epoch": 0.32, + "learning_rate": 8.240504723253843e-06, + "loss": 3.4744, + "step": 2664 + }, + { + "epoch": 0.32, + "learning_rate": 8.238981164718333e-06, + "loss": 3.4456, + "step": 2665 + }, + { + "epoch": 0.32, + "learning_rate": 8.237457087816274e-06, + "loss": 3.5134, + "step": 2666 + }, + { + "epoch": 0.32, + "learning_rate": 8.235932492791581e-06, + "loss": 3.4454, + "step": 2667 + }, + { + "epoch": 0.32, + "learning_rate": 8.234407379888247e-06, + "loss": 3.4481, + "step": 2668 + }, + { + "epoch": 0.32, + "learning_rate": 8.232881749350355e-06, + "loss": 3.4088, + "step": 2669 + }, + { + "epoch": 0.32, + "learning_rate": 8.231355601422067e-06, + "loss": 3.4961, + "step": 2670 + }, + { + "epoch": 0.32, + "learning_rate": 8.229828936347625e-06, + "loss": 3.4201, + "step": 2671 + }, + { + "epoch": 0.32, + "learning_rate": 8.228301754371358e-06, + "loss": 3.4296, + "step": 2672 + }, + { + "epoch": 0.32, + "learning_rate": 8.226774055737674e-06, + "loss": 3.2846, + "step": 2673 + }, + { + "epoch": 0.32, + "learning_rate": 8.22524584069107e-06, + "loss": 3.5564, + "step": 2674 + }, + { + "epoch": 0.32, + "learning_rate": 8.223717109476117e-06, + "loss": 3.4448, + "step": 2675 + }, + { + "epoch": 0.32, + "learning_rate": 8.222187862337477e-06, + "loss": 3.4601, + "step": 2676 + }, + { + "epoch": 0.32, + "learning_rate": 8.220658099519889e-06, + "loss": 3.3984, + "step": 2677 + }, + { + "epoch": 0.32, + "learning_rate": 8.219127821268176e-06, + "loss": 3.402, + "step": 2678 + }, + { + "epoch": 0.32, + "learning_rate": 8.217597027827247e-06, + "loss": 3.4603, + "step": 2679 + }, + { + "epoch": 0.32, + "learning_rate": 8.216065719442084e-06, + "loss": 3.4782, + "step": 2680 + }, + { + "epoch": 0.32, + "learning_rate": 8.214533896357762e-06, + "loss": 3.4625, + "step": 2681 + }, + { + "epoch": 0.32, + "learning_rate": 8.213001558819433e-06, + "loss": 3.338, + "step": 2682 + }, + { + "epoch": 0.32, + "learning_rate": 8.211468707072332e-06, + "loss": 3.3934, + "step": 2683 + }, + { + "epoch": 0.32, + "learning_rate": 8.209935341361774e-06, + "loss": 3.3726, + "step": 2684 + }, + { + "epoch": 0.32, + "learning_rate": 8.208401461933165e-06, + "loss": 3.4378, + "step": 2685 + }, + { + "epoch": 0.32, + "learning_rate": 8.206867069031983e-06, + "loss": 3.4054, + "step": 2686 + }, + { + "epoch": 0.32, + "learning_rate": 8.205332162903794e-06, + "loss": 3.4704, + "step": 2687 + }, + { + "epoch": 0.32, + "learning_rate": 8.20379674379424e-06, + "loss": 3.3473, + "step": 2688 + }, + { + "epoch": 0.32, + "learning_rate": 8.202260811949056e-06, + "loss": 3.4938, + "step": 2689 + }, + { + "epoch": 0.32, + "learning_rate": 8.200724367614049e-06, + "loss": 3.5132, + "step": 2690 + }, + { + "epoch": 0.32, + "learning_rate": 8.199187411035112e-06, + "loss": 3.3869, + "step": 2691 + }, + { + "epoch": 0.32, + "learning_rate": 8.197649942458217e-06, + "loss": 3.4029, + "step": 2692 + }, + { + "epoch": 0.32, + "learning_rate": 8.196111962129425e-06, + "loss": 3.5126, + "step": 2693 + }, + { + "epoch": 0.32, + "learning_rate": 8.194573470294872e-06, + "loss": 3.4396, + "step": 2694 + }, + { + "epoch": 0.32, + "learning_rate": 8.193034467200777e-06, + "loss": 3.5005, + "step": 2695 + }, + { + "epoch": 0.32, + "learning_rate": 8.191494953093446e-06, + "loss": 3.4132, + "step": 2696 + }, + { + "epoch": 0.32, + "learning_rate": 8.189954928219258e-06, + "loss": 3.3377, + "step": 2697 + }, + { + "epoch": 0.32, + "learning_rate": 8.188414392824681e-06, + "loss": 3.421, + "step": 2698 + }, + { + "epoch": 0.32, + "learning_rate": 8.186873347156264e-06, + "loss": 3.4493, + "step": 2699 + }, + { + "epoch": 0.32, + "learning_rate": 8.185331791460633e-06, + "loss": 3.4146, + "step": 2700 + }, + { + "epoch": 0.32, + "learning_rate": 8.183789725984502e-06, + "loss": 3.3761, + "step": 2701 + }, + { + "epoch": 0.32, + "learning_rate": 8.182247150974657e-06, + "loss": 3.3983, + "step": 2702 + }, + { + "epoch": 0.32, + "learning_rate": 8.180704066677978e-06, + "loss": 3.4082, + "step": 2703 + }, + { + "epoch": 0.32, + "learning_rate": 8.179160473341418e-06, + "loss": 3.4621, + "step": 2704 + }, + { + "epoch": 0.32, + "learning_rate": 8.177616371212015e-06, + "loss": 3.3962, + "step": 2705 + }, + { + "epoch": 0.32, + "learning_rate": 8.176071760536882e-06, + "loss": 3.4691, + "step": 2706 + }, + { + "epoch": 0.32, + "learning_rate": 8.174526641563224e-06, + "loss": 3.4159, + "step": 2707 + }, + { + "epoch": 0.32, + "learning_rate": 8.17298101453832e-06, + "loss": 3.4676, + "step": 2708 + }, + { + "epoch": 0.32, + "learning_rate": 8.171434879709533e-06, + "loss": 3.3719, + "step": 2709 + }, + { + "epoch": 0.32, + "learning_rate": 8.169888237324303e-06, + "loss": 3.4643, + "step": 2710 + }, + { + "epoch": 0.32, + "learning_rate": 8.168341087630158e-06, + "loss": 3.5045, + "step": 2711 + }, + { + "epoch": 0.32, + "learning_rate": 8.166793430874705e-06, + "loss": 3.4, + "step": 2712 + }, + { + "epoch": 0.32, + "learning_rate": 8.165245267305627e-06, + "loss": 3.3939, + "step": 2713 + }, + { + "epoch": 0.32, + "learning_rate": 8.163696597170695e-06, + "loss": 3.3914, + "step": 2714 + }, + { + "epoch": 0.33, + "learning_rate": 8.162147420717756e-06, + "loss": 3.4239, + "step": 2715 + }, + { + "epoch": 0.33, + "learning_rate": 8.160597738194744e-06, + "loss": 3.4913, + "step": 2716 + }, + { + "epoch": 0.33, + "learning_rate": 8.159047549849665e-06, + "loss": 3.3813, + "step": 2717 + }, + { + "epoch": 0.33, + "learning_rate": 8.157496855930615e-06, + "loss": 3.3436, + "step": 2718 + }, + { + "epoch": 0.33, + "learning_rate": 8.155945656685766e-06, + "loss": 3.4255, + "step": 2719 + }, + { + "epoch": 0.33, + "learning_rate": 8.154393952363372e-06, + "loss": 3.3924, + "step": 2720 + }, + { + "epoch": 0.33, + "learning_rate": 8.152841743211766e-06, + "loss": 3.4829, + "step": 2721 + }, + { + "epoch": 0.33, + "learning_rate": 8.151289029479368e-06, + "loss": 3.5066, + "step": 2722 + }, + { + "epoch": 0.33, + "learning_rate": 8.14973581141467e-06, + "loss": 3.5151, + "step": 2723 + }, + { + "epoch": 0.33, + "learning_rate": 8.14818208926625e-06, + "loss": 3.3941, + "step": 2724 + }, + { + "epoch": 0.33, + "learning_rate": 8.146627863282766e-06, + "loss": 3.4991, + "step": 2725 + }, + { + "epoch": 0.33, + "learning_rate": 8.145073133712957e-06, + "loss": 3.4408, + "step": 2726 + }, + { + "epoch": 0.33, + "learning_rate": 8.14351790080564e-06, + "loss": 3.4126, + "step": 2727 + }, + { + "epoch": 0.33, + "learning_rate": 8.141962164809718e-06, + "loss": 3.5014, + "step": 2728 + }, + { + "epoch": 0.33, + "learning_rate": 8.140405925974166e-06, + "loss": 3.4127, + "step": 2729 + }, + { + "epoch": 0.33, + "learning_rate": 8.13884918454805e-06, + "loss": 3.451, + "step": 2730 + }, + { + "epoch": 0.33, + "learning_rate": 8.137291940780507e-06, + "loss": 3.4972, + "step": 2731 + }, + { + "epoch": 0.33, + "learning_rate": 8.135734194920762e-06, + "loss": 3.5569, + "step": 2732 + }, + { + "epoch": 0.33, + "learning_rate": 8.134175947218111e-06, + "loss": 3.5399, + "step": 2733 + }, + { + "epoch": 0.33, + "learning_rate": 8.132617197921939e-06, + "loss": 3.3606, + "step": 2734 + }, + { + "epoch": 0.33, + "learning_rate": 8.13105794728171e-06, + "loss": 3.4979, + "step": 2735 + }, + { + "epoch": 0.33, + "learning_rate": 8.129498195546965e-06, + "loss": 3.3861, + "step": 2736 + }, + { + "epoch": 0.33, + "learning_rate": 8.127937942967326e-06, + "loss": 3.4826, + "step": 2737 + }, + { + "epoch": 0.33, + "learning_rate": 8.126377189792497e-06, + "loss": 3.5137, + "step": 2738 + }, + { + "epoch": 0.33, + "learning_rate": 8.12481593627226e-06, + "loss": 3.4216, + "step": 2739 + }, + { + "epoch": 0.33, + "learning_rate": 8.123254182656475e-06, + "loss": 3.4407, + "step": 2740 + }, + { + "epoch": 0.33, + "learning_rate": 8.121691929195092e-06, + "loss": 3.3881, + "step": 2741 + }, + { + "epoch": 0.33, + "learning_rate": 8.12012917613813e-06, + "loss": 3.3425, + "step": 2742 + }, + { + "epoch": 0.33, + "learning_rate": 8.11856592373569e-06, + "loss": 3.3346, + "step": 2743 + }, + { + "epoch": 0.33, + "learning_rate": 8.11700217223796e-06, + "loss": 3.4457, + "step": 2744 + }, + { + "epoch": 0.33, + "learning_rate": 8.115437921895198e-06, + "loss": 3.485, + "step": 2745 + }, + { + "epoch": 0.33, + "learning_rate": 8.11387317295775e-06, + "loss": 3.3761, + "step": 2746 + }, + { + "epoch": 0.33, + "learning_rate": 8.112307925676037e-06, + "loss": 3.5002, + "step": 2747 + }, + { + "epoch": 0.33, + "learning_rate": 8.110742180300561e-06, + "loss": 3.3176, + "step": 2748 + }, + { + "epoch": 0.33, + "learning_rate": 8.109175937081904e-06, + "loss": 3.5018, + "step": 2749 + }, + { + "epoch": 0.33, + "learning_rate": 8.107609196270729e-06, + "loss": 3.583, + "step": 2750 + }, + { + "epoch": 0.33, + "learning_rate": 8.106041958117775e-06, + "loss": 3.3851, + "step": 2751 + }, + { + "epoch": 0.33, + "learning_rate": 8.104474222873864e-06, + "loss": 3.4999, + "step": 2752 + }, + { + "epoch": 0.33, + "learning_rate": 8.102905990789895e-06, + "loss": 3.4734, + "step": 2753 + }, + { + "epoch": 0.33, + "learning_rate": 8.10133726211685e-06, + "loss": 3.4348, + "step": 2754 + }, + { + "epoch": 0.33, + "learning_rate": 8.099768037105788e-06, + "loss": 3.4404, + "step": 2755 + }, + { + "epoch": 0.33, + "learning_rate": 8.098198316007848e-06, + "loss": 3.4186, + "step": 2756 + }, + { + "epoch": 0.33, + "learning_rate": 8.096628099074243e-06, + "loss": 3.4293, + "step": 2757 + }, + { + "epoch": 0.33, + "learning_rate": 8.095057386556278e-06, + "loss": 3.4254, + "step": 2758 + }, + { + "epoch": 0.33, + "learning_rate": 8.093486178705327e-06, + "loss": 3.5392, + "step": 2759 + }, + { + "epoch": 0.33, + "learning_rate": 8.091914475772844e-06, + "loss": 3.4429, + "step": 2760 + }, + { + "epoch": 0.33, + "learning_rate": 8.090342278010367e-06, + "loss": 3.4326, + "step": 2761 + }, + { + "epoch": 0.33, + "learning_rate": 8.08876958566951e-06, + "loss": 3.3731, + "step": 2762 + }, + { + "epoch": 0.33, + "learning_rate": 8.087196399001965e-06, + "loss": 3.3754, + "step": 2763 + }, + { + "epoch": 0.33, + "learning_rate": 8.085622718259505e-06, + "loss": 3.4731, + "step": 2764 + }, + { + "epoch": 0.33, + "learning_rate": 8.084048543693985e-06, + "loss": 3.4582, + "step": 2765 + }, + { + "epoch": 0.33, + "learning_rate": 8.082473875557333e-06, + "loss": 3.3382, + "step": 2766 + }, + { + "epoch": 0.33, + "learning_rate": 8.080898714101559e-06, + "loss": 3.4795, + "step": 2767 + }, + { + "epoch": 0.33, + "learning_rate": 8.079323059578755e-06, + "loss": 3.3737, + "step": 2768 + }, + { + "epoch": 0.33, + "learning_rate": 8.077746912241084e-06, + "loss": 3.2744, + "step": 2769 + }, + { + "epoch": 0.33, + "learning_rate": 8.076170272340795e-06, + "loss": 3.4084, + "step": 2770 + }, + { + "epoch": 0.33, + "learning_rate": 8.074593140130215e-06, + "loss": 3.4552, + "step": 2771 + }, + { + "epoch": 0.33, + "learning_rate": 8.073015515861747e-06, + "loss": 3.397, + "step": 2772 + }, + { + "epoch": 0.33, + "learning_rate": 8.071437399787872e-06, + "loss": 3.3677, + "step": 2773 + }, + { + "epoch": 0.33, + "learning_rate": 8.069858792161152e-06, + "loss": 3.4791, + "step": 2774 + }, + { + "epoch": 0.33, + "learning_rate": 8.06827969323423e-06, + "loss": 3.4163, + "step": 2775 + }, + { + "epoch": 0.33, + "learning_rate": 8.066700103259825e-06, + "loss": 3.487, + "step": 2776 + }, + { + "epoch": 0.33, + "learning_rate": 8.06512002249073e-06, + "loss": 3.3864, + "step": 2777 + }, + { + "epoch": 0.33, + "learning_rate": 8.063539451179828e-06, + "loss": 3.4849, + "step": 2778 + }, + { + "epoch": 0.33, + "learning_rate": 8.061958389580068e-06, + "loss": 3.4202, + "step": 2779 + }, + { + "epoch": 0.33, + "learning_rate": 8.060376837944486e-06, + "loss": 3.5036, + "step": 2780 + }, + { + "epoch": 0.33, + "learning_rate": 8.058794796526192e-06, + "loss": 3.4478, + "step": 2781 + }, + { + "epoch": 0.33, + "learning_rate": 8.057212265578376e-06, + "loss": 3.4214, + "step": 2782 + }, + { + "epoch": 0.33, + "learning_rate": 8.055629245354308e-06, + "loss": 3.4755, + "step": 2783 + }, + { + "epoch": 0.33, + "learning_rate": 8.054045736107333e-06, + "loss": 3.4845, + "step": 2784 + }, + { + "epoch": 0.33, + "learning_rate": 8.052461738090876e-06, + "loss": 3.4198, + "step": 2785 + }, + { + "epoch": 0.33, + "learning_rate": 8.05087725155844e-06, + "loss": 3.3101, + "step": 2786 + }, + { + "epoch": 0.33, + "learning_rate": 8.049292276763607e-06, + "loss": 3.4625, + "step": 2787 + }, + { + "epoch": 0.33, + "learning_rate": 8.047706813960035e-06, + "loss": 3.3709, + "step": 2788 + }, + { + "epoch": 0.33, + "learning_rate": 8.046120863401461e-06, + "loss": 3.4067, + "step": 2789 + }, + { + "epoch": 0.33, + "learning_rate": 8.044534425341703e-06, + "loss": 3.3915, + "step": 2790 + }, + { + "epoch": 0.33, + "learning_rate": 8.042947500034654e-06, + "loss": 3.4239, + "step": 2791 + }, + { + "epoch": 0.33, + "learning_rate": 8.041360087734286e-06, + "loss": 3.4658, + "step": 2792 + }, + { + "epoch": 0.33, + "learning_rate": 8.039772188694645e-06, + "loss": 3.4744, + "step": 2793 + }, + { + "epoch": 0.33, + "learning_rate": 8.038183803169862e-06, + "loss": 3.3731, + "step": 2794 + }, + { + "epoch": 0.33, + "learning_rate": 8.036594931414141e-06, + "loss": 3.3404, + "step": 2795 + }, + { + "epoch": 0.33, + "learning_rate": 8.035005573681765e-06, + "loss": 3.4759, + "step": 2796 + }, + { + "epoch": 0.33, + "learning_rate": 8.033415730227095e-06, + "loss": 3.5032, + "step": 2797 + }, + { + "epoch": 0.33, + "learning_rate": 8.031825401304568e-06, + "loss": 3.504, + "step": 2798 + }, + { + "epoch": 0.34, + "learning_rate": 8.030234587168702e-06, + "loss": 3.4602, + "step": 2799 + }, + { + "epoch": 0.34, + "learning_rate": 8.02864328807409e-06, + "loss": 3.4534, + "step": 2800 + }, + { + "epoch": 0.34, + "learning_rate": 8.027051504275404e-06, + "loss": 3.4167, + "step": 2801 + }, + { + "epoch": 0.34, + "learning_rate": 8.025459236027393e-06, + "loss": 3.3105, + "step": 2802 + }, + { + "epoch": 0.34, + "learning_rate": 8.023866483584886e-06, + "loss": 3.4217, + "step": 2803 + }, + { + "epoch": 0.34, + "learning_rate": 8.022273247202783e-06, + "loss": 3.4291, + "step": 2804 + }, + { + "epoch": 0.34, + "learning_rate": 8.020679527136067e-06, + "loss": 3.4491, + "step": 2805 + }, + { + "epoch": 0.34, + "learning_rate": 8.019085323639796e-06, + "loss": 3.3336, + "step": 2806 + }, + { + "epoch": 0.34, + "learning_rate": 8.01749063696911e-06, + "loss": 3.4034, + "step": 2807 + }, + { + "epoch": 0.34, + "learning_rate": 8.01589546737922e-06, + "loss": 3.4211, + "step": 2808 + }, + { + "epoch": 0.34, + "learning_rate": 8.014299815125417e-06, + "loss": 3.4255, + "step": 2809 + }, + { + "epoch": 0.34, + "learning_rate": 8.01270368046307e-06, + "loss": 3.5165, + "step": 2810 + }, + { + "epoch": 0.34, + "learning_rate": 8.011107063647622e-06, + "loss": 3.3843, + "step": 2811 + }, + { + "epoch": 0.34, + "learning_rate": 8.009509964934599e-06, + "loss": 3.4523, + "step": 2812 + }, + { + "epoch": 0.34, + "learning_rate": 8.007912384579597e-06, + "loss": 3.4155, + "step": 2813 + }, + { + "epoch": 0.34, + "learning_rate": 8.006314322838295e-06, + "loss": 3.4694, + "step": 2814 + }, + { + "epoch": 0.34, + "learning_rate": 8.004715779966448e-06, + "loss": 3.4158, + "step": 2815 + }, + { + "epoch": 0.34, + "learning_rate": 8.003116756219887e-06, + "loss": 3.462, + "step": 2816 + }, + { + "epoch": 0.34, + "learning_rate": 8.001517251854514e-06, + "loss": 3.5356, + "step": 2817 + }, + { + "epoch": 0.34, + "learning_rate": 7.99991726712632e-06, + "loss": 3.5669, + "step": 2818 + }, + { + "epoch": 0.34, + "learning_rate": 7.998316802291364e-06, + "loss": 3.441, + "step": 2819 + }, + { + "epoch": 0.34, + "learning_rate": 7.996715857605786e-06, + "loss": 3.3479, + "step": 2820 + }, + { + "epoch": 0.34, + "learning_rate": 7.995114433325799e-06, + "loss": 3.3704, + "step": 2821 + }, + { + "epoch": 0.34, + "learning_rate": 7.993512529707695e-06, + "loss": 3.4375, + "step": 2822 + }, + { + "epoch": 0.34, + "learning_rate": 7.991910147007845e-06, + "loss": 3.421, + "step": 2823 + }, + { + "epoch": 0.34, + "learning_rate": 7.990307285482693e-06, + "loss": 3.5201, + "step": 2824 + }, + { + "epoch": 0.34, + "learning_rate": 7.98870394538876e-06, + "loss": 3.4008, + "step": 2825 + }, + { + "epoch": 0.34, + "learning_rate": 7.987100126982645e-06, + "loss": 3.3828, + "step": 2826 + }, + { + "epoch": 0.34, + "learning_rate": 7.985495830521024e-06, + "loss": 3.3642, + "step": 2827 + }, + { + "epoch": 0.34, + "learning_rate": 7.983891056260647e-06, + "loss": 3.5334, + "step": 2828 + }, + { + "epoch": 0.34, + "learning_rate": 7.982285804458344e-06, + "loss": 3.4611, + "step": 2829 + }, + { + "epoch": 0.34, + "learning_rate": 7.98068007537102e-06, + "loss": 3.4919, + "step": 2830 + }, + { + "epoch": 0.34, + "learning_rate": 7.979073869255654e-06, + "loss": 3.4245, + "step": 2831 + }, + { + "epoch": 0.34, + "learning_rate": 7.977467186369303e-06, + "loss": 3.538, + "step": 2832 + }, + { + "epoch": 0.34, + "learning_rate": 7.975860026969102e-06, + "loss": 3.4488, + "step": 2833 + }, + { + "epoch": 0.34, + "learning_rate": 7.974252391312259e-06, + "loss": 3.4145, + "step": 2834 + }, + { + "epoch": 0.34, + "learning_rate": 7.972644279656062e-06, + "loss": 3.4405, + "step": 2835 + }, + { + "epoch": 0.34, + "learning_rate": 7.97103569225787e-06, + "loss": 3.3874, + "step": 2836 + }, + { + "epoch": 0.34, + "learning_rate": 7.969426629375127e-06, + "loss": 3.4569, + "step": 2837 + }, + { + "epoch": 0.34, + "learning_rate": 7.967817091265344e-06, + "loss": 3.4902, + "step": 2838 + }, + { + "epoch": 0.34, + "learning_rate": 7.96620707818611e-06, + "loss": 3.4005, + "step": 2839 + }, + { + "epoch": 0.34, + "learning_rate": 7.964596590395092e-06, + "loss": 3.4409, + "step": 2840 + }, + { + "epoch": 0.34, + "learning_rate": 7.962985628150037e-06, + "loss": 3.4853, + "step": 2841 + }, + { + "epoch": 0.34, + "learning_rate": 7.961374191708757e-06, + "loss": 3.3631, + "step": 2842 + }, + { + "epoch": 0.34, + "learning_rate": 7.959762281329151e-06, + "loss": 3.4731, + "step": 2843 + }, + { + "epoch": 0.34, + "learning_rate": 7.958149897269186e-06, + "loss": 3.4892, + "step": 2844 + }, + { + "epoch": 0.34, + "learning_rate": 7.95653703978691e-06, + "loss": 3.3405, + "step": 2845 + }, + { + "epoch": 0.34, + "learning_rate": 7.954923709140442e-06, + "loss": 3.4658, + "step": 2846 + }, + { + "epoch": 0.34, + "learning_rate": 7.953309905587984e-06, + "loss": 3.3954, + "step": 2847 + }, + { + "epoch": 0.34, + "learning_rate": 7.951695629387806e-06, + "loss": 3.338, + "step": 2848 + }, + { + "epoch": 0.34, + "learning_rate": 7.950080880798256e-06, + "loss": 3.4273, + "step": 2849 + }, + { + "epoch": 0.34, + "learning_rate": 7.948465660077762e-06, + "loss": 3.4621, + "step": 2850 + }, + { + "epoch": 0.34, + "learning_rate": 7.94684996748482e-06, + "loss": 3.4042, + "step": 2851 + }, + { + "epoch": 0.34, + "learning_rate": 7.945233803278007e-06, + "loss": 3.3057, + "step": 2852 + }, + { + "epoch": 0.34, + "learning_rate": 7.943617167715974e-06, + "loss": 3.4463, + "step": 2853 + }, + { + "epoch": 0.34, + "learning_rate": 7.942000061057448e-06, + "loss": 3.4727, + "step": 2854 + }, + { + "epoch": 0.34, + "learning_rate": 7.940382483561228e-06, + "loss": 3.3984, + "step": 2855 + }, + { + "epoch": 0.34, + "learning_rate": 7.938764435486195e-06, + "loss": 3.3789, + "step": 2856 + }, + { + "epoch": 0.34, + "learning_rate": 7.9371459170913e-06, + "loss": 3.3825, + "step": 2857 + }, + { + "epoch": 0.34, + "learning_rate": 7.935526928635571e-06, + "loss": 3.4652, + "step": 2858 + }, + { + "epoch": 0.34, + "learning_rate": 7.933907470378108e-06, + "loss": 3.4679, + "step": 2859 + }, + { + "epoch": 0.34, + "learning_rate": 7.932287542578093e-06, + "loss": 3.5223, + "step": 2860 + }, + { + "epoch": 0.34, + "learning_rate": 7.930667145494776e-06, + "loss": 3.4159, + "step": 2861 + }, + { + "epoch": 0.34, + "learning_rate": 7.929046279387488e-06, + "loss": 3.5637, + "step": 2862 + }, + { + "epoch": 0.34, + "learning_rate": 7.92742494451563e-06, + "loss": 3.4644, + "step": 2863 + }, + { + "epoch": 0.34, + "learning_rate": 7.925803141138684e-06, + "loss": 3.4849, + "step": 2864 + }, + { + "epoch": 0.34, + "learning_rate": 7.9241808695162e-06, + "loss": 3.5148, + "step": 2865 + }, + { + "epoch": 0.34, + "learning_rate": 7.922558129907806e-06, + "loss": 3.4584, + "step": 2866 + }, + { + "epoch": 0.34, + "learning_rate": 7.920934922573208e-06, + "loss": 3.3564, + "step": 2867 + }, + { + "epoch": 0.34, + "learning_rate": 7.919311247772183e-06, + "loss": 3.426, + "step": 2868 + }, + { + "epoch": 0.34, + "learning_rate": 7.917687105764584e-06, + "loss": 3.4761, + "step": 2869 + }, + { + "epoch": 0.34, + "learning_rate": 7.916062496810338e-06, + "loss": 3.4918, + "step": 2870 + }, + { + "epoch": 0.34, + "learning_rate": 7.914437421169448e-06, + "loss": 3.4616, + "step": 2871 + }, + { + "epoch": 0.34, + "learning_rate": 7.91281187910199e-06, + "loss": 3.4741, + "step": 2872 + }, + { + "epoch": 0.34, + "learning_rate": 7.91118587086812e-06, + "loss": 3.388, + "step": 2873 + }, + { + "epoch": 0.34, + "learning_rate": 7.909559396728056e-06, + "loss": 3.5234, + "step": 2874 + }, + { + "epoch": 0.34, + "learning_rate": 7.907932456942108e-06, + "loss": 3.4502, + "step": 2875 + }, + { + "epoch": 0.34, + "learning_rate": 7.906305051770646e-06, + "loss": 3.4483, + "step": 2876 + }, + { + "epoch": 0.34, + "learning_rate": 7.904677181474123e-06, + "loss": 3.3346, + "step": 2877 + }, + { + "epoch": 0.34, + "learning_rate": 7.903048846313059e-06, + "loss": 3.4214, + "step": 2878 + }, + { + "epoch": 0.34, + "learning_rate": 7.901420046548059e-06, + "loss": 3.5707, + "step": 2879 + }, + { + "epoch": 0.34, + "learning_rate": 7.89979078243979e-06, + "loss": 3.5093, + "step": 2880 + }, + { + "epoch": 0.34, + "learning_rate": 7.898161054249003e-06, + "loss": 3.3716, + "step": 2881 + }, + { + "epoch": 0.35, + "learning_rate": 7.896530862236518e-06, + "loss": 3.4912, + "step": 2882 + }, + { + "epoch": 0.35, + "learning_rate": 7.894900206663232e-06, + "loss": 3.4511, + "step": 2883 + }, + { + "epoch": 0.35, + "learning_rate": 7.893269087790115e-06, + "loss": 3.5483, + "step": 2884 + }, + { + "epoch": 0.35, + "learning_rate": 7.89163750587821e-06, + "loss": 3.4212, + "step": 2885 + }, + { + "epoch": 0.35, + "learning_rate": 7.89000546118864e-06, + "loss": 3.4227, + "step": 2886 + }, + { + "epoch": 0.35, + "learning_rate": 7.888372953982589e-06, + "loss": 3.4362, + "step": 2887 + }, + { + "epoch": 0.35, + "learning_rate": 7.886739984521331e-06, + "loss": 3.3503, + "step": 2888 + }, + { + "epoch": 0.35, + "learning_rate": 7.885106553066204e-06, + "loss": 3.4685, + "step": 2889 + }, + { + "epoch": 0.35, + "learning_rate": 7.883472659878622e-06, + "loss": 3.4981, + "step": 2890 + }, + { + "epoch": 0.35, + "learning_rate": 7.881838305220073e-06, + "loss": 3.4383, + "step": 2891 + }, + { + "epoch": 0.35, + "learning_rate": 7.880203489352119e-06, + "loss": 3.495, + "step": 2892 + }, + { + "epoch": 0.35, + "learning_rate": 7.878568212536398e-06, + "loss": 3.4063, + "step": 2893 + }, + { + "epoch": 0.35, + "learning_rate": 7.876932475034615e-06, + "loss": 3.5101, + "step": 2894 + }, + { + "epoch": 0.35, + "learning_rate": 7.87529627710856e-06, + "loss": 3.5329, + "step": 2895 + }, + { + "epoch": 0.35, + "learning_rate": 7.873659619020084e-06, + "loss": 3.4103, + "step": 2896 + }, + { + "epoch": 0.35, + "learning_rate": 7.872022501031122e-06, + "loss": 3.3618, + "step": 2897 + }, + { + "epoch": 0.35, + "learning_rate": 7.870384923403677e-06, + "loss": 3.4117, + "step": 2898 + }, + { + "epoch": 0.35, + "learning_rate": 7.868746886399826e-06, + "loss": 3.3996, + "step": 2899 + }, + { + "epoch": 0.35, + "learning_rate": 7.867108390281723e-06, + "loss": 3.3569, + "step": 2900 + }, + { + "epoch": 0.35, + "learning_rate": 7.86546943531159e-06, + "loss": 3.4787, + "step": 2901 + }, + { + "epoch": 0.35, + "learning_rate": 7.863830021751726e-06, + "loss": 3.3157, + "step": 2902 + }, + { + "epoch": 0.35, + "learning_rate": 7.862190149864504e-06, + "loss": 3.5046, + "step": 2903 + }, + { + "epoch": 0.35, + "learning_rate": 7.860549819912366e-06, + "loss": 3.3636, + "step": 2904 + }, + { + "epoch": 0.35, + "learning_rate": 7.858909032157836e-06, + "loss": 3.437, + "step": 2905 + }, + { + "epoch": 0.35, + "learning_rate": 7.857267786863501e-06, + "loss": 3.4436, + "step": 2906 + }, + { + "epoch": 0.35, + "learning_rate": 7.855626084292027e-06, + "loss": 3.4637, + "step": 2907 + }, + { + "epoch": 0.35, + "learning_rate": 7.853983924706153e-06, + "loss": 3.3973, + "step": 2908 + }, + { + "epoch": 0.35, + "learning_rate": 7.85234130836869e-06, + "loss": 3.4266, + "step": 2909 + }, + { + "epoch": 0.35, + "learning_rate": 7.85069823554252e-06, + "loss": 3.5159, + "step": 2910 + }, + { + "epoch": 0.35, + "learning_rate": 7.849054706490603e-06, + "loss": 3.3726, + "step": 2911 + }, + { + "epoch": 0.35, + "learning_rate": 7.84741072147597e-06, + "loss": 3.3886, + "step": 2912 + }, + { + "epoch": 0.35, + "learning_rate": 7.845766280761722e-06, + "loss": 3.4748, + "step": 2913 + }, + { + "epoch": 0.35, + "learning_rate": 7.844121384611037e-06, + "loss": 3.4201, + "step": 2914 + }, + { + "epoch": 0.35, + "learning_rate": 7.842476033287162e-06, + "loss": 3.4173, + "step": 2915 + }, + { + "epoch": 0.35, + "learning_rate": 7.840830227053422e-06, + "loss": 3.4143, + "step": 2916 + }, + { + "epoch": 0.35, + "learning_rate": 7.83918396617321e-06, + "loss": 3.4709, + "step": 2917 + }, + { + "epoch": 0.35, + "learning_rate": 7.837537250909994e-06, + "loss": 3.3472, + "step": 2918 + }, + { + "epoch": 0.35, + "learning_rate": 7.835890081527314e-06, + "loss": 3.5215, + "step": 2919 + }, + { + "epoch": 0.35, + "learning_rate": 7.834242458288784e-06, + "loss": 3.3857, + "step": 2920 + }, + { + "epoch": 0.35, + "learning_rate": 7.832594381458088e-06, + "loss": 3.4023, + "step": 2921 + }, + { + "epoch": 0.35, + "learning_rate": 7.830945851298986e-06, + "loss": 3.44, + "step": 2922 + }, + { + "epoch": 0.35, + "learning_rate": 7.829296868075306e-06, + "loss": 3.4484, + "step": 2923 + }, + { + "epoch": 0.35, + "learning_rate": 7.827647432050955e-06, + "loss": 3.4691, + "step": 2924 + }, + { + "epoch": 0.35, + "learning_rate": 7.825997543489908e-06, + "loss": 3.3739, + "step": 2925 + }, + { + "epoch": 0.35, + "learning_rate": 7.824347202656211e-06, + "loss": 3.434, + "step": 2926 + }, + { + "epoch": 0.35, + "learning_rate": 7.822696409813984e-06, + "loss": 3.46, + "step": 2927 + }, + { + "epoch": 0.35, + "learning_rate": 7.821045165227424e-06, + "loss": 3.4462, + "step": 2928 + }, + { + "epoch": 0.35, + "learning_rate": 7.819393469160791e-06, + "loss": 3.379, + "step": 2929 + }, + { + "epoch": 0.35, + "learning_rate": 7.817741321878427e-06, + "loss": 3.3795, + "step": 2930 + }, + { + "epoch": 0.35, + "learning_rate": 7.81608872364474e-06, + "loss": 3.374, + "step": 2931 + }, + { + "epoch": 0.35, + "learning_rate": 7.81443567472421e-06, + "loss": 3.4128, + "step": 2932 + }, + { + "epoch": 0.35, + "learning_rate": 7.812782175381394e-06, + "loss": 3.3344, + "step": 2933 + }, + { + "epoch": 0.35, + "learning_rate": 7.811128225880916e-06, + "loss": 3.4944, + "step": 2934 + }, + { + "epoch": 0.35, + "learning_rate": 7.809473826487475e-06, + "loss": 3.4087, + "step": 2935 + }, + { + "epoch": 0.35, + "learning_rate": 7.807818977465839e-06, + "loss": 3.3047, + "step": 2936 + }, + { + "epoch": 0.35, + "learning_rate": 7.806163679080854e-06, + "loss": 3.3491, + "step": 2937 + }, + { + "epoch": 0.35, + "learning_rate": 7.804507931597432e-06, + "loss": 3.4556, + "step": 2938 + }, + { + "epoch": 0.35, + "learning_rate": 7.802851735280557e-06, + "loss": 3.4929, + "step": 2939 + }, + { + "epoch": 0.35, + "learning_rate": 7.801195090395289e-06, + "loss": 3.3547, + "step": 2940 + }, + { + "epoch": 0.35, + "learning_rate": 7.799537997206756e-06, + "loss": 3.4794, + "step": 2941 + }, + { + "epoch": 0.35, + "learning_rate": 7.79788045598016e-06, + "loss": 3.3899, + "step": 2942 + }, + { + "epoch": 0.35, + "learning_rate": 7.796222466980777e-06, + "loss": 3.4428, + "step": 2943 + }, + { + "epoch": 0.35, + "learning_rate": 7.794564030473943e-06, + "loss": 3.3445, + "step": 2944 + }, + { + "epoch": 0.35, + "learning_rate": 7.792905146725083e-06, + "loss": 3.3758, + "step": 2945 + }, + { + "epoch": 0.35, + "learning_rate": 7.79124581599968e-06, + "loss": 3.3267, + "step": 2946 + }, + { + "epoch": 0.35, + "learning_rate": 7.789586038563297e-06, + "loss": 3.4362, + "step": 2947 + }, + { + "epoch": 0.35, + "learning_rate": 7.787925814681558e-06, + "loss": 3.3516, + "step": 2948 + }, + { + "epoch": 0.35, + "learning_rate": 7.786265144620174e-06, + "loss": 3.3347, + "step": 2949 + }, + { + "epoch": 0.35, + "learning_rate": 7.78460402864491e-06, + "loss": 3.5641, + "step": 2950 + }, + { + "epoch": 0.35, + "learning_rate": 7.782942467021618e-06, + "loss": 3.5732, + "step": 2951 + }, + { + "epoch": 0.35, + "learning_rate": 7.78128046001621e-06, + "loss": 3.4034, + "step": 2952 + }, + { + "epoch": 0.35, + "learning_rate": 7.779618007894678e-06, + "loss": 3.4938, + "step": 2953 + }, + { + "epoch": 0.35, + "learning_rate": 7.777955110923075e-06, + "loss": 3.4205, + "step": 2954 + }, + { + "epoch": 0.35, + "learning_rate": 7.776291769367535e-06, + "loss": 3.4511, + "step": 2955 + }, + { + "epoch": 0.35, + "learning_rate": 7.774627983494259e-06, + "loss": 3.4391, + "step": 2956 + }, + { + "epoch": 0.35, + "learning_rate": 7.772963753569516e-06, + "loss": 3.375, + "step": 2957 + }, + { + "epoch": 0.35, + "learning_rate": 7.771299079859654e-06, + "loss": 3.4011, + "step": 2958 + }, + { + "epoch": 0.35, + "learning_rate": 7.769633962631084e-06, + "loss": 3.425, + "step": 2959 + }, + { + "epoch": 0.35, + "learning_rate": 7.767968402150294e-06, + "loss": 3.3886, + "step": 2960 + }, + { + "epoch": 0.35, + "learning_rate": 7.766302398683839e-06, + "loss": 3.4317, + "step": 2961 + }, + { + "epoch": 0.35, + "learning_rate": 7.764635952498345e-06, + "loss": 3.437, + "step": 2962 + }, + { + "epoch": 0.35, + "learning_rate": 7.762969063860511e-06, + "loss": 3.4595, + "step": 2963 + }, + { + "epoch": 0.35, + "learning_rate": 7.761301733037106e-06, + "loss": 3.478, + "step": 2964 + }, + { + "epoch": 0.35, + "learning_rate": 7.75963396029497e-06, + "loss": 3.4498, + "step": 2965 + }, + { + "epoch": 0.36, + "learning_rate": 7.757965745901015e-06, + "loss": 3.4683, + "step": 2966 + }, + { + "epoch": 0.36, + "learning_rate": 7.756297090122218e-06, + "loss": 3.4266, + "step": 2967 + }, + { + "epoch": 0.36, + "learning_rate": 7.754627993225632e-06, + "loss": 3.4364, + "step": 2968 + }, + { + "epoch": 0.36, + "learning_rate": 7.752958455478381e-06, + "loss": 3.3349, + "step": 2969 + }, + { + "epoch": 0.36, + "learning_rate": 7.751288477147656e-06, + "loss": 3.5207, + "step": 2970 + }, + { + "epoch": 0.36, + "learning_rate": 7.749618058500722e-06, + "loss": 3.3805, + "step": 2971 + }, + { + "epoch": 0.36, + "learning_rate": 7.747947199804912e-06, + "loss": 3.4797, + "step": 2972 + }, + { + "epoch": 0.36, + "learning_rate": 7.74627590132763e-06, + "loss": 3.3429, + "step": 2973 + }, + { + "epoch": 0.36, + "learning_rate": 7.74460416333635e-06, + "loss": 3.4175, + "step": 2974 + }, + { + "epoch": 0.36, + "learning_rate": 7.742931986098618e-06, + "loss": 3.4886, + "step": 2975 + }, + { + "epoch": 0.36, + "learning_rate": 7.741259369882049e-06, + "loss": 3.3873, + "step": 2976 + }, + { + "epoch": 0.36, + "learning_rate": 7.739586314954329e-06, + "loss": 3.5561, + "step": 2977 + }, + { + "epoch": 0.36, + "learning_rate": 7.737912821583211e-06, + "loss": 3.4286, + "step": 2978 + }, + { + "epoch": 0.36, + "learning_rate": 7.736238890036525e-06, + "loss": 3.3969, + "step": 2979 + }, + { + "epoch": 0.36, + "learning_rate": 7.734564520582165e-06, + "loss": 3.4655, + "step": 2980 + }, + { + "epoch": 0.36, + "learning_rate": 7.732889713488096e-06, + "loss": 3.3905, + "step": 2981 + }, + { + "epoch": 0.36, + "learning_rate": 7.731214469022356e-06, + "loss": 3.5287, + "step": 2982 + }, + { + "epoch": 0.36, + "learning_rate": 7.729538787453051e-06, + "loss": 3.3337, + "step": 2983 + }, + { + "epoch": 0.36, + "learning_rate": 7.727862669048356e-06, + "loss": 3.4413, + "step": 2984 + }, + { + "epoch": 0.36, + "learning_rate": 7.726186114076518e-06, + "loss": 3.4025, + "step": 2985 + }, + { + "epoch": 0.36, + "learning_rate": 7.72450912280585e-06, + "loss": 3.5163, + "step": 2986 + }, + { + "epoch": 0.36, + "learning_rate": 7.722831695504743e-06, + "loss": 3.4305, + "step": 2987 + }, + { + "epoch": 0.36, + "learning_rate": 7.721153832441647e-06, + "loss": 3.4036, + "step": 2988 + }, + { + "epoch": 0.36, + "learning_rate": 7.719475533885091e-06, + "loss": 3.4471, + "step": 2989 + }, + { + "epoch": 0.36, + "learning_rate": 7.71779680010367e-06, + "loss": 3.4409, + "step": 2990 + }, + { + "epoch": 0.36, + "learning_rate": 7.716117631366045e-06, + "loss": 3.4416, + "step": 2991 + }, + { + "epoch": 0.36, + "learning_rate": 7.714438027940953e-06, + "loss": 3.4912, + "step": 2992 + }, + { + "epoch": 0.36, + "learning_rate": 7.712757990097196e-06, + "loss": 3.5001, + "step": 2993 + }, + { + "epoch": 0.36, + "learning_rate": 7.711077518103649e-06, + "loss": 3.3776, + "step": 2994 + }, + { + "epoch": 0.36, + "learning_rate": 7.709396612229255e-06, + "loss": 3.4623, + "step": 2995 + }, + { + "epoch": 0.36, + "learning_rate": 7.707715272743023e-06, + "loss": 3.4862, + "step": 2996 + }, + { + "epoch": 0.36, + "learning_rate": 7.706033499914038e-06, + "loss": 3.4038, + "step": 2997 + }, + { + "epoch": 0.36, + "learning_rate": 7.704351294011451e-06, + "loss": 3.3793, + "step": 2998 + }, + { + "epoch": 0.36, + "learning_rate": 7.702668655304479e-06, + "loss": 3.3701, + "step": 2999 + }, + { + "epoch": 0.36, + "learning_rate": 7.700985584062414e-06, + "loss": 3.4827, + "step": 3000 + }, + { + "epoch": 0.36, + "learning_rate": 7.699302080554614e-06, + "loss": 3.3715, + "step": 3001 + }, + { + "epoch": 0.36, + "learning_rate": 7.697618145050508e-06, + "loss": 3.4823, + "step": 3002 + }, + { + "epoch": 0.36, + "learning_rate": 7.69593377781959e-06, + "loss": 3.411, + "step": 3003 + }, + { + "epoch": 0.36, + "learning_rate": 7.694248979131431e-06, + "loss": 3.4323, + "step": 3004 + }, + { + "epoch": 0.36, + "learning_rate": 7.692563749255662e-06, + "loss": 3.4199, + "step": 3005 + }, + { + "epoch": 0.36, + "learning_rate": 7.690878088461987e-06, + "loss": 3.4539, + "step": 3006 + }, + { + "epoch": 0.36, + "learning_rate": 7.689191997020184e-06, + "loss": 3.4565, + "step": 3007 + }, + { + "epoch": 0.36, + "learning_rate": 7.68750547520009e-06, + "loss": 3.4565, + "step": 3008 + }, + { + "epoch": 0.36, + "learning_rate": 7.685818523271618e-06, + "loss": 3.4229, + "step": 3009 + }, + { + "epoch": 0.36, + "learning_rate": 7.684131141504748e-06, + "loss": 3.3842, + "step": 3010 + }, + { + "epoch": 0.36, + "learning_rate": 7.682443330169527e-06, + "loss": 3.4164, + "step": 3011 + }, + { + "epoch": 0.36, + "learning_rate": 7.680755089536073e-06, + "loss": 3.3742, + "step": 3012 + }, + { + "epoch": 0.36, + "learning_rate": 7.679066419874575e-06, + "loss": 3.4972, + "step": 3013 + }, + { + "epoch": 0.36, + "learning_rate": 7.677377321455284e-06, + "loss": 3.3539, + "step": 3014 + }, + { + "epoch": 0.36, + "learning_rate": 7.675687794548524e-06, + "loss": 3.4556, + "step": 3015 + }, + { + "epoch": 0.36, + "learning_rate": 7.673997839424689e-06, + "loss": 3.4792, + "step": 3016 + }, + { + "epoch": 0.36, + "learning_rate": 7.672307456354235e-06, + "loss": 3.4583, + "step": 3017 + }, + { + "epoch": 0.36, + "learning_rate": 7.670616645607695e-06, + "loss": 3.5271, + "step": 3018 + }, + { + "epoch": 0.36, + "learning_rate": 7.668925407455667e-06, + "loss": 3.389, + "step": 3019 + }, + { + "epoch": 0.36, + "learning_rate": 7.667233742168814e-06, + "loss": 3.3724, + "step": 3020 + }, + { + "epoch": 0.36, + "learning_rate": 7.66554165001787e-06, + "loss": 3.4928, + "step": 3021 + }, + { + "epoch": 0.36, + "learning_rate": 7.663849131273637e-06, + "loss": 3.4636, + "step": 3022 + }, + { + "epoch": 0.36, + "learning_rate": 7.66215618620699e-06, + "loss": 3.3171, + "step": 3023 + }, + { + "epoch": 0.36, + "learning_rate": 7.660462815088863e-06, + "loss": 3.4543, + "step": 3024 + }, + { + "epoch": 0.36, + "learning_rate": 7.658769018190267e-06, + "loss": 3.4223, + "step": 3025 + }, + { + "epoch": 0.36, + "learning_rate": 7.657074795782274e-06, + "loss": 3.4462, + "step": 3026 + }, + { + "epoch": 0.36, + "learning_rate": 7.655380148136028e-06, + "loss": 3.5136, + "step": 3027 + }, + { + "epoch": 0.36, + "learning_rate": 7.653685075522743e-06, + "loss": 3.5615, + "step": 3028 + }, + { + "epoch": 0.36, + "learning_rate": 7.651989578213696e-06, + "loss": 3.5469, + "step": 3029 + }, + { + "epoch": 0.36, + "learning_rate": 7.650293656480234e-06, + "loss": 3.3539, + "step": 3030 + }, + { + "epoch": 0.36, + "learning_rate": 7.648597310593775e-06, + "loss": 3.5554, + "step": 3031 + }, + { + "epoch": 0.36, + "learning_rate": 7.6469005408258e-06, + "loss": 3.3195, + "step": 3032 + }, + { + "epoch": 0.36, + "learning_rate": 7.645203347447859e-06, + "loss": 3.4832, + "step": 3033 + }, + { + "epoch": 0.36, + "learning_rate": 7.643505730731572e-06, + "loss": 3.3928, + "step": 3034 + }, + { + "epoch": 0.36, + "learning_rate": 7.641807690948629e-06, + "loss": 3.4174, + "step": 3035 + }, + { + "epoch": 0.36, + "learning_rate": 7.640109228370778e-06, + "loss": 3.3716, + "step": 3036 + }, + { + "epoch": 0.36, + "learning_rate": 7.638410343269846e-06, + "loss": 3.3729, + "step": 3037 + }, + { + "epoch": 0.36, + "learning_rate": 7.636711035917722e-06, + "loss": 3.402, + "step": 3038 + }, + { + "epoch": 0.36, + "learning_rate": 7.63501130658636e-06, + "loss": 3.3955, + "step": 3039 + }, + { + "epoch": 0.36, + "learning_rate": 7.633311155547786e-06, + "loss": 3.4885, + "step": 3040 + }, + { + "epoch": 0.36, + "learning_rate": 7.631610583074093e-06, + "loss": 3.3688, + "step": 3041 + }, + { + "epoch": 0.36, + "learning_rate": 7.62990958943744e-06, + "loss": 3.4607, + "step": 3042 + }, + { + "epoch": 0.36, + "learning_rate": 7.628208174910057e-06, + "loss": 3.5156, + "step": 3043 + }, + { + "epoch": 0.36, + "learning_rate": 7.626506339764234e-06, + "loss": 3.3952, + "step": 3044 + }, + { + "epoch": 0.36, + "learning_rate": 7.624804084272335e-06, + "loss": 3.5149, + "step": 3045 + }, + { + "epoch": 0.36, + "learning_rate": 7.623101408706787e-06, + "loss": 3.4692, + "step": 3046 + }, + { + "epoch": 0.36, + "learning_rate": 7.6213983133400885e-06, + "loss": 3.511, + "step": 3047 + }, + { + "epoch": 0.36, + "learning_rate": 7.6196947984448025e-06, + "loss": 3.4212, + "step": 3048 + }, + { + "epoch": 0.37, + "learning_rate": 7.617990864293558e-06, + "loss": 3.4733, + "step": 3049 + }, + { + "epoch": 0.37, + "learning_rate": 7.616286511159054e-06, + "loss": 3.3742, + "step": 3050 + }, + { + "epoch": 0.37, + "learning_rate": 7.614581739314055e-06, + "loss": 3.4391, + "step": 3051 + }, + { + "epoch": 0.37, + "learning_rate": 7.612876549031392e-06, + "loss": 3.3786, + "step": 3052 + }, + { + "epoch": 0.37, + "learning_rate": 7.611170940583965e-06, + "loss": 3.5368, + "step": 3053 + }, + { + "epoch": 0.37, + "learning_rate": 7.609464914244739e-06, + "loss": 3.4164, + "step": 3054 + }, + { + "epoch": 0.37, + "learning_rate": 7.607758470286746e-06, + "loss": 3.4533, + "step": 3055 + }, + { + "epoch": 0.37, + "learning_rate": 7.606051608983085e-06, + "loss": 3.4067, + "step": 3056 + }, + { + "epoch": 0.37, + "learning_rate": 7.6043443306069235e-06, + "loss": 3.4585, + "step": 3057 + }, + { + "epoch": 0.37, + "learning_rate": 7.602636635431492e-06, + "loss": 3.4362, + "step": 3058 + }, + { + "epoch": 0.37, + "learning_rate": 7.600928523730092e-06, + "loss": 3.3763, + "step": 3059 + }, + { + "epoch": 0.37, + "learning_rate": 7.59921999577609e-06, + "loss": 3.5347, + "step": 3060 + }, + { + "epoch": 0.37, + "learning_rate": 7.597511051842917e-06, + "loss": 3.4386, + "step": 3061 + }, + { + "epoch": 0.37, + "learning_rate": 7.595801692204075e-06, + "loss": 3.4547, + "step": 3062 + }, + { + "epoch": 0.37, + "learning_rate": 7.5940919171331265e-06, + "loss": 3.4121, + "step": 3063 + }, + { + "epoch": 0.37, + "learning_rate": 7.592381726903706e-06, + "loss": 3.3915, + "step": 3064 + }, + { + "epoch": 0.37, + "learning_rate": 7.590671121789512e-06, + "loss": 3.4279, + "step": 3065 + }, + { + "epoch": 0.37, + "learning_rate": 7.58896010206431e-06, + "loss": 3.4037, + "step": 3066 + }, + { + "epoch": 0.37, + "learning_rate": 7.587248668001932e-06, + "loss": 3.4661, + "step": 3067 + }, + { + "epoch": 0.37, + "learning_rate": 7.5855368198762745e-06, + "loss": 3.4218, + "step": 3068 + }, + { + "epoch": 0.37, + "learning_rate": 7.583824557961304e-06, + "loss": 3.3719, + "step": 3069 + }, + { + "epoch": 0.37, + "learning_rate": 7.582111882531047e-06, + "loss": 3.3714, + "step": 3070 + }, + { + "epoch": 0.37, + "learning_rate": 7.5803987938596025e-06, + "loss": 3.3475, + "step": 3071 + }, + { + "epoch": 0.37, + "learning_rate": 7.578685292221135e-06, + "loss": 3.4603, + "step": 3072 + }, + { + "epoch": 0.37, + "learning_rate": 7.57697137788987e-06, + "loss": 3.4681, + "step": 3073 + }, + { + "epoch": 0.37, + "learning_rate": 7.575257051140104e-06, + "loss": 3.4449, + "step": 3074 + }, + { + "epoch": 0.37, + "learning_rate": 7.573542312246198e-06, + "loss": 3.3932, + "step": 3075 + }, + { + "epoch": 0.37, + "learning_rate": 7.571827161482579e-06, + "loss": 3.4466, + "step": 3076 + }, + { + "epoch": 0.37, + "learning_rate": 7.57011159912374e-06, + "loss": 3.4111, + "step": 3077 + }, + { + "epoch": 0.37, + "learning_rate": 7.568395625444236e-06, + "loss": 3.3533, + "step": 3078 + }, + { + "epoch": 0.37, + "learning_rate": 7.566679240718695e-06, + "loss": 3.4073, + "step": 3079 + }, + { + "epoch": 0.37, + "learning_rate": 7.564962445221809e-06, + "loss": 3.3879, + "step": 3080 + }, + { + "epoch": 0.37, + "learning_rate": 7.56324523922833e-06, + "loss": 3.6099, + "step": 3081 + }, + { + "epoch": 0.37, + "learning_rate": 7.5615276230130785e-06, + "loss": 3.3478, + "step": 3082 + }, + { + "epoch": 0.37, + "learning_rate": 7.559809596850947e-06, + "loss": 3.4677, + "step": 3083 + }, + { + "epoch": 0.37, + "learning_rate": 7.558091161016885e-06, + "loss": 3.4595, + "step": 3084 + }, + { + "epoch": 0.37, + "learning_rate": 7.556372315785912e-06, + "loss": 3.4123, + "step": 3085 + }, + { + "epoch": 0.37, + "learning_rate": 7.5546530614331105e-06, + "loss": 3.4128, + "step": 3086 + }, + { + "epoch": 0.37, + "learning_rate": 7.552933398233632e-06, + "loss": 3.4864, + "step": 3087 + }, + { + "epoch": 0.37, + "learning_rate": 7.551213326462692e-06, + "loss": 3.4285, + "step": 3088 + }, + { + "epoch": 0.37, + "learning_rate": 7.549492846395569e-06, + "loss": 3.3628, + "step": 3089 + }, + { + "epoch": 0.37, + "learning_rate": 7.547771958307607e-06, + "loss": 3.4567, + "step": 3090 + }, + { + "epoch": 0.37, + "learning_rate": 7.54605066247422e-06, + "loss": 3.4834, + "step": 3091 + }, + { + "epoch": 0.37, + "learning_rate": 7.5443289591708834e-06, + "loss": 3.4601, + "step": 3092 + }, + { + "epoch": 0.37, + "learning_rate": 7.5426068486731395e-06, + "loss": 3.4229, + "step": 3093 + }, + { + "epoch": 0.37, + "learning_rate": 7.54088433125659e-06, + "loss": 3.396, + "step": 3094 + }, + { + "epoch": 0.37, + "learning_rate": 7.539161407196913e-06, + "loss": 3.49, + "step": 3095 + }, + { + "epoch": 0.37, + "learning_rate": 7.537438076769843e-06, + "loss": 3.4498, + "step": 3096 + }, + { + "epoch": 0.37, + "learning_rate": 7.5357143402511805e-06, + "loss": 3.4517, + "step": 3097 + }, + { + "epoch": 0.37, + "learning_rate": 7.533990197916793e-06, + "loss": 3.4424, + "step": 3098 + }, + { + "epoch": 0.37, + "learning_rate": 7.532265650042612e-06, + "loss": 3.5042, + "step": 3099 + }, + { + "epoch": 0.37, + "learning_rate": 7.530540696904635e-06, + "loss": 3.4328, + "step": 3100 + }, + { + "epoch": 0.37, + "learning_rate": 7.528815338778923e-06, + "loss": 3.3898, + "step": 3101 + }, + { + "epoch": 0.37, + "learning_rate": 7.527089575941602e-06, + "loss": 3.4027, + "step": 3102 + }, + { + "epoch": 0.37, + "learning_rate": 7.525363408668864e-06, + "loss": 3.4447, + "step": 3103 + }, + { + "epoch": 0.37, + "learning_rate": 7.523636837236964e-06, + "loss": 3.5117, + "step": 3104 + }, + { + "epoch": 0.37, + "learning_rate": 7.521909861922223e-06, + "loss": 3.4117, + "step": 3105 + }, + { + "epoch": 0.37, + "learning_rate": 7.520182483001026e-06, + "loss": 3.4297, + "step": 3106 + }, + { + "epoch": 0.37, + "learning_rate": 7.518454700749821e-06, + "loss": 3.503, + "step": 3107 + }, + { + "epoch": 0.37, + "learning_rate": 7.516726515445125e-06, + "loss": 3.3849, + "step": 3108 + }, + { + "epoch": 0.37, + "learning_rate": 7.514997927363517e-06, + "loss": 3.4009, + "step": 3109 + }, + { + "epoch": 0.37, + "learning_rate": 7.513268936781637e-06, + "loss": 3.5151, + "step": 3110 + }, + { + "epoch": 0.37, + "learning_rate": 7.511539543976194e-06, + "loss": 3.4356, + "step": 3111 + }, + { + "epoch": 0.37, + "learning_rate": 7.509809749223962e-06, + "loss": 3.3867, + "step": 3112 + }, + { + "epoch": 0.37, + "learning_rate": 7.508079552801776e-06, + "loss": 3.5071, + "step": 3113 + }, + { + "epoch": 0.37, + "learning_rate": 7.506348954986535e-06, + "loss": 3.4559, + "step": 3114 + }, + { + "epoch": 0.37, + "learning_rate": 7.504617956055207e-06, + "loss": 3.4182, + "step": 3115 + }, + { + "epoch": 0.37, + "learning_rate": 7.502886556284819e-06, + "loss": 3.3829, + "step": 3116 + }, + { + "epoch": 0.37, + "learning_rate": 7.5011547559524646e-06, + "loss": 3.4618, + "step": 3117 + }, + { + "epoch": 0.37, + "learning_rate": 7.499422555335301e-06, + "loss": 3.4954, + "step": 3118 + }, + { + "epoch": 0.37, + "learning_rate": 7.49768995471055e-06, + "loss": 3.387, + "step": 3119 + }, + { + "epoch": 0.37, + "learning_rate": 7.4959569543554966e-06, + "loss": 3.4645, + "step": 3120 + }, + { + "epoch": 0.37, + "learning_rate": 7.494223554547491e-06, + "loss": 3.286, + "step": 3121 + }, + { + "epoch": 0.37, + "learning_rate": 7.492489755563945e-06, + "loss": 3.4432, + "step": 3122 + }, + { + "epoch": 0.37, + "learning_rate": 7.4907555576823375e-06, + "loss": 3.465, + "step": 3123 + }, + { + "epoch": 0.37, + "learning_rate": 7.489020961180209e-06, + "loss": 3.3448, + "step": 3124 + }, + { + "epoch": 0.37, + "learning_rate": 7.487285966335163e-06, + "loss": 3.4239, + "step": 3125 + }, + { + "epoch": 0.37, + "learning_rate": 7.48555057342487e-06, + "loss": 3.4163, + "step": 3126 + }, + { + "epoch": 0.37, + "learning_rate": 7.48381478272706e-06, + "loss": 3.3715, + "step": 3127 + }, + { + "epoch": 0.37, + "learning_rate": 7.482078594519532e-06, + "loss": 3.4643, + "step": 3128 + }, + { + "epoch": 0.37, + "learning_rate": 7.480342009080144e-06, + "loss": 3.428, + "step": 3129 + }, + { + "epoch": 0.37, + "learning_rate": 7.4786050266868184e-06, + "loss": 3.3119, + "step": 3130 + }, + { + "epoch": 0.37, + "learning_rate": 7.476867647617542e-06, + "loss": 3.3804, + "step": 3131 + }, + { + "epoch": 0.37, + "learning_rate": 7.475129872150366e-06, + "loss": 3.4964, + "step": 3132 + }, + { + "epoch": 0.38, + "learning_rate": 7.473391700563403e-06, + "loss": 3.4749, + "step": 3133 + }, + { + "epoch": 0.38, + "learning_rate": 7.471653133134832e-06, + "loss": 3.4197, + "step": 3134 + }, + { + "epoch": 0.38, + "learning_rate": 7.469914170142889e-06, + "loss": 3.402, + "step": 3135 + }, + { + "epoch": 0.38, + "learning_rate": 7.468174811865883e-06, + "loss": 3.3758, + "step": 3136 + }, + { + "epoch": 0.38, + "learning_rate": 7.466435058582178e-06, + "loss": 3.4238, + "step": 3137 + }, + { + "epoch": 0.38, + "learning_rate": 7.4646949105702025e-06, + "loss": 3.3662, + "step": 3138 + }, + { + "epoch": 0.38, + "learning_rate": 7.462954368108452e-06, + "loss": 3.4611, + "step": 3139 + }, + { + "epoch": 0.38, + "learning_rate": 7.4612134314754844e-06, + "loss": 3.5038, + "step": 3140 + }, + { + "epoch": 0.38, + "learning_rate": 7.4594721009499164e-06, + "loss": 3.4102, + "step": 3141 + }, + { + "epoch": 0.38, + "learning_rate": 7.4577303768104325e-06, + "loss": 3.4593, + "step": 3142 + }, + { + "epoch": 0.38, + "learning_rate": 7.455988259335776e-06, + "loss": 3.4057, + "step": 3143 + }, + { + "epoch": 0.38, + "learning_rate": 7.454245748804757e-06, + "loss": 3.4991, + "step": 3144 + }, + { + "epoch": 0.38, + "learning_rate": 7.452502845496246e-06, + "loss": 3.4909, + "step": 3145 + }, + { + "epoch": 0.38, + "learning_rate": 7.4507595496891795e-06, + "loss": 3.3797, + "step": 3146 + }, + { + "epoch": 0.38, + "learning_rate": 7.449015861662552e-06, + "loss": 3.5184, + "step": 3147 + }, + { + "epoch": 0.38, + "learning_rate": 7.447271781695424e-06, + "loss": 3.4756, + "step": 3148 + }, + { + "epoch": 0.38, + "learning_rate": 7.445527310066919e-06, + "loss": 3.2832, + "step": 3149 + }, + { + "epoch": 0.38, + "learning_rate": 7.443782447056221e-06, + "loss": 3.5145, + "step": 3150 + }, + { + "epoch": 0.38, + "learning_rate": 7.442037192942577e-06, + "loss": 3.4411, + "step": 3151 + }, + { + "epoch": 0.38, + "learning_rate": 7.440291548005301e-06, + "loss": 3.4298, + "step": 3152 + }, + { + "epoch": 0.38, + "learning_rate": 7.438545512523764e-06, + "loss": 3.4947, + "step": 3153 + }, + { + "epoch": 0.38, + "learning_rate": 7.4367990867774e-06, + "loss": 3.4462, + "step": 3154 + }, + { + "epoch": 0.38, + "learning_rate": 7.435052271045709e-06, + "loss": 3.4618, + "step": 3155 + }, + { + "epoch": 0.38, + "learning_rate": 7.433305065608251e-06, + "loss": 3.5147, + "step": 3156 + }, + { + "epoch": 0.38, + "learning_rate": 7.431557470744648e-06, + "loss": 3.4575, + "step": 3157 + }, + { + "epoch": 0.38, + "learning_rate": 7.429809486734586e-06, + "loss": 3.48, + "step": 3158 + }, + { + "epoch": 0.38, + "learning_rate": 7.4280611138578105e-06, + "loss": 3.4821, + "step": 3159 + }, + { + "epoch": 0.38, + "learning_rate": 7.426312352394134e-06, + "loss": 3.4512, + "step": 3160 + }, + { + "epoch": 0.38, + "learning_rate": 7.4245632026234274e-06, + "loss": 3.3727, + "step": 3161 + }, + { + "epoch": 0.38, + "learning_rate": 7.422813664825623e-06, + "loss": 3.3185, + "step": 3162 + }, + { + "epoch": 0.38, + "learning_rate": 7.421063739280718e-06, + "loss": 3.4701, + "step": 3163 + }, + { + "epoch": 0.38, + "learning_rate": 7.419313426268771e-06, + "loss": 3.482, + "step": 3164 + }, + { + "epoch": 0.38, + "learning_rate": 7.417562726069901e-06, + "loss": 3.4269, + "step": 3165 + }, + { + "epoch": 0.38, + "learning_rate": 7.4158116389642906e-06, + "loss": 3.3753, + "step": 3166 + }, + { + "epoch": 0.38, + "learning_rate": 7.4140601652321845e-06, + "loss": 3.4183, + "step": 3167 + }, + { + "epoch": 0.38, + "learning_rate": 7.412308305153886e-06, + "loss": 3.4277, + "step": 3168 + }, + { + "epoch": 0.38, + "learning_rate": 7.410556059009766e-06, + "loss": 3.502, + "step": 3169 + }, + { + "epoch": 0.38, + "learning_rate": 7.408803427080252e-06, + "loss": 3.536, + "step": 3170 + }, + { + "epoch": 0.38, + "learning_rate": 7.407050409645837e-06, + "loss": 3.3385, + "step": 3171 + }, + { + "epoch": 0.38, + "learning_rate": 7.405297006987071e-06, + "loss": 3.4144, + "step": 3172 + }, + { + "epoch": 0.38, + "learning_rate": 7.4035432193845704e-06, + "loss": 3.3936, + "step": 3173 + }, + { + "epoch": 0.38, + "learning_rate": 7.401789047119012e-06, + "loss": 3.4075, + "step": 3174 + }, + { + "epoch": 0.38, + "learning_rate": 7.400034490471133e-06, + "loss": 3.4836, + "step": 3175 + }, + { + "epoch": 0.38, + "learning_rate": 7.3982795497217316e-06, + "loss": 3.3922, + "step": 3176 + }, + { + "epoch": 0.38, + "learning_rate": 7.396524225151669e-06, + "loss": 3.4174, + "step": 3177 + }, + { + "epoch": 0.38, + "learning_rate": 7.39476851704187e-06, + "loss": 3.3394, + "step": 3178 + }, + { + "epoch": 0.38, + "learning_rate": 7.393012425673311e-06, + "loss": 3.4078, + "step": 3179 + }, + { + "epoch": 0.38, + "learning_rate": 7.391255951327045e-06, + "loss": 3.4358, + "step": 3180 + }, + { + "epoch": 0.38, + "learning_rate": 7.389499094284176e-06, + "loss": 3.3895, + "step": 3181 + }, + { + "epoch": 0.38, + "learning_rate": 7.387741854825867e-06, + "loss": 3.4302, + "step": 3182 + }, + { + "epoch": 0.38, + "learning_rate": 7.385984233233352e-06, + "loss": 3.3865, + "step": 3183 + }, + { + "epoch": 0.38, + "learning_rate": 7.384226229787918e-06, + "loss": 3.5098, + "step": 3184 + }, + { + "epoch": 0.38, + "learning_rate": 7.382467844770917e-06, + "loss": 3.4515, + "step": 3185 + }, + { + "epoch": 0.38, + "learning_rate": 7.3807090784637594e-06, + "loss": 3.3859, + "step": 3186 + }, + { + "epoch": 0.38, + "learning_rate": 7.37894993114792e-06, + "loss": 3.3801, + "step": 3187 + }, + { + "epoch": 0.38, + "learning_rate": 7.37719040310493e-06, + "loss": 3.3302, + "step": 3188 + }, + { + "epoch": 0.38, + "learning_rate": 7.375430494616389e-06, + "loss": 3.5257, + "step": 3189 + }, + { + "epoch": 0.38, + "learning_rate": 7.373670205963948e-06, + "loss": 3.4421, + "step": 3190 + }, + { + "epoch": 0.38, + "learning_rate": 7.371909537429327e-06, + "loss": 3.4246, + "step": 3191 + }, + { + "epoch": 0.38, + "learning_rate": 7.3701484892943e-06, + "loss": 3.4381, + "step": 3192 + }, + { + "epoch": 0.38, + "learning_rate": 7.368387061840707e-06, + "loss": 3.4843, + "step": 3193 + }, + { + "epoch": 0.38, + "learning_rate": 7.3666252553504494e-06, + "loss": 3.3767, + "step": 3194 + }, + { + "epoch": 0.38, + "learning_rate": 7.364863070105482e-06, + "loss": 3.3826, + "step": 3195 + }, + { + "epoch": 0.38, + "learning_rate": 7.363100506387828e-06, + "loss": 3.4949, + "step": 3196 + }, + { + "epoch": 0.38, + "learning_rate": 7.361337564479567e-06, + "loss": 3.5067, + "step": 3197 + }, + { + "epoch": 0.38, + "learning_rate": 7.3595742446628405e-06, + "loss": 3.3907, + "step": 3198 + }, + { + "epoch": 0.38, + "learning_rate": 7.35781054721985e-06, + "loss": 3.4976, + "step": 3199 + }, + { + "epoch": 0.38, + "learning_rate": 7.356046472432857e-06, + "loss": 3.4452, + "step": 3200 + }, + { + "epoch": 0.38, + "learning_rate": 7.354282020584187e-06, + "loss": 3.4134, + "step": 3201 + }, + { + "epoch": 0.38, + "learning_rate": 7.3525171919562186e-06, + "loss": 3.3993, + "step": 3202 + }, + { + "epoch": 0.38, + "learning_rate": 7.350751986831399e-06, + "loss": 3.4196, + "step": 3203 + }, + { + "epoch": 0.38, + "learning_rate": 7.348986405492228e-06, + "loss": 3.3603, + "step": 3204 + }, + { + "epoch": 0.38, + "learning_rate": 7.3472204482212715e-06, + "loss": 3.3814, + "step": 3205 + }, + { + "epoch": 0.38, + "learning_rate": 7.345454115301152e-06, + "loss": 3.4169, + "step": 3206 + }, + { + "epoch": 0.38, + "learning_rate": 7.343687407014556e-06, + "loss": 3.477, + "step": 3207 + }, + { + "epoch": 0.38, + "learning_rate": 7.341920323644224e-06, + "loss": 3.4007, + "step": 3208 + }, + { + "epoch": 0.38, + "learning_rate": 7.340152865472963e-06, + "loss": 3.3636, + "step": 3209 + }, + { + "epoch": 0.38, + "learning_rate": 7.338385032783635e-06, + "loss": 3.3962, + "step": 3210 + }, + { + "epoch": 0.38, + "learning_rate": 7.3366168258591656e-06, + "loss": 3.401, + "step": 3211 + }, + { + "epoch": 0.38, + "learning_rate": 7.334848244982535e-06, + "loss": 3.4206, + "step": 3212 + }, + { + "epoch": 0.38, + "learning_rate": 7.333079290436792e-06, + "loss": 3.4498, + "step": 3213 + }, + { + "epoch": 0.38, + "learning_rate": 7.331309962505037e-06, + "loss": 3.4787, + "step": 3214 + }, + { + "epoch": 0.38, + "learning_rate": 7.329540261470435e-06, + "loss": 3.4878, + "step": 3215 + }, + { + "epoch": 0.39, + "learning_rate": 7.327770187616205e-06, + "loss": 3.4329, + "step": 3216 + }, + { + "epoch": 0.39, + "learning_rate": 7.325999741225636e-06, + "loss": 3.3415, + "step": 3217 + }, + { + "epoch": 0.39, + "learning_rate": 7.324228922582064e-06, + "loss": 3.4987, + "step": 3218 + }, + { + "epoch": 0.39, + "learning_rate": 7.322457731968895e-06, + "loss": 3.4012, + "step": 3219 + }, + { + "epoch": 0.39, + "learning_rate": 7.320686169669587e-06, + "loss": 3.5068, + "step": 3220 + }, + { + "epoch": 0.39, + "learning_rate": 7.318914235967664e-06, + "loss": 3.3608, + "step": 3221 + }, + { + "epoch": 0.39, + "learning_rate": 7.3171419311467065e-06, + "loss": 3.474, + "step": 3222 + }, + { + "epoch": 0.39, + "learning_rate": 7.31536925549035e-06, + "loss": 3.3464, + "step": 3223 + }, + { + "epoch": 0.39, + "learning_rate": 7.313596209282297e-06, + "loss": 3.4026, + "step": 3224 + }, + { + "epoch": 0.39, + "learning_rate": 7.311822792806306e-06, + "loss": 3.3996, + "step": 3225 + }, + { + "epoch": 0.39, + "learning_rate": 7.310049006346193e-06, + "loss": 3.4255, + "step": 3226 + }, + { + "epoch": 0.39, + "learning_rate": 7.308274850185835e-06, + "loss": 3.3399, + "step": 3227 + }, + { + "epoch": 0.39, + "learning_rate": 7.3065003246091685e-06, + "loss": 3.545, + "step": 3228 + }, + { + "epoch": 0.39, + "learning_rate": 7.304725429900188e-06, + "loss": 3.3468, + "step": 3229 + }, + { + "epoch": 0.39, + "learning_rate": 7.3029501663429485e-06, + "loss": 3.3627, + "step": 3230 + }, + { + "epoch": 0.39, + "learning_rate": 7.3011745342215625e-06, + "loss": 3.3565, + "step": 3231 + }, + { + "epoch": 0.39, + "learning_rate": 7.299398533820202e-06, + "loss": 3.3605, + "step": 3232 + }, + { + "epoch": 0.39, + "learning_rate": 7.297622165423098e-06, + "loss": 3.433, + "step": 3233 + }, + { + "epoch": 0.39, + "learning_rate": 7.295845429314543e-06, + "loss": 3.4423, + "step": 3234 + }, + { + "epoch": 0.39, + "learning_rate": 7.294068325778883e-06, + "loss": 3.4252, + "step": 3235 + }, + { + "epoch": 0.39, + "learning_rate": 7.292290855100526e-06, + "loss": 3.3528, + "step": 3236 + }, + { + "epoch": 0.39, + "learning_rate": 7.2905130175639385e-06, + "loss": 3.4225, + "step": 3237 + }, + { + "epoch": 0.39, + "learning_rate": 7.2887348134536464e-06, + "loss": 3.4944, + "step": 3238 + }, + { + "epoch": 0.39, + "learning_rate": 7.286956243054233e-06, + "loss": 3.4554, + "step": 3239 + }, + { + "epoch": 0.39, + "learning_rate": 7.2851773066503415e-06, + "loss": 3.4393, + "step": 3240 + }, + { + "epoch": 0.39, + "learning_rate": 7.283398004526672e-06, + "loss": 3.4009, + "step": 3241 + }, + { + "epoch": 0.39, + "learning_rate": 7.281618336967984e-06, + "loss": 3.4646, + "step": 3242 + }, + { + "epoch": 0.39, + "learning_rate": 7.279838304259095e-06, + "loss": 3.4843, + "step": 3243 + }, + { + "epoch": 0.39, + "learning_rate": 7.278057906684883e-06, + "loss": 3.4371, + "step": 3244 + }, + { + "epoch": 0.39, + "learning_rate": 7.27627714453028e-06, + "loss": 3.5733, + "step": 3245 + }, + { + "epoch": 0.39, + "learning_rate": 7.274496018080284e-06, + "loss": 3.3895, + "step": 3246 + }, + { + "epoch": 0.39, + "learning_rate": 7.2727145276199414e-06, + "loss": 3.5215, + "step": 3247 + }, + { + "epoch": 0.39, + "learning_rate": 7.270932673434366e-06, + "loss": 3.4328, + "step": 3248 + }, + { + "epoch": 0.39, + "learning_rate": 7.269150455808723e-06, + "loss": 3.4257, + "step": 3249 + }, + { + "epoch": 0.39, + "learning_rate": 7.267367875028238e-06, + "loss": 3.4516, + "step": 3250 + }, + { + "epoch": 0.39, + "learning_rate": 7.265584931378199e-06, + "loss": 3.4513, + "step": 3251 + }, + { + "epoch": 0.39, + "learning_rate": 7.263801625143945e-06, + "loss": 3.4576, + "step": 3252 + }, + { + "epoch": 0.39, + "learning_rate": 7.262017956610876e-06, + "loss": 3.3264, + "step": 3253 + }, + { + "epoch": 0.39, + "learning_rate": 7.2602339260644524e-06, + "loss": 3.5462, + "step": 3254 + }, + { + "epoch": 0.39, + "learning_rate": 7.258449533790191e-06, + "loss": 3.4276, + "step": 3255 + }, + { + "epoch": 0.39, + "learning_rate": 7.256664780073663e-06, + "loss": 3.44, + "step": 3256 + }, + { + "epoch": 0.39, + "learning_rate": 7.254879665200503e-06, + "loss": 3.3855, + "step": 3257 + }, + { + "epoch": 0.39, + "learning_rate": 7.253094189456399e-06, + "loss": 3.3194, + "step": 3258 + }, + { + "epoch": 0.39, + "learning_rate": 7.2513083531271e-06, + "loss": 3.3781, + "step": 3259 + }, + { + "epoch": 0.39, + "learning_rate": 7.2495221564984096e-06, + "loss": 3.4013, + "step": 3260 + }, + { + "epoch": 0.39, + "learning_rate": 7.247735599856191e-06, + "loss": 3.4239, + "step": 3261 + }, + { + "epoch": 0.39, + "learning_rate": 7.245948683486367e-06, + "loss": 3.4142, + "step": 3262 + }, + { + "epoch": 0.39, + "learning_rate": 7.244161407674914e-06, + "loss": 3.5828, + "step": 3263 + }, + { + "epoch": 0.39, + "learning_rate": 7.242373772707867e-06, + "loss": 3.37, + "step": 3264 + }, + { + "epoch": 0.39, + "learning_rate": 7.240585778871319e-06, + "loss": 3.4095, + "step": 3265 + }, + { + "epoch": 0.39, + "learning_rate": 7.238797426451423e-06, + "loss": 3.4531, + "step": 3266 + }, + { + "epoch": 0.39, + "learning_rate": 7.237008715734384e-06, + "loss": 3.4693, + "step": 3267 + }, + { + "epoch": 0.39, + "learning_rate": 7.235219647006468e-06, + "loss": 3.3098, + "step": 3268 + }, + { + "epoch": 0.39, + "learning_rate": 7.2334302205539985e-06, + "loss": 3.3258, + "step": 3269 + }, + { + "epoch": 0.39, + "learning_rate": 7.231640436663356e-06, + "loss": 3.4398, + "step": 3270 + }, + { + "epoch": 0.39, + "learning_rate": 7.229850295620975e-06, + "loss": 3.423, + "step": 3271 + }, + { + "epoch": 0.39, + "learning_rate": 7.228059797713352e-06, + "loss": 3.4765, + "step": 3272 + }, + { + "epoch": 0.39, + "learning_rate": 7.2262689432270364e-06, + "loss": 3.4756, + "step": 3273 + }, + { + "epoch": 0.39, + "learning_rate": 7.224477732448638e-06, + "loss": 3.4544, + "step": 3274 + }, + { + "epoch": 0.39, + "learning_rate": 7.222686165664823e-06, + "loss": 3.464, + "step": 3275 + }, + { + "epoch": 0.39, + "learning_rate": 7.220894243162311e-06, + "loss": 3.4212, + "step": 3276 + }, + { + "epoch": 0.39, + "learning_rate": 7.219101965227882e-06, + "loss": 3.5172, + "step": 3277 + }, + { + "epoch": 0.39, + "learning_rate": 7.217309332148375e-06, + "loss": 3.4636, + "step": 3278 + }, + { + "epoch": 0.39, + "learning_rate": 7.215516344210679e-06, + "loss": 3.442, + "step": 3279 + }, + { + "epoch": 0.39, + "learning_rate": 7.2137230017017464e-06, + "loss": 3.3365, + "step": 3280 + }, + { + "epoch": 0.39, + "learning_rate": 7.2119293049085825e-06, + "loss": 3.4288, + "step": 3281 + }, + { + "epoch": 0.39, + "learning_rate": 7.210135254118251e-06, + "loss": 3.4164, + "step": 3282 + }, + { + "epoch": 0.39, + "learning_rate": 7.208340849617873e-06, + "loss": 3.4206, + "step": 3283 + }, + { + "epoch": 0.39, + "learning_rate": 7.206546091694624e-06, + "loss": 3.426, + "step": 3284 + }, + { + "epoch": 0.39, + "learning_rate": 7.204750980635735e-06, + "loss": 3.3777, + "step": 3285 + }, + { + "epoch": 0.39, + "learning_rate": 7.202955516728498e-06, + "loss": 3.5137, + "step": 3286 + }, + { + "epoch": 0.39, + "learning_rate": 7.20115970026026e-06, + "loss": 3.3668, + "step": 3287 + }, + { + "epoch": 0.39, + "learning_rate": 7.199363531518421e-06, + "loss": 3.4782, + "step": 3288 + }, + { + "epoch": 0.39, + "learning_rate": 7.197567010790439e-06, + "loss": 3.4232, + "step": 3289 + }, + { + "epoch": 0.39, + "learning_rate": 7.195770138363833e-06, + "loss": 3.5008, + "step": 3290 + }, + { + "epoch": 0.39, + "learning_rate": 7.1939729145261725e-06, + "loss": 3.354, + "step": 3291 + }, + { + "epoch": 0.39, + "learning_rate": 7.192175339565085e-06, + "loss": 3.472, + "step": 3292 + }, + { + "epoch": 0.39, + "learning_rate": 7.1903774137682546e-06, + "loss": 3.4173, + "step": 3293 + }, + { + "epoch": 0.39, + "learning_rate": 7.18857913742342e-06, + "loss": 3.5249, + "step": 3294 + }, + { + "epoch": 0.39, + "learning_rate": 7.1867805108183806e-06, + "loss": 3.38, + "step": 3295 + }, + { + "epoch": 0.39, + "learning_rate": 7.184981534240986e-06, + "loss": 3.4525, + "step": 3296 + }, + { + "epoch": 0.39, + "learning_rate": 7.183182207979144e-06, + "loss": 3.3996, + "step": 3297 + }, + { + "epoch": 0.39, + "learning_rate": 7.181382532320821e-06, + "loss": 3.4574, + "step": 3298 + }, + { + "epoch": 0.39, + "learning_rate": 7.179582507554035e-06, + "loss": 3.3671, + "step": 3299 + }, + { + "epoch": 0.4, + "learning_rate": 7.177782133966864e-06, + "loss": 3.3963, + "step": 3300 + }, + { + "epoch": 0.4, + "learning_rate": 7.175981411847438e-06, + "loss": 3.4505, + "step": 3301 + }, + { + "epoch": 0.4, + "learning_rate": 7.174180341483945e-06, + "loss": 3.4829, + "step": 3302 + }, + { + "epoch": 0.4, + "learning_rate": 7.172378923164629e-06, + "loss": 3.509, + "step": 3303 + }, + { + "epoch": 0.4, + "learning_rate": 7.170577157177789e-06, + "loss": 3.4549, + "step": 3304 + }, + { + "epoch": 0.4, + "learning_rate": 7.168775043811779e-06, + "loss": 3.3731, + "step": 3305 + }, + { + "epoch": 0.4, + "learning_rate": 7.166972583355007e-06, + "loss": 3.3633, + "step": 3306 + }, + { + "epoch": 0.4, + "learning_rate": 7.1651697760959435e-06, + "loss": 3.4201, + "step": 3307 + }, + { + "epoch": 0.4, + "learning_rate": 7.163366622323108e-06, + "loss": 3.4041, + "step": 3308 + }, + { + "epoch": 0.4, + "learning_rate": 7.161563122325076e-06, + "loss": 3.3463, + "step": 3309 + }, + { + "epoch": 0.4, + "learning_rate": 7.15975927639048e-06, + "loss": 3.3413, + "step": 3310 + }, + { + "epoch": 0.4, + "learning_rate": 7.1579550848080085e-06, + "loss": 3.484, + "step": 3311 + }, + { + "epoch": 0.4, + "learning_rate": 7.156150547866402e-06, + "loss": 3.4906, + "step": 3312 + }, + { + "epoch": 0.4, + "learning_rate": 7.154345665854462e-06, + "loss": 3.4171, + "step": 3313 + }, + { + "epoch": 0.4, + "learning_rate": 7.1525404390610384e-06, + "loss": 3.4565, + "step": 3314 + }, + { + "epoch": 0.4, + "learning_rate": 7.150734867775042e-06, + "loss": 3.3992, + "step": 3315 + }, + { + "epoch": 0.4, + "learning_rate": 7.148928952285437e-06, + "loss": 3.4059, + "step": 3316 + }, + { + "epoch": 0.4, + "learning_rate": 7.147122692881241e-06, + "loss": 3.4418, + "step": 3317 + }, + { + "epoch": 0.4, + "learning_rate": 7.145316089851526e-06, + "loss": 3.4588, + "step": 3318 + }, + { + "epoch": 0.4, + "learning_rate": 7.143509143485423e-06, + "loss": 3.4483, + "step": 3319 + }, + { + "epoch": 0.4, + "learning_rate": 7.141701854072116e-06, + "loss": 3.4928, + "step": 3320 + }, + { + "epoch": 0.4, + "learning_rate": 7.139894221900842e-06, + "loss": 3.4825, + "step": 3321 + }, + { + "epoch": 0.4, + "learning_rate": 7.138086247260894e-06, + "loss": 3.4745, + "step": 3322 + }, + { + "epoch": 0.4, + "learning_rate": 7.136277930441623e-06, + "loss": 3.4117, + "step": 3323 + }, + { + "epoch": 0.4, + "learning_rate": 7.13446927173243e-06, + "loss": 3.5122, + "step": 3324 + }, + { + "epoch": 0.4, + "learning_rate": 7.1326602714227735e-06, + "loss": 3.5388, + "step": 3325 + }, + { + "epoch": 0.4, + "learning_rate": 7.130850929802165e-06, + "loss": 3.4977, + "step": 3326 + }, + { + "epoch": 0.4, + "learning_rate": 7.129041247160172e-06, + "loss": 3.4729, + "step": 3327 + }, + { + "epoch": 0.4, + "learning_rate": 7.127231223786417e-06, + "loss": 3.3615, + "step": 3328 + }, + { + "epoch": 0.4, + "learning_rate": 7.125420859970575e-06, + "loss": 3.4197, + "step": 3329 + }, + { + "epoch": 0.4, + "learning_rate": 7.1236101560023775e-06, + "loss": 3.436, + "step": 3330 + }, + { + "epoch": 0.4, + "learning_rate": 7.121799112171609e-06, + "loss": 3.4249, + "step": 3331 + }, + { + "epoch": 0.4, + "learning_rate": 7.119987728768109e-06, + "loss": 3.3965, + "step": 3332 + }, + { + "epoch": 0.4, + "learning_rate": 7.118176006081772e-06, + "loss": 3.4841, + "step": 3333 + }, + { + "epoch": 0.4, + "learning_rate": 7.116363944402542e-06, + "loss": 3.5233, + "step": 3334 + }, + { + "epoch": 0.4, + "learning_rate": 7.114551544020428e-06, + "loss": 3.4144, + "step": 3335 + }, + { + "epoch": 0.4, + "learning_rate": 7.112738805225483e-06, + "loss": 3.4335, + "step": 3336 + }, + { + "epoch": 0.4, + "learning_rate": 7.110925728307818e-06, + "loss": 3.435, + "step": 3337 + }, + { + "epoch": 0.4, + "learning_rate": 7.109112313557597e-06, + "loss": 3.4145, + "step": 3338 + }, + { + "epoch": 0.4, + "learning_rate": 7.1072985612650416e-06, + "loss": 3.4396, + "step": 3339 + }, + { + "epoch": 0.4, + "learning_rate": 7.105484471720421e-06, + "loss": 3.4695, + "step": 3340 + }, + { + "epoch": 0.4, + "learning_rate": 7.103670045214065e-06, + "loss": 3.4039, + "step": 3341 + }, + { + "epoch": 0.4, + "learning_rate": 7.101855282036353e-06, + "loss": 3.4931, + "step": 3342 + }, + { + "epoch": 0.4, + "learning_rate": 7.10004018247772e-06, + "loss": 3.4326, + "step": 3343 + }, + { + "epoch": 0.4, + "learning_rate": 7.098224746828655e-06, + "loss": 3.3676, + "step": 3344 + }, + { + "epoch": 0.4, + "learning_rate": 7.096408975379701e-06, + "loss": 3.561, + "step": 3345 + }, + { + "epoch": 0.4, + "learning_rate": 7.09459286842145e-06, + "loss": 3.4538, + "step": 3346 + }, + { + "epoch": 0.4, + "learning_rate": 7.092776426244558e-06, + "loss": 3.3749, + "step": 3347 + }, + { + "epoch": 0.4, + "learning_rate": 7.090959649139724e-06, + "loss": 3.4276, + "step": 3348 + }, + { + "epoch": 0.4, + "learning_rate": 7.089142537397707e-06, + "loss": 3.3144, + "step": 3349 + }, + { + "epoch": 0.4, + "learning_rate": 7.087325091309315e-06, + "loss": 3.3531, + "step": 3350 + }, + { + "epoch": 0.4, + "learning_rate": 7.085507311165415e-06, + "loss": 3.3655, + "step": 3351 + }, + { + "epoch": 0.4, + "learning_rate": 7.083689197256923e-06, + "loss": 3.3594, + "step": 3352 + }, + { + "epoch": 0.4, + "learning_rate": 7.081870749874811e-06, + "loss": 3.4102, + "step": 3353 + }, + { + "epoch": 0.4, + "learning_rate": 7.080051969310102e-06, + "loss": 3.4495, + "step": 3354 + }, + { + "epoch": 0.4, + "learning_rate": 7.078232855853875e-06, + "loss": 3.3798, + "step": 3355 + }, + { + "epoch": 0.4, + "learning_rate": 7.076413409797258e-06, + "loss": 3.4188, + "step": 3356 + }, + { + "epoch": 0.4, + "learning_rate": 7.07459363143144e-06, + "loss": 3.4449, + "step": 3357 + }, + { + "epoch": 0.4, + "learning_rate": 7.072773521047655e-06, + "loss": 3.3753, + "step": 3358 + }, + { + "epoch": 0.4, + "learning_rate": 7.070953078937193e-06, + "loss": 3.3321, + "step": 3359 + }, + { + "epoch": 0.4, + "learning_rate": 7.069132305391401e-06, + "loss": 3.3525, + "step": 3360 + }, + { + "epoch": 0.4, + "learning_rate": 7.067311200701672e-06, + "loss": 3.4123, + "step": 3361 + }, + { + "epoch": 0.4, + "learning_rate": 7.065489765159457e-06, + "loss": 3.3214, + "step": 3362 + }, + { + "epoch": 0.4, + "learning_rate": 7.0636679990562585e-06, + "loss": 3.3874, + "step": 3363 + }, + { + "epoch": 0.4, + "learning_rate": 7.0618459026836316e-06, + "loss": 3.5224, + "step": 3364 + }, + { + "epoch": 0.4, + "learning_rate": 7.060023476333186e-06, + "loss": 3.4661, + "step": 3365 + }, + { + "epoch": 0.4, + "learning_rate": 7.058200720296581e-06, + "loss": 3.3768, + "step": 3366 + }, + { + "epoch": 0.4, + "learning_rate": 7.05637763486553e-06, + "loss": 3.4102, + "step": 3367 + }, + { + "epoch": 0.4, + "learning_rate": 7.054554220331803e-06, + "loss": 3.3845, + "step": 3368 + }, + { + "epoch": 0.4, + "learning_rate": 7.052730476987216e-06, + "loss": 3.4552, + "step": 3369 + }, + { + "epoch": 0.4, + "learning_rate": 7.050906405123643e-06, + "loss": 3.4074, + "step": 3370 + }, + { + "epoch": 0.4, + "learning_rate": 7.049082005033005e-06, + "loss": 3.3954, + "step": 3371 + }, + { + "epoch": 0.4, + "learning_rate": 7.047257277007283e-06, + "loss": 3.314, + "step": 3372 + }, + { + "epoch": 0.4, + "learning_rate": 7.045432221338505e-06, + "loss": 3.4029, + "step": 3373 + }, + { + "epoch": 0.4, + "learning_rate": 7.043606838318753e-06, + "loss": 3.5424, + "step": 3374 + }, + { + "epoch": 0.4, + "learning_rate": 7.04178112824016e-06, + "loss": 3.341, + "step": 3375 + }, + { + "epoch": 0.4, + "learning_rate": 7.039955091394914e-06, + "loss": 3.4589, + "step": 3376 + }, + { + "epoch": 0.4, + "learning_rate": 7.038128728075255e-06, + "loss": 3.4828, + "step": 3377 + }, + { + "epoch": 0.4, + "learning_rate": 7.0363020385734725e-06, + "loss": 3.3031, + "step": 3378 + }, + { + "epoch": 0.4, + "learning_rate": 7.0344750231819095e-06, + "loss": 3.5283, + "step": 3379 + }, + { + "epoch": 0.4, + "learning_rate": 7.032647682192963e-06, + "loss": 3.4237, + "step": 3380 + }, + { + "epoch": 0.4, + "learning_rate": 7.030820015899079e-06, + "loss": 3.4379, + "step": 3381 + }, + { + "epoch": 0.4, + "learning_rate": 7.0289920245927586e-06, + "loss": 3.4798, + "step": 3382 + }, + { + "epoch": 0.41, + "learning_rate": 7.0271637085665535e-06, + "loss": 3.4771, + "step": 3383 + }, + { + "epoch": 0.41, + "learning_rate": 7.025335068113066e-06, + "loss": 3.4492, + "step": 3384 + }, + { + "epoch": 0.41, + "learning_rate": 7.023506103524954e-06, + "loss": 3.3557, + "step": 3385 + }, + { + "epoch": 0.41, + "learning_rate": 7.021676815094923e-06, + "loss": 3.4351, + "step": 3386 + }, + { + "epoch": 0.41, + "learning_rate": 7.019847203115733e-06, + "loss": 3.4747, + "step": 3387 + }, + { + "epoch": 0.41, + "learning_rate": 7.018017267880195e-06, + "loss": 3.4333, + "step": 3388 + }, + { + "epoch": 0.41, + "learning_rate": 7.016187009681173e-06, + "loss": 3.4586, + "step": 3389 + }, + { + "epoch": 0.41, + "learning_rate": 7.01435642881158e-06, + "loss": 3.4566, + "step": 3390 + }, + { + "epoch": 0.41, + "learning_rate": 7.012525525564382e-06, + "loss": 3.4549, + "step": 3391 + }, + { + "epoch": 0.41, + "learning_rate": 7.0106943002325975e-06, + "loss": 3.4157, + "step": 3392 + }, + { + "epoch": 0.41, + "learning_rate": 7.008862753109297e-06, + "loss": 3.4722, + "step": 3393 + }, + { + "epoch": 0.41, + "learning_rate": 7.007030884487598e-06, + "loss": 3.419, + "step": 3394 + }, + { + "epoch": 0.41, + "learning_rate": 7.005198694660676e-06, + "loss": 3.4332, + "step": 3395 + }, + { + "epoch": 0.41, + "learning_rate": 7.0033661839217535e-06, + "loss": 3.5075, + "step": 3396 + }, + { + "epoch": 0.41, + "learning_rate": 7.001533352564105e-06, + "loss": 3.4173, + "step": 3397 + }, + { + "epoch": 0.41, + "learning_rate": 6.999700200881057e-06, + "loss": 3.3737, + "step": 3398 + }, + { + "epoch": 0.41, + "learning_rate": 6.997866729165988e-06, + "loss": 3.3901, + "step": 3399 + }, + { + "epoch": 0.41, + "learning_rate": 6.996032937712326e-06, + "loss": 3.3541, + "step": 3400 + }, + { + "epoch": 0.41, + "learning_rate": 6.994198826813552e-06, + "loss": 3.378, + "step": 3401 + }, + { + "epoch": 0.41, + "learning_rate": 6.992364396763196e-06, + "loss": 3.401, + "step": 3402 + }, + { + "epoch": 0.41, + "learning_rate": 6.990529647854839e-06, + "loss": 3.4206, + "step": 3403 + }, + { + "epoch": 0.41, + "learning_rate": 6.988694580382117e-06, + "loss": 3.3258, + "step": 3404 + }, + { + "epoch": 0.41, + "learning_rate": 6.986859194638713e-06, + "loss": 3.4498, + "step": 3405 + }, + { + "epoch": 0.41, + "learning_rate": 6.985023490918362e-06, + "loss": 3.4614, + "step": 3406 + }, + { + "epoch": 0.41, + "learning_rate": 6.983187469514849e-06, + "loss": 3.4287, + "step": 3407 + }, + { + "epoch": 0.41, + "learning_rate": 6.981351130722014e-06, + "loss": 3.4946, + "step": 3408 + }, + { + "epoch": 0.41, + "learning_rate": 6.979514474833741e-06, + "loss": 3.4596, + "step": 3409 + }, + { + "epoch": 0.41, + "learning_rate": 6.97767750214397e-06, + "loss": 3.3772, + "step": 3410 + }, + { + "epoch": 0.41, + "learning_rate": 6.97584021294669e-06, + "loss": 3.4447, + "step": 3411 + }, + { + "epoch": 0.41, + "learning_rate": 6.974002607535941e-06, + "loss": 3.5639, + "step": 3412 + }, + { + "epoch": 0.41, + "learning_rate": 6.9721646862058145e-06, + "loss": 3.3569, + "step": 3413 + }, + { + "epoch": 0.41, + "learning_rate": 6.9703264492504495e-06, + "loss": 3.4005, + "step": 3414 + }, + { + "epoch": 0.41, + "learning_rate": 6.9684878969640366e-06, + "loss": 3.4776, + "step": 3415 + }, + { + "epoch": 0.41, + "learning_rate": 6.966649029640822e-06, + "loss": 3.3432, + "step": 3416 + }, + { + "epoch": 0.41, + "learning_rate": 6.9648098475750935e-06, + "loss": 3.3756, + "step": 3417 + }, + { + "epoch": 0.41, + "learning_rate": 6.962970351061195e-06, + "loss": 3.4359, + "step": 3418 + }, + { + "epoch": 0.41, + "learning_rate": 6.961130540393522e-06, + "loss": 3.3277, + "step": 3419 + }, + { + "epoch": 0.41, + "learning_rate": 6.959290415866514e-06, + "loss": 3.3564, + "step": 3420 + }, + { + "epoch": 0.41, + "learning_rate": 6.9574499777746675e-06, + "loss": 3.4318, + "step": 3421 + }, + { + "epoch": 0.41, + "learning_rate": 6.9556092264125255e-06, + "loss": 3.4736, + "step": 3422 + }, + { + "epoch": 0.41, + "learning_rate": 6.953768162074681e-06, + "loss": 3.4444, + "step": 3423 + }, + { + "epoch": 0.41, + "learning_rate": 6.951926785055777e-06, + "loss": 3.5207, + "step": 3424 + }, + { + "epoch": 0.41, + "learning_rate": 6.950085095650511e-06, + "loss": 3.4279, + "step": 3425 + }, + { + "epoch": 0.41, + "learning_rate": 6.9482430941536235e-06, + "loss": 3.4428, + "step": 3426 + }, + { + "epoch": 0.41, + "learning_rate": 6.946400780859911e-06, + "loss": 3.5103, + "step": 3427 + }, + { + "epoch": 0.41, + "learning_rate": 6.944558156064214e-06, + "loss": 3.439, + "step": 3428 + }, + { + "epoch": 0.41, + "learning_rate": 6.94271522006143e-06, + "loss": 3.411, + "step": 3429 + }, + { + "epoch": 0.41, + "learning_rate": 6.940871973146501e-06, + "loss": 3.4665, + "step": 3430 + }, + { + "epoch": 0.41, + "learning_rate": 6.939028415614419e-06, + "loss": 3.4099, + "step": 3431 + }, + { + "epoch": 0.41, + "learning_rate": 6.937184547760228e-06, + "loss": 3.4244, + "step": 3432 + }, + { + "epoch": 0.41, + "learning_rate": 6.935340369879021e-06, + "loss": 3.3754, + "step": 3433 + }, + { + "epoch": 0.41, + "learning_rate": 6.93349588226594e-06, + "loss": 3.3637, + "step": 3434 + }, + { + "epoch": 0.41, + "learning_rate": 6.931651085216177e-06, + "loss": 3.5317, + "step": 3435 + }, + { + "epoch": 0.41, + "learning_rate": 6.929805979024971e-06, + "loss": 3.4088, + "step": 3436 + }, + { + "epoch": 0.41, + "learning_rate": 6.9279605639876165e-06, + "loss": 3.3877, + "step": 3437 + }, + { + "epoch": 0.41, + "learning_rate": 6.926114840399451e-06, + "loss": 3.3683, + "step": 3438 + }, + { + "epoch": 0.41, + "learning_rate": 6.924268808555865e-06, + "loss": 3.3956, + "step": 3439 + }, + { + "epoch": 0.41, + "learning_rate": 6.922422468752298e-06, + "loss": 3.5099, + "step": 3440 + }, + { + "epoch": 0.41, + "learning_rate": 6.920575821284237e-06, + "loss": 3.488, + "step": 3441 + }, + { + "epoch": 0.41, + "learning_rate": 6.9187288664472205e-06, + "loss": 3.4102, + "step": 3442 + }, + { + "epoch": 0.41, + "learning_rate": 6.9168816045368355e-06, + "loss": 3.386, + "step": 3443 + }, + { + "epoch": 0.41, + "learning_rate": 6.915034035848716e-06, + "loss": 3.3702, + "step": 3444 + }, + { + "epoch": 0.41, + "learning_rate": 6.913186160678549e-06, + "loss": 3.3417, + "step": 3445 + }, + { + "epoch": 0.41, + "learning_rate": 6.911337979322067e-06, + "loss": 3.3184, + "step": 3446 + }, + { + "epoch": 0.41, + "learning_rate": 6.909489492075054e-06, + "loss": 3.4198, + "step": 3447 + }, + { + "epoch": 0.41, + "learning_rate": 6.90764069923334e-06, + "loss": 3.4048, + "step": 3448 + }, + { + "epoch": 0.41, + "learning_rate": 6.905791601092808e-06, + "loss": 3.4258, + "step": 3449 + }, + { + "epoch": 0.41, + "learning_rate": 6.903942197949387e-06, + "loss": 3.4899, + "step": 3450 + }, + { + "epoch": 0.41, + "learning_rate": 6.902092490099055e-06, + "loss": 3.3949, + "step": 3451 + }, + { + "epoch": 0.41, + "learning_rate": 6.900242477837839e-06, + "loss": 3.4209, + "step": 3452 + }, + { + "epoch": 0.41, + "learning_rate": 6.898392161461816e-06, + "loss": 3.3501, + "step": 3453 + }, + { + "epoch": 0.41, + "learning_rate": 6.89654154126711e-06, + "loss": 3.4085, + "step": 3454 + }, + { + "epoch": 0.41, + "learning_rate": 6.894690617549894e-06, + "loss": 3.4567, + "step": 3455 + }, + { + "epoch": 0.41, + "learning_rate": 6.8928393906063895e-06, + "loss": 3.3873, + "step": 3456 + }, + { + "epoch": 0.41, + "learning_rate": 6.890987860732869e-06, + "loss": 3.3952, + "step": 3457 + }, + { + "epoch": 0.41, + "learning_rate": 6.889136028225649e-06, + "loss": 3.4077, + "step": 3458 + }, + { + "epoch": 0.41, + "learning_rate": 6.8872838933811e-06, + "loss": 3.5871, + "step": 3459 + }, + { + "epoch": 0.41, + "learning_rate": 6.885431456495631e-06, + "loss": 3.3973, + "step": 3460 + }, + { + "epoch": 0.41, + "learning_rate": 6.883578717865713e-06, + "loss": 3.4729, + "step": 3461 + }, + { + "epoch": 0.41, + "learning_rate": 6.881725677787856e-06, + "loss": 3.4734, + "step": 3462 + }, + { + "epoch": 0.41, + "learning_rate": 6.8798723365586195e-06, + "loss": 3.3989, + "step": 3463 + }, + { + "epoch": 0.41, + "learning_rate": 6.8780186944746115e-06, + "loss": 3.5012, + "step": 3464 + }, + { + "epoch": 0.41, + "learning_rate": 6.876164751832493e-06, + "loss": 3.3925, + "step": 3465 + }, + { + "epoch": 0.41, + "learning_rate": 6.874310508928966e-06, + "loss": 3.4431, + "step": 3466 + }, + { + "epoch": 0.42, + "learning_rate": 6.872455966060782e-06, + "loss": 3.4261, + "step": 3467 + }, + { + "epoch": 0.42, + "learning_rate": 6.870601123524743e-06, + "loss": 3.4349, + "step": 3468 + }, + { + "epoch": 0.42, + "learning_rate": 6.8687459816177e-06, + "loss": 3.4723, + "step": 3469 + }, + { + "epoch": 0.42, + "learning_rate": 6.86689054063655e-06, + "loss": 3.3697, + "step": 3470 + }, + { + "epoch": 0.42, + "learning_rate": 6.8650348008782355e-06, + "loss": 3.5503, + "step": 3471 + }, + { + "epoch": 0.42, + "learning_rate": 6.863178762639749e-06, + "loss": 3.4408, + "step": 3472 + }, + { + "epoch": 0.42, + "learning_rate": 6.8613224262181335e-06, + "loss": 3.4587, + "step": 3473 + }, + { + "epoch": 0.42, + "learning_rate": 6.859465791910476e-06, + "loss": 3.3906, + "step": 3474 + }, + { + "epoch": 0.42, + "learning_rate": 6.857608860013911e-06, + "loss": 3.4457, + "step": 3475 + }, + { + "epoch": 0.42, + "learning_rate": 6.855751630825621e-06, + "loss": 3.4631, + "step": 3476 + }, + { + "epoch": 0.42, + "learning_rate": 6.853894104642841e-06, + "loss": 3.5097, + "step": 3477 + }, + { + "epoch": 0.42, + "learning_rate": 6.852036281762846e-06, + "loss": 3.4216, + "step": 3478 + }, + { + "epoch": 0.42, + "learning_rate": 6.850178162482964e-06, + "loss": 3.5584, + "step": 3479 + }, + { + "epoch": 0.42, + "learning_rate": 6.848319747100567e-06, + "loss": 3.4312, + "step": 3480 + }, + { + "epoch": 0.42, + "learning_rate": 6.846461035913077e-06, + "loss": 3.3835, + "step": 3481 + }, + { + "epoch": 0.42, + "learning_rate": 6.84460202921796e-06, + "loss": 3.4718, + "step": 3482 + }, + { + "epoch": 0.42, + "learning_rate": 6.842742727312733e-06, + "loss": 3.4514, + "step": 3483 + }, + { + "epoch": 0.42, + "learning_rate": 6.84088313049496e-06, + "loss": 3.4987, + "step": 3484 + }, + { + "epoch": 0.42, + "learning_rate": 6.839023239062248e-06, + "loss": 3.4372, + "step": 3485 + }, + { + "epoch": 0.42, + "learning_rate": 6.837163053312255e-06, + "loss": 3.4266, + "step": 3486 + }, + { + "epoch": 0.42, + "learning_rate": 6.8353025735426884e-06, + "loss": 3.3483, + "step": 3487 + }, + { + "epoch": 0.42, + "learning_rate": 6.833441800051295e-06, + "loss": 3.4116, + "step": 3488 + }, + { + "epoch": 0.42, + "learning_rate": 6.8315807331358745e-06, + "loss": 3.4123, + "step": 3489 + }, + { + "epoch": 0.42, + "learning_rate": 6.829719373094271e-06, + "loss": 3.3386, + "step": 3490 + }, + { + "epoch": 0.42, + "learning_rate": 6.82785772022438e-06, + "loss": 3.424, + "step": 3491 + }, + { + "epoch": 0.42, + "learning_rate": 6.8259957748241365e-06, + "loss": 3.3502, + "step": 3492 + }, + { + "epoch": 0.42, + "learning_rate": 6.824133537191528e-06, + "loss": 3.4749, + "step": 3493 + }, + { + "epoch": 0.42, + "learning_rate": 6.822271007624588e-06, + "loss": 3.3969, + "step": 3494 + }, + { + "epoch": 0.42, + "learning_rate": 6.820408186421394e-06, + "loss": 3.4034, + "step": 3495 + }, + { + "epoch": 0.42, + "learning_rate": 6.818545073880072e-06, + "loss": 3.4309, + "step": 3496 + }, + { + "epoch": 0.42, + "learning_rate": 6.816681670298795e-06, + "loss": 3.4898, + "step": 3497 + }, + { + "epoch": 0.42, + "learning_rate": 6.814817975975783e-06, + "loss": 3.4666, + "step": 3498 + }, + { + "epoch": 0.42, + "learning_rate": 6.8129539912093005e-06, + "loss": 3.3203, + "step": 3499 + }, + { + "epoch": 0.42, + "learning_rate": 6.8110897162976595e-06, + "loss": 3.4077, + "step": 3500 + }, + { + "epoch": 0.42, + "learning_rate": 6.809225151539218e-06, + "loss": 3.4467, + "step": 3501 + }, + { + "epoch": 0.42, + "learning_rate": 6.807360297232384e-06, + "loss": 3.4322, + "step": 3502 + }, + { + "epoch": 0.42, + "learning_rate": 6.805495153675606e-06, + "loss": 3.4696, + "step": 3503 + }, + { + "epoch": 0.42, + "learning_rate": 6.803629721167383e-06, + "loss": 3.4701, + "step": 3504 + }, + { + "epoch": 0.42, + "learning_rate": 6.801764000006257e-06, + "loss": 3.4912, + "step": 3505 + }, + { + "epoch": 0.42, + "learning_rate": 6.799897990490819e-06, + "loss": 3.5376, + "step": 3506 + }, + { + "epoch": 0.42, + "learning_rate": 6.798031692919707e-06, + "loss": 3.4634, + "step": 3507 + }, + { + "epoch": 0.42, + "learning_rate": 6.796165107591599e-06, + "loss": 3.4652, + "step": 3508 + }, + { + "epoch": 0.42, + "learning_rate": 6.794298234805226e-06, + "loss": 3.5394, + "step": 3509 + }, + { + "epoch": 0.42, + "learning_rate": 6.792431074859364e-06, + "loss": 3.4313, + "step": 3510 + }, + { + "epoch": 0.42, + "learning_rate": 6.790563628052831e-06, + "loss": 3.5081, + "step": 3511 + }, + { + "epoch": 0.42, + "learning_rate": 6.788695894684494e-06, + "loss": 3.4216, + "step": 3512 + }, + { + "epoch": 0.42, + "learning_rate": 6.7868278750532635e-06, + "loss": 3.4357, + "step": 3513 + }, + { + "epoch": 0.42, + "learning_rate": 6.7849595694581e-06, + "loss": 3.4524, + "step": 3514 + }, + { + "epoch": 0.42, + "learning_rate": 6.7830909781980045e-06, + "loss": 3.435, + "step": 3515 + }, + { + "epoch": 0.42, + "learning_rate": 6.7812221015720284e-06, + "loss": 3.3715, + "step": 3516 + }, + { + "epoch": 0.42, + "learning_rate": 6.779352939879265e-06, + "loss": 3.4627, + "step": 3517 + }, + { + "epoch": 0.42, + "learning_rate": 6.777483493418856e-06, + "loss": 3.541, + "step": 3518 + }, + { + "epoch": 0.42, + "learning_rate": 6.775613762489988e-06, + "loss": 3.3846, + "step": 3519 + }, + { + "epoch": 0.42, + "learning_rate": 6.773743747391892e-06, + "loss": 3.3766, + "step": 3520 + }, + { + "epoch": 0.42, + "learning_rate": 6.771873448423845e-06, + "loss": 3.466, + "step": 3521 + }, + { + "epoch": 0.42, + "learning_rate": 6.77000286588517e-06, + "loss": 3.5241, + "step": 3522 + }, + { + "epoch": 0.42, + "learning_rate": 6.768132000075235e-06, + "loss": 3.3862, + "step": 3523 + }, + { + "epoch": 0.42, + "learning_rate": 6.766260851293452e-06, + "loss": 3.3812, + "step": 3524 + }, + { + "epoch": 0.42, + "learning_rate": 6.764389419839283e-06, + "loss": 3.4381, + "step": 3525 + }, + { + "epoch": 0.42, + "learning_rate": 6.762517706012231e-06, + "loss": 3.3648, + "step": 3526 + }, + { + "epoch": 0.42, + "learning_rate": 6.7606457101118415e-06, + "loss": 3.4127, + "step": 3527 + }, + { + "epoch": 0.42, + "learning_rate": 6.758773432437713e-06, + "loss": 3.4232, + "step": 3528 + }, + { + "epoch": 0.42, + "learning_rate": 6.7569008732894826e-06, + "loss": 3.3858, + "step": 3529 + }, + { + "epoch": 0.42, + "learning_rate": 6.755028032966835e-06, + "loss": 3.4513, + "step": 3530 + }, + { + "epoch": 0.42, + "learning_rate": 6.753154911769499e-06, + "loss": 3.506, + "step": 3531 + }, + { + "epoch": 0.42, + "learning_rate": 6.7512815099972515e-06, + "loss": 3.5052, + "step": 3532 + }, + { + "epoch": 0.42, + "learning_rate": 6.7494078279499075e-06, + "loss": 3.4991, + "step": 3533 + }, + { + "epoch": 0.42, + "learning_rate": 6.747533865927335e-06, + "loss": 3.4445, + "step": 3534 + }, + { + "epoch": 0.42, + "learning_rate": 6.7456596242294415e-06, + "loss": 3.4198, + "step": 3535 + }, + { + "epoch": 0.42, + "learning_rate": 6.74378510315618e-06, + "loss": 3.5149, + "step": 3536 + }, + { + "epoch": 0.42, + "learning_rate": 6.741910303007548e-06, + "loss": 3.418, + "step": 3537 + }, + { + "epoch": 0.42, + "learning_rate": 6.740035224083592e-06, + "loss": 3.4378, + "step": 3538 + }, + { + "epoch": 0.42, + "learning_rate": 6.7381598666843976e-06, + "loss": 3.4985, + "step": 3539 + }, + { + "epoch": 0.42, + "learning_rate": 6.736284231110095e-06, + "loss": 3.4424, + "step": 3540 + }, + { + "epoch": 0.42, + "learning_rate": 6.734408317660863e-06, + "loss": 3.4278, + "step": 3541 + }, + { + "epoch": 0.42, + "learning_rate": 6.732532126636924e-06, + "loss": 3.4204, + "step": 3542 + }, + { + "epoch": 0.42, + "learning_rate": 6.730655658338542e-06, + "loss": 3.3537, + "step": 3543 + }, + { + "epoch": 0.42, + "learning_rate": 6.728778913066026e-06, + "loss": 3.3436, + "step": 3544 + }, + { + "epoch": 0.42, + "learning_rate": 6.7269018911197324e-06, + "loss": 3.3981, + "step": 3545 + }, + { + "epoch": 0.42, + "learning_rate": 6.725024592800058e-06, + "loss": 3.3475, + "step": 3546 + }, + { + "epoch": 0.42, + "learning_rate": 6.7231470184074476e-06, + "loss": 3.44, + "step": 3547 + }, + { + "epoch": 0.42, + "learning_rate": 6.721269168242387e-06, + "loss": 3.4574, + "step": 3548 + }, + { + "epoch": 0.42, + "learning_rate": 6.7193910426054065e-06, + "loss": 3.4023, + "step": 3549 + }, + { + "epoch": 0.42, + "learning_rate": 6.717512641797083e-06, + "loss": 3.4493, + "step": 3550 + }, + { + "epoch": 0.43, + "learning_rate": 6.715633966118034e-06, + "loss": 3.4519, + "step": 3551 + }, + { + "epoch": 0.43, + "learning_rate": 6.713755015868925e-06, + "loss": 3.3147, + "step": 3552 + }, + { + "epoch": 0.43, + "learning_rate": 6.7118757913504624e-06, + "loss": 3.4032, + "step": 3553 + }, + { + "epoch": 0.43, + "learning_rate": 6.709996292863397e-06, + "loss": 3.4728, + "step": 3554 + }, + { + "epoch": 0.43, + "learning_rate": 6.708116520708522e-06, + "loss": 3.3931, + "step": 3555 + }, + { + "epoch": 0.43, + "learning_rate": 6.706236475186679e-06, + "loss": 3.4456, + "step": 3556 + }, + { + "epoch": 0.43, + "learning_rate": 6.704356156598749e-06, + "loss": 3.5012, + "step": 3557 + }, + { + "epoch": 0.43, + "learning_rate": 6.702475565245657e-06, + "loss": 3.4438, + "step": 3558 + }, + { + "epoch": 0.43, + "learning_rate": 6.700594701428377e-06, + "loss": 3.3773, + "step": 3559 + }, + { + "epoch": 0.43, + "learning_rate": 6.6987135654479185e-06, + "loss": 3.3907, + "step": 3560 + }, + { + "epoch": 0.43, + "learning_rate": 6.696832157605339e-06, + "loss": 3.4241, + "step": 3561 + }, + { + "epoch": 0.43, + "learning_rate": 6.694950478201741e-06, + "loss": 3.4155, + "step": 3562 + }, + { + "epoch": 0.43, + "learning_rate": 6.693068527538267e-06, + "loss": 3.4156, + "step": 3563 + }, + { + "epoch": 0.43, + "learning_rate": 6.6911863059161044e-06, + "loss": 3.5436, + "step": 3564 + }, + { + "epoch": 0.43, + "learning_rate": 6.689303813636483e-06, + "loss": 3.3497, + "step": 3565 + }, + { + "epoch": 0.43, + "learning_rate": 6.687421051000678e-06, + "loss": 3.3798, + "step": 3566 + }, + { + "epoch": 0.43, + "learning_rate": 6.685538018310009e-06, + "loss": 3.4954, + "step": 3567 + }, + { + "epoch": 0.43, + "learning_rate": 6.683654715865833e-06, + "loss": 3.3949, + "step": 3568 + }, + { + "epoch": 0.43, + "learning_rate": 6.681771143969554e-06, + "loss": 3.4584, + "step": 3569 + }, + { + "epoch": 0.43, + "learning_rate": 6.679887302922622e-06, + "loss": 3.4635, + "step": 3570 + }, + { + "epoch": 0.43, + "learning_rate": 6.6780031930265235e-06, + "loss": 3.4318, + "step": 3571 + }, + { + "epoch": 0.43, + "learning_rate": 6.676118814582793e-06, + "loss": 3.3602, + "step": 3572 + }, + { + "epoch": 0.43, + "learning_rate": 6.674234167893007e-06, + "loss": 3.519, + "step": 3573 + }, + { + "epoch": 0.43, + "learning_rate": 6.672349253258781e-06, + "loss": 3.4571, + "step": 3574 + }, + { + "epoch": 0.43, + "learning_rate": 6.670464070981782e-06, + "loss": 3.4773, + "step": 3575 + }, + { + "epoch": 0.43, + "learning_rate": 6.66857862136371e-06, + "loss": 3.4081, + "step": 3576 + }, + { + "epoch": 0.43, + "learning_rate": 6.666692904706317e-06, + "loss": 3.2787, + "step": 3577 + }, + { + "epoch": 0.43, + "learning_rate": 6.664806921311387e-06, + "loss": 3.4155, + "step": 3578 + }, + { + "epoch": 0.43, + "learning_rate": 6.6629206714807584e-06, + "loss": 3.4302, + "step": 3579 + }, + { + "epoch": 0.43, + "learning_rate": 6.661034155516304e-06, + "loss": 3.4717, + "step": 3580 + }, + { + "epoch": 0.43, + "learning_rate": 6.659147373719941e-06, + "loss": 3.3861, + "step": 3581 + }, + { + "epoch": 0.43, + "learning_rate": 6.657260326393631e-06, + "loss": 3.3478, + "step": 3582 + }, + { + "epoch": 0.43, + "learning_rate": 6.655373013839379e-06, + "loss": 3.4891, + "step": 3583 + }, + { + "epoch": 0.43, + "learning_rate": 6.6534854363592275e-06, + "loss": 3.4096, + "step": 3584 + }, + { + "epoch": 0.43, + "learning_rate": 6.651597594255266e-06, + "loss": 3.3619, + "step": 3585 + }, + { + "epoch": 0.43, + "learning_rate": 6.649709487829623e-06, + "loss": 3.5106, + "step": 3586 + }, + { + "epoch": 0.43, + "learning_rate": 6.647821117384473e-06, + "loss": 3.4174, + "step": 3587 + }, + { + "epoch": 0.43, + "learning_rate": 6.645932483222031e-06, + "loss": 3.38, + "step": 3588 + }, + { + "epoch": 0.43, + "learning_rate": 6.6440435856445525e-06, + "loss": 3.336, + "step": 3589 + }, + { + "epoch": 0.43, + "learning_rate": 6.642154424954337e-06, + "loss": 3.5287, + "step": 3590 + }, + { + "epoch": 0.43, + "learning_rate": 6.640265001453727e-06, + "loss": 3.4898, + "step": 3591 + }, + { + "epoch": 0.43, + "learning_rate": 6.638375315445105e-06, + "loss": 3.4472, + "step": 3592 + }, + { + "epoch": 0.43, + "learning_rate": 6.636485367230896e-06, + "loss": 3.4527, + "step": 3593 + }, + { + "epoch": 0.43, + "learning_rate": 6.634595157113567e-06, + "loss": 3.4576, + "step": 3594 + }, + { + "epoch": 0.43, + "learning_rate": 6.632704685395628e-06, + "loss": 3.4065, + "step": 3595 + }, + { + "epoch": 0.43, + "learning_rate": 6.630813952379631e-06, + "loss": 3.3887, + "step": 3596 + }, + { + "epoch": 0.43, + "learning_rate": 6.628922958368167e-06, + "loss": 3.4052, + "step": 3597 + }, + { + "epoch": 0.43, + "learning_rate": 6.627031703663871e-06, + "loss": 3.4918, + "step": 3598 + }, + { + "epoch": 0.43, + "learning_rate": 6.625140188569421e-06, + "loss": 3.4735, + "step": 3599 + }, + { + "epoch": 0.43, + "learning_rate": 6.623248413387535e-06, + "loss": 3.49, + "step": 3600 + }, + { + "epoch": 0.43, + "learning_rate": 6.62135637842097e-06, + "loss": 3.3188, + "step": 3601 + }, + { + "epoch": 0.43, + "learning_rate": 6.619464083972529e-06, + "loss": 3.3849, + "step": 3602 + }, + { + "epoch": 0.43, + "learning_rate": 6.617571530345055e-06, + "loss": 3.4486, + "step": 3603 + }, + { + "epoch": 0.43, + "learning_rate": 6.615678717841434e-06, + "loss": 3.544, + "step": 3604 + }, + { + "epoch": 0.43, + "learning_rate": 6.613785646764588e-06, + "loss": 3.3877, + "step": 3605 + }, + { + "epoch": 0.43, + "learning_rate": 6.611892317417486e-06, + "loss": 3.4231, + "step": 3606 + }, + { + "epoch": 0.43, + "learning_rate": 6.609998730103135e-06, + "loss": 3.5452, + "step": 3607 + }, + { + "epoch": 0.43, + "learning_rate": 6.6081048851245885e-06, + "loss": 3.3984, + "step": 3608 + }, + { + "epoch": 0.43, + "learning_rate": 6.6062107827849335e-06, + "loss": 3.4817, + "step": 3609 + }, + { + "epoch": 0.43, + "learning_rate": 6.6043164233873045e-06, + "loss": 3.4609, + "step": 3610 + }, + { + "epoch": 0.43, + "learning_rate": 6.6024218072348734e-06, + "loss": 3.5109, + "step": 3611 + }, + { + "epoch": 0.43, + "learning_rate": 6.600526934630855e-06, + "loss": 3.437, + "step": 3612 + }, + { + "epoch": 0.43, + "learning_rate": 6.598631805878506e-06, + "loss": 3.4346, + "step": 3613 + }, + { + "epoch": 0.43, + "learning_rate": 6.59673642128112e-06, + "loss": 3.5725, + "step": 3614 + }, + { + "epoch": 0.43, + "learning_rate": 6.594840781142037e-06, + "loss": 3.4052, + "step": 3615 + }, + { + "epoch": 0.43, + "learning_rate": 6.592944885764634e-06, + "loss": 3.4409, + "step": 3616 + }, + { + "epoch": 0.43, + "learning_rate": 6.59104873545233e-06, + "loss": 3.3447, + "step": 3617 + }, + { + "epoch": 0.43, + "learning_rate": 6.589152330508586e-06, + "loss": 3.3521, + "step": 3618 + }, + { + "epoch": 0.43, + "learning_rate": 6.587255671236901e-06, + "loss": 3.4029, + "step": 3619 + }, + { + "epoch": 0.43, + "learning_rate": 6.585358757940817e-06, + "loss": 3.4171, + "step": 3620 + }, + { + "epoch": 0.43, + "learning_rate": 6.583461590923918e-06, + "loss": 3.3859, + "step": 3621 + }, + { + "epoch": 0.43, + "learning_rate": 6.581564170489822e-06, + "loss": 3.5505, + "step": 3622 + }, + { + "epoch": 0.43, + "learning_rate": 6.5796664969421966e-06, + "loss": 3.4129, + "step": 3623 + }, + { + "epoch": 0.43, + "learning_rate": 6.577768570584744e-06, + "loss": 3.4186, + "step": 3624 + }, + { + "epoch": 0.43, + "learning_rate": 6.575870391721209e-06, + "loss": 3.493, + "step": 3625 + }, + { + "epoch": 0.43, + "learning_rate": 6.573971960655374e-06, + "loss": 3.3779, + "step": 3626 + }, + { + "epoch": 0.43, + "learning_rate": 6.572073277691066e-06, + "loss": 3.3912, + "step": 3627 + }, + { + "epoch": 0.43, + "learning_rate": 6.570174343132148e-06, + "loss": 3.4203, + "step": 3628 + }, + { + "epoch": 0.43, + "learning_rate": 6.5682751572825285e-06, + "loss": 3.3282, + "step": 3629 + }, + { + "epoch": 0.43, + "learning_rate": 6.566375720446152e-06, + "loss": 3.4307, + "step": 3630 + }, + { + "epoch": 0.43, + "learning_rate": 6.564476032927003e-06, + "loss": 3.5083, + "step": 3631 + }, + { + "epoch": 0.43, + "learning_rate": 6.56257609502911e-06, + "loss": 3.3382, + "step": 3632 + }, + { + "epoch": 0.43, + "learning_rate": 6.5606759070565366e-06, + "loss": 3.4286, + "step": 3633 + }, + { + "epoch": 0.44, + "learning_rate": 6.55877546931339e-06, + "loss": 3.373, + "step": 3634 + }, + { + "epoch": 0.44, + "learning_rate": 6.556874782103815e-06, + "loss": 3.4622, + "step": 3635 + }, + { + "epoch": 0.44, + "learning_rate": 6.554973845732e-06, + "loss": 3.3969, + "step": 3636 + }, + { + "epoch": 0.44, + "learning_rate": 6.553072660502169e-06, + "loss": 3.3734, + "step": 3637 + }, + { + "epoch": 0.44, + "learning_rate": 6.551171226718589e-06, + "loss": 3.4016, + "step": 3638 + }, + { + "epoch": 0.44, + "learning_rate": 6.549269544685564e-06, + "loss": 3.4196, + "step": 3639 + }, + { + "epoch": 0.44, + "learning_rate": 6.547367614707441e-06, + "loss": 3.3368, + "step": 3640 + }, + { + "epoch": 0.44, + "learning_rate": 6.545465437088603e-06, + "loss": 3.2841, + "step": 3641 + }, + { + "epoch": 0.44, + "learning_rate": 6.543563012133476e-06, + "loss": 3.472, + "step": 3642 + }, + { + "epoch": 0.44, + "learning_rate": 6.5416603401465216e-06, + "loss": 3.4138, + "step": 3643 + }, + { + "epoch": 0.44, + "learning_rate": 6.539757421432247e-06, + "loss": 3.3816, + "step": 3644 + }, + { + "epoch": 0.44, + "learning_rate": 6.537854256295193e-06, + "loss": 3.3544, + "step": 3645 + }, + { + "epoch": 0.44, + "learning_rate": 6.535950845039944e-06, + "loss": 3.3733, + "step": 3646 + }, + { + "epoch": 0.44, + "learning_rate": 6.534047187971118e-06, + "loss": 3.4551, + "step": 3647 + }, + { + "epoch": 0.44, + "learning_rate": 6.53214328539338e-06, + "loss": 3.4363, + "step": 3648 + }, + { + "epoch": 0.44, + "learning_rate": 6.5302391376114305e-06, + "loss": 3.3932, + "step": 3649 + }, + { + "epoch": 0.44, + "learning_rate": 6.528334744930007e-06, + "loss": 3.4107, + "step": 3650 + }, + { + "epoch": 0.44, + "learning_rate": 6.526430107653889e-06, + "loss": 3.5553, + "step": 3651 + }, + { + "epoch": 0.44, + "learning_rate": 6.524525226087898e-06, + "loss": 3.3892, + "step": 3652 + }, + { + "epoch": 0.44, + "learning_rate": 6.522620100536887e-06, + "loss": 3.5228, + "step": 3653 + }, + { + "epoch": 0.44, + "learning_rate": 6.5207147313057555e-06, + "loss": 3.4157, + "step": 3654 + }, + { + "epoch": 0.44, + "learning_rate": 6.518809118699436e-06, + "loss": 3.4938, + "step": 3655 + }, + { + "epoch": 0.44, + "learning_rate": 6.516903263022904e-06, + "loss": 3.4382, + "step": 3656 + }, + { + "epoch": 0.44, + "learning_rate": 6.514997164581173e-06, + "loss": 3.5883, + "step": 3657 + }, + { + "epoch": 0.44, + "learning_rate": 6.513090823679295e-06, + "loss": 3.5432, + "step": 3658 + }, + { + "epoch": 0.44, + "learning_rate": 6.511184240622359e-06, + "loss": 3.315, + "step": 3659 + }, + { + "epoch": 0.44, + "learning_rate": 6.509277415715497e-06, + "loss": 3.4146, + "step": 3660 + }, + { + "epoch": 0.44, + "learning_rate": 6.5073703492638775e-06, + "loss": 3.406, + "step": 3661 + }, + { + "epoch": 0.44, + "learning_rate": 6.505463041572705e-06, + "loss": 3.4027, + "step": 3662 + }, + { + "epoch": 0.44, + "learning_rate": 6.503555492947226e-06, + "loss": 3.4459, + "step": 3663 + }, + { + "epoch": 0.44, + "learning_rate": 6.501647703692725e-06, + "loss": 3.3941, + "step": 3664 + }, + { + "epoch": 0.44, + "learning_rate": 6.499739674114524e-06, + "loss": 3.3657, + "step": 3665 + }, + { + "epoch": 0.44, + "learning_rate": 6.497831404517986e-06, + "loss": 3.4839, + "step": 3666 + }, + { + "epoch": 0.44, + "learning_rate": 6.495922895208507e-06, + "loss": 3.289, + "step": 3667 + }, + { + "epoch": 0.44, + "learning_rate": 6.494014146491528e-06, + "loss": 3.3863, + "step": 3668 + }, + { + "epoch": 0.44, + "learning_rate": 6.492105158672523e-06, + "loss": 3.4407, + "step": 3669 + }, + { + "epoch": 0.44, + "learning_rate": 6.49019593205701e-06, + "loss": 3.5117, + "step": 3670 + }, + { + "epoch": 0.44, + "learning_rate": 6.488286466950535e-06, + "loss": 3.352, + "step": 3671 + }, + { + "epoch": 0.44, + "learning_rate": 6.486376763658695e-06, + "loss": 3.439, + "step": 3672 + }, + { + "epoch": 0.44, + "learning_rate": 6.484466822487116e-06, + "loss": 3.4961, + "step": 3673 + }, + { + "epoch": 0.44, + "learning_rate": 6.482556643741466e-06, + "loss": 3.3816, + "step": 3674 + }, + { + "epoch": 0.44, + "learning_rate": 6.480646227727449e-06, + "loss": 3.395, + "step": 3675 + }, + { + "epoch": 0.44, + "learning_rate": 6.47873557475081e-06, + "loss": 3.5016, + "step": 3676 + }, + { + "epoch": 0.44, + "learning_rate": 6.4768246851173285e-06, + "loss": 3.4262, + "step": 3677 + }, + { + "epoch": 0.44, + "learning_rate": 6.474913559132823e-06, + "loss": 3.3769, + "step": 3678 + }, + { + "epoch": 0.44, + "learning_rate": 6.473002197103149e-06, + "loss": 3.4373, + "step": 3679 + }, + { + "epoch": 0.44, + "learning_rate": 6.471090599334203e-06, + "loss": 3.4366, + "step": 3680 + }, + { + "epoch": 0.44, + "learning_rate": 6.469178766131917e-06, + "loss": 3.4083, + "step": 3681 + }, + { + "epoch": 0.44, + "learning_rate": 6.46726669780226e-06, + "loss": 3.4771, + "step": 3682 + }, + { + "epoch": 0.44, + "learning_rate": 6.46535439465124e-06, + "loss": 3.4966, + "step": 3683 + }, + { + "epoch": 0.44, + "learning_rate": 6.4634418569849e-06, + "loss": 3.4227, + "step": 3684 + }, + { + "epoch": 0.44, + "learning_rate": 6.461529085109326e-06, + "loss": 3.3582, + "step": 3685 + }, + { + "epoch": 0.44, + "learning_rate": 6.459616079330636e-06, + "loss": 3.4255, + "step": 3686 + }, + { + "epoch": 0.44, + "learning_rate": 6.457702839954987e-06, + "loss": 3.5158, + "step": 3687 + }, + { + "epoch": 0.44, + "learning_rate": 6.455789367288574e-06, + "loss": 3.3217, + "step": 3688 + }, + { + "epoch": 0.44, + "learning_rate": 6.453875661637629e-06, + "loss": 3.4828, + "step": 3689 + }, + { + "epoch": 0.44, + "learning_rate": 6.451961723308425e-06, + "loss": 3.4136, + "step": 3690 + }, + { + "epoch": 0.44, + "learning_rate": 6.450047552607264e-06, + "loss": 3.4827, + "step": 3691 + }, + { + "epoch": 0.44, + "learning_rate": 6.44813314984049e-06, + "loss": 3.4749, + "step": 3692 + }, + { + "epoch": 0.44, + "learning_rate": 6.446218515314488e-06, + "loss": 3.4339, + "step": 3693 + }, + { + "epoch": 0.44, + "learning_rate": 6.444303649335673e-06, + "loss": 3.5004, + "step": 3694 + }, + { + "epoch": 0.44, + "learning_rate": 6.442388552210499e-06, + "loss": 3.459, + "step": 3695 + }, + { + "epoch": 0.44, + "learning_rate": 6.440473224245462e-06, + "loss": 3.4297, + "step": 3696 + }, + { + "epoch": 0.44, + "learning_rate": 6.4385576657470895e-06, + "loss": 3.3419, + "step": 3697 + }, + { + "epoch": 0.44, + "learning_rate": 6.4366418770219455e-06, + "loss": 3.4584, + "step": 3698 + }, + { + "epoch": 0.44, + "learning_rate": 6.434725858376636e-06, + "loss": 3.3513, + "step": 3699 + }, + { + "epoch": 0.44, + "learning_rate": 6.432809610117798e-06, + "loss": 3.4153, + "step": 3700 + }, + { + "epoch": 0.44, + "learning_rate": 6.430893132552109e-06, + "loss": 3.3959, + "step": 3701 + }, + { + "epoch": 0.44, + "learning_rate": 6.428976425986283e-06, + "loss": 3.432, + "step": 3702 + }, + { + "epoch": 0.44, + "learning_rate": 6.427059490727069e-06, + "loss": 3.3837, + "step": 3703 + }, + { + "epoch": 0.44, + "learning_rate": 6.42514232708125e-06, + "loss": 3.4292, + "step": 3704 + }, + { + "epoch": 0.44, + "learning_rate": 6.4232249353556565e-06, + "loss": 3.4432, + "step": 3705 + }, + { + "epoch": 0.44, + "learning_rate": 6.421307315857141e-06, + "loss": 3.3464, + "step": 3706 + }, + { + "epoch": 0.44, + "learning_rate": 6.419389468892602e-06, + "loss": 3.4136, + "step": 3707 + }, + { + "epoch": 0.44, + "learning_rate": 6.4174713947689706e-06, + "loss": 3.4822, + "step": 3708 + }, + { + "epoch": 0.44, + "learning_rate": 6.415553093793217e-06, + "loss": 3.4664, + "step": 3709 + }, + { + "epoch": 0.44, + "learning_rate": 6.413634566272344e-06, + "loss": 3.5688, + "step": 3710 + }, + { + "epoch": 0.44, + "learning_rate": 6.411715812513395e-06, + "loss": 3.4928, + "step": 3711 + }, + { + "epoch": 0.44, + "learning_rate": 6.409796832823445e-06, + "loss": 3.4165, + "step": 3712 + }, + { + "epoch": 0.44, + "learning_rate": 6.407877627509611e-06, + "loss": 3.4613, + "step": 3713 + }, + { + "epoch": 0.44, + "learning_rate": 6.4059581968790395e-06, + "loss": 3.4518, + "step": 3714 + }, + { + "epoch": 0.44, + "learning_rate": 6.4040385412389175e-06, + "loss": 3.417, + "step": 3715 + }, + { + "epoch": 0.44, + "learning_rate": 6.4021186608964645e-06, + "loss": 3.4583, + "step": 3716 + }, + { + "epoch": 0.44, + "learning_rate": 6.400198556158942e-06, + "loss": 3.44, + "step": 3717 + }, + { + "epoch": 0.45, + "learning_rate": 6.3982782273336405e-06, + "loss": 3.3964, + "step": 3718 + }, + { + "epoch": 0.45, + "learning_rate": 6.3963576747278925e-06, + "loss": 3.3945, + "step": 3719 + }, + { + "epoch": 0.45, + "learning_rate": 6.394436898649059e-06, + "loss": 3.3871, + "step": 3720 + }, + { + "epoch": 0.45, + "learning_rate": 6.392515899404543e-06, + "loss": 3.4228, + "step": 3721 + }, + { + "epoch": 0.45, + "learning_rate": 6.390594677301784e-06, + "loss": 3.4698, + "step": 3722 + }, + { + "epoch": 0.45, + "learning_rate": 6.388673232648249e-06, + "loss": 3.4186, + "step": 3723 + }, + { + "epoch": 0.45, + "learning_rate": 6.38675156575145e-06, + "loss": 3.4477, + "step": 3724 + }, + { + "epoch": 0.45, + "learning_rate": 6.384829676918929e-06, + "loss": 3.4409, + "step": 3725 + }, + { + "epoch": 0.45, + "learning_rate": 6.382907566458266e-06, + "loss": 3.3907, + "step": 3726 + }, + { + "epoch": 0.45, + "learning_rate": 6.380985234677075e-06, + "loss": 3.3807, + "step": 3727 + }, + { + "epoch": 0.45, + "learning_rate": 6.379062681883006e-06, + "loss": 3.3268, + "step": 3728 + }, + { + "epoch": 0.45, + "learning_rate": 6.377139908383745e-06, + "loss": 3.4227, + "step": 3729 + }, + { + "epoch": 0.45, + "learning_rate": 6.3752169144870115e-06, + "loss": 3.4715, + "step": 3730 + }, + { + "epoch": 0.45, + "learning_rate": 6.373293700500563e-06, + "loss": 3.3889, + "step": 3731 + }, + { + "epoch": 0.45, + "learning_rate": 6.371370266732186e-06, + "loss": 3.3465, + "step": 3732 + }, + { + "epoch": 0.45, + "learning_rate": 6.369446613489714e-06, + "loss": 3.4752, + "step": 3733 + }, + { + "epoch": 0.45, + "learning_rate": 6.367522741081005e-06, + "loss": 3.3996, + "step": 3734 + }, + { + "epoch": 0.45, + "learning_rate": 6.365598649813955e-06, + "loss": 3.4873, + "step": 3735 + }, + { + "epoch": 0.45, + "learning_rate": 6.363674339996494e-06, + "loss": 3.4332, + "step": 3736 + }, + { + "epoch": 0.45, + "learning_rate": 6.361749811936593e-06, + "loss": 3.3752, + "step": 3737 + }, + { + "epoch": 0.45, + "learning_rate": 6.35982506594225e-06, + "loss": 3.5011, + "step": 3738 + }, + { + "epoch": 0.45, + "learning_rate": 6.357900102321502e-06, + "loss": 3.321, + "step": 3739 + }, + { + "epoch": 0.45, + "learning_rate": 6.355974921382422e-06, + "loss": 3.3859, + "step": 3740 + }, + { + "epoch": 0.45, + "learning_rate": 6.354049523433115e-06, + "loss": 3.4358, + "step": 3741 + }, + { + "epoch": 0.45, + "learning_rate": 6.352123908781719e-06, + "loss": 3.3629, + "step": 3742 + }, + { + "epoch": 0.45, + "learning_rate": 6.3501980777364125e-06, + "loss": 3.3811, + "step": 3743 + }, + { + "epoch": 0.45, + "learning_rate": 6.348272030605406e-06, + "loss": 3.3846, + "step": 3744 + }, + { + "epoch": 0.45, + "learning_rate": 6.346345767696941e-06, + "loss": 3.4505, + "step": 3745 + }, + { + "epoch": 0.45, + "learning_rate": 6.344419289319299e-06, + "loss": 3.3842, + "step": 3746 + }, + { + "epoch": 0.45, + "learning_rate": 6.3424925957807935e-06, + "loss": 3.3744, + "step": 3747 + }, + { + "epoch": 0.45, + "learning_rate": 6.340565687389773e-06, + "loss": 3.4362, + "step": 3748 + }, + { + "epoch": 0.45, + "learning_rate": 6.338638564454616e-06, + "loss": 3.4799, + "step": 3749 + }, + { + "epoch": 0.45, + "learning_rate": 6.336711227283744e-06, + "loss": 3.5681, + "step": 3750 + }, + { + "epoch": 0.45, + "learning_rate": 6.3347836761856065e-06, + "loss": 3.4372, + "step": 3751 + }, + { + "epoch": 0.45, + "learning_rate": 6.332855911468687e-06, + "loss": 3.4348, + "step": 3752 + }, + { + "epoch": 0.45, + "learning_rate": 6.330927933441508e-06, + "loss": 3.4477, + "step": 3753 + }, + { + "epoch": 0.45, + "learning_rate": 6.32899974241262e-06, + "loss": 3.4581, + "step": 3754 + }, + { + "epoch": 0.45, + "learning_rate": 6.327071338690614e-06, + "loss": 3.3266, + "step": 3755 + }, + { + "epoch": 0.45, + "learning_rate": 6.32514272258411e-06, + "loss": 3.4064, + "step": 3756 + }, + { + "epoch": 0.45, + "learning_rate": 6.323213894401762e-06, + "loss": 3.4336, + "step": 3757 + }, + { + "epoch": 0.45, + "learning_rate": 6.321284854452262e-06, + "loss": 3.4259, + "step": 3758 + }, + { + "epoch": 0.45, + "learning_rate": 6.319355603044334e-06, + "loss": 3.433, + "step": 3759 + }, + { + "epoch": 0.45, + "learning_rate": 6.317426140486733e-06, + "loss": 3.591, + "step": 3760 + }, + { + "epoch": 0.45, + "learning_rate": 6.3154964670882514e-06, + "loss": 3.3627, + "step": 3761 + }, + { + "epoch": 0.45, + "learning_rate": 6.3135665831577154e-06, + "loss": 3.3967, + "step": 3762 + }, + { + "epoch": 0.45, + "learning_rate": 6.311636489003983e-06, + "loss": 3.3594, + "step": 3763 + }, + { + "epoch": 0.45, + "learning_rate": 6.309706184935945e-06, + "loss": 3.4912, + "step": 3764 + }, + { + "epoch": 0.45, + "learning_rate": 6.307775671262526e-06, + "loss": 3.5225, + "step": 3765 + }, + { + "epoch": 0.45, + "learning_rate": 6.305844948292691e-06, + "loss": 3.4638, + "step": 3766 + }, + { + "epoch": 0.45, + "learning_rate": 6.303914016335428e-06, + "loss": 3.4119, + "step": 3767 + }, + { + "epoch": 0.45, + "learning_rate": 6.301982875699766e-06, + "loss": 3.4609, + "step": 3768 + }, + { + "epoch": 0.45, + "learning_rate": 6.300051526694762e-06, + "loss": 3.3592, + "step": 3769 + }, + { + "epoch": 0.45, + "learning_rate": 6.298119969629511e-06, + "loss": 3.3897, + "step": 3770 + }, + { + "epoch": 0.45, + "learning_rate": 6.29618820481314e-06, + "loss": 3.4289, + "step": 3771 + }, + { + "epoch": 0.45, + "learning_rate": 6.294256232554807e-06, + "loss": 3.3782, + "step": 3772 + }, + { + "epoch": 0.45, + "learning_rate": 6.292324053163705e-06, + "loss": 3.3444, + "step": 3773 + }, + { + "epoch": 0.45, + "learning_rate": 6.290391666949063e-06, + "loss": 3.4483, + "step": 3774 + }, + { + "epoch": 0.45, + "learning_rate": 6.288459074220136e-06, + "loss": 3.4673, + "step": 3775 + }, + { + "epoch": 0.45, + "learning_rate": 6.286526275286218e-06, + "loss": 3.3599, + "step": 3776 + }, + { + "epoch": 0.45, + "learning_rate": 6.284593270456634e-06, + "loss": 3.4833, + "step": 3777 + }, + { + "epoch": 0.45, + "learning_rate": 6.282660060040742e-06, + "loss": 3.3844, + "step": 3778 + }, + { + "epoch": 0.45, + "learning_rate": 6.280726644347932e-06, + "loss": 3.379, + "step": 3779 + }, + { + "epoch": 0.45, + "learning_rate": 6.27879302368763e-06, + "loss": 3.4295, + "step": 3780 + }, + { + "epoch": 0.45, + "learning_rate": 6.2768591983692915e-06, + "loss": 3.3852, + "step": 3781 + }, + { + "epoch": 0.45, + "learning_rate": 6.274925168702406e-06, + "loss": 3.4242, + "step": 3782 + }, + { + "epoch": 0.45, + "learning_rate": 6.272990934996495e-06, + "loss": 3.4769, + "step": 3783 + }, + { + "epoch": 0.45, + "learning_rate": 6.2710564975611145e-06, + "loss": 3.4333, + "step": 3784 + }, + { + "epoch": 0.45, + "learning_rate": 6.269121856705851e-06, + "loss": 3.4367, + "step": 3785 + }, + { + "epoch": 0.45, + "learning_rate": 6.267187012740324e-06, + "loss": 3.4284, + "step": 3786 + }, + { + "epoch": 0.45, + "learning_rate": 6.265251965974189e-06, + "loss": 3.4261, + "step": 3787 + }, + { + "epoch": 0.45, + "learning_rate": 6.263316716717126e-06, + "loss": 3.3768, + "step": 3788 + }, + { + "epoch": 0.45, + "learning_rate": 6.261381265278854e-06, + "loss": 3.3941, + "step": 3789 + }, + { + "epoch": 0.45, + "learning_rate": 6.259445611969126e-06, + "loss": 3.4934, + "step": 3790 + }, + { + "epoch": 0.45, + "learning_rate": 6.257509757097719e-06, + "loss": 3.4138, + "step": 3791 + }, + { + "epoch": 0.45, + "learning_rate": 6.255573700974452e-06, + "loss": 3.3556, + "step": 3792 + }, + { + "epoch": 0.45, + "learning_rate": 6.253637443909166e-06, + "loss": 3.4467, + "step": 3793 + }, + { + "epoch": 0.45, + "learning_rate": 6.251700986211745e-06, + "loss": 3.3974, + "step": 3794 + }, + { + "epoch": 0.45, + "learning_rate": 6.249764328192097e-06, + "loss": 3.4614, + "step": 3795 + }, + { + "epoch": 0.45, + "learning_rate": 6.247827470160165e-06, + "loss": 3.3647, + "step": 3796 + }, + { + "epoch": 0.45, + "learning_rate": 6.245890412425922e-06, + "loss": 3.3779, + "step": 3797 + }, + { + "epoch": 0.45, + "learning_rate": 6.243953155299378e-06, + "loss": 3.4217, + "step": 3798 + }, + { + "epoch": 0.45, + "learning_rate": 6.24201569909057e-06, + "loss": 3.3289, + "step": 3799 + }, + { + "epoch": 0.45, + "learning_rate": 6.240078044109569e-06, + "loss": 3.3669, + "step": 3800 + }, + { + "epoch": 0.46, + "learning_rate": 6.238140190666475e-06, + "loss": 3.3955, + "step": 3801 + }, + { + "epoch": 0.46, + "learning_rate": 6.236202139071427e-06, + "loss": 3.4813, + "step": 3802 + }, + { + "epoch": 0.46, + "learning_rate": 6.234263889634586e-06, + "loss": 3.4106, + "step": 3803 + }, + { + "epoch": 0.46, + "learning_rate": 6.232325442666152e-06, + "loss": 3.4254, + "step": 3804 + }, + { + "epoch": 0.46, + "learning_rate": 6.230386798476351e-06, + "loss": 3.464, + "step": 3805 + }, + { + "epoch": 0.46, + "learning_rate": 6.228447957375447e-06, + "loss": 3.4076, + "step": 3806 + }, + { + "epoch": 0.46, + "learning_rate": 6.226508919673733e-06, + "loss": 3.4069, + "step": 3807 + }, + { + "epoch": 0.46, + "learning_rate": 6.224569685681528e-06, + "loss": 3.4252, + "step": 3808 + }, + { + "epoch": 0.46, + "learning_rate": 6.22263025570919e-06, + "loss": 3.3838, + "step": 3809 + }, + { + "epoch": 0.46, + "learning_rate": 6.220690630067107e-06, + "loss": 3.4642, + "step": 3810 + }, + { + "epoch": 0.46, + "learning_rate": 6.218750809065694e-06, + "loss": 3.398, + "step": 3811 + }, + { + "epoch": 0.46, + "learning_rate": 6.216810793015399e-06, + "loss": 3.4509, + "step": 3812 + }, + { + "epoch": 0.46, + "learning_rate": 6.214870582226706e-06, + "loss": 3.424, + "step": 3813 + }, + { + "epoch": 0.46, + "learning_rate": 6.212930177010121e-06, + "loss": 3.3722, + "step": 3814 + }, + { + "epoch": 0.46, + "learning_rate": 6.210989577676192e-06, + "loss": 3.4338, + "step": 3815 + }, + { + "epoch": 0.46, + "learning_rate": 6.209048784535489e-06, + "loss": 3.3473, + "step": 3816 + }, + { + "epoch": 0.46, + "learning_rate": 6.2071077978986194e-06, + "loss": 3.5154, + "step": 3817 + }, + { + "epoch": 0.46, + "learning_rate": 6.205166618076215e-06, + "loss": 3.4005, + "step": 3818 + }, + { + "epoch": 0.46, + "learning_rate": 6.203225245378944e-06, + "loss": 3.4169, + "step": 3819 + }, + { + "epoch": 0.46, + "learning_rate": 6.201283680117506e-06, + "loss": 3.5047, + "step": 3820 + }, + { + "epoch": 0.46, + "learning_rate": 6.199341922602626e-06, + "loss": 3.389, + "step": 3821 + }, + { + "epoch": 0.46, + "learning_rate": 6.197399973145063e-06, + "loss": 3.3939, + "step": 3822 + }, + { + "epoch": 0.46, + "learning_rate": 6.195457832055608e-06, + "loss": 3.2948, + "step": 3823 + }, + { + "epoch": 0.46, + "learning_rate": 6.19351549964508e-06, + "loss": 3.3652, + "step": 3824 + }, + { + "epoch": 0.46, + "learning_rate": 6.191572976224331e-06, + "loss": 3.3125, + "step": 3825 + }, + { + "epoch": 0.46, + "learning_rate": 6.1896302621042425e-06, + "loss": 3.3738, + "step": 3826 + }, + { + "epoch": 0.46, + "learning_rate": 6.187687357595725e-06, + "loss": 3.5769, + "step": 3827 + }, + { + "epoch": 0.46, + "learning_rate": 6.185744263009723e-06, + "loss": 3.3578, + "step": 3828 + }, + { + "epoch": 0.46, + "learning_rate": 6.1838009786572086e-06, + "loss": 3.4268, + "step": 3829 + }, + { + "epoch": 0.46, + "learning_rate": 6.181857504849183e-06, + "loss": 3.4071, + "step": 3830 + }, + { + "epoch": 0.46, + "learning_rate": 6.179913841896682e-06, + "loss": 3.3812, + "step": 3831 + }, + { + "epoch": 0.46, + "learning_rate": 6.17796999011077e-06, + "loss": 3.3693, + "step": 3832 + }, + { + "epoch": 0.46, + "learning_rate": 6.176025949802539e-06, + "loss": 3.3876, + "step": 3833 + }, + { + "epoch": 0.46, + "learning_rate": 6.174081721283114e-06, + "loss": 3.4976, + "step": 3834 + }, + { + "epoch": 0.46, + "learning_rate": 6.17213730486365e-06, + "loss": 3.439, + "step": 3835 + }, + { + "epoch": 0.46, + "learning_rate": 6.1701927008553305e-06, + "loss": 3.4336, + "step": 3836 + }, + { + "epoch": 0.46, + "learning_rate": 6.168247909569372e-06, + "loss": 3.5033, + "step": 3837 + }, + { + "epoch": 0.46, + "learning_rate": 6.166302931317014e-06, + "loss": 3.4642, + "step": 3838 + }, + { + "epoch": 0.46, + "learning_rate": 6.164357766409536e-06, + "loss": 3.4041, + "step": 3839 + }, + { + "epoch": 0.46, + "learning_rate": 6.162412415158241e-06, + "loss": 3.358, + "step": 3840 + }, + { + "epoch": 0.46, + "learning_rate": 6.1604668778744605e-06, + "loss": 3.4543, + "step": 3841 + }, + { + "epoch": 0.46, + "learning_rate": 6.15852115486956e-06, + "loss": 3.4018, + "step": 3842 + }, + { + "epoch": 0.46, + "learning_rate": 6.1565752464549345e-06, + "loss": 3.5084, + "step": 3843 + }, + { + "epoch": 0.46, + "learning_rate": 6.154629152942004e-06, + "loss": 3.5086, + "step": 3844 + }, + { + "epoch": 0.46, + "learning_rate": 6.152682874642225e-06, + "loss": 3.4042, + "step": 3845 + }, + { + "epoch": 0.46, + "learning_rate": 6.150736411867077e-06, + "loss": 3.5616, + "step": 3846 + }, + { + "epoch": 0.46, + "learning_rate": 6.148789764928073e-06, + "loss": 3.4128, + "step": 3847 + }, + { + "epoch": 0.46, + "learning_rate": 6.146842934136753e-06, + "loss": 3.5049, + "step": 3848 + }, + { + "epoch": 0.46, + "learning_rate": 6.14489591980469e-06, + "loss": 3.3708, + "step": 3849 + }, + { + "epoch": 0.46, + "learning_rate": 6.142948722243483e-06, + "loss": 3.4544, + "step": 3850 + }, + { + "epoch": 0.46, + "learning_rate": 6.141001341764762e-06, + "loss": 3.445, + "step": 3851 + }, + { + "epoch": 0.46, + "learning_rate": 6.139053778680185e-06, + "loss": 3.4776, + "step": 3852 + }, + { + "epoch": 0.46, + "learning_rate": 6.137106033301441e-06, + "loss": 3.5389, + "step": 3853 + }, + { + "epoch": 0.46, + "learning_rate": 6.135158105940244e-06, + "loss": 3.3736, + "step": 3854 + }, + { + "epoch": 0.46, + "learning_rate": 6.133209996908346e-06, + "loss": 3.5305, + "step": 3855 + }, + { + "epoch": 0.46, + "learning_rate": 6.131261706517518e-06, + "loss": 3.4484, + "step": 3856 + }, + { + "epoch": 0.46, + "learning_rate": 6.129313235079567e-06, + "loss": 3.4216, + "step": 3857 + }, + { + "epoch": 0.46, + "learning_rate": 6.127364582906323e-06, + "loss": 3.4917, + "step": 3858 + }, + { + "epoch": 0.46, + "learning_rate": 6.125415750309651e-06, + "loss": 3.4444, + "step": 3859 + }, + { + "epoch": 0.46, + "learning_rate": 6.123466737601442e-06, + "loss": 3.3368, + "step": 3860 + }, + { + "epoch": 0.46, + "learning_rate": 6.121517545093616e-06, + "loss": 3.5524, + "step": 3861 + }, + { + "epoch": 0.46, + "learning_rate": 6.11956817309812e-06, + "loss": 3.4349, + "step": 3862 + }, + { + "epoch": 0.46, + "learning_rate": 6.117618621926933e-06, + "loss": 3.3351, + "step": 3863 + }, + { + "epoch": 0.46, + "learning_rate": 6.115668891892062e-06, + "loss": 3.4128, + "step": 3864 + }, + { + "epoch": 0.46, + "learning_rate": 6.1137189833055396e-06, + "loss": 3.4519, + "step": 3865 + }, + { + "epoch": 0.46, + "learning_rate": 6.111768896479431e-06, + "loss": 3.4185, + "step": 3866 + }, + { + "epoch": 0.46, + "learning_rate": 6.109818631725827e-06, + "loss": 3.4761, + "step": 3867 + }, + { + "epoch": 0.46, + "learning_rate": 6.107868189356848e-06, + "loss": 3.5089, + "step": 3868 + }, + { + "epoch": 0.46, + "learning_rate": 6.105917569684643e-06, + "loss": 3.4602, + "step": 3869 + }, + { + "epoch": 0.46, + "learning_rate": 6.10396677302139e-06, + "loss": 3.4033, + "step": 3870 + }, + { + "epoch": 0.46, + "learning_rate": 6.102015799679293e-06, + "loss": 3.5089, + "step": 3871 + }, + { + "epoch": 0.46, + "learning_rate": 6.100064649970587e-06, + "loss": 3.405, + "step": 3872 + }, + { + "epoch": 0.46, + "learning_rate": 6.0981133242075355e-06, + "loss": 3.3718, + "step": 3873 + }, + { + "epoch": 0.46, + "learning_rate": 6.096161822702424e-06, + "loss": 3.4137, + "step": 3874 + }, + { + "epoch": 0.46, + "learning_rate": 6.094210145767575e-06, + "loss": 3.393, + "step": 3875 + }, + { + "epoch": 0.46, + "learning_rate": 6.092258293715334e-06, + "loss": 3.4663, + "step": 3876 + }, + { + "epoch": 0.46, + "learning_rate": 6.090306266858074e-06, + "loss": 3.4271, + "step": 3877 + }, + { + "epoch": 0.46, + "learning_rate": 6.088354065508198e-06, + "loss": 3.416, + "step": 3878 + }, + { + "epoch": 0.46, + "learning_rate": 6.086401689978136e-06, + "loss": 3.4742, + "step": 3879 + }, + { + "epoch": 0.46, + "learning_rate": 6.0844491405803485e-06, + "loss": 3.4326, + "step": 3880 + }, + { + "epoch": 0.46, + "learning_rate": 6.082496417627319e-06, + "loss": 3.3925, + "step": 3881 + }, + { + "epoch": 0.46, + "learning_rate": 6.0805435214315636e-06, + "loss": 3.4383, + "step": 3882 + }, + { + "epoch": 0.46, + "learning_rate": 6.078590452305619e-06, + "loss": 3.5154, + "step": 3883 + }, + { + "epoch": 0.46, + "learning_rate": 6.076637210562059e-06, + "loss": 3.3634, + "step": 3884 + }, + { + "epoch": 0.47, + "learning_rate": 6.074683796513481e-06, + "loss": 3.4804, + "step": 3885 + }, + { + "epoch": 0.47, + "learning_rate": 6.072730210472506e-06, + "loss": 3.4329, + "step": 3886 + }, + { + "epoch": 0.47, + "learning_rate": 6.070776452751785e-06, + "loss": 3.4623, + "step": 3887 + }, + { + "epoch": 0.47, + "learning_rate": 6.068822523664003e-06, + "loss": 3.4628, + "step": 3888 + }, + { + "epoch": 0.47, + "learning_rate": 6.066868423521862e-06, + "loss": 3.3918, + "step": 3889 + }, + { + "epoch": 0.47, + "learning_rate": 6.064914152638099e-06, + "loss": 3.3967, + "step": 3890 + }, + { + "epoch": 0.47, + "learning_rate": 6.062959711325473e-06, + "loss": 3.36, + "step": 3891 + }, + { + "epoch": 0.47, + "learning_rate": 6.061005099896774e-06, + "loss": 3.4411, + "step": 3892 + }, + { + "epoch": 0.47, + "learning_rate": 6.059050318664819e-06, + "loss": 3.455, + "step": 3893 + }, + { + "epoch": 0.47, + "learning_rate": 6.057095367942449e-06, + "loss": 3.3894, + "step": 3894 + }, + { + "epoch": 0.47, + "learning_rate": 6.055140248042536e-06, + "loss": 3.3919, + "step": 3895 + }, + { + "epoch": 0.47, + "learning_rate": 6.053184959277977e-06, + "loss": 3.4165, + "step": 3896 + }, + { + "epoch": 0.47, + "learning_rate": 6.051229501961697e-06, + "loss": 3.4414, + "step": 3897 + }, + { + "epoch": 0.47, + "learning_rate": 6.0492738764066465e-06, + "loss": 3.5039, + "step": 3898 + }, + { + "epoch": 0.47, + "learning_rate": 6.047318082925803e-06, + "loss": 3.4724, + "step": 3899 + }, + { + "epoch": 0.47, + "learning_rate": 6.045362121832173e-06, + "loss": 3.3273, + "step": 3900 + }, + { + "epoch": 0.47, + "learning_rate": 6.043405993438789e-06, + "loss": 3.3779, + "step": 3901 + }, + { + "epoch": 0.47, + "learning_rate": 6.041449698058709e-06, + "loss": 3.469, + "step": 3902 + }, + { + "epoch": 0.47, + "learning_rate": 6.039493236005017e-06, + "loss": 3.4562, + "step": 3903 + }, + { + "epoch": 0.47, + "learning_rate": 6.037536607590829e-06, + "loss": 3.522, + "step": 3904 + }, + { + "epoch": 0.47, + "learning_rate": 6.035579813129281e-06, + "loss": 3.4272, + "step": 3905 + }, + { + "epoch": 0.47, + "learning_rate": 6.033622852933538e-06, + "loss": 3.4398, + "step": 3906 + }, + { + "epoch": 0.47, + "learning_rate": 6.031665727316793e-06, + "loss": 3.4172, + "step": 3907 + }, + { + "epoch": 0.47, + "learning_rate": 6.0297084365922655e-06, + "loss": 3.3979, + "step": 3908 + }, + { + "epoch": 0.47, + "learning_rate": 6.027750981073198e-06, + "loss": 3.4332, + "step": 3909 + }, + { + "epoch": 0.47, + "learning_rate": 6.0257933610728625e-06, + "loss": 3.4887, + "step": 3910 + }, + { + "epoch": 0.47, + "learning_rate": 6.023835576904558e-06, + "loss": 3.4086, + "step": 3911 + }, + { + "epoch": 0.47, + "learning_rate": 6.021877628881606e-06, + "loss": 3.4187, + "step": 3912 + }, + { + "epoch": 0.47, + "learning_rate": 6.019919517317356e-06, + "loss": 3.4389, + "step": 3913 + }, + { + "epoch": 0.47, + "learning_rate": 6.017961242525189e-06, + "loss": 3.4038, + "step": 3914 + }, + { + "epoch": 0.47, + "learning_rate": 6.016002804818502e-06, + "loss": 3.4595, + "step": 3915 + }, + { + "epoch": 0.47, + "learning_rate": 6.014044204510725e-06, + "loss": 3.395, + "step": 3916 + }, + { + "epoch": 0.47, + "learning_rate": 6.012085441915314e-06, + "loss": 3.4295, + "step": 3917 + }, + { + "epoch": 0.47, + "learning_rate": 6.010126517345748e-06, + "loss": 3.3921, + "step": 3918 + }, + { + "epoch": 0.47, + "learning_rate": 6.008167431115532e-06, + "loss": 3.4195, + "step": 3919 + }, + { + "epoch": 0.47, + "learning_rate": 6.006208183538201e-06, + "loss": 3.4115, + "step": 3920 + }, + { + "epoch": 0.47, + "learning_rate": 6.0042487749273115e-06, + "loss": 3.4219, + "step": 3921 + }, + { + "epoch": 0.47, + "learning_rate": 6.002289205596447e-06, + "loss": 3.4381, + "step": 3922 + }, + { + "epoch": 0.47, + "learning_rate": 6.000329475859217e-06, + "loss": 3.4107, + "step": 3923 + }, + { + "epoch": 0.47, + "learning_rate": 5.998369586029258e-06, + "loss": 3.3846, + "step": 3924 + }, + { + "epoch": 0.47, + "learning_rate": 5.996409536420229e-06, + "loss": 3.3787, + "step": 3925 + }, + { + "epoch": 0.47, + "learning_rate": 5.994449327345819e-06, + "loss": 3.4121, + "step": 3926 + }, + { + "epoch": 0.47, + "learning_rate": 5.992488959119734e-06, + "loss": 3.5011, + "step": 3927 + }, + { + "epoch": 0.47, + "learning_rate": 5.990528432055718e-06, + "loss": 3.4404, + "step": 3928 + }, + { + "epoch": 0.47, + "learning_rate": 5.988567746467531e-06, + "loss": 3.4873, + "step": 3929 + }, + { + "epoch": 0.47, + "learning_rate": 5.98660690266896e-06, + "loss": 3.4121, + "step": 3930 + }, + { + "epoch": 0.47, + "learning_rate": 5.98464590097382e-06, + "loss": 3.438, + "step": 3931 + }, + { + "epoch": 0.47, + "learning_rate": 5.9826847416959485e-06, + "loss": 3.4787, + "step": 3932 + }, + { + "epoch": 0.47, + "learning_rate": 5.9807234251492116e-06, + "loss": 3.4583, + "step": 3933 + }, + { + "epoch": 0.47, + "learning_rate": 5.978761951647497e-06, + "loss": 3.3876, + "step": 3934 + }, + { + "epoch": 0.47, + "learning_rate": 5.976800321504718e-06, + "loss": 3.479, + "step": 3935 + }, + { + "epoch": 0.47, + "learning_rate": 5.974838535034814e-06, + "loss": 3.4793, + "step": 3936 + }, + { + "epoch": 0.47, + "learning_rate": 5.972876592551753e-06, + "loss": 3.3569, + "step": 3937 + }, + { + "epoch": 0.47, + "learning_rate": 5.97091449436952e-06, + "loss": 3.42, + "step": 3938 + }, + { + "epoch": 0.47, + "learning_rate": 5.9689522408021295e-06, + "loss": 3.4168, + "step": 3939 + }, + { + "epoch": 0.47, + "learning_rate": 5.966989832163623e-06, + "loss": 3.468, + "step": 3940 + }, + { + "epoch": 0.47, + "learning_rate": 5.965027268768063e-06, + "loss": 3.3956, + "step": 3941 + }, + { + "epoch": 0.47, + "learning_rate": 5.963064550929538e-06, + "loss": 3.3627, + "step": 3942 + }, + { + "epoch": 0.47, + "learning_rate": 5.9611016789621624e-06, + "loss": 3.4046, + "step": 3943 + }, + { + "epoch": 0.47, + "learning_rate": 5.959138653180071e-06, + "loss": 3.4122, + "step": 3944 + }, + { + "epoch": 0.47, + "learning_rate": 5.957175473897431e-06, + "loss": 3.3511, + "step": 3945 + }, + { + "epoch": 0.47, + "learning_rate": 5.955212141428427e-06, + "loss": 3.3973, + "step": 3946 + }, + { + "epoch": 0.47, + "learning_rate": 5.95324865608727e-06, + "loss": 3.442, + "step": 3947 + }, + { + "epoch": 0.47, + "learning_rate": 5.951285018188199e-06, + "loss": 3.4128, + "step": 3948 + }, + { + "epoch": 0.47, + "learning_rate": 5.9493212280454716e-06, + "loss": 3.4466, + "step": 3949 + }, + { + "epoch": 0.47, + "learning_rate": 5.9473572859733755e-06, + "loss": 3.4957, + "step": 3950 + }, + { + "epoch": 0.47, + "learning_rate": 5.945393192286217e-06, + "loss": 3.4992, + "step": 3951 + }, + { + "epoch": 0.47, + "learning_rate": 5.943428947298333e-06, + "loss": 3.3663, + "step": 3952 + }, + { + "epoch": 0.47, + "learning_rate": 5.941464551324079e-06, + "loss": 3.3997, + "step": 3953 + }, + { + "epoch": 0.47, + "learning_rate": 5.9395000046778365e-06, + "loss": 3.4712, + "step": 3954 + }, + { + "epoch": 0.47, + "learning_rate": 5.9375353076740135e-06, + "loss": 3.533, + "step": 3955 + }, + { + "epoch": 0.47, + "learning_rate": 5.93557046062704e-06, + "loss": 3.388, + "step": 3956 + }, + { + "epoch": 0.47, + "learning_rate": 5.9336054638513675e-06, + "loss": 3.3673, + "step": 3957 + }, + { + "epoch": 0.47, + "learning_rate": 5.9316403176614765e-06, + "loss": 3.5185, + "step": 3958 + }, + { + "epoch": 0.47, + "learning_rate": 5.92967502237187e-06, + "loss": 3.428, + "step": 3959 + }, + { + "epoch": 0.47, + "learning_rate": 5.9277095782970695e-06, + "loss": 3.5703, + "step": 3960 + }, + { + "epoch": 0.47, + "learning_rate": 5.92574398575163e-06, + "loss": 3.4868, + "step": 3961 + }, + { + "epoch": 0.47, + "learning_rate": 5.923778245050121e-06, + "loss": 3.3981, + "step": 3962 + }, + { + "epoch": 0.47, + "learning_rate": 5.921812356507142e-06, + "loss": 3.3174, + "step": 3963 + }, + { + "epoch": 0.47, + "learning_rate": 5.91984632043731e-06, + "loss": 3.5034, + "step": 3964 + }, + { + "epoch": 0.47, + "learning_rate": 5.9178801371552755e-06, + "loss": 3.3855, + "step": 3965 + }, + { + "epoch": 0.47, + "learning_rate": 5.915913806975701e-06, + "loss": 3.4218, + "step": 3966 + }, + { + "epoch": 0.47, + "learning_rate": 5.913947330213282e-06, + "loss": 3.4757, + "step": 3967 + }, + { + "epoch": 0.48, + "learning_rate": 5.9119807071827295e-06, + "loss": 3.4492, + "step": 3968 + }, + { + "epoch": 0.48, + "learning_rate": 5.9100139381987865e-06, + "loss": 3.4474, + "step": 3969 + }, + { + "epoch": 0.48, + "learning_rate": 5.90804702357621e-06, + "loss": 3.4254, + "step": 3970 + }, + { + "epoch": 0.48, + "learning_rate": 5.906079963629788e-06, + "loss": 3.4144, + "step": 3971 + }, + { + "epoch": 0.48, + "learning_rate": 5.904112758674327e-06, + "loss": 3.4942, + "step": 3972 + }, + { + "epoch": 0.48, + "learning_rate": 5.902145409024661e-06, + "loss": 3.5044, + "step": 3973 + }, + { + "epoch": 0.48, + "learning_rate": 5.900177914995643e-06, + "loss": 3.3731, + "step": 3974 + }, + { + "epoch": 0.48, + "learning_rate": 5.898210276902151e-06, + "loss": 3.3507, + "step": 3975 + }, + { + "epoch": 0.48, + "learning_rate": 5.896242495059083e-06, + "loss": 3.4727, + "step": 3976 + }, + { + "epoch": 0.48, + "learning_rate": 5.8942745697813684e-06, + "loss": 3.432, + "step": 3977 + }, + { + "epoch": 0.48, + "learning_rate": 5.89230650138395e-06, + "loss": 3.4183, + "step": 3978 + }, + { + "epoch": 0.48, + "learning_rate": 5.890338290181799e-06, + "loss": 3.4172, + "step": 3979 + }, + { + "epoch": 0.48, + "learning_rate": 5.888369936489906e-06, + "loss": 3.4028, + "step": 3980 + }, + { + "epoch": 0.48, + "learning_rate": 5.8864014406232886e-06, + "loss": 3.4033, + "step": 3981 + }, + { + "epoch": 0.48, + "learning_rate": 5.884432802896984e-06, + "loss": 3.3973, + "step": 3982 + }, + { + "epoch": 0.48, + "learning_rate": 5.8824640236260535e-06, + "loss": 3.4512, + "step": 3983 + }, + { + "epoch": 0.48, + "learning_rate": 5.880495103125579e-06, + "loss": 3.3719, + "step": 3984 + }, + { + "epoch": 0.48, + "learning_rate": 5.878526041710667e-06, + "loss": 3.3552, + "step": 3985 + }, + { + "epoch": 0.48, + "learning_rate": 5.876556839696448e-06, + "loss": 3.4162, + "step": 3986 + }, + { + "epoch": 0.48, + "learning_rate": 5.874587497398072e-06, + "loss": 3.4965, + "step": 3987 + }, + { + "epoch": 0.48, + "learning_rate": 5.872618015130711e-06, + "loss": 3.3927, + "step": 3988 + }, + { + "epoch": 0.48, + "learning_rate": 5.870648393209563e-06, + "loss": 3.4568, + "step": 3989 + }, + { + "epoch": 0.48, + "learning_rate": 5.868678631949845e-06, + "loss": 3.3661, + "step": 3990 + }, + { + "epoch": 0.48, + "learning_rate": 5.866708731666799e-06, + "loss": 3.3325, + "step": 3991 + }, + { + "epoch": 0.48, + "learning_rate": 5.864738692675686e-06, + "loss": 3.3287, + "step": 3992 + }, + { + "epoch": 0.48, + "learning_rate": 5.862768515291792e-06, + "loss": 3.3718, + "step": 3993 + }, + { + "epoch": 0.48, + "learning_rate": 5.860798199830425e-06, + "loss": 3.4484, + "step": 3994 + }, + { + "epoch": 0.48, + "learning_rate": 5.858827746606912e-06, + "loss": 3.4475, + "step": 3995 + }, + { + "epoch": 0.48, + "learning_rate": 5.856857155936607e-06, + "loss": 3.4556, + "step": 3996 + }, + { + "epoch": 0.48, + "learning_rate": 5.854886428134882e-06, + "loss": 3.4558, + "step": 3997 + }, + { + "epoch": 0.48, + "learning_rate": 5.852915563517132e-06, + "loss": 3.4705, + "step": 3998 + }, + { + "epoch": 0.48, + "learning_rate": 5.850944562398776e-06, + "loss": 3.3541, + "step": 3999 + }, + { + "epoch": 0.48, + "learning_rate": 5.84897342509525e-06, + "loss": 3.4248, + "step": 4000 + }, + { + "epoch": 0.48, + "learning_rate": 5.8470021519220165e-06, + "loss": 3.443, + "step": 4001 + }, + { + "epoch": 0.48, + "learning_rate": 5.8450307431945586e-06, + "loss": 3.3494, + "step": 4002 + }, + { + "epoch": 0.48, + "learning_rate": 5.84305919922838e-06, + "loss": 3.4133, + "step": 4003 + }, + { + "epoch": 0.48, + "learning_rate": 5.841087520339006e-06, + "loss": 3.4129, + "step": 4004 + }, + { + "epoch": 0.48, + "learning_rate": 5.839115706841984e-06, + "loss": 3.4384, + "step": 4005 + }, + { + "epoch": 0.48, + "learning_rate": 5.837143759052886e-06, + "loss": 3.4828, + "step": 4006 + }, + { + "epoch": 0.48, + "learning_rate": 5.835171677287299e-06, + "loss": 3.3182, + "step": 4007 + }, + { + "epoch": 0.48, + "learning_rate": 5.833199461860837e-06, + "loss": 3.4036, + "step": 4008 + }, + { + "epoch": 0.48, + "learning_rate": 5.8312271130891305e-06, + "loss": 3.4015, + "step": 4009 + }, + { + "epoch": 0.48, + "learning_rate": 5.8292546312878376e-06, + "loss": 3.3901, + "step": 4010 + }, + { + "epoch": 0.48, + "learning_rate": 5.827282016772633e-06, + "loss": 3.4128, + "step": 4011 + }, + { + "epoch": 0.48, + "learning_rate": 5.825309269859213e-06, + "loss": 3.4396, + "step": 4012 + }, + { + "epoch": 0.48, + "learning_rate": 5.823336390863297e-06, + "loss": 3.3372, + "step": 4013 + }, + { + "epoch": 0.48, + "learning_rate": 5.8213633801006246e-06, + "loss": 3.4363, + "step": 4014 + }, + { + "epoch": 0.48, + "learning_rate": 5.819390237886956e-06, + "loss": 3.3901, + "step": 4015 + }, + { + "epoch": 0.48, + "learning_rate": 5.817416964538074e-06, + "loss": 3.4396, + "step": 4016 + }, + { + "epoch": 0.48, + "learning_rate": 5.815443560369779e-06, + "loss": 3.4187, + "step": 4017 + }, + { + "epoch": 0.48, + "learning_rate": 5.813470025697897e-06, + "loss": 3.3123, + "step": 4018 + }, + { + "epoch": 0.48, + "learning_rate": 5.811496360838271e-06, + "loss": 3.4692, + "step": 4019 + }, + { + "epoch": 0.48, + "learning_rate": 5.809522566106768e-06, + "loss": 3.4154, + "step": 4020 + }, + { + "epoch": 0.48, + "learning_rate": 5.807548641819272e-06, + "loss": 3.3352, + "step": 4021 + }, + { + "epoch": 0.48, + "learning_rate": 5.805574588291691e-06, + "loss": 3.4533, + "step": 4022 + }, + { + "epoch": 0.48, + "learning_rate": 5.803600405839952e-06, + "loss": 3.3929, + "step": 4023 + }, + { + "epoch": 0.48, + "learning_rate": 5.801626094780004e-06, + "loss": 3.3271, + "step": 4024 + }, + { + "epoch": 0.48, + "learning_rate": 5.799651655427814e-06, + "loss": 3.3899, + "step": 4025 + }, + { + "epoch": 0.48, + "learning_rate": 5.7976770880993725e-06, + "loss": 3.4093, + "step": 4026 + }, + { + "epoch": 0.48, + "learning_rate": 5.795702393110692e-06, + "loss": 3.3682, + "step": 4027 + }, + { + "epoch": 0.48, + "learning_rate": 5.793727570777797e-06, + "loss": 3.4544, + "step": 4028 + }, + { + "epoch": 0.48, + "learning_rate": 5.79175262141674e-06, + "loss": 3.442, + "step": 4029 + }, + { + "epoch": 0.48, + "learning_rate": 5.789777545343597e-06, + "loss": 3.4654, + "step": 4030 + }, + { + "epoch": 0.48, + "learning_rate": 5.787802342874452e-06, + "loss": 3.482, + "step": 4031 + }, + { + "epoch": 0.48, + "learning_rate": 5.785827014325421e-06, + "loss": 3.5099, + "step": 4032 + }, + { + "epoch": 0.48, + "learning_rate": 5.783851560012632e-06, + "loss": 3.3807, + "step": 4033 + }, + { + "epoch": 0.48, + "learning_rate": 5.781875980252241e-06, + "loss": 3.4382, + "step": 4034 + }, + { + "epoch": 0.48, + "learning_rate": 5.779900275360417e-06, + "loss": 3.4452, + "step": 4035 + }, + { + "epoch": 0.48, + "learning_rate": 5.777924445653352e-06, + "loss": 3.4336, + "step": 4036 + }, + { + "epoch": 0.48, + "learning_rate": 5.775948491447258e-06, + "loss": 3.3752, + "step": 4037 + }, + { + "epoch": 0.48, + "learning_rate": 5.773972413058367e-06, + "loss": 3.43, + "step": 4038 + }, + { + "epoch": 0.48, + "learning_rate": 5.77199621080293e-06, + "loss": 3.4485, + "step": 4039 + }, + { + "epoch": 0.48, + "learning_rate": 5.77001988499722e-06, + "loss": 3.3479, + "step": 4040 + }, + { + "epoch": 0.48, + "learning_rate": 5.7680434359575265e-06, + "loss": 3.3536, + "step": 4041 + }, + { + "epoch": 0.48, + "learning_rate": 5.766066864000159e-06, + "loss": 3.399, + "step": 4042 + }, + { + "epoch": 0.48, + "learning_rate": 5.764090169441453e-06, + "loss": 3.4044, + "step": 4043 + }, + { + "epoch": 0.48, + "learning_rate": 5.762113352597755e-06, + "loss": 3.3696, + "step": 4044 + }, + { + "epoch": 0.48, + "learning_rate": 5.760136413785434e-06, + "loss": 3.5103, + "step": 4045 + }, + { + "epoch": 0.48, + "learning_rate": 5.758159353320883e-06, + "loss": 3.4132, + "step": 4046 + }, + { + "epoch": 0.48, + "learning_rate": 5.756182171520507e-06, + "loss": 3.3984, + "step": 4047 + }, + { + "epoch": 0.48, + "learning_rate": 5.754204868700737e-06, + "loss": 3.4039, + "step": 4048 + }, + { + "epoch": 0.48, + "learning_rate": 5.752227445178017e-06, + "loss": 3.4668, + "step": 4049 + }, + { + "epoch": 0.48, + "learning_rate": 5.750249901268819e-06, + "loss": 3.3876, + "step": 4050 + }, + { + "epoch": 0.48, + "learning_rate": 5.748272237289624e-06, + "loss": 3.3875, + "step": 4051 + }, + { + "epoch": 0.49, + "learning_rate": 5.746294453556941e-06, + "loss": 3.4852, + "step": 4052 + }, + { + "epoch": 0.49, + "learning_rate": 5.744316550387291e-06, + "loss": 3.3741, + "step": 4053 + }, + { + "epoch": 0.49, + "learning_rate": 5.7423385280972225e-06, + "loss": 3.3706, + "step": 4054 + }, + { + "epoch": 0.49, + "learning_rate": 5.740360387003295e-06, + "loss": 3.5075, + "step": 4055 + }, + { + "epoch": 0.49, + "learning_rate": 5.7383821274220884e-06, + "loss": 3.3567, + "step": 4056 + }, + { + "epoch": 0.49, + "learning_rate": 5.736403749670206e-06, + "loss": 3.4466, + "step": 4057 + }, + { + "epoch": 0.49, + "learning_rate": 5.734425254064269e-06, + "loss": 3.3834, + "step": 4058 + }, + { + "epoch": 0.49, + "learning_rate": 5.732446640920912e-06, + "loss": 3.425, + "step": 4059 + }, + { + "epoch": 0.49, + "learning_rate": 5.7304679105567966e-06, + "loss": 3.4189, + "step": 4060 + }, + { + "epoch": 0.49, + "learning_rate": 5.728489063288593e-06, + "loss": 3.4687, + "step": 4061 + }, + { + "epoch": 0.49, + "learning_rate": 5.726510099433e-06, + "loss": 3.4691, + "step": 4062 + }, + { + "epoch": 0.49, + "learning_rate": 5.724531019306732e-06, + "loss": 3.4856, + "step": 4063 + }, + { + "epoch": 0.49, + "learning_rate": 5.722551823226517e-06, + "loss": 3.5017, + "step": 4064 + }, + { + "epoch": 0.49, + "learning_rate": 5.720572511509108e-06, + "loss": 3.3319, + "step": 4065 + }, + { + "epoch": 0.49, + "learning_rate": 5.718593084471275e-06, + "loss": 3.5295, + "step": 4066 + }, + { + "epoch": 0.49, + "learning_rate": 5.716613542429804e-06, + "loss": 3.4804, + "step": 4067 + }, + { + "epoch": 0.49, + "learning_rate": 5.714633885701502e-06, + "loss": 3.5218, + "step": 4068 + }, + { + "epoch": 0.49, + "learning_rate": 5.712654114603189e-06, + "loss": 3.3331, + "step": 4069 + }, + { + "epoch": 0.49, + "learning_rate": 5.710674229451714e-06, + "loss": 3.3984, + "step": 4070 + }, + { + "epoch": 0.49, + "learning_rate": 5.708694230563934e-06, + "loss": 3.51, + "step": 4071 + }, + { + "epoch": 0.49, + "learning_rate": 5.706714118256729e-06, + "loss": 3.4712, + "step": 4072 + }, + { + "epoch": 0.49, + "learning_rate": 5.704733892846996e-06, + "loss": 3.3637, + "step": 4073 + }, + { + "epoch": 0.49, + "learning_rate": 5.702753554651648e-06, + "loss": 3.4069, + "step": 4074 + }, + { + "epoch": 0.49, + "learning_rate": 5.7007731039876235e-06, + "loss": 3.4129, + "step": 4075 + }, + { + "epoch": 0.49, + "learning_rate": 5.69879254117187e-06, + "loss": 3.3847, + "step": 4076 + }, + { + "epoch": 0.49, + "learning_rate": 5.696811866521359e-06, + "loss": 3.4639, + "step": 4077 + }, + { + "epoch": 0.49, + "learning_rate": 5.694831080353074e-06, + "loss": 3.3644, + "step": 4078 + }, + { + "epoch": 0.49, + "learning_rate": 5.692850182984024e-06, + "loss": 3.3602, + "step": 4079 + }, + { + "epoch": 0.49, + "learning_rate": 5.690869174731229e-06, + "loss": 3.4112, + "step": 4080 + }, + { + "epoch": 0.49, + "learning_rate": 5.688888055911731e-06, + "loss": 3.3463, + "step": 4081 + }, + { + "epoch": 0.49, + "learning_rate": 5.686906826842588e-06, + "loss": 3.3796, + "step": 4082 + }, + { + "epoch": 0.49, + "learning_rate": 5.684925487840878e-06, + "loss": 3.4801, + "step": 4083 + }, + { + "epoch": 0.49, + "learning_rate": 5.68294403922369e-06, + "loss": 3.4329, + "step": 4084 + }, + { + "epoch": 0.49, + "learning_rate": 5.680962481308139e-06, + "loss": 3.487, + "step": 4085 + }, + { + "epoch": 0.49, + "learning_rate": 5.678980814411351e-06, + "loss": 3.4562, + "step": 4086 + }, + { + "epoch": 0.49, + "learning_rate": 5.676999038850473e-06, + "loss": 3.3927, + "step": 4087 + }, + { + "epoch": 0.49, + "learning_rate": 5.675017154942668e-06, + "loss": 3.4301, + "step": 4088 + }, + { + "epoch": 0.49, + "learning_rate": 5.673035163005118e-06, + "loss": 3.3622, + "step": 4089 + }, + { + "epoch": 0.49, + "learning_rate": 5.671053063355019e-06, + "loss": 3.5363, + "step": 4090 + }, + { + "epoch": 0.49, + "learning_rate": 5.669070856309588e-06, + "loss": 3.3371, + "step": 4091 + }, + { + "epoch": 0.49, + "learning_rate": 5.667088542186056e-06, + "loss": 3.428, + "step": 4092 + }, + { + "epoch": 0.49, + "learning_rate": 5.665106121301675e-06, + "loss": 3.3956, + "step": 4093 + }, + { + "epoch": 0.49, + "learning_rate": 5.663123593973707e-06, + "loss": 3.4173, + "step": 4094 + }, + { + "epoch": 0.49, + "learning_rate": 5.6611409605194415e-06, + "loss": 3.4139, + "step": 4095 + }, + { + "epoch": 0.49, + "learning_rate": 5.659158221256175e-06, + "loss": 3.4078, + "step": 4096 + }, + { + "epoch": 0.49, + "learning_rate": 5.657175376501226e-06, + "loss": 3.3304, + "step": 4097 + }, + { + "epoch": 0.49, + "learning_rate": 5.655192426571929e-06, + "loss": 3.4746, + "step": 4098 + }, + { + "epoch": 0.49, + "learning_rate": 5.653209371785636e-06, + "loss": 3.4625, + "step": 4099 + }, + { + "epoch": 0.49, + "learning_rate": 5.651226212459715e-06, + "loss": 3.4687, + "step": 4100 + }, + { + "epoch": 0.49, + "learning_rate": 5.649242948911551e-06, + "loss": 3.4471, + "step": 4101 + }, + { + "epoch": 0.49, + "learning_rate": 5.647259581458543e-06, + "loss": 3.4491, + "step": 4102 + }, + { + "epoch": 0.49, + "learning_rate": 5.645276110418112e-06, + "loss": 3.4368, + "step": 4103 + }, + { + "epoch": 0.49, + "learning_rate": 5.643292536107693e-06, + "loss": 3.3563, + "step": 4104 + }, + { + "epoch": 0.49, + "learning_rate": 5.641308858844735e-06, + "loss": 3.4395, + "step": 4105 + }, + { + "epoch": 0.49, + "learning_rate": 5.639325078946706e-06, + "loss": 3.444, + "step": 4106 + }, + { + "epoch": 0.49, + "learning_rate": 5.637341196731092e-06, + "loss": 3.4207, + "step": 4107 + }, + { + "epoch": 0.49, + "learning_rate": 5.635357212515392e-06, + "loss": 3.3747, + "step": 4108 + }, + { + "epoch": 0.49, + "learning_rate": 5.633373126617124e-06, + "loss": 3.3388, + "step": 4109 + }, + { + "epoch": 0.49, + "learning_rate": 5.631388939353819e-06, + "loss": 3.4271, + "step": 4110 + }, + { + "epoch": 0.49, + "learning_rate": 5.629404651043029e-06, + "loss": 3.4804, + "step": 4111 + }, + { + "epoch": 0.49, + "learning_rate": 5.627420262002318e-06, + "loss": 3.4308, + "step": 4112 + }, + { + "epoch": 0.49, + "learning_rate": 5.625435772549267e-06, + "loss": 3.2969, + "step": 4113 + }, + { + "epoch": 0.49, + "learning_rate": 5.623451183001475e-06, + "loss": 3.3339, + "step": 4114 + }, + { + "epoch": 0.49, + "learning_rate": 5.621466493676555e-06, + "loss": 3.3815, + "step": 4115 + }, + { + "epoch": 0.49, + "learning_rate": 5.6194817048921365e-06, + "loss": 3.408, + "step": 4116 + }, + { + "epoch": 0.49, + "learning_rate": 5.617496816965867e-06, + "loss": 3.4134, + "step": 4117 + }, + { + "epoch": 0.49, + "learning_rate": 5.6155118302154045e-06, + "loss": 3.4009, + "step": 4118 + }, + { + "epoch": 0.49, + "learning_rate": 5.61352674495843e-06, + "loss": 3.5508, + "step": 4119 + }, + { + "epoch": 0.49, + "learning_rate": 5.611541561512634e-06, + "loss": 3.3375, + "step": 4120 + }, + { + "epoch": 0.49, + "learning_rate": 5.609556280195726e-06, + "loss": 3.4422, + "step": 4121 + }, + { + "epoch": 0.49, + "learning_rate": 5.60757090132543e-06, + "loss": 3.4251, + "step": 4122 + }, + { + "epoch": 0.49, + "learning_rate": 5.6055854252194875e-06, + "loss": 3.4289, + "step": 4123 + }, + { + "epoch": 0.49, + "learning_rate": 5.603599852195653e-06, + "loss": 3.3734, + "step": 4124 + }, + { + "epoch": 0.49, + "learning_rate": 5.601614182571696e-06, + "loss": 3.4157, + "step": 4125 + }, + { + "epoch": 0.49, + "learning_rate": 5.5996284166654065e-06, + "loss": 3.4551, + "step": 4126 + }, + { + "epoch": 0.49, + "learning_rate": 5.597642554794586e-06, + "loss": 3.4508, + "step": 4127 + }, + { + "epoch": 0.49, + "learning_rate": 5.595656597277047e-06, + "loss": 3.4796, + "step": 4128 + }, + { + "epoch": 0.49, + "learning_rate": 5.593670544430628e-06, + "loss": 3.3617, + "step": 4129 + }, + { + "epoch": 0.49, + "learning_rate": 5.591684396573175e-06, + "loss": 3.5174, + "step": 4130 + }, + { + "epoch": 0.49, + "learning_rate": 5.58969815402255e-06, + "loss": 3.4303, + "step": 4131 + }, + { + "epoch": 0.49, + "learning_rate": 5.587711817096632e-06, + "loss": 3.3643, + "step": 4132 + }, + { + "epoch": 0.49, + "learning_rate": 5.5857253861133145e-06, + "loss": 3.3779, + "step": 4133 + }, + { + "epoch": 0.49, + "learning_rate": 5.583738861390505e-06, + "loss": 3.528, + "step": 4134 + }, + { + "epoch": 0.5, + "learning_rate": 5.581752243246129e-06, + "loss": 3.4238, + "step": 4135 + }, + { + "epoch": 0.5, + "learning_rate": 5.579765531998122e-06, + "loss": 3.4219, + "step": 4136 + }, + { + "epoch": 0.5, + "learning_rate": 5.577778727964442e-06, + "loss": 3.3627, + "step": 4137 + }, + { + "epoch": 0.5, + "learning_rate": 5.575791831463051e-06, + "loss": 3.4899, + "step": 4138 + }, + { + "epoch": 0.5, + "learning_rate": 5.573804842811935e-06, + "loss": 3.5616, + "step": 4139 + }, + { + "epoch": 0.5, + "learning_rate": 5.571817762329092e-06, + "loss": 3.3396, + "step": 4140 + }, + { + "epoch": 0.5, + "learning_rate": 5.569830590332535e-06, + "loss": 3.5094, + "step": 4141 + }, + { + "epoch": 0.5, + "learning_rate": 5.56784332714029e-06, + "loss": 3.395, + "step": 4142 + }, + { + "epoch": 0.5, + "learning_rate": 5.565855973070397e-06, + "loss": 3.4002, + "step": 4143 + }, + { + "epoch": 0.5, + "learning_rate": 5.563868528440913e-06, + "loss": 3.3735, + "step": 4144 + }, + { + "epoch": 0.5, + "learning_rate": 5.561880993569912e-06, + "loss": 3.4769, + "step": 4145 + }, + { + "epoch": 0.5, + "learning_rate": 5.559893368775474e-06, + "loss": 3.3813, + "step": 4146 + }, + { + "epoch": 0.5, + "learning_rate": 5.557905654375702e-06, + "loss": 3.4432, + "step": 4147 + }, + { + "epoch": 0.5, + "learning_rate": 5.555917850688708e-06, + "loss": 3.2576, + "step": 4148 + }, + { + "epoch": 0.5, + "learning_rate": 5.553929958032621e-06, + "loss": 3.3274, + "step": 4149 + }, + { + "epoch": 0.5, + "learning_rate": 5.5519419767255845e-06, + "loss": 3.4414, + "step": 4150 + }, + { + "epoch": 0.5, + "learning_rate": 5.549953907085751e-06, + "loss": 3.4259, + "step": 4151 + }, + { + "epoch": 0.5, + "learning_rate": 5.547965749431296e-06, + "loss": 3.4737, + "step": 4152 + }, + { + "epoch": 0.5, + "learning_rate": 5.545977504080401e-06, + "loss": 3.3512, + "step": 4153 + }, + { + "epoch": 0.5, + "learning_rate": 5.543989171351266e-06, + "loss": 3.4284, + "step": 4154 + }, + { + "epoch": 0.5, + "learning_rate": 5.542000751562103e-06, + "loss": 3.4034, + "step": 4155 + }, + { + "epoch": 0.5, + "learning_rate": 5.540012245031141e-06, + "loss": 3.4441, + "step": 4156 + }, + { + "epoch": 0.5, + "learning_rate": 5.538023652076618e-06, + "loss": 3.426, + "step": 4157 + }, + { + "epoch": 0.5, + "learning_rate": 5.53603497301679e-06, + "loss": 3.3781, + "step": 4158 + }, + { + "epoch": 0.5, + "learning_rate": 5.534046208169923e-06, + "loss": 3.4466, + "step": 4159 + }, + { + "epoch": 0.5, + "learning_rate": 5.5320573578543015e-06, + "loss": 3.4284, + "step": 4160 + }, + { + "epoch": 0.5, + "learning_rate": 5.5300684223882196e-06, + "loss": 3.4182, + "step": 4161 + }, + { + "epoch": 0.5, + "learning_rate": 5.528079402089987e-06, + "loss": 3.4115, + "step": 4162 + }, + { + "epoch": 0.5, + "learning_rate": 5.526090297277926e-06, + "loss": 3.4163, + "step": 4163 + }, + { + "epoch": 0.5, + "learning_rate": 5.524101108270376e-06, + "loss": 3.348, + "step": 4164 + }, + { + "epoch": 0.5, + "learning_rate": 5.522111835385682e-06, + "loss": 3.4031, + "step": 4165 + }, + { + "epoch": 0.5, + "learning_rate": 5.520122478942209e-06, + "loss": 3.4323, + "step": 4166 + }, + { + "epoch": 0.5, + "learning_rate": 5.5181330392583356e-06, + "loss": 3.4167, + "step": 4167 + }, + { + "epoch": 0.5, + "learning_rate": 5.5161435166524495e-06, + "loss": 3.5148, + "step": 4168 + }, + { + "epoch": 0.5, + "learning_rate": 5.514153911442954e-06, + "loss": 3.4285, + "step": 4169 + }, + { + "epoch": 0.5, + "learning_rate": 5.512164223948268e-06, + "loss": 3.3716, + "step": 4170 + }, + { + "epoch": 0.5, + "learning_rate": 5.510174454486817e-06, + "loss": 3.3378, + "step": 4171 + }, + { + "epoch": 0.5, + "learning_rate": 5.508184603377047e-06, + "loss": 3.4996, + "step": 4172 + }, + { + "epoch": 0.5, + "learning_rate": 5.5061946709374125e-06, + "loss": 3.4004, + "step": 4173 + }, + { + "epoch": 0.5, + "learning_rate": 5.5042046574863824e-06, + "loss": 3.4306, + "step": 4174 + }, + { + "epoch": 0.5, + "learning_rate": 5.5022145633424385e-06, + "loss": 3.5328, + "step": 4175 + }, + { + "epoch": 0.5, + "learning_rate": 5.500224388824077e-06, + "loss": 3.3634, + "step": 4176 + }, + { + "epoch": 0.5, + "learning_rate": 5.498234134249802e-06, + "loss": 3.4473, + "step": 4177 + }, + { + "epoch": 0.5, + "learning_rate": 5.496243799938138e-06, + "loss": 3.4377, + "step": 4178 + }, + { + "epoch": 0.5, + "learning_rate": 5.494253386207613e-06, + "loss": 3.4883, + "step": 4179 + }, + { + "epoch": 0.5, + "learning_rate": 5.492262893376777e-06, + "loss": 3.3856, + "step": 4180 + }, + { + "epoch": 0.5, + "learning_rate": 5.4902723217641885e-06, + "loss": 3.379, + "step": 4181 + }, + { + "epoch": 0.5, + "learning_rate": 5.4882816716884164e-06, + "loss": 3.4219, + "step": 4182 + }, + { + "epoch": 0.5, + "learning_rate": 5.486290943468044e-06, + "loss": 3.4029, + "step": 4183 + }, + { + "epoch": 0.5, + "learning_rate": 5.48430013742167e-06, + "loss": 3.4334, + "step": 4184 + }, + { + "epoch": 0.5, + "learning_rate": 5.482309253867901e-06, + "loss": 3.4078, + "step": 4185 + }, + { + "epoch": 0.5, + "learning_rate": 5.480318293125359e-06, + "loss": 3.484, + "step": 4186 + }, + { + "epoch": 0.5, + "learning_rate": 5.478327255512677e-06, + "loss": 3.4085, + "step": 4187 + }, + { + "epoch": 0.5, + "learning_rate": 5.476336141348501e-06, + "loss": 3.4279, + "step": 4188 + }, + { + "epoch": 0.5, + "learning_rate": 5.474344950951489e-06, + "loss": 3.4414, + "step": 4189 + }, + { + "epoch": 0.5, + "learning_rate": 5.47235368464031e-06, + "loss": 3.4081, + "step": 4190 + }, + { + "epoch": 0.5, + "learning_rate": 5.470362342733646e-06, + "loss": 3.4154, + "step": 4191 + }, + { + "epoch": 0.5, + "learning_rate": 5.468370925550194e-06, + "loss": 3.4114, + "step": 4192 + }, + { + "epoch": 0.5, + "learning_rate": 5.466379433408657e-06, + "loss": 3.3595, + "step": 4193 + }, + { + "epoch": 0.5, + "learning_rate": 5.464387866627757e-06, + "loss": 3.4375, + "step": 4194 + }, + { + "epoch": 0.5, + "learning_rate": 5.462396225526221e-06, + "loss": 3.4954, + "step": 4195 + }, + { + "epoch": 0.5, + "learning_rate": 5.460404510422793e-06, + "loss": 3.4612, + "step": 4196 + }, + { + "epoch": 0.5, + "learning_rate": 5.4584127216362285e-06, + "loss": 3.4234, + "step": 4197 + }, + { + "epoch": 0.5, + "learning_rate": 5.456420859485289e-06, + "loss": 3.5496, + "step": 4198 + }, + { + "epoch": 0.5, + "learning_rate": 5.454428924288756e-06, + "loss": 3.4663, + "step": 4199 + }, + { + "epoch": 0.5, + "learning_rate": 5.452436916365418e-06, + "loss": 3.4596, + "step": 4200 + }, + { + "epoch": 0.5, + "learning_rate": 5.450444836034075e-06, + "loss": 3.3397, + "step": 4201 + }, + { + "epoch": 0.5, + "learning_rate": 5.448452683613541e-06, + "loss": 3.2104, + "step": 4202 + }, + { + "epoch": 0.5, + "learning_rate": 5.44646045942264e-06, + "loss": 3.4762, + "step": 4203 + }, + { + "epoch": 0.5, + "learning_rate": 5.4444681637802045e-06, + "loss": 3.4586, + "step": 4204 + }, + { + "epoch": 0.5, + "learning_rate": 5.442475797005085e-06, + "loss": 3.4252, + "step": 4205 + }, + { + "epoch": 0.5, + "learning_rate": 5.440483359416138e-06, + "loss": 3.391, + "step": 4206 + }, + { + "epoch": 0.5, + "learning_rate": 5.438490851332234e-06, + "loss": 3.5088, + "step": 4207 + }, + { + "epoch": 0.5, + "learning_rate": 5.436498273072254e-06, + "loss": 3.4267, + "step": 4208 + }, + { + "epoch": 0.5, + "learning_rate": 5.43450562495509e-06, + "loss": 3.4952, + "step": 4209 + }, + { + "epoch": 0.5, + "learning_rate": 5.432512907299645e-06, + "loss": 3.5055, + "step": 4210 + }, + { + "epoch": 0.5, + "learning_rate": 5.430520120424834e-06, + "loss": 3.3838, + "step": 4211 + }, + { + "epoch": 0.5, + "learning_rate": 5.428527264649583e-06, + "loss": 3.3113, + "step": 4212 + }, + { + "epoch": 0.5, + "learning_rate": 5.426534340292829e-06, + "loss": 3.381, + "step": 4213 + }, + { + "epoch": 0.5, + "learning_rate": 5.424541347673518e-06, + "loss": 3.4303, + "step": 4214 + }, + { + "epoch": 0.5, + "learning_rate": 5.4225482871106095e-06, + "loss": 3.4044, + "step": 4215 + }, + { + "epoch": 0.5, + "learning_rate": 5.420555158923072e-06, + "loss": 3.3903, + "step": 4216 + }, + { + "epoch": 0.5, + "learning_rate": 5.418561963429887e-06, + "loss": 3.3225, + "step": 4217 + }, + { + "epoch": 0.5, + "learning_rate": 5.416568700950047e-06, + "loss": 3.5081, + "step": 4218 + }, + { + "epoch": 0.51, + "learning_rate": 5.41457537180255e-06, + "loss": 3.4193, + "step": 4219 + }, + { + "epoch": 0.51, + "learning_rate": 5.412581976306409e-06, + "loss": 3.4011, + "step": 4220 + }, + { + "epoch": 0.51, + "learning_rate": 5.41058851478065e-06, + "loss": 3.4283, + "step": 4221 + }, + { + "epoch": 0.51, + "learning_rate": 5.408594987544306e-06, + "loss": 3.5257, + "step": 4222 + }, + { + "epoch": 0.51, + "learning_rate": 5.4066013949164175e-06, + "loss": 3.4139, + "step": 4223 + }, + { + "epoch": 0.51, + "learning_rate": 5.404607737216042e-06, + "loss": 3.4787, + "step": 4224 + }, + { + "epoch": 0.51, + "learning_rate": 5.402614014762244e-06, + "loss": 3.3985, + "step": 4225 + }, + { + "epoch": 0.51, + "learning_rate": 5.4006202278741e-06, + "loss": 3.4993, + "step": 4226 + }, + { + "epoch": 0.51, + "learning_rate": 5.398626376870693e-06, + "loss": 3.423, + "step": 4227 + }, + { + "epoch": 0.51, + "learning_rate": 5.396632462071121e-06, + "loss": 3.3843, + "step": 4228 + }, + { + "epoch": 0.51, + "learning_rate": 5.39463848379449e-06, + "loss": 3.4219, + "step": 4229 + }, + { + "epoch": 0.51, + "learning_rate": 5.392644442359914e-06, + "loss": 3.353, + "step": 4230 + }, + { + "epoch": 0.51, + "learning_rate": 5.3906503380865235e-06, + "loss": 3.4584, + "step": 4231 + }, + { + "epoch": 0.51, + "learning_rate": 5.3886561712934504e-06, + "loss": 3.4851, + "step": 4232 + }, + { + "epoch": 0.51, + "learning_rate": 5.386661942299844e-06, + "loss": 3.3766, + "step": 4233 + }, + { + "epoch": 0.51, + "learning_rate": 5.3846676514248615e-06, + "loss": 3.416, + "step": 4234 + }, + { + "epoch": 0.51, + "learning_rate": 5.382673298987667e-06, + "loss": 3.4799, + "step": 4235 + }, + { + "epoch": 0.51, + "learning_rate": 5.380678885307437e-06, + "loss": 3.3776, + "step": 4236 + }, + { + "epoch": 0.51, + "learning_rate": 5.378684410703357e-06, + "loss": 3.4713, + "step": 4237 + }, + { + "epoch": 0.51, + "learning_rate": 5.376689875494626e-06, + "loss": 3.4841, + "step": 4238 + }, + { + "epoch": 0.51, + "learning_rate": 5.374695280000446e-06, + "loss": 3.5035, + "step": 4239 + }, + { + "epoch": 0.51, + "learning_rate": 5.372700624540032e-06, + "loss": 3.3795, + "step": 4240 + }, + { + "epoch": 0.51, + "learning_rate": 5.3707059094326105e-06, + "loss": 3.382, + "step": 4241 + }, + { + "epoch": 0.51, + "learning_rate": 5.368711134997415e-06, + "loss": 3.483, + "step": 4242 + }, + { + "epoch": 0.51, + "learning_rate": 5.3667163015536906e-06, + "loss": 3.4152, + "step": 4243 + }, + { + "epoch": 0.51, + "learning_rate": 5.364721409420687e-06, + "loss": 3.4189, + "step": 4244 + }, + { + "epoch": 0.51, + "learning_rate": 5.36272645891767e-06, + "loss": 3.5604, + "step": 4245 + }, + { + "epoch": 0.51, + "learning_rate": 5.36073145036391e-06, + "loss": 3.4478, + "step": 4246 + }, + { + "epoch": 0.51, + "learning_rate": 5.3587363840786876e-06, + "loss": 3.3998, + "step": 4247 + }, + { + "epoch": 0.51, + "learning_rate": 5.356741260381294e-06, + "loss": 3.3849, + "step": 4248 + }, + { + "epoch": 0.51, + "learning_rate": 5.35474607959103e-06, + "loss": 3.4557, + "step": 4249 + }, + { + "epoch": 0.51, + "learning_rate": 5.3527508420272025e-06, + "loss": 3.4253, + "step": 4250 + }, + { + "epoch": 0.51, + "learning_rate": 5.350755548009131e-06, + "loss": 3.4285, + "step": 4251 + }, + { + "epoch": 0.51, + "learning_rate": 5.34876019785614e-06, + "loss": 3.3399, + "step": 4252 + }, + { + "epoch": 0.51, + "learning_rate": 5.346764791887566e-06, + "loss": 3.4793, + "step": 4253 + }, + { + "epoch": 0.51, + "learning_rate": 5.344769330422756e-06, + "loss": 3.425, + "step": 4254 + }, + { + "epoch": 0.51, + "learning_rate": 5.3427738137810615e-06, + "loss": 3.3125, + "step": 4255 + }, + { + "epoch": 0.51, + "learning_rate": 5.340778242281845e-06, + "loss": 3.4606, + "step": 4256 + }, + { + "epoch": 0.51, + "learning_rate": 5.338782616244479e-06, + "loss": 3.5485, + "step": 4257 + }, + { + "epoch": 0.51, + "learning_rate": 5.336786935988344e-06, + "loss": 3.3753, + "step": 4258 + }, + { + "epoch": 0.51, + "learning_rate": 5.334791201832826e-06, + "loss": 3.4149, + "step": 4259 + }, + { + "epoch": 0.51, + "learning_rate": 5.332795414097324e-06, + "loss": 3.3293, + "step": 4260 + }, + { + "epoch": 0.51, + "learning_rate": 5.330799573101244e-06, + "loss": 3.4426, + "step": 4261 + }, + { + "epoch": 0.51, + "learning_rate": 5.328803679164001e-06, + "loss": 3.4419, + "step": 4262 + }, + { + "epoch": 0.51, + "learning_rate": 5.326807732605016e-06, + "loss": 3.4283, + "step": 4263 + }, + { + "epoch": 0.51, + "learning_rate": 5.32481173374372e-06, + "loss": 3.4661, + "step": 4264 + }, + { + "epoch": 0.51, + "learning_rate": 5.322815682899556e-06, + "loss": 3.4133, + "step": 4265 + }, + { + "epoch": 0.51, + "learning_rate": 5.320819580391969e-06, + "loss": 3.3746, + "step": 4266 + }, + { + "epoch": 0.51, + "learning_rate": 5.318823426540416e-06, + "loss": 3.3464, + "step": 4267 + }, + { + "epoch": 0.51, + "learning_rate": 5.316827221664363e-06, + "loss": 3.349, + "step": 4268 + }, + { + "epoch": 0.51, + "learning_rate": 5.314830966083279e-06, + "loss": 3.4925, + "step": 4269 + }, + { + "epoch": 0.51, + "learning_rate": 5.3128346601166484e-06, + "loss": 3.3513, + "step": 4270 + }, + { + "epoch": 0.51, + "learning_rate": 5.310838304083957e-06, + "loss": 3.6425, + "step": 4271 + }, + { + "epoch": 0.51, + "learning_rate": 5.308841898304705e-06, + "loss": 3.4744, + "step": 4272 + }, + { + "epoch": 0.51, + "learning_rate": 5.306845443098394e-06, + "loss": 3.4411, + "step": 4273 + }, + { + "epoch": 0.51, + "learning_rate": 5.3048489387845396e-06, + "loss": 3.3732, + "step": 4274 + }, + { + "epoch": 0.51, + "learning_rate": 5.3028523856826595e-06, + "loss": 3.3358, + "step": 4275 + }, + { + "epoch": 0.51, + "learning_rate": 5.3008557841122844e-06, + "loss": 3.3759, + "step": 4276 + }, + { + "epoch": 0.51, + "learning_rate": 5.2988591343929475e-06, + "loss": 3.3392, + "step": 4277 + }, + { + "epoch": 0.51, + "learning_rate": 5.296862436844196e-06, + "loss": 3.358, + "step": 4278 + }, + { + "epoch": 0.51, + "learning_rate": 5.29486569178558e-06, + "loss": 3.458, + "step": 4279 + }, + { + "epoch": 0.51, + "learning_rate": 5.292868899536659e-06, + "loss": 3.3481, + "step": 4280 + }, + { + "epoch": 0.51, + "learning_rate": 5.290872060416997e-06, + "loss": 3.4366, + "step": 4281 + }, + { + "epoch": 0.51, + "learning_rate": 5.288875174746172e-06, + "loss": 3.4579, + "step": 4282 + }, + { + "epoch": 0.51, + "learning_rate": 5.286878242843762e-06, + "loss": 3.3876, + "step": 4283 + }, + { + "epoch": 0.51, + "learning_rate": 5.28488126502936e-06, + "loss": 3.4696, + "step": 4284 + }, + { + "epoch": 0.51, + "learning_rate": 5.282884241622559e-06, + "loss": 3.4706, + "step": 4285 + }, + { + "epoch": 0.51, + "learning_rate": 5.280887172942965e-06, + "loss": 3.3331, + "step": 4286 + }, + { + "epoch": 0.51, + "learning_rate": 5.278890059310187e-06, + "loss": 3.3742, + "step": 4287 + }, + { + "epoch": 0.51, + "learning_rate": 5.276892901043843e-06, + "loss": 3.3688, + "step": 4288 + }, + { + "epoch": 0.51, + "learning_rate": 5.274895698463558e-06, + "loss": 3.4019, + "step": 4289 + }, + { + "epoch": 0.51, + "learning_rate": 5.272898451888967e-06, + "loss": 3.4296, + "step": 4290 + }, + { + "epoch": 0.51, + "learning_rate": 5.270901161639707e-06, + "loss": 3.3809, + "step": 4291 + }, + { + "epoch": 0.51, + "learning_rate": 5.268903828035425e-06, + "loss": 3.5389, + "step": 4292 + }, + { + "epoch": 0.51, + "learning_rate": 5.266906451395774e-06, + "loss": 3.5241, + "step": 4293 + }, + { + "epoch": 0.51, + "learning_rate": 5.2649090320404136e-06, + "loss": 3.4572, + "step": 4294 + }, + { + "epoch": 0.51, + "learning_rate": 5.262911570289013e-06, + "loss": 3.4367, + "step": 4295 + }, + { + "epoch": 0.51, + "learning_rate": 5.260914066461243e-06, + "loss": 3.4036, + "step": 4296 + }, + { + "epoch": 0.51, + "learning_rate": 5.258916520876787e-06, + "loss": 3.4523, + "step": 4297 + }, + { + "epoch": 0.51, + "learning_rate": 5.256918933855331e-06, + "loss": 3.4072, + "step": 4298 + }, + { + "epoch": 0.51, + "learning_rate": 5.254921305716567e-06, + "loss": 3.4646, + "step": 4299 + }, + { + "epoch": 0.51, + "learning_rate": 5.2529236367801986e-06, + "loss": 3.4321, + "step": 4300 + }, + { + "epoch": 0.51, + "learning_rate": 5.25092592736593e-06, + "loss": 3.4426, + "step": 4301 + }, + { + "epoch": 0.52, + "learning_rate": 5.248928177793475e-06, + "loss": 3.4254, + "step": 4302 + }, + { + "epoch": 0.52, + "learning_rate": 5.246930388382556e-06, + "loss": 3.3221, + "step": 4303 + }, + { + "epoch": 0.52, + "learning_rate": 5.244932559452898e-06, + "loss": 3.3996, + "step": 4304 + }, + { + "epoch": 0.52, + "learning_rate": 5.242934691324231e-06, + "loss": 3.3138, + "step": 4305 + }, + { + "epoch": 0.52, + "learning_rate": 5.240936784316297e-06, + "loss": 3.4286, + "step": 4306 + }, + { + "epoch": 0.52, + "learning_rate": 5.23893883874884e-06, + "loss": 3.4362, + "step": 4307 + }, + { + "epoch": 0.52, + "learning_rate": 5.236940854941611e-06, + "loss": 3.4937, + "step": 4308 + }, + { + "epoch": 0.52, + "learning_rate": 5.234942833214367e-06, + "loss": 3.3647, + "step": 4309 + }, + { + "epoch": 0.52, + "learning_rate": 5.232944773886872e-06, + "loss": 3.4366, + "step": 4310 + }, + { + "epoch": 0.52, + "learning_rate": 5.230946677278895e-06, + "loss": 3.4257, + "step": 4311 + }, + { + "epoch": 0.52, + "learning_rate": 5.228948543710213e-06, + "loss": 3.429, + "step": 4312 + }, + { + "epoch": 0.52, + "learning_rate": 5.226950373500604e-06, + "loss": 3.3725, + "step": 4313 + }, + { + "epoch": 0.52, + "learning_rate": 5.224952166969858e-06, + "loss": 3.3928, + "step": 4314 + }, + { + "epoch": 0.52, + "learning_rate": 5.222953924437767e-06, + "loss": 3.4903, + "step": 4315 + }, + { + "epoch": 0.52, + "learning_rate": 5.22095564622413e-06, + "loss": 3.4295, + "step": 4316 + }, + { + "epoch": 0.52, + "learning_rate": 5.2189573326487495e-06, + "loss": 3.4808, + "step": 4317 + }, + { + "epoch": 0.52, + "learning_rate": 5.216958984031438e-06, + "loss": 3.4018, + "step": 4318 + }, + { + "epoch": 0.52, + "learning_rate": 5.214960600692012e-06, + "loss": 3.4643, + "step": 4319 + }, + { + "epoch": 0.52, + "learning_rate": 5.212962182950291e-06, + "loss": 3.3674, + "step": 4320 + }, + { + "epoch": 0.52, + "learning_rate": 5.210963731126101e-06, + "loss": 3.3211, + "step": 4321 + }, + { + "epoch": 0.52, + "learning_rate": 5.208965245539276e-06, + "loss": 3.4241, + "step": 4322 + }, + { + "epoch": 0.52, + "learning_rate": 5.206966726509652e-06, + "loss": 3.4673, + "step": 4323 + }, + { + "epoch": 0.52, + "learning_rate": 5.2049681743570745e-06, + "loss": 3.3625, + "step": 4324 + }, + { + "epoch": 0.52, + "learning_rate": 5.202969589401389e-06, + "loss": 3.3658, + "step": 4325 + }, + { + "epoch": 0.52, + "learning_rate": 5.200970971962451e-06, + "loss": 3.4031, + "step": 4326 + }, + { + "epoch": 0.52, + "learning_rate": 5.1989723223601185e-06, + "loss": 3.368, + "step": 4327 + }, + { + "epoch": 0.52, + "learning_rate": 5.196973640914256e-06, + "loss": 3.384, + "step": 4328 + }, + { + "epoch": 0.52, + "learning_rate": 5.194974927944729e-06, + "loss": 3.4309, + "step": 4329 + }, + { + "epoch": 0.52, + "learning_rate": 5.192976183771417e-06, + "loss": 3.3492, + "step": 4330 + }, + { + "epoch": 0.52, + "learning_rate": 5.190977408714196e-06, + "loss": 3.4056, + "step": 4331 + }, + { + "epoch": 0.52, + "learning_rate": 5.188978603092951e-06, + "loss": 3.4473, + "step": 4332 + }, + { + "epoch": 0.52, + "learning_rate": 5.186979767227569e-06, + "loss": 3.3854, + "step": 4333 + }, + { + "epoch": 0.52, + "learning_rate": 5.184980901437945e-06, + "loss": 3.4888, + "step": 4334 + }, + { + "epoch": 0.52, + "learning_rate": 5.1829820060439774e-06, + "loss": 3.3712, + "step": 4335 + }, + { + "epoch": 0.52, + "learning_rate": 5.18098308136557e-06, + "loss": 3.5064, + "step": 4336 + }, + { + "epoch": 0.52, + "learning_rate": 5.17898412772263e-06, + "loss": 3.4361, + "step": 4337 + }, + { + "epoch": 0.52, + "learning_rate": 5.1769851454350695e-06, + "loss": 3.3815, + "step": 4338 + }, + { + "epoch": 0.52, + "learning_rate": 5.174986134822807e-06, + "loss": 3.385, + "step": 4339 + }, + { + "epoch": 0.52, + "learning_rate": 5.172987096205761e-06, + "loss": 3.4746, + "step": 4340 + }, + { + "epoch": 0.52, + "learning_rate": 5.170988029903863e-06, + "loss": 3.3808, + "step": 4341 + }, + { + "epoch": 0.52, + "learning_rate": 5.16898893623704e-06, + "loss": 3.3832, + "step": 4342 + }, + { + "epoch": 0.52, + "learning_rate": 5.166989815525226e-06, + "loss": 3.3555, + "step": 4343 + }, + { + "epoch": 0.52, + "learning_rate": 5.1649906680883635e-06, + "loss": 3.3999, + "step": 4344 + }, + { + "epoch": 0.52, + "learning_rate": 5.1629914942463945e-06, + "loss": 3.4007, + "step": 4345 + }, + { + "epoch": 0.52, + "learning_rate": 5.160992294319265e-06, + "loss": 3.4818, + "step": 4346 + }, + { + "epoch": 0.52, + "learning_rate": 5.15899306862693e-06, + "loss": 3.5023, + "step": 4347 + }, + { + "epoch": 0.52, + "learning_rate": 5.156993817489343e-06, + "loss": 3.3462, + "step": 4348 + }, + { + "epoch": 0.52, + "learning_rate": 5.154994541226467e-06, + "loss": 3.4132, + "step": 4349 + }, + { + "epoch": 0.52, + "learning_rate": 5.1529952401582615e-06, + "loss": 3.4492, + "step": 4350 + }, + { + "epoch": 0.52, + "learning_rate": 5.150995914604701e-06, + "loss": 3.4117, + "step": 4351 + }, + { + "epoch": 0.52, + "learning_rate": 5.148996564885752e-06, + "loss": 3.4406, + "step": 4352 + }, + { + "epoch": 0.52, + "learning_rate": 5.1469971913213945e-06, + "loss": 3.4067, + "step": 4353 + }, + { + "epoch": 0.52, + "learning_rate": 5.144997794231604e-06, + "loss": 3.4137, + "step": 4354 + }, + { + "epoch": 0.52, + "learning_rate": 5.142998373936368e-06, + "loss": 3.3938, + "step": 4355 + }, + { + "epoch": 0.52, + "learning_rate": 5.1409989307556725e-06, + "loss": 3.3911, + "step": 4356 + }, + { + "epoch": 0.52, + "learning_rate": 5.1389994650095074e-06, + "loss": 3.4097, + "step": 4357 + }, + { + "epoch": 0.52, + "learning_rate": 5.136999977017867e-06, + "loss": 3.5114, + "step": 4358 + }, + { + "epoch": 0.52, + "learning_rate": 5.135000467100751e-06, + "loss": 3.3299, + "step": 4359 + }, + { + "epoch": 0.52, + "learning_rate": 5.1330009355781605e-06, + "loss": 3.451, + "step": 4360 + }, + { + "epoch": 0.52, + "learning_rate": 5.131001382770101e-06, + "loss": 3.4271, + "step": 4361 + }, + { + "epoch": 0.52, + "learning_rate": 5.129001808996578e-06, + "loss": 3.3311, + "step": 4362 + }, + { + "epoch": 0.52, + "learning_rate": 5.127002214577607e-06, + "loss": 3.4237, + "step": 4363 + }, + { + "epoch": 0.52, + "learning_rate": 5.125002599833201e-06, + "loss": 3.4267, + "step": 4364 + }, + { + "epoch": 0.52, + "learning_rate": 5.123002965083379e-06, + "loss": 3.343, + "step": 4365 + }, + { + "epoch": 0.52, + "learning_rate": 5.121003310648161e-06, + "loss": 3.3081, + "step": 4366 + }, + { + "epoch": 0.52, + "learning_rate": 5.119003636847574e-06, + "loss": 3.3754, + "step": 4367 + }, + { + "epoch": 0.52, + "learning_rate": 5.117003944001645e-06, + "loss": 3.42, + "step": 4368 + }, + { + "epoch": 0.52, + "learning_rate": 5.115004232430405e-06, + "loss": 3.4173, + "step": 4369 + }, + { + "epoch": 0.52, + "learning_rate": 5.1130045024538864e-06, + "loss": 3.3142, + "step": 4370 + }, + { + "epoch": 0.52, + "learning_rate": 5.111004754392128e-06, + "loss": 3.3909, + "step": 4371 + }, + { + "epoch": 0.52, + "learning_rate": 5.10900498856517e-06, + "loss": 3.4572, + "step": 4372 + }, + { + "epoch": 0.52, + "learning_rate": 5.107005205293052e-06, + "loss": 3.524, + "step": 4373 + }, + { + "epoch": 0.52, + "learning_rate": 5.1050054048958195e-06, + "loss": 3.419, + "step": 4374 + }, + { + "epoch": 0.52, + "learning_rate": 5.1030055876935245e-06, + "loss": 3.4817, + "step": 4375 + }, + { + "epoch": 0.52, + "learning_rate": 5.101005754006214e-06, + "loss": 3.3521, + "step": 4376 + }, + { + "epoch": 0.52, + "learning_rate": 5.099005904153943e-06, + "loss": 3.4781, + "step": 4377 + }, + { + "epoch": 0.52, + "learning_rate": 5.0970060384567665e-06, + "loss": 3.3541, + "step": 4378 + }, + { + "epoch": 0.52, + "learning_rate": 5.095006157234744e-06, + "loss": 3.3795, + "step": 4379 + }, + { + "epoch": 0.52, + "learning_rate": 5.093006260807935e-06, + "loss": 3.3203, + "step": 4380 + }, + { + "epoch": 0.52, + "learning_rate": 5.091006349496405e-06, + "loss": 3.3773, + "step": 4381 + }, + { + "epoch": 0.52, + "learning_rate": 5.089006423620219e-06, + "loss": 3.4426, + "step": 4382 + }, + { + "epoch": 0.52, + "learning_rate": 5.087006483499445e-06, + "loss": 3.3835, + "step": 4383 + }, + { + "epoch": 0.52, + "learning_rate": 5.085006529454151e-06, + "loss": 3.4057, + "step": 4384 + }, + { + "epoch": 0.52, + "learning_rate": 5.083006561804414e-06, + "loss": 3.4424, + "step": 4385 + }, + { + "epoch": 0.53, + "learning_rate": 5.081006580870307e-06, + "loss": 3.3836, + "step": 4386 + }, + { + "epoch": 0.53, + "learning_rate": 5.079006586971905e-06, + "loss": 3.3435, + "step": 4387 + }, + { + "epoch": 0.53, + "learning_rate": 5.07700658042929e-06, + "loss": 3.421, + "step": 4388 + }, + { + "epoch": 0.53, + "learning_rate": 5.075006561562542e-06, + "loss": 3.4132, + "step": 4389 + }, + { + "epoch": 0.53, + "learning_rate": 5.073006530691743e-06, + "loss": 3.4192, + "step": 4390 + }, + { + "epoch": 0.53, + "learning_rate": 5.0710064881369805e-06, + "loss": 3.3171, + "step": 4391 + }, + { + "epoch": 0.53, + "learning_rate": 5.069006434218338e-06, + "loss": 3.3982, + "step": 4392 + }, + { + "epoch": 0.53, + "learning_rate": 5.067006369255907e-06, + "loss": 3.4142, + "step": 4393 + }, + { + "epoch": 0.53, + "learning_rate": 5.065006293569775e-06, + "loss": 3.4757, + "step": 4394 + }, + { + "epoch": 0.53, + "learning_rate": 5.063006207480037e-06, + "loss": 3.3846, + "step": 4395 + }, + { + "epoch": 0.53, + "learning_rate": 5.0610061113067845e-06, + "loss": 3.3433, + "step": 4396 + }, + { + "epoch": 0.53, + "learning_rate": 5.059006005370115e-06, + "loss": 3.4148, + "step": 4397 + }, + { + "epoch": 0.53, + "learning_rate": 5.0570058899901245e-06, + "loss": 3.394, + "step": 4398 + }, + { + "epoch": 0.53, + "learning_rate": 5.055005765486909e-06, + "loss": 3.4092, + "step": 4399 + }, + { + "epoch": 0.53, + "learning_rate": 5.053005632180573e-06, + "loss": 3.4003, + "step": 4400 + }, + { + "epoch": 0.53, + "learning_rate": 5.0510054903912155e-06, + "loss": 3.4682, + "step": 4401 + }, + { + "epoch": 0.53, + "learning_rate": 5.04900534043894e-06, + "loss": 3.4086, + "step": 4402 + }, + { + "epoch": 0.53, + "learning_rate": 5.047005182643847e-06, + "loss": 3.432, + "step": 4403 + }, + { + "epoch": 0.53, + "learning_rate": 5.045005017326046e-06, + "loss": 3.3877, + "step": 4404 + }, + { + "epoch": 0.53, + "learning_rate": 5.043004844805643e-06, + "loss": 3.4019, + "step": 4405 + }, + { + "epoch": 0.53, + "learning_rate": 5.041004665402743e-06, + "loss": 3.3931, + "step": 4406 + }, + { + "epoch": 0.53, + "learning_rate": 5.039004479437456e-06, + "loss": 3.3783, + "step": 4407 + }, + { + "epoch": 0.53, + "learning_rate": 5.037004287229894e-06, + "loss": 3.4175, + "step": 4408 + }, + { + "epoch": 0.53, + "learning_rate": 5.035004089100165e-06, + "loss": 3.4639, + "step": 4409 + }, + { + "epoch": 0.53, + "learning_rate": 5.033003885368382e-06, + "loss": 3.4631, + "step": 4410 + }, + { + "epoch": 0.53, + "learning_rate": 5.031003676354655e-06, + "loss": 3.3588, + "step": 4411 + }, + { + "epoch": 0.53, + "learning_rate": 5.029003462379102e-06, + "loss": 3.4593, + "step": 4412 + }, + { + "epoch": 0.53, + "learning_rate": 5.0270032437618345e-06, + "loss": 3.354, + "step": 4413 + }, + { + "epoch": 0.53, + "learning_rate": 5.025003020822968e-06, + "loss": 3.3963, + "step": 4414 + }, + { + "epoch": 0.53, + "learning_rate": 5.023002793882616e-06, + "loss": 3.4215, + "step": 4415 + }, + { + "epoch": 0.53, + "learning_rate": 5.021002563260898e-06, + "loss": 3.4619, + "step": 4416 + }, + { + "epoch": 0.53, + "learning_rate": 5.019002329277931e-06, + "loss": 3.4292, + "step": 4417 + }, + { + "epoch": 0.53, + "learning_rate": 5.017002092253831e-06, + "loss": 3.3329, + "step": 4418 + }, + { + "epoch": 0.53, + "learning_rate": 5.015001852508714e-06, + "loss": 3.4275, + "step": 4419 + }, + { + "epoch": 0.53, + "learning_rate": 5.013001610362702e-06, + "loss": 3.3404, + "step": 4420 + }, + { + "epoch": 0.53, + "learning_rate": 5.011001366135913e-06, + "loss": 3.3824, + "step": 4421 + }, + { + "epoch": 0.53, + "learning_rate": 5.009001120148464e-06, + "loss": 3.4653, + "step": 4422 + }, + { + "epoch": 0.53, + "learning_rate": 5.007000872720476e-06, + "loss": 3.4554, + "step": 4423 + }, + { + "epoch": 0.53, + "learning_rate": 5.00500062417207e-06, + "loss": 3.3098, + "step": 4424 + }, + { + "epoch": 0.53, + "learning_rate": 5.003000374823363e-06, + "loss": 3.3879, + "step": 4425 + }, + { + "epoch": 0.53, + "learning_rate": 5.001000124994474e-06, + "loss": 3.3933, + "step": 4426 + }, + { + "epoch": 0.53, + "learning_rate": 4.998999875005527e-06, + "loss": 3.4975, + "step": 4427 + }, + { + "epoch": 0.53, + "learning_rate": 4.99699962517664e-06, + "loss": 3.4334, + "step": 4428 + }, + { + "epoch": 0.53, + "learning_rate": 4.994999375827932e-06, + "loss": 3.4581, + "step": 4429 + }, + { + "epoch": 0.53, + "learning_rate": 4.992999127279525e-06, + "loss": 3.3868, + "step": 4430 + }, + { + "epoch": 0.53, + "learning_rate": 4.9909988798515375e-06, + "loss": 3.3883, + "step": 4431 + }, + { + "epoch": 0.53, + "learning_rate": 4.988998633864088e-06, + "loss": 3.3526, + "step": 4432 + }, + { + "epoch": 0.53, + "learning_rate": 4.986998389637299e-06, + "loss": 3.45, + "step": 4433 + }, + { + "epoch": 0.53, + "learning_rate": 4.9849981474912874e-06, + "loss": 3.3615, + "step": 4434 + }, + { + "epoch": 0.53, + "learning_rate": 4.9829979077461705e-06, + "loss": 3.3836, + "step": 4435 + }, + { + "epoch": 0.53, + "learning_rate": 4.9809976707220715e-06, + "loss": 3.4451, + "step": 4436 + }, + { + "epoch": 0.53, + "learning_rate": 4.978997436739103e-06, + "loss": 3.4168, + "step": 4437 + }, + { + "epoch": 0.53, + "learning_rate": 4.9769972061173846e-06, + "loss": 3.4662, + "step": 4438 + }, + { + "epoch": 0.53, + "learning_rate": 4.974996979177035e-06, + "loss": 3.4841, + "step": 4439 + }, + { + "epoch": 0.53, + "learning_rate": 4.972996756238167e-06, + "loss": 3.4267, + "step": 4440 + }, + { + "epoch": 0.53, + "learning_rate": 4.970996537620899e-06, + "loss": 3.3591, + "step": 4441 + }, + { + "epoch": 0.53, + "learning_rate": 4.968996323645346e-06, + "loss": 3.4624, + "step": 4442 + }, + { + "epoch": 0.53, + "learning_rate": 4.96699611463162e-06, + "loss": 3.5196, + "step": 4443 + }, + { + "epoch": 0.53, + "learning_rate": 4.9649959108998375e-06, + "loss": 3.4565, + "step": 4444 + }, + { + "epoch": 0.53, + "learning_rate": 4.962995712770108e-06, + "loss": 3.4224, + "step": 4445 + }, + { + "epoch": 0.53, + "learning_rate": 4.960995520562544e-06, + "loss": 3.4087, + "step": 4446 + }, + { + "epoch": 0.53, + "learning_rate": 4.9589953345972595e-06, + "loss": 3.5087, + "step": 4447 + }, + { + "epoch": 0.53, + "learning_rate": 4.956995155194359e-06, + "loss": 3.3122, + "step": 4448 + }, + { + "epoch": 0.53, + "learning_rate": 4.954994982673954e-06, + "loss": 3.3651, + "step": 4449 + }, + { + "epoch": 0.53, + "learning_rate": 4.9529948173561545e-06, + "loss": 3.4808, + "step": 4450 + }, + { + "epoch": 0.53, + "learning_rate": 4.950994659561063e-06, + "loss": 3.3934, + "step": 4451 + }, + { + "epoch": 0.53, + "learning_rate": 4.948994509608787e-06, + "loss": 3.5274, + "step": 4452 + }, + { + "epoch": 0.53, + "learning_rate": 4.946994367819428e-06, + "loss": 3.3914, + "step": 4453 + }, + { + "epoch": 0.53, + "learning_rate": 4.944994234513091e-06, + "loss": 3.508, + "step": 4454 + }, + { + "epoch": 0.53, + "learning_rate": 4.942994110009878e-06, + "loss": 3.4752, + "step": 4455 + }, + { + "epoch": 0.53, + "learning_rate": 4.9409939946298865e-06, + "loss": 3.4169, + "step": 4456 + }, + { + "epoch": 0.53, + "learning_rate": 4.9389938886932155e-06, + "loss": 3.3807, + "step": 4457 + }, + { + "epoch": 0.53, + "learning_rate": 4.936993792519965e-06, + "loss": 3.4046, + "step": 4458 + }, + { + "epoch": 0.53, + "learning_rate": 4.934993706430226e-06, + "loss": 3.4123, + "step": 4459 + }, + { + "epoch": 0.53, + "learning_rate": 4.932993630744096e-06, + "loss": 3.4026, + "step": 4460 + }, + { + "epoch": 0.53, + "learning_rate": 4.930993565781663e-06, + "loss": 3.3675, + "step": 4461 + }, + { + "epoch": 0.53, + "learning_rate": 4.92899351186302e-06, + "loss": 3.4558, + "step": 4462 + }, + { + "epoch": 0.53, + "learning_rate": 4.9269934693082585e-06, + "loss": 3.5077, + "step": 4463 + }, + { + "epoch": 0.53, + "learning_rate": 4.924993438437459e-06, + "loss": 3.4496, + "step": 4464 + }, + { + "epoch": 0.53, + "learning_rate": 4.9229934195707106e-06, + "loss": 3.4201, + "step": 4465 + }, + { + "epoch": 0.53, + "learning_rate": 4.920993413028096e-06, + "loss": 3.537, + "step": 4466 + }, + { + "epoch": 0.53, + "learning_rate": 4.918993419129695e-06, + "loss": 3.3748, + "step": 4467 + }, + { + "epoch": 0.53, + "learning_rate": 4.916993438195587e-06, + "loss": 3.4093, + "step": 4468 + }, + { + "epoch": 0.54, + "learning_rate": 4.91499347054585e-06, + "loss": 3.395, + "step": 4469 + }, + { + "epoch": 0.54, + "learning_rate": 4.9129935165005575e-06, + "loss": 3.3709, + "step": 4470 + }, + { + "epoch": 0.54, + "learning_rate": 4.910993576379783e-06, + "loss": 3.5013, + "step": 4471 + }, + { + "epoch": 0.54, + "learning_rate": 4.908993650503597e-06, + "loss": 3.4385, + "step": 4472 + }, + { + "epoch": 0.54, + "learning_rate": 4.9069937391920665e-06, + "loss": 3.3632, + "step": 4473 + }, + { + "epoch": 0.54, + "learning_rate": 4.904993842765258e-06, + "loss": 3.3813, + "step": 4474 + }, + { + "epoch": 0.54, + "learning_rate": 4.902993961543234e-06, + "loss": 3.4571, + "step": 4475 + }, + { + "epoch": 0.54, + "learning_rate": 4.900994095846059e-06, + "loss": 3.4133, + "step": 4476 + }, + { + "epoch": 0.54, + "learning_rate": 4.898994245993787e-06, + "loss": 3.4378, + "step": 4477 + }, + { + "epoch": 0.54, + "learning_rate": 4.8969944123064755e-06, + "loss": 3.4453, + "step": 4478 + }, + { + "epoch": 0.54, + "learning_rate": 4.894994595104181e-06, + "loss": 3.3973, + "step": 4479 + }, + { + "epoch": 0.54, + "learning_rate": 4.89299479470695e-06, + "loss": 3.3825, + "step": 4480 + }, + { + "epoch": 0.54, + "learning_rate": 4.890995011434833e-06, + "loss": 3.456, + "step": 4481 + }, + { + "epoch": 0.54, + "learning_rate": 4.888995245607873e-06, + "loss": 3.3957, + "step": 4482 + }, + { + "epoch": 0.54, + "learning_rate": 4.8869954975461135e-06, + "loss": 3.4351, + "step": 4483 + }, + { + "epoch": 0.54, + "learning_rate": 4.884995767569597e-06, + "loss": 3.4077, + "step": 4484 + }, + { + "epoch": 0.54, + "learning_rate": 4.882996055998356e-06, + "loss": 3.3258, + "step": 4485 + }, + { + "epoch": 0.54, + "learning_rate": 4.880996363152426e-06, + "loss": 3.4051, + "step": 4486 + }, + { + "epoch": 0.54, + "learning_rate": 4.878996689351841e-06, + "loss": 3.3904, + "step": 4487 + }, + { + "epoch": 0.54, + "learning_rate": 4.876997034916623e-06, + "loss": 3.4466, + "step": 4488 + }, + { + "epoch": 0.54, + "learning_rate": 4.874997400166802e-06, + "loss": 3.3553, + "step": 4489 + }, + { + "epoch": 0.54, + "learning_rate": 4.872997785422395e-06, + "loss": 3.412, + "step": 4490 + }, + { + "epoch": 0.54, + "learning_rate": 4.870998191003422e-06, + "loss": 3.4199, + "step": 4491 + }, + { + "epoch": 0.54, + "learning_rate": 4.868998617229902e-06, + "loss": 3.3391, + "step": 4492 + }, + { + "epoch": 0.54, + "learning_rate": 4.86699906442184e-06, + "loss": 3.4895, + "step": 4493 + }, + { + "epoch": 0.54, + "learning_rate": 4.864999532899249e-06, + "loss": 3.4201, + "step": 4494 + }, + { + "epoch": 0.54, + "learning_rate": 4.863000022982134e-06, + "loss": 3.4412, + "step": 4495 + }, + { + "epoch": 0.54, + "learning_rate": 4.861000534990493e-06, + "loss": 3.468, + "step": 4496 + }, + { + "epoch": 0.54, + "learning_rate": 4.85900106924433e-06, + "loss": 3.4108, + "step": 4497 + }, + { + "epoch": 0.54, + "learning_rate": 4.857001626063633e-06, + "loss": 3.458, + "step": 4498 + }, + { + "epoch": 0.54, + "learning_rate": 4.855002205768396e-06, + "loss": 3.4641, + "step": 4499 + }, + { + "epoch": 0.54, + "learning_rate": 4.853002808678609e-06, + "loss": 3.4305, + "step": 4500 + }, + { + "epoch": 0.54, + "learning_rate": 4.851003435114249e-06, + "loss": 3.3931, + "step": 4501 + }, + { + "epoch": 0.54, + "learning_rate": 4.8490040853953e-06, + "loss": 3.4264, + "step": 4502 + }, + { + "epoch": 0.54, + "learning_rate": 4.847004759841739e-06, + "loss": 3.4196, + "step": 4503 + }, + { + "epoch": 0.54, + "learning_rate": 4.845005458773535e-06, + "loss": 3.3824, + "step": 4504 + }, + { + "epoch": 0.54, + "learning_rate": 4.843006182510658e-06, + "loss": 3.4712, + "step": 4505 + }, + { + "epoch": 0.54, + "learning_rate": 4.841006931373072e-06, + "loss": 3.3693, + "step": 4506 + }, + { + "epoch": 0.54, + "learning_rate": 4.839007705680736e-06, + "loss": 3.455, + "step": 4507 + }, + { + "epoch": 0.54, + "learning_rate": 4.837008505753607e-06, + "loss": 3.3375, + "step": 4508 + }, + { + "epoch": 0.54, + "learning_rate": 4.835009331911638e-06, + "loss": 3.4785, + "step": 4509 + }, + { + "epoch": 0.54, + "learning_rate": 4.833010184474774e-06, + "loss": 3.429, + "step": 4510 + }, + { + "epoch": 0.54, + "learning_rate": 4.8310110637629615e-06, + "loss": 3.4593, + "step": 4511 + }, + { + "epoch": 0.54, + "learning_rate": 4.829011970096138e-06, + "loss": 3.3893, + "step": 4512 + }, + { + "epoch": 0.54, + "learning_rate": 4.8270129037942395e-06, + "loss": 3.4063, + "step": 4513 + }, + { + "epoch": 0.54, + "learning_rate": 4.825013865177194e-06, + "loss": 3.4007, + "step": 4514 + }, + { + "epoch": 0.54, + "learning_rate": 4.823014854564932e-06, + "loss": 3.4651, + "step": 4515 + }, + { + "epoch": 0.54, + "learning_rate": 4.821015872277372e-06, + "loss": 3.4222, + "step": 4516 + }, + { + "epoch": 0.54, + "learning_rate": 4.819016918634431e-06, + "loss": 3.3861, + "step": 4517 + }, + { + "epoch": 0.54, + "learning_rate": 4.817017993956023e-06, + "loss": 3.3661, + "step": 4518 + }, + { + "epoch": 0.54, + "learning_rate": 4.8150190985620564e-06, + "loss": 3.5279, + "step": 4519 + }, + { + "epoch": 0.54, + "learning_rate": 4.813020232772431e-06, + "loss": 3.3817, + "step": 4520 + }, + { + "epoch": 0.54, + "learning_rate": 4.8110213969070515e-06, + "loss": 3.4329, + "step": 4521 + }, + { + "epoch": 0.54, + "learning_rate": 4.809022591285805e-06, + "loss": 3.4482, + "step": 4522 + }, + { + "epoch": 0.54, + "learning_rate": 4.807023816228584e-06, + "loss": 3.428, + "step": 4523 + }, + { + "epoch": 0.54, + "learning_rate": 4.8050250720552715e-06, + "loss": 3.3538, + "step": 4524 + }, + { + "epoch": 0.54, + "learning_rate": 4.803026359085746e-06, + "loss": 3.4351, + "step": 4525 + }, + { + "epoch": 0.54, + "learning_rate": 4.801027677639884e-06, + "loss": 3.4501, + "step": 4526 + }, + { + "epoch": 0.54, + "learning_rate": 4.799029028037551e-06, + "loss": 3.3625, + "step": 4527 + }, + { + "epoch": 0.54, + "learning_rate": 4.797030410598612e-06, + "loss": 3.39, + "step": 4528 + }, + { + "epoch": 0.54, + "learning_rate": 4.795031825642928e-06, + "loss": 3.4519, + "step": 4529 + }, + { + "epoch": 0.54, + "learning_rate": 4.793033273490349e-06, + "loss": 3.4614, + "step": 4530 + }, + { + "epoch": 0.54, + "learning_rate": 4.791034754460724e-06, + "loss": 3.5175, + "step": 4531 + }, + { + "epoch": 0.54, + "learning_rate": 4.789036268873901e-06, + "loss": 3.3056, + "step": 4532 + }, + { + "epoch": 0.54, + "learning_rate": 4.78703781704971e-06, + "loss": 3.418, + "step": 4533 + }, + { + "epoch": 0.54, + "learning_rate": 4.78503939930799e-06, + "loss": 3.3531, + "step": 4534 + }, + { + "epoch": 0.54, + "learning_rate": 4.783041015968563e-06, + "loss": 3.3621, + "step": 4535 + }, + { + "epoch": 0.54, + "learning_rate": 4.781042667351251e-06, + "loss": 3.4547, + "step": 4536 + }, + { + "epoch": 0.54, + "learning_rate": 4.779044353775873e-06, + "loss": 3.4877, + "step": 4537 + }, + { + "epoch": 0.54, + "learning_rate": 4.777046075562235e-06, + "loss": 3.3576, + "step": 4538 + }, + { + "epoch": 0.54, + "learning_rate": 4.775047833030143e-06, + "loss": 3.4565, + "step": 4539 + }, + { + "epoch": 0.54, + "learning_rate": 4.773049626499398e-06, + "loss": 3.3898, + "step": 4540 + }, + { + "epoch": 0.54, + "learning_rate": 4.7710514562897895e-06, + "loss": 3.4463, + "step": 4541 + }, + { + "epoch": 0.54, + "learning_rate": 4.769053322721108e-06, + "loss": 3.4141, + "step": 4542 + }, + { + "epoch": 0.54, + "learning_rate": 4.767055226113129e-06, + "loss": 3.463, + "step": 4543 + }, + { + "epoch": 0.54, + "learning_rate": 4.765057166785633e-06, + "loss": 3.434, + "step": 4544 + }, + { + "epoch": 0.54, + "learning_rate": 4.763059145058392e-06, + "loss": 3.4662, + "step": 4545 + }, + { + "epoch": 0.54, + "learning_rate": 4.7610611612511614e-06, + "loss": 3.4878, + "step": 4546 + }, + { + "epoch": 0.54, + "learning_rate": 4.759063215683703e-06, + "loss": 3.5071, + "step": 4547 + }, + { + "epoch": 0.54, + "learning_rate": 4.757065308675771e-06, + "loss": 3.3909, + "step": 4548 + }, + { + "epoch": 0.54, + "learning_rate": 4.755067440547104e-06, + "loss": 3.4693, + "step": 4549 + }, + { + "epoch": 0.54, + "learning_rate": 4.753069611617445e-06, + "loss": 3.4961, + "step": 4550 + }, + { + "epoch": 0.54, + "learning_rate": 4.751071822206526e-06, + "loss": 3.4101, + "step": 4551 + }, + { + "epoch": 0.54, + "learning_rate": 4.749074072634071e-06, + "loss": 3.3531, + "step": 4552 + }, + { + "epoch": 0.55, + "learning_rate": 4.747076363219804e-06, + "loss": 3.3979, + "step": 4553 + }, + { + "epoch": 0.55, + "learning_rate": 4.745078694283435e-06, + "loss": 3.3295, + "step": 4554 + }, + { + "epoch": 0.55, + "learning_rate": 4.743081066144671e-06, + "loss": 3.5403, + "step": 4555 + }, + { + "epoch": 0.55, + "learning_rate": 4.741083479123214e-06, + "loss": 3.4818, + "step": 4556 + }, + { + "epoch": 0.55, + "learning_rate": 4.739085933538758e-06, + "loss": 3.4285, + "step": 4557 + }, + { + "epoch": 0.55, + "learning_rate": 4.7370884297109895e-06, + "loss": 3.4804, + "step": 4558 + }, + { + "epoch": 0.55, + "learning_rate": 4.735090967959587e-06, + "loss": 3.4856, + "step": 4559 + }, + { + "epoch": 0.55, + "learning_rate": 4.733093548604228e-06, + "loss": 3.4507, + "step": 4560 + }, + { + "epoch": 0.55, + "learning_rate": 4.731096171964577e-06, + "loss": 3.355, + "step": 4561 + }, + { + "epoch": 0.55, + "learning_rate": 4.729098838360294e-06, + "loss": 3.5056, + "step": 4562 + }, + { + "epoch": 0.55, + "learning_rate": 4.727101548111034e-06, + "loss": 3.4307, + "step": 4563 + }, + { + "epoch": 0.55, + "learning_rate": 4.725104301536443e-06, + "loss": 3.3609, + "step": 4564 + }, + { + "epoch": 0.55, + "learning_rate": 4.723107098956159e-06, + "loss": 3.4012, + "step": 4565 + }, + { + "epoch": 0.55, + "learning_rate": 4.7211099406898165e-06, + "loss": 3.3077, + "step": 4566 + }, + { + "epoch": 0.55, + "learning_rate": 4.719112827057038e-06, + "loss": 3.3829, + "step": 4567 + }, + { + "epoch": 0.55, + "learning_rate": 4.717115758377441e-06, + "loss": 3.5303, + "step": 4568 + }, + { + "epoch": 0.55, + "learning_rate": 4.7151187349706425e-06, + "loss": 3.5484, + "step": 4569 + }, + { + "epoch": 0.55, + "learning_rate": 4.7131217571562385e-06, + "loss": 3.4586, + "step": 4570 + }, + { + "epoch": 0.55, + "learning_rate": 4.711124825253829e-06, + "loss": 3.3756, + "step": 4571 + }, + { + "epoch": 0.55, + "learning_rate": 4.7091279395830045e-06, + "loss": 3.4611, + "step": 4572 + }, + { + "epoch": 0.55, + "learning_rate": 4.707131100463343e-06, + "loss": 3.3615, + "step": 4573 + }, + { + "epoch": 0.55, + "learning_rate": 4.705134308214423e-06, + "loss": 3.3549, + "step": 4574 + }, + { + "epoch": 0.55, + "learning_rate": 4.703137563155805e-06, + "loss": 3.2957, + "step": 4575 + }, + { + "epoch": 0.55, + "learning_rate": 4.7011408656070525e-06, + "loss": 3.4313, + "step": 4576 + }, + { + "epoch": 0.55, + "learning_rate": 4.699144215887719e-06, + "loss": 3.453, + "step": 4577 + }, + { + "epoch": 0.55, + "learning_rate": 4.697147614317342e-06, + "loss": 3.5707, + "step": 4578 + }, + { + "epoch": 0.55, + "learning_rate": 4.695151061215461e-06, + "loss": 3.4309, + "step": 4579 + }, + { + "epoch": 0.55, + "learning_rate": 4.693154556901607e-06, + "loss": 3.4387, + "step": 4580 + }, + { + "epoch": 0.55, + "learning_rate": 4.691158101695296e-06, + "loss": 3.4281, + "step": 4581 + }, + { + "epoch": 0.55, + "learning_rate": 4.689161695916045e-06, + "loss": 3.4574, + "step": 4582 + }, + { + "epoch": 0.55, + "learning_rate": 4.687165339883354e-06, + "loss": 3.4277, + "step": 4583 + }, + { + "epoch": 0.55, + "learning_rate": 4.685169033916722e-06, + "loss": 3.3739, + "step": 4584 + }, + { + "epoch": 0.55, + "learning_rate": 4.68317277833564e-06, + "loss": 3.4005, + "step": 4585 + }, + { + "epoch": 0.55, + "learning_rate": 4.681176573459585e-06, + "loss": 3.4325, + "step": 4586 + }, + { + "epoch": 0.55, + "learning_rate": 4.679180419608032e-06, + "loss": 3.4036, + "step": 4587 + }, + { + "epoch": 0.55, + "learning_rate": 4.677184317100446e-06, + "loss": 3.5177, + "step": 4588 + }, + { + "epoch": 0.55, + "learning_rate": 4.67518826625628e-06, + "loss": 3.4883, + "step": 4589 + }, + { + "epoch": 0.55, + "learning_rate": 4.673192267394987e-06, + "loss": 3.3129, + "step": 4590 + }, + { + "epoch": 0.55, + "learning_rate": 4.671196320836001e-06, + "loss": 3.3933, + "step": 4591 + }, + { + "epoch": 0.55, + "learning_rate": 4.669200426898756e-06, + "loss": 3.4986, + "step": 4592 + }, + { + "epoch": 0.55, + "learning_rate": 4.667204585902678e-06, + "loss": 3.3749, + "step": 4593 + }, + { + "epoch": 0.55, + "learning_rate": 4.665208798167175e-06, + "loss": 3.4024, + "step": 4594 + }, + { + "epoch": 0.55, + "learning_rate": 4.663213064011658e-06, + "loss": 3.4539, + "step": 4595 + }, + { + "epoch": 0.55, + "learning_rate": 4.661217383755522e-06, + "loss": 3.3329, + "step": 4596 + }, + { + "epoch": 0.55, + "learning_rate": 4.6592217577181554e-06, + "loss": 3.5052, + "step": 4597 + }, + { + "epoch": 0.55, + "learning_rate": 4.65722618621894e-06, + "loss": 3.3777, + "step": 4598 + }, + { + "epoch": 0.55, + "learning_rate": 4.655230669577245e-06, + "loss": 3.4813, + "step": 4599 + }, + { + "epoch": 0.55, + "learning_rate": 4.653235208112435e-06, + "loss": 3.4298, + "step": 4600 + }, + { + "epoch": 0.55, + "learning_rate": 4.651239802143862e-06, + "loss": 3.4437, + "step": 4601 + }, + { + "epoch": 0.55, + "learning_rate": 4.649244451990871e-06, + "loss": 3.4994, + "step": 4602 + }, + { + "epoch": 0.55, + "learning_rate": 4.6472491579728e-06, + "loss": 3.486, + "step": 4603 + }, + { + "epoch": 0.55, + "learning_rate": 4.645253920408971e-06, + "loss": 3.3877, + "step": 4604 + }, + { + "epoch": 0.55, + "learning_rate": 4.643258739618706e-06, + "loss": 3.4274, + "step": 4605 + }, + { + "epoch": 0.55, + "learning_rate": 4.641263615921315e-06, + "loss": 3.4599, + "step": 4606 + }, + { + "epoch": 0.55, + "learning_rate": 4.639268549636092e-06, + "loss": 3.4821, + "step": 4607 + }, + { + "epoch": 0.55, + "learning_rate": 4.637273541082331e-06, + "loss": 3.2714, + "step": 4608 + }, + { + "epoch": 0.55, + "learning_rate": 4.635278590579315e-06, + "loss": 3.5013, + "step": 4609 + }, + { + "epoch": 0.55, + "learning_rate": 4.633283698446311e-06, + "loss": 3.541, + "step": 4610 + }, + { + "epoch": 0.55, + "learning_rate": 4.631288865002587e-06, + "loss": 3.4395, + "step": 4611 + }, + { + "epoch": 0.55, + "learning_rate": 4.62929409056739e-06, + "loss": 3.3064, + "step": 4612 + }, + { + "epoch": 0.55, + "learning_rate": 4.627299375459968e-06, + "loss": 3.4569, + "step": 4613 + }, + { + "epoch": 0.55, + "learning_rate": 4.625304719999557e-06, + "loss": 3.3893, + "step": 4614 + }, + { + "epoch": 0.55, + "learning_rate": 4.623310124505376e-06, + "loss": 3.391, + "step": 4615 + }, + { + "epoch": 0.55, + "learning_rate": 4.621315589296643e-06, + "loss": 3.4662, + "step": 4616 + }, + { + "epoch": 0.55, + "learning_rate": 4.619321114692565e-06, + "loss": 3.4006, + "step": 4617 + }, + { + "epoch": 0.55, + "learning_rate": 4.6173267010123345e-06, + "loss": 3.3912, + "step": 4618 + }, + { + "epoch": 0.55, + "learning_rate": 4.615332348575141e-06, + "loss": 3.4977, + "step": 4619 + }, + { + "epoch": 0.55, + "learning_rate": 4.613338057700156e-06, + "loss": 3.3374, + "step": 4620 + }, + { + "epoch": 0.55, + "learning_rate": 4.6113438287065495e-06, + "loss": 3.4178, + "step": 4621 + }, + { + "epoch": 0.55, + "learning_rate": 4.609349661913479e-06, + "loss": 3.2924, + "step": 4622 + }, + { + "epoch": 0.55, + "learning_rate": 4.6073555576400865e-06, + "loss": 3.3928, + "step": 4623 + }, + { + "epoch": 0.55, + "learning_rate": 4.605361516205511e-06, + "loss": 3.4307, + "step": 4624 + }, + { + "epoch": 0.55, + "learning_rate": 4.60336753792888e-06, + "loss": 3.4099, + "step": 4625 + }, + { + "epoch": 0.55, + "learning_rate": 4.6013736231293074e-06, + "loss": 3.4344, + "step": 4626 + }, + { + "epoch": 0.55, + "learning_rate": 4.599379772125902e-06, + "loss": 3.4114, + "step": 4627 + }, + { + "epoch": 0.55, + "learning_rate": 4.597385985237757e-06, + "loss": 3.4118, + "step": 4628 + }, + { + "epoch": 0.55, + "learning_rate": 4.595392262783958e-06, + "loss": 3.4574, + "step": 4629 + }, + { + "epoch": 0.55, + "learning_rate": 4.593398605083584e-06, + "loss": 3.3931, + "step": 4630 + }, + { + "epoch": 0.55, + "learning_rate": 4.591405012455697e-06, + "loss": 3.4251, + "step": 4631 + }, + { + "epoch": 0.55, + "learning_rate": 4.58941148521935e-06, + "loss": 3.3385, + "step": 4632 + }, + { + "epoch": 0.55, + "learning_rate": 4.587418023693591e-06, + "loss": 3.4381, + "step": 4633 + }, + { + "epoch": 0.55, + "learning_rate": 4.585424628197452e-06, + "loss": 3.4767, + "step": 4634 + }, + { + "epoch": 0.55, + "learning_rate": 4.583431299049956e-06, + "loss": 3.32, + "step": 4635 + }, + { + "epoch": 0.56, + "learning_rate": 4.5814380365701135e-06, + "loss": 3.3471, + "step": 4636 + }, + { + "epoch": 0.56, + "learning_rate": 4.5794448410769285e-06, + "loss": 3.3835, + "step": 4637 + }, + { + "epoch": 0.56, + "learning_rate": 4.577451712889392e-06, + "loss": 3.3481, + "step": 4638 + }, + { + "epoch": 0.56, + "learning_rate": 4.575458652326485e-06, + "loss": 3.5097, + "step": 4639 + }, + { + "epoch": 0.56, + "learning_rate": 4.573465659707172e-06, + "loss": 3.4494, + "step": 4640 + }, + { + "epoch": 0.56, + "learning_rate": 4.571472735350418e-06, + "loss": 3.5277, + "step": 4641 + }, + { + "epoch": 0.56, + "learning_rate": 4.569479879575168e-06, + "loss": 3.441, + "step": 4642 + }, + { + "epoch": 0.56, + "learning_rate": 4.567487092700357e-06, + "loss": 3.3337, + "step": 4643 + }, + { + "epoch": 0.56, + "learning_rate": 4.565494375044912e-06, + "loss": 3.3581, + "step": 4644 + }, + { + "epoch": 0.56, + "learning_rate": 4.5635017269277485e-06, + "loss": 3.3991, + "step": 4645 + }, + { + "epoch": 0.56, + "learning_rate": 4.561509148667768e-06, + "loss": 3.3865, + "step": 4646 + }, + { + "epoch": 0.56, + "learning_rate": 4.5595166405838636e-06, + "loss": 3.4591, + "step": 4647 + }, + { + "epoch": 0.56, + "learning_rate": 4.557524202994917e-06, + "loss": 3.348, + "step": 4648 + }, + { + "epoch": 0.56, + "learning_rate": 4.555531836219798e-06, + "loss": 3.4451, + "step": 4649 + }, + { + "epoch": 0.56, + "learning_rate": 4.553539540577362e-06, + "loss": 3.4887, + "step": 4650 + }, + { + "epoch": 0.56, + "learning_rate": 4.551547316386461e-06, + "loss": 3.3429, + "step": 4651 + }, + { + "epoch": 0.56, + "learning_rate": 4.5495551639659256e-06, + "loss": 3.4061, + "step": 4652 + }, + { + "epoch": 0.56, + "learning_rate": 4.547563083634582e-06, + "loss": 3.4179, + "step": 4653 + }, + { + "epoch": 0.56, + "learning_rate": 4.5455710757112455e-06, + "loss": 3.5102, + "step": 4654 + }, + { + "epoch": 0.56, + "learning_rate": 4.5435791405147116e-06, + "loss": 3.4107, + "step": 4655 + }, + { + "epoch": 0.56, + "learning_rate": 4.541587278363775e-06, + "loss": 3.3435, + "step": 4656 + }, + { + "epoch": 0.56, + "learning_rate": 4.539595489577208e-06, + "loss": 3.4695, + "step": 4657 + }, + { + "epoch": 0.56, + "learning_rate": 4.537603774473779e-06, + "loss": 3.4741, + "step": 4658 + }, + { + "epoch": 0.56, + "learning_rate": 4.535612133372246e-06, + "loss": 3.391, + "step": 4659 + }, + { + "epoch": 0.56, + "learning_rate": 4.533620566591344e-06, + "loss": 3.5329, + "step": 4660 + }, + { + "epoch": 0.56, + "learning_rate": 4.531629074449807e-06, + "loss": 3.399, + "step": 4661 + }, + { + "epoch": 0.56, + "learning_rate": 4.529637657266356e-06, + "loss": 3.4486, + "step": 4662 + }, + { + "epoch": 0.56, + "learning_rate": 4.527646315359692e-06, + "loss": 3.4408, + "step": 4663 + }, + { + "epoch": 0.56, + "learning_rate": 4.525655049048514e-06, + "loss": 3.3503, + "step": 4664 + }, + { + "epoch": 0.56, + "learning_rate": 4.5236638586515e-06, + "loss": 3.4704, + "step": 4665 + }, + { + "epoch": 0.56, + "learning_rate": 4.521672744487323e-06, + "loss": 3.4328, + "step": 4666 + }, + { + "epoch": 0.56, + "learning_rate": 4.5196817068746424e-06, + "loss": 3.4447, + "step": 4667 + }, + { + "epoch": 0.56, + "learning_rate": 4.5176907461321e-06, + "loss": 3.4343, + "step": 4668 + }, + { + "epoch": 0.56, + "learning_rate": 4.51569986257833e-06, + "loss": 3.4, + "step": 4669 + }, + { + "epoch": 0.56, + "learning_rate": 4.513709056531957e-06, + "loss": 3.357, + "step": 4670 + }, + { + "epoch": 0.56, + "learning_rate": 4.511718328311585e-06, + "loss": 3.5133, + "step": 4671 + }, + { + "epoch": 0.56, + "learning_rate": 4.509727678235814e-06, + "loss": 3.4221, + "step": 4672 + }, + { + "epoch": 0.56, + "learning_rate": 4.507737106623224e-06, + "loss": 3.4699, + "step": 4673 + }, + { + "epoch": 0.56, + "learning_rate": 4.505746613792387e-06, + "loss": 3.3692, + "step": 4674 + }, + { + "epoch": 0.56, + "learning_rate": 4.503756200061865e-06, + "loss": 3.466, + "step": 4675 + }, + { + "epoch": 0.56, + "learning_rate": 4.501765865750199e-06, + "loss": 3.4636, + "step": 4676 + }, + { + "epoch": 0.56, + "learning_rate": 4.499775611175924e-06, + "loss": 3.4688, + "step": 4677 + }, + { + "epoch": 0.56, + "learning_rate": 4.497785436657562e-06, + "loss": 3.3522, + "step": 4678 + }, + { + "epoch": 0.56, + "learning_rate": 4.495795342513618e-06, + "loss": 3.397, + "step": 4679 + }, + { + "epoch": 0.56, + "learning_rate": 4.493805329062589e-06, + "loss": 3.328, + "step": 4680 + }, + { + "epoch": 0.56, + "learning_rate": 4.491815396622955e-06, + "loss": 3.3496, + "step": 4681 + }, + { + "epoch": 0.56, + "learning_rate": 4.489825545513184e-06, + "loss": 3.3266, + "step": 4682 + }, + { + "epoch": 0.56, + "learning_rate": 4.487835776051734e-06, + "loss": 3.4839, + "step": 4683 + }, + { + "epoch": 0.56, + "learning_rate": 4.485846088557048e-06, + "loss": 3.3658, + "step": 4684 + }, + { + "epoch": 0.56, + "learning_rate": 4.483856483347551e-06, + "loss": 3.3399, + "step": 4685 + }, + { + "epoch": 0.56, + "learning_rate": 4.481866960741666e-06, + "loss": 3.4354, + "step": 4686 + }, + { + "epoch": 0.56, + "learning_rate": 4.479877521057792e-06, + "loss": 3.427, + "step": 4687 + }, + { + "epoch": 0.56, + "learning_rate": 4.477888164614321e-06, + "loss": 3.327, + "step": 4688 + }, + { + "epoch": 0.56, + "learning_rate": 4.475898891729627e-06, + "loss": 3.4392, + "step": 4689 + }, + { + "epoch": 0.56, + "learning_rate": 4.473909702722074e-06, + "loss": 3.4619, + "step": 4690 + }, + { + "epoch": 0.56, + "learning_rate": 4.471920597910015e-06, + "loss": 3.4089, + "step": 4691 + }, + { + "epoch": 0.56, + "learning_rate": 4.469931577611781e-06, + "loss": 3.4752, + "step": 4692 + }, + { + "epoch": 0.56, + "learning_rate": 4.467942642145699e-06, + "loss": 3.4455, + "step": 4693 + }, + { + "epoch": 0.56, + "learning_rate": 4.465953791830079e-06, + "loss": 3.5028, + "step": 4694 + }, + { + "epoch": 0.56, + "learning_rate": 4.463965026983212e-06, + "loss": 3.4126, + "step": 4695 + }, + { + "epoch": 0.56, + "learning_rate": 4.461976347923385e-06, + "loss": 3.4236, + "step": 4696 + }, + { + "epoch": 0.56, + "learning_rate": 4.459987754968861e-06, + "loss": 3.4803, + "step": 4697 + }, + { + "epoch": 0.56, + "learning_rate": 4.457999248437897e-06, + "loss": 3.4138, + "step": 4698 + }, + { + "epoch": 0.56, + "learning_rate": 4.456010828648736e-06, + "loss": 3.4897, + "step": 4699 + }, + { + "epoch": 0.56, + "learning_rate": 4.454022495919601e-06, + "loss": 3.3091, + "step": 4700 + }, + { + "epoch": 0.56, + "learning_rate": 4.452034250568705e-06, + "loss": 3.3688, + "step": 4701 + }, + { + "epoch": 0.56, + "learning_rate": 4.450046092914251e-06, + "loss": 3.4317, + "step": 4702 + }, + { + "epoch": 0.56, + "learning_rate": 4.448058023274417e-06, + "loss": 3.4212, + "step": 4703 + }, + { + "epoch": 0.56, + "learning_rate": 4.446070041967381e-06, + "loss": 3.3182, + "step": 4704 + }, + { + "epoch": 0.56, + "learning_rate": 4.444082149311294e-06, + "loss": 3.3952, + "step": 4705 + }, + { + "epoch": 0.56, + "learning_rate": 4.442094345624299e-06, + "loss": 3.4541, + "step": 4706 + }, + { + "epoch": 0.56, + "learning_rate": 4.440106631224528e-06, + "loss": 3.3545, + "step": 4707 + }, + { + "epoch": 0.56, + "learning_rate": 4.43811900643009e-06, + "loss": 3.4662, + "step": 4708 + }, + { + "epoch": 0.56, + "learning_rate": 4.436131471559087e-06, + "loss": 3.4733, + "step": 4709 + }, + { + "epoch": 0.56, + "learning_rate": 4.434144026929606e-06, + "loss": 3.4856, + "step": 4710 + }, + { + "epoch": 0.56, + "learning_rate": 4.432156672859712e-06, + "loss": 3.4782, + "step": 4711 + }, + { + "epoch": 0.56, + "learning_rate": 4.430169409667468e-06, + "loss": 3.4282, + "step": 4712 + }, + { + "epoch": 0.56, + "learning_rate": 4.428182237670909e-06, + "loss": 3.3912, + "step": 4713 + }, + { + "epoch": 0.56, + "learning_rate": 4.426195157188065e-06, + "loss": 3.4167, + "step": 4714 + }, + { + "epoch": 0.56, + "learning_rate": 4.424208168536951e-06, + "loss": 3.4506, + "step": 4715 + }, + { + "epoch": 0.56, + "learning_rate": 4.42222127203556e-06, + "loss": 3.3783, + "step": 4716 + }, + { + "epoch": 0.56, + "learning_rate": 4.420234468001877e-06, + "loss": 3.4776, + "step": 4717 + }, + { + "epoch": 0.56, + "learning_rate": 4.418247756753873e-06, + "loss": 3.4114, + "step": 4718 + }, + { + "epoch": 0.56, + "learning_rate": 4.416261138609496e-06, + "loss": 3.3611, + "step": 4719 + }, + { + "epoch": 0.57, + "learning_rate": 4.414274613886688e-06, + "loss": 3.4338, + "step": 4720 + }, + { + "epoch": 0.57, + "learning_rate": 4.41228818290337e-06, + "loss": 3.4325, + "step": 4721 + }, + { + "epoch": 0.57, + "learning_rate": 4.410301845977451e-06, + "loss": 3.4388, + "step": 4722 + }, + { + "epoch": 0.57, + "learning_rate": 4.408315603426826e-06, + "loss": 3.3908, + "step": 4723 + }, + { + "epoch": 0.57, + "learning_rate": 4.406329455569373e-06, + "loss": 3.4044, + "step": 4724 + }, + { + "epoch": 0.57, + "learning_rate": 4.4043434027229545e-06, + "loss": 3.3043, + "step": 4725 + }, + { + "epoch": 0.57, + "learning_rate": 4.402357445205417e-06, + "loss": 3.4108, + "step": 4726 + }, + { + "epoch": 0.57, + "learning_rate": 4.400371583334594e-06, + "loss": 3.4031, + "step": 4727 + }, + { + "epoch": 0.57, + "learning_rate": 4.398385817428305e-06, + "loss": 3.4474, + "step": 4728 + }, + { + "epoch": 0.57, + "learning_rate": 4.396400147804348e-06, + "loss": 3.3927, + "step": 4729 + }, + { + "epoch": 0.57, + "learning_rate": 4.394414574780514e-06, + "loss": 3.5145, + "step": 4730 + }, + { + "epoch": 0.57, + "learning_rate": 4.392429098674572e-06, + "loss": 3.4358, + "step": 4731 + }, + { + "epoch": 0.57, + "learning_rate": 4.390443719804275e-06, + "loss": 3.4436, + "step": 4732 + }, + { + "epoch": 0.57, + "learning_rate": 4.388458438487368e-06, + "loss": 3.488, + "step": 4733 + }, + { + "epoch": 0.57, + "learning_rate": 4.386473255041572e-06, + "loss": 3.4118, + "step": 4734 + }, + { + "epoch": 0.57, + "learning_rate": 4.384488169784596e-06, + "loss": 3.3829, + "step": 4735 + }, + { + "epoch": 0.57, + "learning_rate": 4.382503183034136e-06, + "loss": 3.3654, + "step": 4736 + }, + { + "epoch": 0.57, + "learning_rate": 4.380518295107864e-06, + "loss": 3.4163, + "step": 4737 + }, + { + "epoch": 0.57, + "learning_rate": 4.378533506323446e-06, + "loss": 3.4585, + "step": 4738 + }, + { + "epoch": 0.57, + "learning_rate": 4.376548816998527e-06, + "loss": 3.3367, + "step": 4739 + }, + { + "epoch": 0.57, + "learning_rate": 4.374564227450734e-06, + "loss": 3.4776, + "step": 4740 + }, + { + "epoch": 0.57, + "learning_rate": 4.372579737997685e-06, + "loss": 3.3655, + "step": 4741 + }, + { + "epoch": 0.57, + "learning_rate": 4.370595348956973e-06, + "loss": 3.3849, + "step": 4742 + }, + { + "epoch": 0.57, + "learning_rate": 4.368611060646181e-06, + "loss": 3.3733, + "step": 4743 + }, + { + "epoch": 0.57, + "learning_rate": 4.366626873382879e-06, + "loss": 3.4249, + "step": 4744 + }, + { + "epoch": 0.57, + "learning_rate": 4.3646427874846096e-06, + "loss": 3.4419, + "step": 4745 + }, + { + "epoch": 0.57, + "learning_rate": 4.362658803268909e-06, + "loss": 3.34, + "step": 4746 + }, + { + "epoch": 0.57, + "learning_rate": 4.360674921053296e-06, + "loss": 3.4472, + "step": 4747 + }, + { + "epoch": 0.57, + "learning_rate": 4.3586911411552665e-06, + "loss": 3.3958, + "step": 4748 + }, + { + "epoch": 0.57, + "learning_rate": 4.35670746389231e-06, + "loss": 3.3155, + "step": 4749 + }, + { + "epoch": 0.57, + "learning_rate": 4.354723889581889e-06, + "loss": 3.5447, + "step": 4750 + }, + { + "epoch": 0.57, + "learning_rate": 4.352740418541458e-06, + "loss": 3.4739, + "step": 4751 + }, + { + "epoch": 0.57, + "learning_rate": 4.350757051088452e-06, + "loss": 3.375, + "step": 4752 + }, + { + "epoch": 0.57, + "learning_rate": 4.348773787540286e-06, + "loss": 3.3992, + "step": 4753 + }, + { + "epoch": 0.57, + "learning_rate": 4.3467906282143645e-06, + "loss": 3.4457, + "step": 4754 + }, + { + "epoch": 0.57, + "learning_rate": 4.344807573428072e-06, + "loss": 3.4147, + "step": 4755 + }, + { + "epoch": 0.57, + "learning_rate": 4.342824623498776e-06, + "loss": 3.4474, + "step": 4756 + }, + { + "epoch": 0.57, + "learning_rate": 4.340841778743828e-06, + "loss": 3.359, + "step": 4757 + }, + { + "epoch": 0.57, + "learning_rate": 4.33885903948056e-06, + "loss": 3.3449, + "step": 4758 + }, + { + "epoch": 0.57, + "learning_rate": 4.336876406026293e-06, + "loss": 3.3488, + "step": 4759 + }, + { + "epoch": 0.57, + "learning_rate": 4.334893878698328e-06, + "loss": 3.412, + "step": 4760 + }, + { + "epoch": 0.57, + "learning_rate": 4.3329114578139445e-06, + "loss": 3.4746, + "step": 4761 + }, + { + "epoch": 0.57, + "learning_rate": 4.330929143690412e-06, + "loss": 3.383, + "step": 4762 + }, + { + "epoch": 0.57, + "learning_rate": 4.328946936644983e-06, + "loss": 3.4734, + "step": 4763 + }, + { + "epoch": 0.57, + "learning_rate": 4.3269648369948836e-06, + "loss": 3.4069, + "step": 4764 + }, + { + "epoch": 0.57, + "learning_rate": 4.3249828450573336e-06, + "loss": 3.4027, + "step": 4765 + }, + { + "epoch": 0.57, + "learning_rate": 4.32300096114953e-06, + "loss": 3.3573, + "step": 4766 + }, + { + "epoch": 0.57, + "learning_rate": 4.321019185588651e-06, + "loss": 3.4076, + "step": 4767 + }, + { + "epoch": 0.57, + "learning_rate": 4.319037518691863e-06, + "loss": 3.4522, + "step": 4768 + }, + { + "epoch": 0.57, + "learning_rate": 4.317055960776312e-06, + "loss": 3.4175, + "step": 4769 + }, + { + "epoch": 0.57, + "learning_rate": 4.315074512159124e-06, + "loss": 3.4955, + "step": 4770 + }, + { + "epoch": 0.57, + "learning_rate": 4.3130931731574126e-06, + "loss": 3.3286, + "step": 4771 + }, + { + "epoch": 0.57, + "learning_rate": 4.31111194408827e-06, + "loss": 3.3808, + "step": 4772 + }, + { + "epoch": 0.57, + "learning_rate": 4.309130825268773e-06, + "loss": 3.5057, + "step": 4773 + }, + { + "epoch": 0.57, + "learning_rate": 4.307149817015977e-06, + "loss": 3.465, + "step": 4774 + }, + { + "epoch": 0.57, + "learning_rate": 4.305168919646928e-06, + "loss": 3.4896, + "step": 4775 + }, + { + "epoch": 0.57, + "learning_rate": 4.303188133478644e-06, + "loss": 3.4564, + "step": 4776 + }, + { + "epoch": 0.57, + "learning_rate": 4.301207458828131e-06, + "loss": 3.4404, + "step": 4777 + }, + { + "epoch": 0.57, + "learning_rate": 4.299226896012377e-06, + "loss": 3.3477, + "step": 4778 + }, + { + "epoch": 0.57, + "learning_rate": 4.297246445348352e-06, + "loss": 3.4354, + "step": 4779 + }, + { + "epoch": 0.57, + "learning_rate": 4.295266107153006e-06, + "loss": 3.4641, + "step": 4780 + }, + { + "epoch": 0.57, + "learning_rate": 4.293285881743274e-06, + "loss": 3.4255, + "step": 4781 + }, + { + "epoch": 0.57, + "learning_rate": 4.291305769436068e-06, + "loss": 3.3577, + "step": 4782 + }, + { + "epoch": 0.57, + "learning_rate": 4.289325770548287e-06, + "loss": 3.4145, + "step": 4783 + }, + { + "epoch": 0.57, + "learning_rate": 4.287345885396812e-06, + "loss": 3.4322, + "step": 4784 + }, + { + "epoch": 0.57, + "learning_rate": 4.2853661142985005e-06, + "loss": 3.4507, + "step": 4785 + }, + { + "epoch": 0.57, + "learning_rate": 4.283386457570199e-06, + "loss": 3.4377, + "step": 4786 + }, + { + "epoch": 0.57, + "learning_rate": 4.281406915528726e-06, + "loss": 3.4256, + "step": 4787 + }, + { + "epoch": 0.57, + "learning_rate": 4.279427488490892e-06, + "loss": 3.4399, + "step": 4788 + }, + { + "epoch": 0.57, + "learning_rate": 4.277448176773484e-06, + "loss": 3.4288, + "step": 4789 + }, + { + "epoch": 0.57, + "learning_rate": 4.27546898069327e-06, + "loss": 3.3903, + "step": 4790 + }, + { + "epoch": 0.57, + "learning_rate": 4.273489900567e-06, + "loss": 3.396, + "step": 4791 + }, + { + "epoch": 0.57, + "learning_rate": 4.271510936711409e-06, + "loss": 3.4554, + "step": 4792 + }, + { + "epoch": 0.57, + "learning_rate": 4.269532089443206e-06, + "loss": 3.4896, + "step": 4793 + }, + { + "epoch": 0.57, + "learning_rate": 4.2675533590790895e-06, + "loss": 3.4332, + "step": 4794 + }, + { + "epoch": 0.57, + "learning_rate": 4.265574745935733e-06, + "loss": 3.5434, + "step": 4795 + }, + { + "epoch": 0.57, + "learning_rate": 4.263596250329794e-06, + "loss": 3.4423, + "step": 4796 + }, + { + "epoch": 0.57, + "learning_rate": 4.261617872577914e-06, + "loss": 3.4809, + "step": 4797 + }, + { + "epoch": 0.57, + "learning_rate": 4.259639612996708e-06, + "loss": 3.4191, + "step": 4798 + }, + { + "epoch": 0.57, + "learning_rate": 4.257661471902778e-06, + "loss": 3.445, + "step": 4799 + }, + { + "epoch": 0.57, + "learning_rate": 4.25568344961271e-06, + "loss": 3.4037, + "step": 4800 + }, + { + "epoch": 0.57, + "learning_rate": 4.253705546443061e-06, + "loss": 3.4565, + "step": 4801 + }, + { + "epoch": 0.57, + "learning_rate": 4.251727762710379e-06, + "loss": 3.4413, + "step": 4802 + }, + { + "epoch": 0.58, + "learning_rate": 4.249750098731184e-06, + "loss": 3.4038, + "step": 4803 + }, + { + "epoch": 0.58, + "learning_rate": 4.247772554821983e-06, + "loss": 3.5171, + "step": 4804 + }, + { + "epoch": 0.58, + "learning_rate": 4.245795131299266e-06, + "loss": 3.3538, + "step": 4805 + }, + { + "epoch": 0.58, + "learning_rate": 4.243817828479494e-06, + "loss": 3.397, + "step": 4806 + }, + { + "epoch": 0.58, + "learning_rate": 4.241840646679118e-06, + "loss": 3.3647, + "step": 4807 + }, + { + "epoch": 0.58, + "learning_rate": 4.2398635862145675e-06, + "loss": 3.3603, + "step": 4808 + }, + { + "epoch": 0.58, + "learning_rate": 4.237886647402247e-06, + "loss": 3.3889, + "step": 4809 + }, + { + "epoch": 0.58, + "learning_rate": 4.235909830558549e-06, + "loss": 3.4987, + "step": 4810 + }, + { + "epoch": 0.58, + "learning_rate": 4.233933135999842e-06, + "loss": 3.3829, + "step": 4811 + }, + { + "epoch": 0.58, + "learning_rate": 4.231956564042475e-06, + "loss": 3.4567, + "step": 4812 + }, + { + "epoch": 0.58, + "learning_rate": 4.229980115002782e-06, + "loss": 3.4483, + "step": 4813 + }, + { + "epoch": 0.58, + "learning_rate": 4.228003789197071e-06, + "loss": 3.3642, + "step": 4814 + }, + { + "epoch": 0.58, + "learning_rate": 4.2260275869416344e-06, + "loss": 3.3367, + "step": 4815 + }, + { + "epoch": 0.58, + "learning_rate": 4.224051508552743e-06, + "loss": 3.3598, + "step": 4816 + }, + { + "epoch": 0.58, + "learning_rate": 4.222075554346649e-06, + "loss": 3.4787, + "step": 4817 + }, + { + "epoch": 0.58, + "learning_rate": 4.220099724639585e-06, + "loss": 3.3055, + "step": 4818 + }, + { + "epoch": 0.58, + "learning_rate": 4.21812401974776e-06, + "loss": 3.4358, + "step": 4819 + }, + { + "epoch": 0.58, + "learning_rate": 4.216148439987368e-06, + "loss": 3.4609, + "step": 4820 + }, + { + "epoch": 0.58, + "learning_rate": 4.214172985674581e-06, + "loss": 3.4285, + "step": 4821 + }, + { + "epoch": 0.58, + "learning_rate": 4.212197657125549e-06, + "loss": 3.3426, + "step": 4822 + }, + { + "epoch": 0.58, + "learning_rate": 4.210222454656404e-06, + "loss": 3.3868, + "step": 4823 + }, + { + "epoch": 0.58, + "learning_rate": 4.2082473785832605e-06, + "loss": 3.3851, + "step": 4824 + }, + { + "epoch": 0.58, + "learning_rate": 4.206272429222204e-06, + "loss": 3.4639, + "step": 4825 + }, + { + "epoch": 0.58, + "learning_rate": 4.2042976068893115e-06, + "loss": 3.3573, + "step": 4826 + }, + { + "epoch": 0.58, + "learning_rate": 4.202322911900628e-06, + "loss": 3.4448, + "step": 4827 + }, + { + "epoch": 0.58, + "learning_rate": 4.200348344572186e-06, + "loss": 3.441, + "step": 4828 + }, + { + "epoch": 0.58, + "learning_rate": 4.198373905219999e-06, + "loss": 3.3993, + "step": 4829 + }, + { + "epoch": 0.58, + "learning_rate": 4.19639959416005e-06, + "loss": 3.3911, + "step": 4830 + }, + { + "epoch": 0.58, + "learning_rate": 4.19442541170831e-06, + "loss": 3.4962, + "step": 4831 + }, + { + "epoch": 0.58, + "learning_rate": 4.192451358180731e-06, + "loss": 3.4257, + "step": 4832 + }, + { + "epoch": 0.58, + "learning_rate": 4.190477433893233e-06, + "loss": 3.3969, + "step": 4833 + }, + { + "epoch": 0.58, + "learning_rate": 4.188503639161731e-06, + "loss": 3.4914, + "step": 4834 + }, + { + "epoch": 0.58, + "learning_rate": 4.186529974302105e-06, + "loss": 3.4125, + "step": 4835 + }, + { + "epoch": 0.58, + "learning_rate": 4.184556439630221e-06, + "loss": 3.4834, + "step": 4836 + }, + { + "epoch": 0.58, + "learning_rate": 4.1825830354619285e-06, + "loss": 3.4511, + "step": 4837 + }, + { + "epoch": 0.58, + "learning_rate": 4.180609762113045e-06, + "loss": 3.3376, + "step": 4838 + }, + { + "epoch": 0.58, + "learning_rate": 4.178636619899376e-06, + "loss": 3.3766, + "step": 4839 + }, + { + "epoch": 0.58, + "learning_rate": 4.176663609136705e-06, + "loss": 3.4568, + "step": 4840 + }, + { + "epoch": 0.58, + "learning_rate": 4.174690730140788e-06, + "loss": 3.4818, + "step": 4841 + }, + { + "epoch": 0.58, + "learning_rate": 4.17271798322737e-06, + "loss": 3.3929, + "step": 4842 + }, + { + "epoch": 0.58, + "learning_rate": 4.170745368712164e-06, + "loss": 3.4085, + "step": 4843 + }, + { + "epoch": 0.58, + "learning_rate": 4.16877288691087e-06, + "loss": 3.3632, + "step": 4844 + }, + { + "epoch": 0.58, + "learning_rate": 4.166800538139166e-06, + "loss": 3.4022, + "step": 4845 + }, + { + "epoch": 0.58, + "learning_rate": 4.164828322712702e-06, + "loss": 3.4007, + "step": 4846 + }, + { + "epoch": 0.58, + "learning_rate": 4.162856240947114e-06, + "loss": 3.4086, + "step": 4847 + }, + { + "epoch": 0.58, + "learning_rate": 4.1608842931580165e-06, + "loss": 3.3919, + "step": 4848 + }, + { + "epoch": 0.58, + "learning_rate": 4.158912479660995e-06, + "loss": 3.4338, + "step": 4849 + }, + { + "epoch": 0.58, + "learning_rate": 4.156940800771621e-06, + "loss": 3.5204, + "step": 4850 + }, + { + "epoch": 0.58, + "learning_rate": 4.154969256805442e-06, + "loss": 3.3975, + "step": 4851 + }, + { + "epoch": 0.58, + "learning_rate": 4.1529978480779835e-06, + "loss": 3.3438, + "step": 4852 + }, + { + "epoch": 0.58, + "learning_rate": 4.151026574904751e-06, + "loss": 3.3909, + "step": 4853 + }, + { + "epoch": 0.58, + "learning_rate": 4.149055437601226e-06, + "loss": 3.3882, + "step": 4854 + }, + { + "epoch": 0.58, + "learning_rate": 4.1470844364828684e-06, + "loss": 3.3907, + "step": 4855 + }, + { + "epoch": 0.58, + "learning_rate": 4.1451135718651185e-06, + "loss": 3.4826, + "step": 4856 + }, + { + "epoch": 0.58, + "learning_rate": 4.143142844063394e-06, + "loss": 3.4577, + "step": 4857 + }, + { + "epoch": 0.58, + "learning_rate": 4.14117225339309e-06, + "loss": 3.4168, + "step": 4858 + }, + { + "epoch": 0.58, + "learning_rate": 4.139201800169577e-06, + "loss": 3.3713, + "step": 4859 + }, + { + "epoch": 0.58, + "learning_rate": 4.137231484708209e-06, + "loss": 3.3984, + "step": 4860 + }, + { + "epoch": 0.58, + "learning_rate": 4.135261307324316e-06, + "loss": 3.4338, + "step": 4861 + }, + { + "epoch": 0.58, + "learning_rate": 4.133291268333202e-06, + "loss": 3.3804, + "step": 4862 + }, + { + "epoch": 0.58, + "learning_rate": 4.131321368050157e-06, + "loss": 3.4391, + "step": 4863 + }, + { + "epoch": 0.58, + "learning_rate": 4.129351606790439e-06, + "loss": 3.4231, + "step": 4864 + }, + { + "epoch": 0.58, + "learning_rate": 4.12738198486929e-06, + "loss": 3.3806, + "step": 4865 + }, + { + "epoch": 0.58, + "learning_rate": 4.125412502601931e-06, + "loss": 3.406, + "step": 4866 + }, + { + "epoch": 0.58, + "learning_rate": 4.123443160303553e-06, + "loss": 3.3456, + "step": 4867 + }, + { + "epoch": 0.58, + "learning_rate": 4.121473958289333e-06, + "loss": 3.3683, + "step": 4868 + }, + { + "epoch": 0.58, + "learning_rate": 4.1195048968744235e-06, + "loss": 3.3738, + "step": 4869 + }, + { + "epoch": 0.58, + "learning_rate": 4.117535976373948e-06, + "loss": 3.3438, + "step": 4870 + }, + { + "epoch": 0.58, + "learning_rate": 4.115567197103019e-06, + "loss": 3.3856, + "step": 4871 + }, + { + "epoch": 0.58, + "learning_rate": 4.113598559376713e-06, + "loss": 3.4072, + "step": 4872 + }, + { + "epoch": 0.58, + "learning_rate": 4.111630063510095e-06, + "loss": 3.3911, + "step": 4873 + }, + { + "epoch": 0.58, + "learning_rate": 4.109661709818203e-06, + "loss": 3.4231, + "step": 4874 + }, + { + "epoch": 0.58, + "learning_rate": 4.107693498616051e-06, + "loss": 3.4618, + "step": 4875 + }, + { + "epoch": 0.58, + "learning_rate": 4.105725430218632e-06, + "loss": 3.3671, + "step": 4876 + }, + { + "epoch": 0.58, + "learning_rate": 4.103757504940918e-06, + "loss": 3.3464, + "step": 4877 + }, + { + "epoch": 0.58, + "learning_rate": 4.101789723097851e-06, + "loss": 3.4058, + "step": 4878 + }, + { + "epoch": 0.58, + "learning_rate": 4.099822085004359e-06, + "loss": 3.3875, + "step": 4879 + }, + { + "epoch": 0.58, + "learning_rate": 4.09785459097534e-06, + "loss": 3.4857, + "step": 4880 + }, + { + "epoch": 0.58, + "learning_rate": 4.095887241325672e-06, + "loss": 3.4277, + "step": 4881 + }, + { + "epoch": 0.58, + "learning_rate": 4.093920036370214e-06, + "loss": 3.379, + "step": 4882 + }, + { + "epoch": 0.58, + "learning_rate": 4.0919529764237905e-06, + "loss": 3.4134, + "step": 4883 + }, + { + "epoch": 0.58, + "learning_rate": 4.089986061801214e-06, + "loss": 3.4546, + "step": 4884 + }, + { + "epoch": 0.58, + "learning_rate": 4.088019292817271e-06, + "loss": 3.4818, + "step": 4885 + }, + { + "epoch": 0.58, + "learning_rate": 4.086052669786719e-06, + "loss": 3.4997, + "step": 4886 + }, + { + "epoch": 0.59, + "learning_rate": 4.084086193024301e-06, + "loss": 3.3953, + "step": 4887 + }, + { + "epoch": 0.59, + "learning_rate": 4.082119862844726e-06, + "loss": 3.3669, + "step": 4888 + }, + { + "epoch": 0.59, + "learning_rate": 4.08015367956269e-06, + "loss": 3.4095, + "step": 4889 + }, + { + "epoch": 0.59, + "learning_rate": 4.078187643492862e-06, + "loss": 3.3977, + "step": 4890 + }, + { + "epoch": 0.59, + "learning_rate": 4.0762217549498805e-06, + "loss": 3.3569, + "step": 4891 + }, + { + "epoch": 0.59, + "learning_rate": 4.074256014248371e-06, + "loss": 3.3664, + "step": 4892 + }, + { + "epoch": 0.59, + "learning_rate": 4.072290421702931e-06, + "loss": 3.4662, + "step": 4893 + }, + { + "epoch": 0.59, + "learning_rate": 4.070324977628132e-06, + "loss": 3.4288, + "step": 4894 + }, + { + "epoch": 0.59, + "learning_rate": 4.068359682338524e-06, + "loss": 3.3363, + "step": 4895 + }, + { + "epoch": 0.59, + "learning_rate": 4.066394536148634e-06, + "loss": 3.3875, + "step": 4896 + }, + { + "epoch": 0.59, + "learning_rate": 4.064429539372961e-06, + "loss": 3.4653, + "step": 4897 + }, + { + "epoch": 0.59, + "learning_rate": 4.062464692325987e-06, + "loss": 3.4138, + "step": 4898 + }, + { + "epoch": 0.59, + "learning_rate": 4.060499995322164e-06, + "loss": 3.4106, + "step": 4899 + }, + { + "epoch": 0.59, + "learning_rate": 4.058535448675923e-06, + "loss": 3.4088, + "step": 4900 + }, + { + "epoch": 0.59, + "learning_rate": 4.056571052701669e-06, + "loss": 3.436, + "step": 4901 + }, + { + "epoch": 0.59, + "learning_rate": 4.054606807713784e-06, + "loss": 3.5796, + "step": 4902 + }, + { + "epoch": 0.59, + "learning_rate": 4.052642714026627e-06, + "loss": 3.3506, + "step": 4903 + }, + { + "epoch": 0.59, + "learning_rate": 4.050678771954529e-06, + "loss": 3.3697, + "step": 4904 + }, + { + "epoch": 0.59, + "learning_rate": 4.048714981811803e-06, + "loss": 3.3325, + "step": 4905 + }, + { + "epoch": 0.59, + "learning_rate": 4.0467513439127315e-06, + "loss": 3.4692, + "step": 4906 + }, + { + "epoch": 0.59, + "learning_rate": 4.044787858571575e-06, + "loss": 3.4382, + "step": 4907 + }, + { + "epoch": 0.59, + "learning_rate": 4.04282452610257e-06, + "loss": 3.369, + "step": 4908 + }, + { + "epoch": 0.59, + "learning_rate": 4.04086134681993e-06, + "loss": 3.4435, + "step": 4909 + }, + { + "epoch": 0.59, + "learning_rate": 4.038898321037839e-06, + "loss": 3.45, + "step": 4910 + }, + { + "epoch": 0.59, + "learning_rate": 4.036935449070464e-06, + "loss": 3.4368, + "step": 4911 + }, + { + "epoch": 0.59, + "learning_rate": 4.0349727312319386e-06, + "loss": 3.355, + "step": 4912 + }, + { + "epoch": 0.59, + "learning_rate": 4.033010167836378e-06, + "loss": 3.2986, + "step": 4913 + }, + { + "epoch": 0.59, + "learning_rate": 4.031047759197872e-06, + "loss": 3.3486, + "step": 4914 + }, + { + "epoch": 0.59, + "learning_rate": 4.029085505630481e-06, + "loss": 3.5228, + "step": 4915 + }, + { + "epoch": 0.59, + "learning_rate": 4.02712340744825e-06, + "loss": 3.3983, + "step": 4916 + }, + { + "epoch": 0.59, + "learning_rate": 4.025161464965186e-06, + "loss": 3.4684, + "step": 4917 + }, + { + "epoch": 0.59, + "learning_rate": 4.023199678495283e-06, + "loss": 3.3639, + "step": 4918 + }, + { + "epoch": 0.59, + "learning_rate": 4.021238048352505e-06, + "loss": 3.417, + "step": 4919 + }, + { + "epoch": 0.59, + "learning_rate": 4.019276574850789e-06, + "loss": 3.4536, + "step": 4920 + }, + { + "epoch": 0.59, + "learning_rate": 4.0173152583040515e-06, + "loss": 3.3598, + "step": 4921 + }, + { + "epoch": 0.59, + "learning_rate": 4.015354099026182e-06, + "loss": 3.4577, + "step": 4922 + }, + { + "epoch": 0.59, + "learning_rate": 4.013393097331041e-06, + "loss": 3.3479, + "step": 4923 + }, + { + "epoch": 0.59, + "learning_rate": 4.011432253532472e-06, + "loss": 3.4685, + "step": 4924 + }, + { + "epoch": 0.59, + "learning_rate": 4.0094715679442835e-06, + "loss": 3.4252, + "step": 4925 + }, + { + "epoch": 0.59, + "learning_rate": 4.007511040880267e-06, + "loss": 3.464, + "step": 4926 + }, + { + "epoch": 0.59, + "learning_rate": 4.005550672654185e-06, + "loss": 3.4519, + "step": 4927 + }, + { + "epoch": 0.59, + "learning_rate": 4.003590463579772e-06, + "loss": 3.4938, + "step": 4928 + }, + { + "epoch": 0.59, + "learning_rate": 4.001630413970742e-06, + "loss": 3.4333, + "step": 4929 + }, + { + "epoch": 0.59, + "learning_rate": 3.999670524140784e-06, + "loss": 3.4726, + "step": 4930 + }, + { + "epoch": 0.59, + "learning_rate": 3.9977107944035534e-06, + "loss": 3.4351, + "step": 4931 + }, + { + "epoch": 0.59, + "learning_rate": 3.99575122507269e-06, + "loss": 3.4583, + "step": 4932 + }, + { + "epoch": 0.59, + "learning_rate": 3.9937918164618e-06, + "loss": 3.3779, + "step": 4933 + }, + { + "epoch": 0.59, + "learning_rate": 3.9918325688844684e-06, + "loss": 3.4745, + "step": 4934 + }, + { + "epoch": 0.59, + "learning_rate": 3.989873482654255e-06, + "loss": 3.3177, + "step": 4935 + }, + { + "epoch": 0.59, + "learning_rate": 3.987914558084688e-06, + "loss": 3.3213, + "step": 4936 + }, + { + "epoch": 0.59, + "learning_rate": 3.985955795489275e-06, + "loss": 3.4054, + "step": 4937 + }, + { + "epoch": 0.59, + "learning_rate": 3.983997195181499e-06, + "loss": 3.3356, + "step": 4938 + }, + { + "epoch": 0.59, + "learning_rate": 3.982038757474813e-06, + "loss": 3.4058, + "step": 4939 + }, + { + "epoch": 0.59, + "learning_rate": 3.980080482682645e-06, + "loss": 3.4051, + "step": 4940 + }, + { + "epoch": 0.59, + "learning_rate": 3.978122371118396e-06, + "loss": 3.4618, + "step": 4941 + }, + { + "epoch": 0.59, + "learning_rate": 3.976164423095444e-06, + "loss": 3.4268, + "step": 4942 + }, + { + "epoch": 0.59, + "learning_rate": 3.974206638927139e-06, + "loss": 3.421, + "step": 4943 + }, + { + "epoch": 0.59, + "learning_rate": 3.9722490189268035e-06, + "loss": 3.3757, + "step": 4944 + }, + { + "epoch": 0.59, + "learning_rate": 3.970291563407736e-06, + "loss": 3.4656, + "step": 4945 + }, + { + "epoch": 0.59, + "learning_rate": 3.968334272683209e-06, + "loss": 3.4278, + "step": 4946 + }, + { + "epoch": 0.59, + "learning_rate": 3.966377147066463e-06, + "loss": 3.3748, + "step": 4947 + }, + { + "epoch": 0.59, + "learning_rate": 3.964420186870722e-06, + "loss": 3.3923, + "step": 4948 + }, + { + "epoch": 0.59, + "learning_rate": 3.962463392409173e-06, + "loss": 3.358, + "step": 4949 + }, + { + "epoch": 0.59, + "learning_rate": 3.960506763994983e-06, + "loss": 3.4375, + "step": 4950 + }, + { + "epoch": 0.59, + "learning_rate": 3.958550301941294e-06, + "loss": 3.3423, + "step": 4951 + }, + { + "epoch": 0.59, + "learning_rate": 3.956594006561213e-06, + "loss": 3.3225, + "step": 4952 + }, + { + "epoch": 0.59, + "learning_rate": 3.9546378781678275e-06, + "loss": 3.4092, + "step": 4953 + }, + { + "epoch": 0.59, + "learning_rate": 3.952681917074199e-06, + "loss": 3.5328, + "step": 4954 + }, + { + "epoch": 0.59, + "learning_rate": 3.950726123593355e-06, + "loss": 3.368, + "step": 4955 + }, + { + "epoch": 0.59, + "learning_rate": 3.948770498038306e-06, + "loss": 3.3611, + "step": 4956 + }, + { + "epoch": 0.59, + "learning_rate": 3.946815040722025e-06, + "loss": 3.3384, + "step": 4957 + }, + { + "epoch": 0.59, + "learning_rate": 3.944859751957465e-06, + "loss": 3.3769, + "step": 4958 + }, + { + "epoch": 0.59, + "learning_rate": 3.942904632057553e-06, + "loss": 3.4564, + "step": 4959 + }, + { + "epoch": 0.59, + "learning_rate": 3.940949681335183e-06, + "loss": 3.5006, + "step": 4960 + }, + { + "epoch": 0.59, + "learning_rate": 3.938994900103227e-06, + "loss": 3.4281, + "step": 4961 + }, + { + "epoch": 0.59, + "learning_rate": 3.937040288674529e-06, + "loss": 3.3906, + "step": 4962 + }, + { + "epoch": 0.59, + "learning_rate": 3.935085847361902e-06, + "loss": 3.5009, + "step": 4963 + }, + { + "epoch": 0.59, + "learning_rate": 3.93313157647814e-06, + "loss": 3.3637, + "step": 4964 + }, + { + "epoch": 0.59, + "learning_rate": 3.931177476335998e-06, + "loss": 3.5026, + "step": 4965 + }, + { + "epoch": 0.59, + "learning_rate": 3.929223547248214e-06, + "loss": 3.3111, + "step": 4966 + }, + { + "epoch": 0.59, + "learning_rate": 3.927269789527497e-06, + "loss": 3.249, + "step": 4967 + }, + { + "epoch": 0.59, + "learning_rate": 3.925316203486521e-06, + "loss": 3.3084, + "step": 4968 + }, + { + "epoch": 0.59, + "learning_rate": 3.92336278943794e-06, + "loss": 3.3807, + "step": 4969 + }, + { + "epoch": 0.59, + "learning_rate": 3.921409547694382e-06, + "loss": 3.3974, + "step": 4970 + }, + { + "epoch": 0.6, + "learning_rate": 3.919456478568439e-06, + "loss": 3.4723, + "step": 4971 + }, + { + "epoch": 0.6, + "learning_rate": 3.9175035823726825e-06, + "loss": 3.3933, + "step": 4972 + }, + { + "epoch": 0.6, + "learning_rate": 3.915550859419652e-06, + "loss": 3.4342, + "step": 4973 + }, + { + "epoch": 0.6, + "learning_rate": 3.913598310021863e-06, + "loss": 3.3155, + "step": 4974 + }, + { + "epoch": 0.6, + "learning_rate": 3.911645934491803e-06, + "loss": 3.3404, + "step": 4975 + }, + { + "epoch": 0.6, + "learning_rate": 3.909693733141927e-06, + "loss": 3.3315, + "step": 4976 + }, + { + "epoch": 0.6, + "learning_rate": 3.907741706284667e-06, + "loss": 3.4248, + "step": 4977 + }, + { + "epoch": 0.6, + "learning_rate": 3.9057898542324265e-06, + "loss": 3.4433, + "step": 4978 + }, + { + "epoch": 0.6, + "learning_rate": 3.903838177297577e-06, + "loss": 3.4193, + "step": 4979 + }, + { + "epoch": 0.6, + "learning_rate": 3.901886675792467e-06, + "loss": 3.3894, + "step": 4980 + }, + { + "epoch": 0.6, + "learning_rate": 3.8999353500294135e-06, + "loss": 3.4235, + "step": 4981 + }, + { + "epoch": 0.6, + "learning_rate": 3.897984200320707e-06, + "loss": 3.4373, + "step": 4982 + }, + { + "epoch": 0.6, + "learning_rate": 3.896033226978611e-06, + "loss": 3.4324, + "step": 4983 + }, + { + "epoch": 0.6, + "learning_rate": 3.8940824303153585e-06, + "loss": 3.376, + "step": 4984 + }, + { + "epoch": 0.6, + "learning_rate": 3.892131810643154e-06, + "loss": 3.4693, + "step": 4985 + }, + { + "epoch": 0.6, + "learning_rate": 3.890181368274175e-06, + "loss": 3.46, + "step": 4986 + }, + { + "epoch": 0.6, + "learning_rate": 3.888231103520571e-06, + "loss": 3.4583, + "step": 4987 + }, + { + "epoch": 0.6, + "learning_rate": 3.886281016694463e-06, + "loss": 3.3331, + "step": 4988 + }, + { + "epoch": 0.6, + "learning_rate": 3.884331108107939e-06, + "loss": 3.3719, + "step": 4989 + }, + { + "epoch": 0.6, + "learning_rate": 3.882381378073068e-06, + "loss": 3.3957, + "step": 4990 + }, + { + "epoch": 0.6, + "learning_rate": 3.880431826901881e-06, + "loss": 3.4637, + "step": 4991 + }, + { + "epoch": 0.6, + "learning_rate": 3.878482454906386e-06, + "loss": 3.3394, + "step": 4992 + }, + { + "epoch": 0.6, + "learning_rate": 3.876533262398559e-06, + "loss": 3.518, + "step": 4993 + }, + { + "epoch": 0.6, + "learning_rate": 3.87458424969035e-06, + "loss": 3.5307, + "step": 4994 + }, + { + "epoch": 0.6, + "learning_rate": 3.872635417093678e-06, + "loss": 3.3533, + "step": 4995 + }, + { + "epoch": 0.6, + "learning_rate": 3.870686764920436e-06, + "loss": 3.3346, + "step": 4996 + }, + { + "epoch": 0.6, + "learning_rate": 3.868738293482483e-06, + "loss": 3.4706, + "step": 4997 + }, + { + "epoch": 0.6, + "learning_rate": 3.866790003091654e-06, + "loss": 3.4303, + "step": 4998 + }, + { + "epoch": 0.6, + "learning_rate": 3.864841894059757e-06, + "loss": 3.4103, + "step": 4999 + }, + { + "epoch": 0.6, + "learning_rate": 3.862893966698561e-06, + "loss": 3.5004, + "step": 5000 + }, + { + "epoch": 0.6, + "learning_rate": 3.860946221319818e-06, + "loss": 3.4257, + "step": 5001 + }, + { + "epoch": 0.6, + "learning_rate": 3.85899865823524e-06, + "loss": 3.3662, + "step": 5002 + }, + { + "epoch": 0.6, + "learning_rate": 3.857051277756517e-06, + "loss": 3.3715, + "step": 5003 + }, + { + "epoch": 0.6, + "learning_rate": 3.855104080195312e-06, + "loss": 3.41, + "step": 5004 + }, + { + "epoch": 0.6, + "learning_rate": 3.853157065863248e-06, + "loss": 3.4475, + "step": 5005 + }, + { + "epoch": 0.6, + "learning_rate": 3.851210235071928e-06, + "loss": 3.4081, + "step": 5006 + }, + { + "epoch": 0.6, + "learning_rate": 3.849263588132925e-06, + "loss": 3.3999, + "step": 5007 + }, + { + "epoch": 0.6, + "learning_rate": 3.847317125357776e-06, + "loss": 3.4501, + "step": 5008 + }, + { + "epoch": 0.6, + "learning_rate": 3.8453708470579975e-06, + "loss": 3.4037, + "step": 5009 + }, + { + "epoch": 0.6, + "learning_rate": 3.843424753545067e-06, + "loss": 3.4873, + "step": 5010 + }, + { + "epoch": 0.6, + "learning_rate": 3.84147884513044e-06, + "loss": 3.3553, + "step": 5011 + }, + { + "epoch": 0.6, + "learning_rate": 3.839533122125542e-06, + "loss": 3.452, + "step": 5012 + }, + { + "epoch": 0.6, + "learning_rate": 3.837587584841761e-06, + "loss": 3.5397, + "step": 5013 + }, + { + "epoch": 0.6, + "learning_rate": 3.8356422335904645e-06, + "loss": 3.4447, + "step": 5014 + }, + { + "epoch": 0.6, + "learning_rate": 3.8336970686829874e-06, + "loss": 3.3707, + "step": 5015 + }, + { + "epoch": 0.6, + "learning_rate": 3.831752090430631e-06, + "loss": 3.4631, + "step": 5016 + }, + { + "epoch": 0.6, + "learning_rate": 3.829807299144672e-06, + "loss": 3.3387, + "step": 5017 + }, + { + "epoch": 0.6, + "learning_rate": 3.827862695136352e-06, + "loss": 3.3242, + "step": 5018 + }, + { + "epoch": 0.6, + "learning_rate": 3.825918278716886e-06, + "loss": 3.5218, + "step": 5019 + }, + { + "epoch": 0.6, + "learning_rate": 3.8239740501974635e-06, + "loss": 3.3865, + "step": 5020 + }, + { + "epoch": 0.6, + "learning_rate": 3.822030009889231e-06, + "loss": 3.4684, + "step": 5021 + }, + { + "epoch": 0.6, + "learning_rate": 3.820086158103319e-06, + "loss": 3.3884, + "step": 5022 + }, + { + "epoch": 0.6, + "learning_rate": 3.818142495150819e-06, + "loss": 3.3424, + "step": 5023 + }, + { + "epoch": 0.6, + "learning_rate": 3.816199021342793e-06, + "loss": 3.3958, + "step": 5024 + }, + { + "epoch": 0.6, + "learning_rate": 3.814255736990279e-06, + "loss": 3.4174, + "step": 5025 + }, + { + "epoch": 0.6, + "learning_rate": 3.8123126424042768e-06, + "loss": 3.3903, + "step": 5026 + }, + { + "epoch": 0.6, + "learning_rate": 3.8103697378957587e-06, + "loss": 3.4305, + "step": 5027 + }, + { + "epoch": 0.6, + "learning_rate": 3.8084270237756705e-06, + "loss": 3.4296, + "step": 5028 + }, + { + "epoch": 0.6, + "learning_rate": 3.806484500354922e-06, + "loss": 3.3266, + "step": 5029 + }, + { + "epoch": 0.6, + "learning_rate": 3.804542167944394e-06, + "loss": 3.432, + "step": 5030 + }, + { + "epoch": 0.6, + "learning_rate": 3.802600026854939e-06, + "loss": 3.4348, + "step": 5031 + }, + { + "epoch": 0.6, + "learning_rate": 3.8006580773973755e-06, + "loss": 3.4236, + "step": 5032 + }, + { + "epoch": 0.6, + "learning_rate": 3.7987163198824967e-06, + "loss": 3.3808, + "step": 5033 + }, + { + "epoch": 0.6, + "learning_rate": 3.7967747546210563e-06, + "loss": 3.4239, + "step": 5034 + }, + { + "epoch": 0.6, + "learning_rate": 3.794833381923786e-06, + "loss": 3.3601, + "step": 5035 + }, + { + "epoch": 0.6, + "learning_rate": 3.7928922021013835e-06, + "loss": 3.4292, + "step": 5036 + }, + { + "epoch": 0.6, + "learning_rate": 3.7909512154645116e-06, + "loss": 3.4224, + "step": 5037 + }, + { + "epoch": 0.6, + "learning_rate": 3.7890104223238084e-06, + "loss": 3.4642, + "step": 5038 + }, + { + "epoch": 0.6, + "learning_rate": 3.7870698229898805e-06, + "loss": 3.3923, + "step": 5039 + }, + { + "epoch": 0.6, + "learning_rate": 3.785129417773296e-06, + "loss": 3.3463, + "step": 5040 + }, + { + "epoch": 0.6, + "learning_rate": 3.783189206984603e-06, + "loss": 3.397, + "step": 5041 + }, + { + "epoch": 0.6, + "learning_rate": 3.7812491909343085e-06, + "loss": 3.4351, + "step": 5042 + }, + { + "epoch": 0.6, + "learning_rate": 3.779309369932894e-06, + "loss": 3.3151, + "step": 5043 + }, + { + "epoch": 0.6, + "learning_rate": 3.777369744290811e-06, + "loss": 3.4856, + "step": 5044 + }, + { + "epoch": 0.6, + "learning_rate": 3.775430314318473e-06, + "loss": 3.392, + "step": 5045 + }, + { + "epoch": 0.6, + "learning_rate": 3.7734910803262705e-06, + "loss": 3.4366, + "step": 5046 + }, + { + "epoch": 0.6, + "learning_rate": 3.7715520426245537e-06, + "loss": 3.4385, + "step": 5047 + }, + { + "epoch": 0.6, + "learning_rate": 3.7696132015236495e-06, + "loss": 3.3832, + "step": 5048 + }, + { + "epoch": 0.6, + "learning_rate": 3.7676745573338516e-06, + "loss": 3.4517, + "step": 5049 + }, + { + "epoch": 0.6, + "learning_rate": 3.7657361103654156e-06, + "loss": 3.4378, + "step": 5050 + }, + { + "epoch": 0.6, + "learning_rate": 3.763797860928574e-06, + "loss": 3.3924, + "step": 5051 + }, + { + "epoch": 0.6, + "learning_rate": 3.7618598093335257e-06, + "loss": 3.5564, + "step": 5052 + }, + { + "epoch": 0.6, + "learning_rate": 3.7599219558904323e-06, + "loss": 3.4074, + "step": 5053 + }, + { + "epoch": 0.61, + "learning_rate": 3.757984300909432e-06, + "loss": 3.3106, + "step": 5054 + }, + { + "epoch": 0.61, + "learning_rate": 3.756046844700623e-06, + "loss": 3.5244, + "step": 5055 + }, + { + "epoch": 0.61, + "learning_rate": 3.7541095875740778e-06, + "loss": 3.4203, + "step": 5056 + }, + { + "epoch": 0.61, + "learning_rate": 3.7521725298398374e-06, + "loss": 3.3634, + "step": 5057 + }, + { + "epoch": 0.61, + "learning_rate": 3.7502356718079047e-06, + "loss": 3.3879, + "step": 5058 + }, + { + "epoch": 0.61, + "learning_rate": 3.7482990137882547e-06, + "loss": 3.4082, + "step": 5059 + }, + { + "epoch": 0.61, + "learning_rate": 3.7463625560908345e-06, + "loss": 3.4767, + "step": 5060 + }, + { + "epoch": 0.61, + "learning_rate": 3.74442629902555e-06, + "loss": 3.4135, + "step": 5061 + }, + { + "epoch": 0.61, + "learning_rate": 3.7424902429022812e-06, + "loss": 3.3389, + "step": 5062 + }, + { + "epoch": 0.61, + "learning_rate": 3.740554388030876e-06, + "loss": 3.4443, + "step": 5063 + }, + { + "epoch": 0.61, + "learning_rate": 3.7386187347211457e-06, + "loss": 3.4485, + "step": 5064 + }, + { + "epoch": 0.61, + "learning_rate": 3.7366832832828757e-06, + "loss": 3.389, + "step": 5065 + }, + { + "epoch": 0.61, + "learning_rate": 3.7347480340258137e-06, + "loss": 3.3567, + "step": 5066 + }, + { + "epoch": 0.61, + "learning_rate": 3.7328129872596764e-06, + "loss": 3.4153, + "step": 5067 + }, + { + "epoch": 0.61, + "learning_rate": 3.7308781432941497e-06, + "loss": 3.449, + "step": 5068 + }, + { + "epoch": 0.61, + "learning_rate": 3.7289435024388863e-06, + "loss": 3.3858, + "step": 5069 + }, + { + "epoch": 0.61, + "learning_rate": 3.727009065003506e-06, + "loss": 3.4421, + "step": 5070 + }, + { + "epoch": 0.61, + "learning_rate": 3.725074831297595e-06, + "loss": 3.402, + "step": 5071 + }, + { + "epoch": 0.61, + "learning_rate": 3.72314080163071e-06, + "loss": 3.3791, + "step": 5072 + }, + { + "epoch": 0.61, + "learning_rate": 3.7212069763123713e-06, + "loss": 3.3858, + "step": 5073 + }, + { + "epoch": 0.61, + "learning_rate": 3.719273355652069e-06, + "loss": 3.3956, + "step": 5074 + }, + { + "epoch": 0.61, + "learning_rate": 3.7173399399592603e-06, + "loss": 3.3956, + "step": 5075 + }, + { + "epoch": 0.61, + "learning_rate": 3.715406729543368e-06, + "loss": 3.3746, + "step": 5076 + }, + { + "epoch": 0.61, + "learning_rate": 3.713473724713783e-06, + "loss": 3.4955, + "step": 5077 + }, + { + "epoch": 0.61, + "learning_rate": 3.7115409257798667e-06, + "loss": 3.3674, + "step": 5078 + }, + { + "epoch": 0.61, + "learning_rate": 3.709608333050939e-06, + "loss": 3.4264, + "step": 5079 + }, + { + "epoch": 0.61, + "learning_rate": 3.707675946836295e-06, + "loss": 3.3602, + "step": 5080 + }, + { + "epoch": 0.61, + "learning_rate": 3.705743767445195e-06, + "loss": 3.4025, + "step": 5081 + }, + { + "epoch": 0.61, + "learning_rate": 3.703811795186861e-06, + "loss": 3.3751, + "step": 5082 + }, + { + "epoch": 0.61, + "learning_rate": 3.7018800303704895e-06, + "loss": 3.4279, + "step": 5083 + }, + { + "epoch": 0.61, + "learning_rate": 3.6999484733052405e-06, + "loss": 3.3101, + "step": 5084 + }, + { + "epoch": 0.61, + "learning_rate": 3.698017124300236e-06, + "loss": 3.3096, + "step": 5085 + }, + { + "epoch": 0.61, + "learning_rate": 3.6960859836645746e-06, + "loss": 3.4025, + "step": 5086 + }, + { + "epoch": 0.61, + "learning_rate": 3.694155051707311e-06, + "loss": 3.3746, + "step": 5087 + }, + { + "epoch": 0.61, + "learning_rate": 3.6922243287374736e-06, + "loss": 3.3817, + "step": 5088 + }, + { + "epoch": 0.61, + "learning_rate": 3.6902938150640583e-06, + "loss": 3.4346, + "step": 5089 + }, + { + "epoch": 0.61, + "learning_rate": 3.6883635109960194e-06, + "loss": 3.4483, + "step": 5090 + }, + { + "epoch": 0.61, + "learning_rate": 3.686433416842285e-06, + "loss": 3.4572, + "step": 5091 + }, + { + "epoch": 0.61, + "learning_rate": 3.6845035329117494e-06, + "loss": 3.3695, + "step": 5092 + }, + { + "epoch": 0.61, + "learning_rate": 3.6825738595132675e-06, + "loss": 3.4119, + "step": 5093 + }, + { + "epoch": 0.61, + "learning_rate": 3.680644396955669e-06, + "loss": 3.376, + "step": 5094 + }, + { + "epoch": 0.61, + "learning_rate": 3.678715145547739e-06, + "loss": 3.408, + "step": 5095 + }, + { + "epoch": 0.61, + "learning_rate": 3.676786105598239e-06, + "loss": 3.5172, + "step": 5096 + }, + { + "epoch": 0.61, + "learning_rate": 3.674857277415893e-06, + "loss": 3.4649, + "step": 5097 + }, + { + "epoch": 0.61, + "learning_rate": 3.6729286613093874e-06, + "loss": 3.3935, + "step": 5098 + }, + { + "epoch": 0.61, + "learning_rate": 3.6710002575873793e-06, + "loss": 3.3554, + "step": 5099 + }, + { + "epoch": 0.61, + "learning_rate": 3.669072066558494e-06, + "loss": 3.4061, + "step": 5100 + }, + { + "epoch": 0.61, + "learning_rate": 3.6671440885313135e-06, + "loss": 3.3858, + "step": 5101 + }, + { + "epoch": 0.61, + "learning_rate": 3.665216323814396e-06, + "loss": 3.431, + "step": 5102 + }, + { + "epoch": 0.61, + "learning_rate": 3.6632887727162575e-06, + "loss": 3.3541, + "step": 5103 + }, + { + "epoch": 0.61, + "learning_rate": 3.661361435545384e-06, + "loss": 3.5195, + "step": 5104 + }, + { + "epoch": 0.61, + "learning_rate": 3.6594343126102305e-06, + "loss": 3.4545, + "step": 5105 + }, + { + "epoch": 0.61, + "learning_rate": 3.6575074042192078e-06, + "loss": 3.3884, + "step": 5106 + }, + { + "epoch": 0.61, + "learning_rate": 3.6555807106807006e-06, + "loss": 3.4547, + "step": 5107 + }, + { + "epoch": 0.61, + "learning_rate": 3.6536542323030606e-06, + "loss": 3.4744, + "step": 5108 + }, + { + "epoch": 0.61, + "learning_rate": 3.6517279693945956e-06, + "loss": 3.4458, + "step": 5109 + }, + { + "epoch": 0.61, + "learning_rate": 3.6498019222635884e-06, + "loss": 3.4135, + "step": 5110 + }, + { + "epoch": 0.61, + "learning_rate": 3.647876091218283e-06, + "loss": 3.474, + "step": 5111 + }, + { + "epoch": 0.61, + "learning_rate": 3.6459504765668872e-06, + "loss": 3.4522, + "step": 5112 + }, + { + "epoch": 0.61, + "learning_rate": 3.6440250786175796e-06, + "loss": 3.3673, + "step": 5113 + }, + { + "epoch": 0.61, + "learning_rate": 3.6420998976784994e-06, + "loss": 3.4383, + "step": 5114 + }, + { + "epoch": 0.61, + "learning_rate": 3.6401749340577526e-06, + "loss": 3.5302, + "step": 5115 + }, + { + "epoch": 0.61, + "learning_rate": 3.638250188063409e-06, + "loss": 3.4663, + "step": 5116 + }, + { + "epoch": 0.61, + "learning_rate": 3.636325660003508e-06, + "loss": 3.4817, + "step": 5117 + }, + { + "epoch": 0.61, + "learning_rate": 3.6344013501860486e-06, + "loss": 3.4395, + "step": 5118 + }, + { + "epoch": 0.61, + "learning_rate": 3.632477258918997e-06, + "loss": 3.3827, + "step": 5119 + }, + { + "epoch": 0.61, + "learning_rate": 3.6305533865102875e-06, + "loss": 3.3607, + "step": 5120 + }, + { + "epoch": 0.61, + "learning_rate": 3.628629733267815e-06, + "loss": 3.3554, + "step": 5121 + }, + { + "epoch": 0.61, + "learning_rate": 3.6267062994994394e-06, + "loss": 3.5051, + "step": 5122 + }, + { + "epoch": 0.61, + "learning_rate": 3.624783085512992e-06, + "loss": 3.4466, + "step": 5123 + }, + { + "epoch": 0.61, + "learning_rate": 3.622860091616257e-06, + "loss": 3.4422, + "step": 5124 + }, + { + "epoch": 0.61, + "learning_rate": 3.6209373181169943e-06, + "loss": 3.4974, + "step": 5125 + }, + { + "epoch": 0.61, + "learning_rate": 3.6190147653229267e-06, + "loss": 3.5308, + "step": 5126 + }, + { + "epoch": 0.61, + "learning_rate": 3.6170924335417344e-06, + "loss": 3.5309, + "step": 5127 + }, + { + "epoch": 0.61, + "learning_rate": 3.615170323081071e-06, + "loss": 3.3592, + "step": 5128 + }, + { + "epoch": 0.61, + "learning_rate": 3.6132484342485518e-06, + "loss": 3.3932, + "step": 5129 + }, + { + "epoch": 0.61, + "learning_rate": 3.6113267673517515e-06, + "loss": 3.4157, + "step": 5130 + }, + { + "epoch": 0.61, + "learning_rate": 3.6094053226982196e-06, + "loss": 3.388, + "step": 5131 + }, + { + "epoch": 0.61, + "learning_rate": 3.6074841005954575e-06, + "loss": 3.4807, + "step": 5132 + }, + { + "epoch": 0.61, + "learning_rate": 3.605563101350942e-06, + "loss": 3.3921, + "step": 5133 + }, + { + "epoch": 0.61, + "learning_rate": 3.6036423252721104e-06, + "loss": 3.3969, + "step": 5134 + }, + { + "epoch": 0.61, + "learning_rate": 3.60172177266636e-06, + "loss": 3.4827, + "step": 5135 + }, + { + "epoch": 0.61, + "learning_rate": 3.5998014438410576e-06, + "loss": 3.3589, + "step": 5136 + }, + { + "epoch": 0.61, + "learning_rate": 3.5978813391035363e-06, + "loss": 3.4204, + "step": 5137 + }, + { + "epoch": 0.62, + "learning_rate": 3.595961458761084e-06, + "loss": 3.4549, + "step": 5138 + }, + { + "epoch": 0.62, + "learning_rate": 3.594041803120963e-06, + "loss": 3.4296, + "step": 5139 + }, + { + "epoch": 0.62, + "learning_rate": 3.5921223724903907e-06, + "loss": 3.4152, + "step": 5140 + }, + { + "epoch": 0.62, + "learning_rate": 3.590203167176555e-06, + "loss": 3.4235, + "step": 5141 + }, + { + "epoch": 0.62, + "learning_rate": 3.588284187486607e-06, + "loss": 3.4137, + "step": 5142 + }, + { + "epoch": 0.62, + "learning_rate": 3.586365433727657e-06, + "loss": 3.3022, + "step": 5143 + }, + { + "epoch": 0.62, + "learning_rate": 3.5844469062067845e-06, + "loss": 3.4151, + "step": 5144 + }, + { + "epoch": 0.62, + "learning_rate": 3.5825286052310315e-06, + "loss": 3.3876, + "step": 5145 + }, + { + "epoch": 0.62, + "learning_rate": 3.5806105311074002e-06, + "loss": 3.4322, + "step": 5146 + }, + { + "epoch": 0.62, + "learning_rate": 3.5786926841428617e-06, + "loss": 3.4977, + "step": 5147 + }, + { + "epoch": 0.62, + "learning_rate": 3.5767750646443456e-06, + "loss": 3.4041, + "step": 5148 + }, + { + "epoch": 0.62, + "learning_rate": 3.574857672918749e-06, + "loss": 3.4082, + "step": 5149 + }, + { + "epoch": 0.62, + "learning_rate": 3.572940509272934e-06, + "loss": 3.3156, + "step": 5150 + }, + { + "epoch": 0.62, + "learning_rate": 3.5710235740137177e-06, + "loss": 3.4858, + "step": 5151 + }, + { + "epoch": 0.62, + "learning_rate": 3.5691068674478914e-06, + "loss": 3.4009, + "step": 5152 + }, + { + "epoch": 0.62, + "learning_rate": 3.5671903898822035e-06, + "loss": 3.4135, + "step": 5153 + }, + { + "epoch": 0.62, + "learning_rate": 3.5652741416233655e-06, + "loss": 3.3922, + "step": 5154 + }, + { + "epoch": 0.62, + "learning_rate": 3.5633581229780558e-06, + "loss": 3.3942, + "step": 5155 + }, + { + "epoch": 0.62, + "learning_rate": 3.561442334252912e-06, + "loss": 3.3756, + "step": 5156 + }, + { + "epoch": 0.62, + "learning_rate": 3.5595267757545393e-06, + "loss": 3.4887, + "step": 5157 + }, + { + "epoch": 0.62, + "learning_rate": 3.5576114477895018e-06, + "loss": 3.4984, + "step": 5158 + }, + { + "epoch": 0.62, + "learning_rate": 3.5556963506643294e-06, + "loss": 3.3499, + "step": 5159 + }, + { + "epoch": 0.62, + "learning_rate": 3.5537814846855147e-06, + "loss": 3.4524, + "step": 5160 + }, + { + "epoch": 0.62, + "learning_rate": 3.5518668501595116e-06, + "loss": 3.34, + "step": 5161 + }, + { + "epoch": 0.62, + "learning_rate": 3.549952447392738e-06, + "loss": 3.3527, + "step": 5162 + }, + { + "epoch": 0.62, + "learning_rate": 3.5480382766915784e-06, + "loss": 3.3376, + "step": 5163 + }, + { + "epoch": 0.62, + "learning_rate": 3.546124338362371e-06, + "loss": 3.4248, + "step": 5164 + }, + { + "epoch": 0.62, + "learning_rate": 3.5442106327114267e-06, + "loss": 3.3813, + "step": 5165 + }, + { + "epoch": 0.62, + "learning_rate": 3.542297160045015e-06, + "loss": 3.4358, + "step": 5166 + }, + { + "epoch": 0.62, + "learning_rate": 3.540383920669366e-06, + "loss": 3.4117, + "step": 5167 + }, + { + "epoch": 0.62, + "learning_rate": 3.5384709148906744e-06, + "loss": 3.4114, + "step": 5168 + }, + { + "epoch": 0.62, + "learning_rate": 3.536558143015101e-06, + "loss": 3.4976, + "step": 5169 + }, + { + "epoch": 0.62, + "learning_rate": 3.534645605348761e-06, + "loss": 3.5251, + "step": 5170 + }, + { + "epoch": 0.62, + "learning_rate": 3.5327333021977415e-06, + "loss": 3.4668, + "step": 5171 + }, + { + "epoch": 0.62, + "learning_rate": 3.5308212338680842e-06, + "loss": 3.4647, + "step": 5172 + }, + { + "epoch": 0.62, + "learning_rate": 3.528909400665797e-06, + "loss": 3.411, + "step": 5173 + }, + { + "epoch": 0.62, + "learning_rate": 3.5269978028968526e-06, + "loss": 3.4382, + "step": 5174 + }, + { + "epoch": 0.62, + "learning_rate": 3.5250864408671786e-06, + "loss": 3.5914, + "step": 5175 + }, + { + "epoch": 0.62, + "learning_rate": 3.523175314882674e-06, + "loss": 3.4337, + "step": 5176 + }, + { + "epoch": 0.62, + "learning_rate": 3.5212644252491912e-06, + "loss": 3.4818, + "step": 5177 + }, + { + "epoch": 0.62, + "learning_rate": 3.51935377227255e-06, + "loss": 3.4009, + "step": 5178 + }, + { + "epoch": 0.62, + "learning_rate": 3.5174433562585353e-06, + "loss": 3.3668, + "step": 5179 + }, + { + "epoch": 0.62, + "learning_rate": 3.5155331775128844e-06, + "loss": 3.4176, + "step": 5180 + }, + { + "epoch": 0.62, + "learning_rate": 3.513623236341305e-06, + "loss": 3.3761, + "step": 5181 + }, + { + "epoch": 0.62, + "learning_rate": 3.5117135330494663e-06, + "loss": 3.4815, + "step": 5182 + }, + { + "epoch": 0.62, + "learning_rate": 3.5098040679429923e-06, + "loss": 3.4474, + "step": 5183 + }, + { + "epoch": 0.62, + "learning_rate": 3.5078948413274783e-06, + "loss": 3.457, + "step": 5184 + }, + { + "epoch": 0.62, + "learning_rate": 3.5059858535084733e-06, + "loss": 3.4308, + "step": 5185 + }, + { + "epoch": 0.62, + "learning_rate": 3.5040771047914934e-06, + "loss": 3.4282, + "step": 5186 + }, + { + "epoch": 0.62, + "learning_rate": 3.502168595482016e-06, + "loss": 3.4347, + "step": 5187 + }, + { + "epoch": 0.62, + "learning_rate": 3.5002603258854765e-06, + "loss": 3.3595, + "step": 5188 + }, + { + "epoch": 0.62, + "learning_rate": 3.4983522963072756e-06, + "loss": 3.5134, + "step": 5189 + }, + { + "epoch": 0.62, + "learning_rate": 3.4964445070527764e-06, + "loss": 3.4815, + "step": 5190 + }, + { + "epoch": 0.62, + "learning_rate": 3.494536958427297e-06, + "loss": 3.421, + "step": 5191 + }, + { + "epoch": 0.62, + "learning_rate": 3.4926296507361246e-06, + "loss": 3.3535, + "step": 5192 + }, + { + "epoch": 0.62, + "learning_rate": 3.4907225842845045e-06, + "loss": 3.4062, + "step": 5193 + }, + { + "epoch": 0.62, + "learning_rate": 3.4888157593776416e-06, + "loss": 3.4457, + "step": 5194 + }, + { + "epoch": 0.62, + "learning_rate": 3.4869091763207073e-06, + "loss": 3.2062, + "step": 5195 + }, + { + "epoch": 0.62, + "learning_rate": 3.4850028354188293e-06, + "loss": 3.4138, + "step": 5196 + }, + { + "epoch": 0.62, + "learning_rate": 3.4830967369770976e-06, + "loss": 3.3742, + "step": 5197 + }, + { + "epoch": 0.62, + "learning_rate": 3.481190881300566e-06, + "loss": 3.3635, + "step": 5198 + }, + { + "epoch": 0.62, + "learning_rate": 3.479285268694247e-06, + "loss": 3.33, + "step": 5199 + }, + { + "epoch": 0.62, + "learning_rate": 3.477379899463115e-06, + "loss": 3.4853, + "step": 5200 + }, + { + "epoch": 0.62, + "learning_rate": 3.475474773912103e-06, + "loss": 3.4293, + "step": 5201 + }, + { + "epoch": 0.62, + "learning_rate": 3.4735698923461113e-06, + "loss": 3.3664, + "step": 5202 + }, + { + "epoch": 0.62, + "learning_rate": 3.4716652550699946e-06, + "loss": 3.3939, + "step": 5203 + }, + { + "epoch": 0.62, + "learning_rate": 3.4697608623885716e-06, + "loss": 3.42, + "step": 5204 + }, + { + "epoch": 0.62, + "learning_rate": 3.467856714606621e-06, + "loss": 3.3332, + "step": 5205 + }, + { + "epoch": 0.62, + "learning_rate": 3.4659528120288837e-06, + "loss": 3.4626, + "step": 5206 + }, + { + "epoch": 0.62, + "learning_rate": 3.4640491549600585e-06, + "loss": 3.4429, + "step": 5207 + }, + { + "epoch": 0.62, + "learning_rate": 3.4621457437048096e-06, + "loss": 3.3634, + "step": 5208 + }, + { + "epoch": 0.62, + "learning_rate": 3.4602425785677548e-06, + "loss": 3.3456, + "step": 5209 + }, + { + "epoch": 0.62, + "learning_rate": 3.4583396598534793e-06, + "loss": 3.4241, + "step": 5210 + }, + { + "epoch": 0.62, + "learning_rate": 3.456436987866527e-06, + "loss": 3.3055, + "step": 5211 + }, + { + "epoch": 0.62, + "learning_rate": 3.4545345629113985e-06, + "loss": 3.5001, + "step": 5212 + }, + { + "epoch": 0.62, + "learning_rate": 3.45263238529256e-06, + "loss": 3.3615, + "step": 5213 + }, + { + "epoch": 0.62, + "learning_rate": 3.450730455314437e-06, + "loss": 3.4452, + "step": 5214 + }, + { + "epoch": 0.62, + "learning_rate": 3.448828773281412e-06, + "loss": 3.4307, + "step": 5215 + }, + { + "epoch": 0.62, + "learning_rate": 3.446927339497833e-06, + "loss": 3.3417, + "step": 5216 + }, + { + "epoch": 0.62, + "learning_rate": 3.4450261542680015e-06, + "loss": 3.3962, + "step": 5217 + }, + { + "epoch": 0.62, + "learning_rate": 3.4431252178961854e-06, + "loss": 3.4394, + "step": 5218 + }, + { + "epoch": 0.62, + "learning_rate": 3.4412245306866126e-06, + "loss": 3.4449, + "step": 5219 + }, + { + "epoch": 0.62, + "learning_rate": 3.439324092943465e-06, + "loss": 3.4973, + "step": 5220 + }, + { + "epoch": 0.63, + "learning_rate": 3.4374239049708914e-06, + "loss": 3.3951, + "step": 5221 + }, + { + "epoch": 0.63, + "learning_rate": 3.4355239670729983e-06, + "loss": 3.4891, + "step": 5222 + }, + { + "epoch": 0.63, + "learning_rate": 3.4336242795538487e-06, + "loss": 3.4609, + "step": 5223 + }, + { + "epoch": 0.63, + "learning_rate": 3.431724842717473e-06, + "loss": 3.352, + "step": 5224 + }, + { + "epoch": 0.63, + "learning_rate": 3.4298256568678522e-06, + "loss": 3.4418, + "step": 5225 + }, + { + "epoch": 0.63, + "learning_rate": 3.4279267223089353e-06, + "loss": 3.4661, + "step": 5226 + }, + { + "epoch": 0.63, + "learning_rate": 3.4260280393446277e-06, + "loss": 3.3474, + "step": 5227 + }, + { + "epoch": 0.63, + "learning_rate": 3.4241296082787922e-06, + "loss": 3.4428, + "step": 5228 + }, + { + "epoch": 0.63, + "learning_rate": 3.422231429415256e-06, + "loss": 3.3355, + "step": 5229 + }, + { + "epoch": 0.63, + "learning_rate": 3.4203335030578043e-06, + "loss": 3.4329, + "step": 5230 + }, + { + "epoch": 0.63, + "learning_rate": 3.418435829510178e-06, + "loss": 3.3619, + "step": 5231 + }, + { + "epoch": 0.63, + "learning_rate": 3.4165384090760857e-06, + "loss": 3.4007, + "step": 5232 + }, + { + "epoch": 0.63, + "learning_rate": 3.4146412420591845e-06, + "loss": 3.4262, + "step": 5233 + }, + { + "epoch": 0.63, + "learning_rate": 3.4127443287631003e-06, + "loss": 3.4094, + "step": 5234 + }, + { + "epoch": 0.63, + "learning_rate": 3.410847669491417e-06, + "loss": 3.4301, + "step": 5235 + }, + { + "epoch": 0.63, + "learning_rate": 3.4089512645476713e-06, + "loss": 3.384, + "step": 5236 + }, + { + "epoch": 0.63, + "learning_rate": 3.407055114235367e-06, + "loss": 3.3876, + "step": 5237 + }, + { + "epoch": 0.63, + "learning_rate": 3.4051592188579653e-06, + "loss": 3.3564, + "step": 5238 + }, + { + "epoch": 0.63, + "learning_rate": 3.403263578718881e-06, + "loss": 3.3423, + "step": 5239 + }, + { + "epoch": 0.63, + "learning_rate": 3.401368194121496e-06, + "loss": 3.3763, + "step": 5240 + }, + { + "epoch": 0.63, + "learning_rate": 3.3994730653691465e-06, + "loss": 3.2961, + "step": 5241 + }, + { + "epoch": 0.63, + "learning_rate": 3.3975781927651274e-06, + "loss": 3.478, + "step": 5242 + }, + { + "epoch": 0.63, + "learning_rate": 3.395683576612697e-06, + "loss": 3.3633, + "step": 5243 + }, + { + "epoch": 0.63, + "learning_rate": 3.3937892172150678e-06, + "loss": 3.3842, + "step": 5244 + }, + { + "epoch": 0.63, + "learning_rate": 3.3918951148754136e-06, + "loss": 3.3612, + "step": 5245 + }, + { + "epoch": 0.63, + "learning_rate": 3.3900012698968656e-06, + "loss": 3.4209, + "step": 5246 + }, + { + "epoch": 0.63, + "learning_rate": 3.3881076825825155e-06, + "loss": 3.397, + "step": 5247 + }, + { + "epoch": 0.63, + "learning_rate": 3.3862143532354148e-06, + "loss": 3.4214, + "step": 5248 + }, + { + "epoch": 0.63, + "learning_rate": 3.3843212821585682e-06, + "loss": 3.389, + "step": 5249 + }, + { + "epoch": 0.63, + "learning_rate": 3.3824284696549448e-06, + "loss": 3.4225, + "step": 5250 + }, + { + "epoch": 0.63, + "learning_rate": 3.380535916027472e-06, + "loss": 3.4303, + "step": 5251 + }, + { + "epoch": 0.63, + "learning_rate": 3.3786436215790308e-06, + "loss": 3.3998, + "step": 5252 + }, + { + "epoch": 0.63, + "learning_rate": 3.376751586612468e-06, + "loss": 3.4228, + "step": 5253 + }, + { + "epoch": 0.63, + "learning_rate": 3.3748598114305795e-06, + "loss": 3.4407, + "step": 5254 + }, + { + "epoch": 0.63, + "learning_rate": 3.372968296336129e-06, + "loss": 3.6102, + "step": 5255 + }, + { + "epoch": 0.63, + "learning_rate": 3.371077041631835e-06, + "loss": 3.4455, + "step": 5256 + }, + { + "epoch": 0.63, + "learning_rate": 3.36918604762037e-06, + "loss": 3.4381, + "step": 5257 + }, + { + "epoch": 0.63, + "learning_rate": 3.367295314604372e-06, + "loss": 3.4292, + "step": 5258 + }, + { + "epoch": 0.63, + "learning_rate": 3.365404842886435e-06, + "loss": 3.4971, + "step": 5259 + }, + { + "epoch": 0.63, + "learning_rate": 3.3635146327691053e-06, + "loss": 3.3762, + "step": 5260 + }, + { + "epoch": 0.63, + "learning_rate": 3.361624684554897e-06, + "loss": 3.3819, + "step": 5261 + }, + { + "epoch": 0.63, + "learning_rate": 3.3597349985462747e-06, + "loss": 3.456, + "step": 5262 + }, + { + "epoch": 0.63, + "learning_rate": 3.357845575045663e-06, + "loss": 3.4578, + "step": 5263 + }, + { + "epoch": 0.63, + "learning_rate": 3.3559564143554495e-06, + "loss": 3.317, + "step": 5264 + }, + { + "epoch": 0.63, + "learning_rate": 3.3540675167779702e-06, + "loss": 3.3466, + "step": 5265 + }, + { + "epoch": 0.63, + "learning_rate": 3.3521788826155267e-06, + "loss": 3.4561, + "step": 5266 + }, + { + "epoch": 0.63, + "learning_rate": 3.350290512170379e-06, + "loss": 3.3477, + "step": 5267 + }, + { + "epoch": 0.63, + "learning_rate": 3.3484024057447356e-06, + "loss": 3.4294, + "step": 5268 + }, + { + "epoch": 0.63, + "learning_rate": 3.3465145636407746e-06, + "loss": 3.3971, + "step": 5269 + }, + { + "epoch": 0.63, + "learning_rate": 3.344626986160623e-06, + "loss": 3.4476, + "step": 5270 + }, + { + "epoch": 0.63, + "learning_rate": 3.342739673606369e-06, + "loss": 3.3623, + "step": 5271 + }, + { + "epoch": 0.63, + "learning_rate": 3.3408526262800612e-06, + "loss": 3.4193, + "step": 5272 + }, + { + "epoch": 0.63, + "learning_rate": 3.338965844483698e-06, + "loss": 3.4433, + "step": 5273 + }, + { + "epoch": 0.63, + "learning_rate": 3.337079328519243e-06, + "loss": 3.3492, + "step": 5274 + }, + { + "epoch": 0.63, + "learning_rate": 3.3351930786886144e-06, + "loss": 3.4018, + "step": 5275 + }, + { + "epoch": 0.63, + "learning_rate": 3.3333070952936853e-06, + "loss": 3.4812, + "step": 5276 + }, + { + "epoch": 0.63, + "learning_rate": 3.3314213786362905e-06, + "loss": 3.3703, + "step": 5277 + }, + { + "epoch": 0.63, + "learning_rate": 3.3295359290182195e-06, + "loss": 3.3577, + "step": 5278 + }, + { + "epoch": 0.63, + "learning_rate": 3.327650746741219e-06, + "loss": 3.301, + "step": 5279 + }, + { + "epoch": 0.63, + "learning_rate": 3.3257658321069952e-06, + "loss": 3.3752, + "step": 5280 + }, + { + "epoch": 0.63, + "learning_rate": 3.323881185417209e-06, + "loss": 3.4086, + "step": 5281 + }, + { + "epoch": 0.63, + "learning_rate": 3.321996806973478e-06, + "loss": 3.4713, + "step": 5282 + }, + { + "epoch": 0.63, + "learning_rate": 3.3201126970773794e-06, + "loss": 3.3687, + "step": 5283 + }, + { + "epoch": 0.63, + "learning_rate": 3.3182288560304465e-06, + "loss": 3.4056, + "step": 5284 + }, + { + "epoch": 0.63, + "learning_rate": 3.316345284134169e-06, + "loss": 3.4816, + "step": 5285 + }, + { + "epoch": 0.63, + "learning_rate": 3.3144619816899924e-06, + "loss": 3.4333, + "step": 5286 + }, + { + "epoch": 0.63, + "learning_rate": 3.312578948999323e-06, + "loss": 3.418, + "step": 5287 + }, + { + "epoch": 0.63, + "learning_rate": 3.310696186363519e-06, + "loss": 3.4083, + "step": 5288 + }, + { + "epoch": 0.63, + "learning_rate": 3.3088136940838977e-06, + "loss": 3.3839, + "step": 5289 + }, + { + "epoch": 0.63, + "learning_rate": 3.3069314724617353e-06, + "loss": 3.4428, + "step": 5290 + }, + { + "epoch": 0.63, + "learning_rate": 3.3050495217982613e-06, + "loss": 3.431, + "step": 5291 + }, + { + "epoch": 0.63, + "learning_rate": 3.303167842394662e-06, + "loss": 3.4289, + "step": 5292 + }, + { + "epoch": 0.63, + "learning_rate": 3.301286434552084e-06, + "loss": 3.4201, + "step": 5293 + }, + { + "epoch": 0.63, + "learning_rate": 3.2994052985716245e-06, + "loss": 3.4554, + "step": 5294 + }, + { + "epoch": 0.63, + "learning_rate": 3.2975244347543422e-06, + "loss": 3.4121, + "step": 5295 + }, + { + "epoch": 0.63, + "learning_rate": 3.295643843401253e-06, + "loss": 3.4248, + "step": 5296 + }, + { + "epoch": 0.63, + "learning_rate": 3.2937635248133227e-06, + "loss": 3.4477, + "step": 5297 + }, + { + "epoch": 0.63, + "learning_rate": 3.2918834792914777e-06, + "loss": 3.474, + "step": 5298 + }, + { + "epoch": 0.63, + "learning_rate": 3.2900037071366055e-06, + "loss": 3.3484, + "step": 5299 + }, + { + "epoch": 0.63, + "learning_rate": 3.288124208649538e-06, + "loss": 3.4306, + "step": 5300 + }, + { + "epoch": 0.63, + "learning_rate": 3.2862449841310763e-06, + "loss": 3.4807, + "step": 5301 + }, + { + "epoch": 0.63, + "learning_rate": 3.284366033881966e-06, + "loss": 3.4212, + "step": 5302 + }, + { + "epoch": 0.63, + "learning_rate": 3.282487358202917e-06, + "loss": 3.4204, + "step": 5303 + }, + { + "epoch": 0.63, + "learning_rate": 3.2806089573945956e-06, + "loss": 3.4759, + "step": 5304 + }, + { + "epoch": 0.64, + "learning_rate": 3.2787308317576144e-06, + "loss": 3.4666, + "step": 5305 + }, + { + "epoch": 0.64, + "learning_rate": 3.276852981592554e-06, + "loss": 3.409, + "step": 5306 + }, + { + "epoch": 0.64, + "learning_rate": 3.274975407199943e-06, + "loss": 3.5012, + "step": 5307 + }, + { + "epoch": 0.64, + "learning_rate": 3.2730981088802684e-06, + "loss": 3.2959, + "step": 5308 + }, + { + "epoch": 0.64, + "learning_rate": 3.2712210869339755e-06, + "loss": 3.3926, + "step": 5309 + }, + { + "epoch": 0.64, + "learning_rate": 3.2693443416614595e-06, + "loss": 3.3701, + "step": 5310 + }, + { + "epoch": 0.64, + "learning_rate": 3.267467873363076e-06, + "loss": 3.3759, + "step": 5311 + }, + { + "epoch": 0.64, + "learning_rate": 3.2655916823391374e-06, + "loss": 3.3852, + "step": 5312 + }, + { + "epoch": 0.64, + "learning_rate": 3.2637157688899057e-06, + "loss": 3.4607, + "step": 5313 + }, + { + "epoch": 0.64, + "learning_rate": 3.2618401333156054e-06, + "loss": 3.4501, + "step": 5314 + }, + { + "epoch": 0.64, + "learning_rate": 3.2599647759164087e-06, + "loss": 3.4561, + "step": 5315 + }, + { + "epoch": 0.64, + "learning_rate": 3.2580896969924512e-06, + "loss": 3.4408, + "step": 5316 + }, + { + "epoch": 0.64, + "learning_rate": 3.2562148968438224e-06, + "loss": 3.3419, + "step": 5317 + }, + { + "epoch": 0.64, + "learning_rate": 3.25434037577056e-06, + "loss": 3.4068, + "step": 5318 + }, + { + "epoch": 0.64, + "learning_rate": 3.252466134072665e-06, + "loss": 3.3737, + "step": 5319 + }, + { + "epoch": 0.64, + "learning_rate": 3.2505921720500938e-06, + "loss": 3.3501, + "step": 5320 + }, + { + "epoch": 0.64, + "learning_rate": 3.24871849000275e-06, + "loss": 3.4196, + "step": 5321 + }, + { + "epoch": 0.64, + "learning_rate": 3.246845088230502e-06, + "loss": 3.3288, + "step": 5322 + }, + { + "epoch": 0.64, + "learning_rate": 3.244971967033167e-06, + "loss": 3.4208, + "step": 5323 + }, + { + "epoch": 0.64, + "learning_rate": 3.2430991267105187e-06, + "loss": 3.3847, + "step": 5324 + }, + { + "epoch": 0.64, + "learning_rate": 3.2412265675622887e-06, + "loss": 3.3179, + "step": 5325 + }, + { + "epoch": 0.64, + "learning_rate": 3.2393542898881597e-06, + "loss": 3.4777, + "step": 5326 + }, + { + "epoch": 0.64, + "learning_rate": 3.2374822939877704e-06, + "loss": 3.3927, + "step": 5327 + }, + { + "epoch": 0.64, + "learning_rate": 3.2356105801607176e-06, + "loss": 3.4778, + "step": 5328 + }, + { + "epoch": 0.64, + "learning_rate": 3.2337391487065485e-06, + "loss": 3.424, + "step": 5329 + }, + { + "epoch": 0.64, + "learning_rate": 3.2318679999247673e-06, + "loss": 3.4273, + "step": 5330 + }, + { + "epoch": 0.64, + "learning_rate": 3.229997134114832e-06, + "loss": 3.288, + "step": 5331 + }, + { + "epoch": 0.64, + "learning_rate": 3.2281265515761572e-06, + "loss": 3.3739, + "step": 5332 + }, + { + "epoch": 0.64, + "learning_rate": 3.226256252608111e-06, + "loss": 3.3694, + "step": 5333 + }, + { + "epoch": 0.64, + "learning_rate": 3.2243862375100137e-06, + "loss": 3.379, + "step": 5334 + }, + { + "epoch": 0.64, + "learning_rate": 3.222516506581146e-06, + "loss": 3.3784, + "step": 5335 + }, + { + "epoch": 0.64, + "learning_rate": 3.2206470601207375e-06, + "loss": 3.4703, + "step": 5336 + }, + { + "epoch": 0.64, + "learning_rate": 3.2187778984279732e-06, + "loss": 3.3869, + "step": 5337 + }, + { + "epoch": 0.64, + "learning_rate": 3.2169090218019984e-06, + "loss": 3.4608, + "step": 5338 + }, + { + "epoch": 0.64, + "learning_rate": 3.215040430541902e-06, + "loss": 3.4703, + "step": 5339 + }, + { + "epoch": 0.64, + "learning_rate": 3.2131721249467373e-06, + "loss": 3.4298, + "step": 5340 + }, + { + "epoch": 0.64, + "learning_rate": 3.211304105315508e-06, + "loss": 3.4598, + "step": 5341 + }, + { + "epoch": 0.64, + "learning_rate": 3.20943637194717e-06, + "loss": 3.3434, + "step": 5342 + }, + { + "epoch": 0.64, + "learning_rate": 3.207568925140636e-06, + "loss": 3.363, + "step": 5343 + }, + { + "epoch": 0.64, + "learning_rate": 3.205701765194775e-06, + "loss": 3.3467, + "step": 5344 + }, + { + "epoch": 0.64, + "learning_rate": 3.2038348924084017e-06, + "loss": 3.4217, + "step": 5345 + }, + { + "epoch": 0.64, + "learning_rate": 3.2019683070802965e-06, + "loss": 3.3877, + "step": 5346 + }, + { + "epoch": 0.64, + "learning_rate": 3.200102009509182e-06, + "loss": 3.4587, + "step": 5347 + }, + { + "epoch": 0.64, + "learning_rate": 3.198235999993744e-06, + "loss": 3.3993, + "step": 5348 + }, + { + "epoch": 0.64, + "learning_rate": 3.1963702788326194e-06, + "loss": 3.3956, + "step": 5349 + }, + { + "epoch": 0.64, + "learning_rate": 3.194504846324395e-06, + "loss": 3.4591, + "step": 5350 + }, + { + "epoch": 0.64, + "learning_rate": 3.192639702767616e-06, + "loss": 3.4047, + "step": 5351 + }, + { + "epoch": 0.64, + "learning_rate": 3.1907748484607824e-06, + "loss": 3.5004, + "step": 5352 + }, + { + "epoch": 0.64, + "learning_rate": 3.1889102837023413e-06, + "loss": 3.4512, + "step": 5353 + }, + { + "epoch": 0.64, + "learning_rate": 3.1870460087907016e-06, + "loss": 3.3806, + "step": 5354 + }, + { + "epoch": 0.64, + "learning_rate": 3.185182024024219e-06, + "loss": 3.4103, + "step": 5355 + }, + { + "epoch": 0.64, + "learning_rate": 3.1833183297012056e-06, + "loss": 3.3848, + "step": 5356 + }, + { + "epoch": 0.64, + "learning_rate": 3.1814549261199303e-06, + "loss": 3.4926, + "step": 5357 + }, + { + "epoch": 0.64, + "learning_rate": 3.1795918135786076e-06, + "loss": 3.3887, + "step": 5358 + }, + { + "epoch": 0.64, + "learning_rate": 3.177728992375413e-06, + "loss": 3.487, + "step": 5359 + }, + { + "epoch": 0.64, + "learning_rate": 3.1758664628084735e-06, + "loss": 3.4713, + "step": 5360 + }, + { + "epoch": 0.64, + "learning_rate": 3.174004225175864e-06, + "loss": 3.3446, + "step": 5361 + }, + { + "epoch": 0.64, + "learning_rate": 3.172142279775623e-06, + "loss": 3.2557, + "step": 5362 + }, + { + "epoch": 0.64, + "learning_rate": 3.1702806269057296e-06, + "loss": 3.3868, + "step": 5363 + }, + { + "epoch": 0.64, + "learning_rate": 3.1684192668641267e-06, + "loss": 3.4144, + "step": 5364 + }, + { + "epoch": 0.64, + "learning_rate": 3.1665581999487074e-06, + "loss": 3.4271, + "step": 5365 + }, + { + "epoch": 0.64, + "learning_rate": 3.1646974264573132e-06, + "loss": 3.3723, + "step": 5366 + }, + { + "epoch": 0.64, + "learning_rate": 3.162836946687744e-06, + "loss": 3.437, + "step": 5367 + }, + { + "epoch": 0.64, + "learning_rate": 3.160976760937754e-06, + "loss": 3.3523, + "step": 5368 + }, + { + "epoch": 0.64, + "learning_rate": 3.1591168695050418e-06, + "loss": 3.3341, + "step": 5369 + }, + { + "epoch": 0.64, + "learning_rate": 3.1572572726872684e-06, + "loss": 3.3557, + "step": 5370 + }, + { + "epoch": 0.64, + "learning_rate": 3.1553979707820413e-06, + "loss": 3.3484, + "step": 5371 + }, + { + "epoch": 0.64, + "learning_rate": 3.1535389640869253e-06, + "loss": 3.4384, + "step": 5372 + }, + { + "epoch": 0.64, + "learning_rate": 3.151680252899435e-06, + "loss": 3.3916, + "step": 5373 + }, + { + "epoch": 0.64, + "learning_rate": 3.1498218375170368e-06, + "loss": 3.4007, + "step": 5374 + }, + { + "epoch": 0.64, + "learning_rate": 3.1479637182371557e-06, + "loss": 3.4144, + "step": 5375 + }, + { + "epoch": 0.64, + "learning_rate": 3.1461058953571605e-06, + "loss": 3.4474, + "step": 5376 + }, + { + "epoch": 0.64, + "learning_rate": 3.1442483691743786e-06, + "loss": 3.408, + "step": 5377 + }, + { + "epoch": 0.64, + "learning_rate": 3.142391139986092e-06, + "loss": 3.4695, + "step": 5378 + }, + { + "epoch": 0.64, + "learning_rate": 3.140534208089526e-06, + "loss": 3.3176, + "step": 5379 + }, + { + "epoch": 0.64, + "learning_rate": 3.1386775737818664e-06, + "loss": 3.4836, + "step": 5380 + }, + { + "epoch": 0.64, + "learning_rate": 3.1368212373602525e-06, + "loss": 3.4279, + "step": 5381 + }, + { + "epoch": 0.64, + "learning_rate": 3.1349651991217653e-06, + "loss": 3.4353, + "step": 5382 + }, + { + "epoch": 0.64, + "learning_rate": 3.1331094593634528e-06, + "loss": 3.5562, + "step": 5383 + }, + { + "epoch": 0.64, + "learning_rate": 3.131254018382301e-06, + "loss": 3.3554, + "step": 5384 + }, + { + "epoch": 0.64, + "learning_rate": 3.129398876475257e-06, + "loss": 3.367, + "step": 5385 + }, + { + "epoch": 0.64, + "learning_rate": 3.127544033939221e-06, + "loss": 3.5343, + "step": 5386 + }, + { + "epoch": 0.64, + "learning_rate": 3.1256894910710365e-06, + "loss": 3.3651, + "step": 5387 + }, + { + "epoch": 0.65, + "learning_rate": 3.1238352481675084e-06, + "loss": 3.4205, + "step": 5388 + }, + { + "epoch": 0.65, + "learning_rate": 3.121981305525389e-06, + "loss": 3.4975, + "step": 5389 + }, + { + "epoch": 0.65, + "learning_rate": 3.1201276634413817e-06, + "loss": 3.418, + "step": 5390 + }, + { + "epoch": 0.65, + "learning_rate": 3.1182743222121463e-06, + "loss": 3.4425, + "step": 5391 + }, + { + "epoch": 0.65, + "learning_rate": 3.1164212821342876e-06, + "loss": 3.3383, + "step": 5392 + }, + { + "epoch": 0.65, + "learning_rate": 3.114568543504369e-06, + "loss": 3.4563, + "step": 5393 + }, + { + "epoch": 0.65, + "learning_rate": 3.1127161066189037e-06, + "loss": 3.3934, + "step": 5394 + }, + { + "epoch": 0.65, + "learning_rate": 3.1108639717743516e-06, + "loss": 3.5136, + "step": 5395 + }, + { + "epoch": 0.65, + "learning_rate": 3.109012139267131e-06, + "loss": 3.4319, + "step": 5396 + }, + { + "epoch": 0.65, + "learning_rate": 3.1071606093936114e-06, + "loss": 3.3591, + "step": 5397 + }, + { + "epoch": 0.65, + "learning_rate": 3.1053093824501066e-06, + "loss": 3.3183, + "step": 5398 + }, + { + "epoch": 0.65, + "learning_rate": 3.103458458732893e-06, + "loss": 3.4456, + "step": 5399 + }, + { + "epoch": 0.65, + "learning_rate": 3.101607838538186e-06, + "loss": 3.4651, + "step": 5400 + }, + { + "epoch": 0.65, + "learning_rate": 3.099757522162162e-06, + "loss": 3.4326, + "step": 5401 + }, + { + "epoch": 0.65, + "learning_rate": 3.0979075099009475e-06, + "loss": 3.4335, + "step": 5402 + }, + { + "epoch": 0.65, + "learning_rate": 3.096057802050615e-06, + "loss": 3.3765, + "step": 5403 + }, + { + "epoch": 0.65, + "learning_rate": 3.0942083989071923e-06, + "loss": 3.3315, + "step": 5404 + }, + { + "epoch": 0.65, + "learning_rate": 3.092359300766662e-06, + "loss": 3.3685, + "step": 5405 + }, + { + "epoch": 0.65, + "learning_rate": 3.0905105079249477e-06, + "loss": 3.3213, + "step": 5406 + }, + { + "epoch": 0.65, + "learning_rate": 3.088662020677934e-06, + "loss": 3.4577, + "step": 5407 + }, + { + "epoch": 0.65, + "learning_rate": 3.0868138393214525e-06, + "loss": 3.5689, + "step": 5408 + }, + { + "epoch": 0.65, + "learning_rate": 3.084965964151284e-06, + "loss": 3.385, + "step": 5409 + }, + { + "epoch": 0.65, + "learning_rate": 3.083118395463166e-06, + "loss": 3.4447, + "step": 5410 + }, + { + "epoch": 0.65, + "learning_rate": 3.0812711335527807e-06, + "loss": 3.4268, + "step": 5411 + }, + { + "epoch": 0.65, + "learning_rate": 3.079424178715763e-06, + "loss": 3.4134, + "step": 5412 + }, + { + "epoch": 0.65, + "learning_rate": 3.077577531247703e-06, + "loss": 3.422, + "step": 5413 + }, + { + "epoch": 0.65, + "learning_rate": 3.0757311914441364e-06, + "loss": 3.4341, + "step": 5414 + }, + { + "epoch": 0.65, + "learning_rate": 3.073885159600551e-06, + "loss": 3.3194, + "step": 5415 + }, + { + "epoch": 0.65, + "learning_rate": 3.072039436012385e-06, + "loss": 3.3638, + "step": 5416 + }, + { + "epoch": 0.65, + "learning_rate": 3.0701940209750304e-06, + "loss": 3.3985, + "step": 5417 + }, + { + "epoch": 0.65, + "learning_rate": 3.0683489147838253e-06, + "loss": 3.4323, + "step": 5418 + }, + { + "epoch": 0.65, + "learning_rate": 3.066504117734061e-06, + "loss": 3.4479, + "step": 5419 + }, + { + "epoch": 0.65, + "learning_rate": 3.0646596301209795e-06, + "loss": 3.3784, + "step": 5420 + }, + { + "epoch": 0.65, + "learning_rate": 3.0628154522397737e-06, + "loss": 3.471, + "step": 5421 + }, + { + "epoch": 0.65, + "learning_rate": 3.060971584385582e-06, + "loss": 3.4717, + "step": 5422 + }, + { + "epoch": 0.65, + "learning_rate": 3.0591280268535016e-06, + "loss": 3.4105, + "step": 5423 + }, + { + "epoch": 0.65, + "learning_rate": 3.057284779938571e-06, + "loss": 3.3562, + "step": 5424 + }, + { + "epoch": 0.65, + "learning_rate": 3.0554418439357858e-06, + "loss": 3.4427, + "step": 5425 + }, + { + "epoch": 0.65, + "learning_rate": 3.053599219140092e-06, + "loss": 3.4204, + "step": 5426 + }, + { + "epoch": 0.65, + "learning_rate": 3.0517569058463773e-06, + "loss": 3.4884, + "step": 5427 + }, + { + "epoch": 0.65, + "learning_rate": 3.04991490434949e-06, + "loss": 3.3668, + "step": 5428 + }, + { + "epoch": 0.65, + "learning_rate": 3.0480732149442237e-06, + "loss": 3.4133, + "step": 5429 + }, + { + "epoch": 0.65, + "learning_rate": 3.0462318379253207e-06, + "loss": 3.4246, + "step": 5430 + }, + { + "epoch": 0.65, + "learning_rate": 3.0443907735874774e-06, + "loss": 3.3974, + "step": 5431 + }, + { + "epoch": 0.65, + "learning_rate": 3.0425500222253333e-06, + "loss": 3.447, + "step": 5432 + }, + { + "epoch": 0.65, + "learning_rate": 3.0407095841334865e-06, + "loss": 3.4438, + "step": 5433 + }, + { + "epoch": 0.65, + "learning_rate": 3.0388694596064804e-06, + "loss": 3.4212, + "step": 5434 + }, + { + "epoch": 0.65, + "learning_rate": 3.037029648938805e-06, + "loss": 3.397, + "step": 5435 + }, + { + "epoch": 0.65, + "learning_rate": 3.035190152424909e-06, + "loss": 3.3813, + "step": 5436 + }, + { + "epoch": 0.65, + "learning_rate": 3.03335097035918e-06, + "loss": 3.4089, + "step": 5437 + }, + { + "epoch": 0.65, + "learning_rate": 3.031512103035963e-06, + "loss": 3.4575, + "step": 5438 + }, + { + "epoch": 0.65, + "learning_rate": 3.029673550749553e-06, + "loss": 3.4554, + "step": 5439 + }, + { + "epoch": 0.65, + "learning_rate": 3.0278353137941867e-06, + "loss": 3.4096, + "step": 5440 + }, + { + "epoch": 0.65, + "learning_rate": 3.0259973924640586e-06, + "loss": 3.3482, + "step": 5441 + }, + { + "epoch": 0.65, + "learning_rate": 3.024159787053311e-06, + "loss": 3.4957, + "step": 5442 + }, + { + "epoch": 0.65, + "learning_rate": 3.0223224978560306e-06, + "loss": 3.3852, + "step": 5443 + }, + { + "epoch": 0.65, + "learning_rate": 3.020485525166261e-06, + "loss": 3.4182, + "step": 5444 + }, + { + "epoch": 0.65, + "learning_rate": 3.018648869277988e-06, + "loss": 3.4395, + "step": 5445 + }, + { + "epoch": 0.65, + "learning_rate": 3.0168125304851512e-06, + "loss": 3.4241, + "step": 5446 + }, + { + "epoch": 0.65, + "learning_rate": 3.01497650908164e-06, + "loss": 3.3591, + "step": 5447 + }, + { + "epoch": 0.65, + "learning_rate": 3.013140805361288e-06, + "loss": 3.3752, + "step": 5448 + }, + { + "epoch": 0.65, + "learning_rate": 3.011305419617883e-06, + "loss": 3.5222, + "step": 5449 + }, + { + "epoch": 0.65, + "learning_rate": 3.0094703521451626e-06, + "loss": 3.4358, + "step": 5450 + }, + { + "epoch": 0.65, + "learning_rate": 3.0076356032368057e-06, + "loss": 3.435, + "step": 5451 + }, + { + "epoch": 0.65, + "learning_rate": 3.0058011731864493e-06, + "loss": 3.3526, + "step": 5452 + }, + { + "epoch": 0.65, + "learning_rate": 3.003967062287675e-06, + "loss": 3.4482, + "step": 5453 + }, + { + "epoch": 0.65, + "learning_rate": 3.002133270834012e-06, + "loss": 3.3275, + "step": 5454 + }, + { + "epoch": 0.65, + "learning_rate": 3.0002997991189434e-06, + "loss": 3.4259, + "step": 5455 + }, + { + "epoch": 0.65, + "learning_rate": 2.9984666474358963e-06, + "loss": 3.3928, + "step": 5456 + }, + { + "epoch": 0.65, + "learning_rate": 2.9966338160782477e-06, + "loss": 3.3528, + "step": 5457 + }, + { + "epoch": 0.65, + "learning_rate": 2.9948013053393253e-06, + "loss": 3.3341, + "step": 5458 + }, + { + "epoch": 0.65, + "learning_rate": 2.992969115512403e-06, + "loss": 3.4583, + "step": 5459 + }, + { + "epoch": 0.65, + "learning_rate": 2.991137246890706e-06, + "loss": 3.4512, + "step": 5460 + }, + { + "epoch": 0.65, + "learning_rate": 2.989305699767403e-06, + "loss": 3.4097, + "step": 5461 + }, + { + "epoch": 0.65, + "learning_rate": 2.98747447443562e-06, + "loss": 3.4464, + "step": 5462 + }, + { + "epoch": 0.65, + "learning_rate": 2.9856435711884225e-06, + "loss": 3.4181, + "step": 5463 + }, + { + "epoch": 0.65, + "learning_rate": 2.9838129903188283e-06, + "loss": 3.3303, + "step": 5464 + }, + { + "epoch": 0.65, + "learning_rate": 2.981982732119805e-06, + "loss": 3.4777, + "step": 5465 + }, + { + "epoch": 0.65, + "learning_rate": 2.9801527968842682e-06, + "loss": 3.4417, + "step": 5466 + }, + { + "epoch": 0.65, + "learning_rate": 2.9783231849050777e-06, + "loss": 3.3723, + "step": 5467 + }, + { + "epoch": 0.65, + "learning_rate": 2.976493896475048e-06, + "loss": 3.4718, + "step": 5468 + }, + { + "epoch": 0.65, + "learning_rate": 2.9746649318869346e-06, + "loss": 3.2979, + "step": 5469 + }, + { + "epoch": 0.65, + "learning_rate": 2.9728362914334473e-06, + "loss": 3.3194, + "step": 5470 + }, + { + "epoch": 0.65, + "learning_rate": 2.971007975407243e-06, + "loss": 3.4682, + "step": 5471 + }, + { + "epoch": 0.66, + "learning_rate": 2.9691799841009216e-06, + "loss": 3.4099, + "step": 5472 + }, + { + "epoch": 0.66, + "learning_rate": 2.967352317807038e-06, + "loss": 3.3772, + "step": 5473 + }, + { + "epoch": 0.66, + "learning_rate": 2.965524976818092e-06, + "loss": 3.425, + "step": 5474 + }, + { + "epoch": 0.66, + "learning_rate": 2.9636979614265287e-06, + "loss": 3.493, + "step": 5475 + }, + { + "epoch": 0.66, + "learning_rate": 2.9618712719247466e-06, + "loss": 3.4659, + "step": 5476 + }, + { + "epoch": 0.66, + "learning_rate": 2.9600449086050863e-06, + "loss": 3.4143, + "step": 5477 + }, + { + "epoch": 0.66, + "learning_rate": 2.9582188717598405e-06, + "loss": 3.4552, + "step": 5478 + }, + { + "epoch": 0.66, + "learning_rate": 2.956393161681249e-06, + "loss": 3.3956, + "step": 5479 + }, + { + "epoch": 0.66, + "learning_rate": 2.9545677786614957e-06, + "loss": 3.4477, + "step": 5480 + }, + { + "epoch": 0.66, + "learning_rate": 2.952742722992717e-06, + "loss": 3.464, + "step": 5481 + }, + { + "epoch": 0.66, + "learning_rate": 2.9509179949669964e-06, + "loss": 3.346, + "step": 5482 + }, + { + "epoch": 0.66, + "learning_rate": 2.9490935948763586e-06, + "loss": 3.3009, + "step": 5483 + }, + { + "epoch": 0.66, + "learning_rate": 2.947269523012786e-06, + "loss": 3.401, + "step": 5484 + }, + { + "epoch": 0.66, + "learning_rate": 2.945445779668199e-06, + "loss": 3.4549, + "step": 5485 + }, + { + "epoch": 0.66, + "learning_rate": 2.9436223651344697e-06, + "loss": 3.3461, + "step": 5486 + }, + { + "epoch": 0.66, + "learning_rate": 2.941799279703421e-06, + "loss": 3.3544, + "step": 5487 + }, + { + "epoch": 0.66, + "learning_rate": 2.9399765236668155e-06, + "loss": 3.3809, + "step": 5488 + }, + { + "epoch": 0.66, + "learning_rate": 2.9381540973163684e-06, + "loss": 3.4076, + "step": 5489 + }, + { + "epoch": 0.66, + "learning_rate": 2.9363320009437436e-06, + "loss": 3.2837, + "step": 5490 + }, + { + "epoch": 0.66, + "learning_rate": 2.934510234840544e-06, + "loss": 3.4479, + "step": 5491 + }, + { + "epoch": 0.66, + "learning_rate": 2.9326887992983305e-06, + "loss": 3.416, + "step": 5492 + }, + { + "epoch": 0.66, + "learning_rate": 2.9308676946086013e-06, + "loss": 3.4711, + "step": 5493 + }, + { + "epoch": 0.66, + "learning_rate": 2.929046921062807e-06, + "loss": 3.3341, + "step": 5494 + }, + { + "epoch": 0.66, + "learning_rate": 2.9272264789523465e-06, + "loss": 3.4603, + "step": 5495 + }, + { + "epoch": 0.66, + "learning_rate": 2.9254063685685613e-06, + "loss": 3.4744, + "step": 5496 + }, + { + "epoch": 0.66, + "learning_rate": 2.9235865902027417e-06, + "loss": 3.469, + "step": 5497 + }, + { + "epoch": 0.66, + "learning_rate": 2.921767144146127e-06, + "loss": 3.362, + "step": 5498 + }, + { + "epoch": 0.66, + "learning_rate": 2.9199480306898996e-06, + "loss": 3.4054, + "step": 5499 + }, + { + "epoch": 0.66, + "learning_rate": 2.9181292501251924e-06, + "loss": 3.4413, + "step": 5500 + }, + { + "epoch": 0.66, + "learning_rate": 2.9163108027430782e-06, + "loss": 3.4547, + "step": 5501 + }, + { + "epoch": 0.66, + "learning_rate": 2.914492688834587e-06, + "loss": 3.3803, + "step": 5502 + }, + { + "epoch": 0.66, + "learning_rate": 2.912674908690687e-06, + "loss": 3.4058, + "step": 5503 + }, + { + "epoch": 0.66, + "learning_rate": 2.9108574626022955e-06, + "loss": 3.4433, + "step": 5504 + }, + { + "epoch": 0.66, + "learning_rate": 2.909040350860278e-06, + "loss": 3.3705, + "step": 5505 + }, + { + "epoch": 0.66, + "learning_rate": 2.9072235737554443e-06, + "loss": 3.3475, + "step": 5506 + }, + { + "epoch": 0.66, + "learning_rate": 2.9054071315785496e-06, + "loss": 3.4369, + "step": 5507 + }, + { + "epoch": 0.66, + "learning_rate": 2.903591024620303e-06, + "loss": 3.3713, + "step": 5508 + }, + { + "epoch": 0.66, + "learning_rate": 2.901775253171346e-06, + "loss": 3.4057, + "step": 5509 + }, + { + "epoch": 0.66, + "learning_rate": 2.899959817522281e-06, + "loss": 3.3904, + "step": 5510 + }, + { + "epoch": 0.66, + "learning_rate": 2.898144717963649e-06, + "loss": 3.3328, + "step": 5511 + }, + { + "epoch": 0.66, + "learning_rate": 2.896329954785937e-06, + "loss": 3.3994, + "step": 5512 + }, + { + "epoch": 0.66, + "learning_rate": 2.894515528279581e-06, + "loss": 3.4162, + "step": 5513 + }, + { + "epoch": 0.66, + "learning_rate": 2.892701438734962e-06, + "loss": 3.296, + "step": 5514 + }, + { + "epoch": 0.66, + "learning_rate": 2.890887686442403e-06, + "loss": 3.415, + "step": 5515 + }, + { + "epoch": 0.66, + "learning_rate": 2.8890742716921834e-06, + "loss": 3.3307, + "step": 5516 + }, + { + "epoch": 0.66, + "learning_rate": 2.8872611947745175e-06, + "loss": 3.4262, + "step": 5517 + }, + { + "epoch": 0.66, + "learning_rate": 2.8854484559795733e-06, + "loss": 3.3856, + "step": 5518 + }, + { + "epoch": 0.66, + "learning_rate": 2.8836360555974583e-06, + "loss": 3.4422, + "step": 5519 + }, + { + "epoch": 0.66, + "learning_rate": 2.8818239939182313e-06, + "loss": 3.3032, + "step": 5520 + }, + { + "epoch": 0.66, + "learning_rate": 2.8800122712318936e-06, + "loss": 3.4437, + "step": 5521 + }, + { + "epoch": 0.66, + "learning_rate": 2.8782008878283916e-06, + "loss": 3.4471, + "step": 5522 + }, + { + "epoch": 0.66, + "learning_rate": 2.8763898439976233e-06, + "loss": 3.3843, + "step": 5523 + }, + { + "epoch": 0.66, + "learning_rate": 2.874579140029425e-06, + "loss": 3.4385, + "step": 5524 + }, + { + "epoch": 0.66, + "learning_rate": 2.8727687762135837e-06, + "loss": 3.4598, + "step": 5525 + }, + { + "epoch": 0.66, + "learning_rate": 2.8709587528398285e-06, + "loss": 3.4595, + "step": 5526 + }, + { + "epoch": 0.66, + "learning_rate": 2.8691490701978365e-06, + "loss": 3.3873, + "step": 5527 + }, + { + "epoch": 0.66, + "learning_rate": 2.8673397285772264e-06, + "loss": 3.3046, + "step": 5528 + }, + { + "epoch": 0.66, + "learning_rate": 2.865530728267572e-06, + "loss": 3.344, + "step": 5529 + }, + { + "epoch": 0.66, + "learning_rate": 2.8637220695583774e-06, + "loss": 3.4373, + "step": 5530 + }, + { + "epoch": 0.66, + "learning_rate": 2.861913752739106e-06, + "loss": 3.3786, + "step": 5531 + }, + { + "epoch": 0.66, + "learning_rate": 2.860105778099159e-06, + "loss": 3.4661, + "step": 5532 + }, + { + "epoch": 0.66, + "learning_rate": 2.858298145927886e-06, + "loss": 3.4161, + "step": 5533 + }, + { + "epoch": 0.66, + "learning_rate": 2.8564908565145767e-06, + "loss": 3.4311, + "step": 5534 + }, + { + "epoch": 0.66, + "learning_rate": 2.854683910148476e-06, + "loss": 3.3358, + "step": 5535 + }, + { + "epoch": 0.66, + "learning_rate": 2.8528773071187597e-06, + "loss": 3.4601, + "step": 5536 + }, + { + "epoch": 0.66, + "learning_rate": 2.8510710477145653e-06, + "loss": 3.3826, + "step": 5537 + }, + { + "epoch": 0.66, + "learning_rate": 2.8492651322249577e-06, + "loss": 3.416, + "step": 5538 + }, + { + "epoch": 0.66, + "learning_rate": 2.847459560938962e-06, + "loss": 3.4313, + "step": 5539 + }, + { + "epoch": 0.66, + "learning_rate": 2.84565433414554e-06, + "loss": 3.4552, + "step": 5540 + }, + { + "epoch": 0.66, + "learning_rate": 2.843849452133599e-06, + "loss": 3.3656, + "step": 5541 + }, + { + "epoch": 0.66, + "learning_rate": 2.842044915191992e-06, + "loss": 3.3039, + "step": 5542 + }, + { + "epoch": 0.66, + "learning_rate": 2.8402407236095223e-06, + "loss": 3.3443, + "step": 5543 + }, + { + "epoch": 0.66, + "learning_rate": 2.838436877674925e-06, + "loss": 3.4098, + "step": 5544 + }, + { + "epoch": 0.66, + "learning_rate": 2.836633377676895e-06, + "loss": 3.3526, + "step": 5545 + }, + { + "epoch": 0.66, + "learning_rate": 2.834830223904057e-06, + "loss": 3.3869, + "step": 5546 + }, + { + "epoch": 0.66, + "learning_rate": 2.8330274166449933e-06, + "loss": 3.4326, + "step": 5547 + }, + { + "epoch": 0.66, + "learning_rate": 2.831224956188224e-06, + "loss": 3.4223, + "step": 5548 + }, + { + "epoch": 0.66, + "learning_rate": 2.8294228428222136e-06, + "loss": 3.4032, + "step": 5549 + }, + { + "epoch": 0.66, + "learning_rate": 2.827621076835371e-06, + "loss": 3.4919, + "step": 5550 + }, + { + "epoch": 0.66, + "learning_rate": 2.8258196585160574e-06, + "loss": 3.3891, + "step": 5551 + }, + { + "epoch": 0.66, + "learning_rate": 2.824018588152563e-06, + "loss": 3.4419, + "step": 5552 + }, + { + "epoch": 0.66, + "learning_rate": 2.8222178660331394e-06, + "loss": 3.311, + "step": 5553 + }, + { + "epoch": 0.66, + "learning_rate": 2.820417492445966e-06, + "loss": 3.3937, + "step": 5554 + }, + { + "epoch": 0.67, + "learning_rate": 2.818617467679181e-06, + "loss": 3.3658, + "step": 5555 + }, + { + "epoch": 0.67, + "learning_rate": 2.816817792020858e-06, + "loss": 3.3745, + "step": 5556 + }, + { + "epoch": 0.67, + "learning_rate": 2.815018465759017e-06, + "loss": 3.4338, + "step": 5557 + }, + { + "epoch": 0.67, + "learning_rate": 2.81321948918162e-06, + "loss": 3.4195, + "step": 5558 + }, + { + "epoch": 0.67, + "learning_rate": 2.811420862576582e-06, + "loss": 3.4586, + "step": 5559 + }, + { + "epoch": 0.67, + "learning_rate": 2.8096225862317467e-06, + "loss": 3.3109, + "step": 5560 + }, + { + "epoch": 0.67, + "learning_rate": 2.8078246604349164e-06, + "loss": 3.4422, + "step": 5561 + }, + { + "epoch": 0.67, + "learning_rate": 2.8060270854738287e-06, + "loss": 3.4197, + "step": 5562 + }, + { + "epoch": 0.67, + "learning_rate": 2.804229861636168e-06, + "loss": 3.4767, + "step": 5563 + }, + { + "epoch": 0.67, + "learning_rate": 2.802432989209562e-06, + "loss": 3.379, + "step": 5564 + }, + { + "epoch": 0.67, + "learning_rate": 2.800636468481582e-06, + "loss": 3.388, + "step": 5565 + }, + { + "epoch": 0.67, + "learning_rate": 2.798840299739743e-06, + "loss": 3.4071, + "step": 5566 + }, + { + "epoch": 0.67, + "learning_rate": 2.797044483271502e-06, + "loss": 3.5156, + "step": 5567 + }, + { + "epoch": 0.67, + "learning_rate": 2.7952490193642657e-06, + "loss": 3.4179, + "step": 5568 + }, + { + "epoch": 0.67, + "learning_rate": 2.793453908305378e-06, + "loss": 3.4683, + "step": 5569 + }, + { + "epoch": 0.67, + "learning_rate": 2.7916591503821285e-06, + "loss": 3.3832, + "step": 5570 + }, + { + "epoch": 0.67, + "learning_rate": 2.7898647458817474e-06, + "loss": 3.4608, + "step": 5571 + }, + { + "epoch": 0.67, + "learning_rate": 2.7880706950914196e-06, + "loss": 3.473, + "step": 5572 + }, + { + "epoch": 0.67, + "learning_rate": 2.786276998298254e-06, + "loss": 3.3951, + "step": 5573 + }, + { + "epoch": 0.67, + "learning_rate": 2.7844836557893236e-06, + "loss": 3.4602, + "step": 5574 + }, + { + "epoch": 0.67, + "learning_rate": 2.782690667851626e-06, + "loss": 3.3709, + "step": 5575 + }, + { + "epoch": 0.67, + "learning_rate": 2.780898034772118e-06, + "loss": 3.4117, + "step": 5576 + }, + { + "epoch": 0.67, + "learning_rate": 2.7791057568376905e-06, + "loss": 3.4003, + "step": 5577 + }, + { + "epoch": 0.67, + "learning_rate": 2.777313834335179e-06, + "loss": 3.2915, + "step": 5578 + }, + { + "epoch": 0.67, + "learning_rate": 2.7755222675513616e-06, + "loss": 3.3362, + "step": 5579 + }, + { + "epoch": 0.67, + "learning_rate": 2.7737310567729652e-06, + "loss": 3.331, + "step": 5580 + }, + { + "epoch": 0.67, + "learning_rate": 2.7719402022866483e-06, + "loss": 3.3699, + "step": 5581 + }, + { + "epoch": 0.67, + "learning_rate": 2.770149704379027e-06, + "loss": 3.5099, + "step": 5582 + }, + { + "epoch": 0.67, + "learning_rate": 2.7683595633366456e-06, + "loss": 3.5185, + "step": 5583 + }, + { + "epoch": 0.67, + "learning_rate": 2.7665697794460023e-06, + "loss": 3.4332, + "step": 5584 + }, + { + "epoch": 0.67, + "learning_rate": 2.7647803529935337e-06, + "loss": 3.3615, + "step": 5585 + }, + { + "epoch": 0.67, + "learning_rate": 2.7629912842656187e-06, + "loss": 3.4069, + "step": 5586 + }, + { + "epoch": 0.67, + "learning_rate": 2.761202573548578e-06, + "loss": 3.3344, + "step": 5587 + }, + { + "epoch": 0.67, + "learning_rate": 2.7594142211286834e-06, + "loss": 3.4359, + "step": 5588 + }, + { + "epoch": 0.67, + "learning_rate": 2.757626227292135e-06, + "loss": 3.4101, + "step": 5589 + }, + { + "epoch": 0.67, + "learning_rate": 2.7558385923250897e-06, + "loss": 3.42, + "step": 5590 + }, + { + "epoch": 0.67, + "learning_rate": 2.7540513165136345e-06, + "loss": 3.375, + "step": 5591 + }, + { + "epoch": 0.67, + "learning_rate": 2.7522644001438104e-06, + "loss": 3.3416, + "step": 5592 + }, + { + "epoch": 0.67, + "learning_rate": 2.7504778435015925e-06, + "loss": 3.342, + "step": 5593 + }, + { + "epoch": 0.67, + "learning_rate": 2.748691646872903e-06, + "loss": 3.3128, + "step": 5594 + }, + { + "epoch": 0.67, + "learning_rate": 2.746905810543602e-06, + "loss": 3.419, + "step": 5595 + }, + { + "epoch": 0.67, + "learning_rate": 2.7451203347995005e-06, + "loss": 3.4226, + "step": 5596 + }, + { + "epoch": 0.67, + "learning_rate": 2.7433352199263373e-06, + "loss": 3.4995, + "step": 5597 + }, + { + "epoch": 0.67, + "learning_rate": 2.7415504662098124e-06, + "loss": 3.2569, + "step": 5598 + }, + { + "epoch": 0.67, + "learning_rate": 2.739766073935548e-06, + "loss": 3.4978, + "step": 5599 + }, + { + "epoch": 0.67, + "learning_rate": 2.7379820433891245e-06, + "loss": 3.4194, + "step": 5600 + }, + { + "epoch": 0.67, + "learning_rate": 2.7361983748560572e-06, + "loss": 3.4532, + "step": 5601 + }, + { + "epoch": 0.67, + "learning_rate": 2.7344150686218036e-06, + "loss": 3.4811, + "step": 5602 + }, + { + "epoch": 0.67, + "learning_rate": 2.7326321249717615e-06, + "loss": 3.451, + "step": 5603 + }, + { + "epoch": 0.67, + "learning_rate": 2.7308495441912786e-06, + "loss": 3.3133, + "step": 5604 + }, + { + "epoch": 0.67, + "learning_rate": 2.729067326565635e-06, + "loss": 3.361, + "step": 5605 + }, + { + "epoch": 0.67, + "learning_rate": 2.727285472380059e-06, + "loss": 3.4933, + "step": 5606 + }, + { + "epoch": 0.67, + "learning_rate": 2.725503981919718e-06, + "loss": 3.3867, + "step": 5607 + }, + { + "epoch": 0.67, + "learning_rate": 2.7237228554697203e-06, + "loss": 3.4515, + "step": 5608 + }, + { + "epoch": 0.67, + "learning_rate": 2.72194209331512e-06, + "loss": 3.3558, + "step": 5609 + }, + { + "epoch": 0.67, + "learning_rate": 2.7201616957409055e-06, + "loss": 3.4155, + "step": 5610 + }, + { + "epoch": 0.67, + "learning_rate": 2.718381663032017e-06, + "loss": 3.3943, + "step": 5611 + }, + { + "epoch": 0.67, + "learning_rate": 2.7166019954733293e-06, + "loss": 3.3433, + "step": 5612 + }, + { + "epoch": 0.67, + "learning_rate": 2.7148226933496597e-06, + "loss": 3.4373, + "step": 5613 + }, + { + "epoch": 0.67, + "learning_rate": 2.7130437569457678e-06, + "loss": 3.4758, + "step": 5614 + }, + { + "epoch": 0.67, + "learning_rate": 2.711265186546355e-06, + "loss": 3.4629, + "step": 5615 + }, + { + "epoch": 0.67, + "learning_rate": 2.7094869824360615e-06, + "loss": 3.4235, + "step": 5616 + }, + { + "epoch": 0.67, + "learning_rate": 2.707709144899476e-06, + "loss": 3.5101, + "step": 5617 + }, + { + "epoch": 0.67, + "learning_rate": 2.7059316742211177e-06, + "loss": 3.4082, + "step": 5618 + }, + { + "epoch": 0.67, + "learning_rate": 2.7041545706854576e-06, + "loss": 3.362, + "step": 5619 + }, + { + "epoch": 0.67, + "learning_rate": 2.7023778345769014e-06, + "loss": 3.4597, + "step": 5620 + }, + { + "epoch": 0.67, + "learning_rate": 2.7006014661797985e-06, + "loss": 3.426, + "step": 5621 + }, + { + "epoch": 0.67, + "learning_rate": 2.698825465778439e-06, + "loss": 3.4382, + "step": 5622 + }, + { + "epoch": 0.67, + "learning_rate": 2.697049833657053e-06, + "loss": 3.3946, + "step": 5623 + }, + { + "epoch": 0.67, + "learning_rate": 2.695274570099812e-06, + "loss": 3.3991, + "step": 5624 + }, + { + "epoch": 0.67, + "learning_rate": 2.6934996753908336e-06, + "loss": 3.4194, + "step": 5625 + }, + { + "epoch": 0.67, + "learning_rate": 2.691725149814166e-06, + "loss": 3.4418, + "step": 5626 + }, + { + "epoch": 0.67, + "learning_rate": 2.6899509936538083e-06, + "loss": 3.3955, + "step": 5627 + }, + { + "epoch": 0.67, + "learning_rate": 2.6881772071936952e-06, + "loss": 3.5197, + "step": 5628 + }, + { + "epoch": 0.67, + "learning_rate": 2.686403790717704e-06, + "loss": 3.4461, + "step": 5629 + }, + { + "epoch": 0.67, + "learning_rate": 2.6846307445096515e-06, + "loss": 3.3969, + "step": 5630 + }, + { + "epoch": 0.67, + "learning_rate": 2.6828580688532964e-06, + "loss": 3.3562, + "step": 5631 + }, + { + "epoch": 0.67, + "learning_rate": 2.6810857640323356e-06, + "loss": 3.4519, + "step": 5632 + }, + { + "epoch": 0.67, + "learning_rate": 2.6793138303304143e-06, + "loss": 3.4643, + "step": 5633 + }, + { + "epoch": 0.67, + "learning_rate": 2.6775422680311062e-06, + "loss": 3.4978, + "step": 5634 + }, + { + "epoch": 0.67, + "learning_rate": 2.675771077417939e-06, + "loss": 3.4619, + "step": 5635 + }, + { + "epoch": 0.67, + "learning_rate": 2.6740002587743664e-06, + "loss": 3.3931, + "step": 5636 + }, + { + "epoch": 0.67, + "learning_rate": 2.672229812383796e-06, + "loss": 3.4296, + "step": 5637 + }, + { + "epoch": 0.67, + "learning_rate": 2.670459738529568e-06, + "loss": 3.2801, + "step": 5638 + }, + { + "epoch": 0.68, + "learning_rate": 2.6686900374949655e-06, + "loss": 3.3839, + "step": 5639 + }, + { + "epoch": 0.68, + "learning_rate": 2.666920709563208e-06, + "loss": 3.4143, + "step": 5640 + }, + { + "epoch": 0.68, + "learning_rate": 2.6651517550174666e-06, + "loss": 3.4027, + "step": 5641 + }, + { + "epoch": 0.68, + "learning_rate": 2.6633831741408365e-06, + "loss": 3.4534, + "step": 5642 + }, + { + "epoch": 0.68, + "learning_rate": 2.661614967216366e-06, + "loss": 3.4057, + "step": 5643 + }, + { + "epoch": 0.68, + "learning_rate": 2.6598471345270383e-06, + "loss": 3.5032, + "step": 5644 + }, + { + "epoch": 0.68, + "learning_rate": 2.658079676355777e-06, + "loss": 3.3414, + "step": 5645 + }, + { + "epoch": 0.68, + "learning_rate": 2.656312592985446e-06, + "loss": 3.4442, + "step": 5646 + }, + { + "epoch": 0.68, + "learning_rate": 2.6545458846988494e-06, + "loss": 3.414, + "step": 5647 + }, + { + "epoch": 0.68, + "learning_rate": 2.6527795517787293e-06, + "loss": 3.3741, + "step": 5648 + }, + { + "epoch": 0.68, + "learning_rate": 2.651013594507773e-06, + "loss": 3.4676, + "step": 5649 + }, + { + "epoch": 0.68, + "learning_rate": 2.6492480131686027e-06, + "loss": 3.4281, + "step": 5650 + }, + { + "epoch": 0.68, + "learning_rate": 2.647482808043782e-06, + "loss": 3.394, + "step": 5651 + }, + { + "epoch": 0.68, + "learning_rate": 2.645717979415815e-06, + "loss": 3.4745, + "step": 5652 + }, + { + "epoch": 0.68, + "learning_rate": 2.6439535275671437e-06, + "loss": 3.3654, + "step": 5653 + }, + { + "epoch": 0.68, + "learning_rate": 2.642189452780152e-06, + "loss": 3.3814, + "step": 5654 + }, + { + "epoch": 0.68, + "learning_rate": 2.6404257553371603e-06, + "loss": 3.5507, + "step": 5655 + }, + { + "epoch": 0.68, + "learning_rate": 2.6386624355204336e-06, + "loss": 3.459, + "step": 5656 + }, + { + "epoch": 0.68, + "learning_rate": 2.6368994936121726e-06, + "loss": 3.3028, + "step": 5657 + }, + { + "epoch": 0.68, + "learning_rate": 2.635136929894519e-06, + "loss": 3.3256, + "step": 5658 + }, + { + "epoch": 0.68, + "learning_rate": 2.6333747446495526e-06, + "loss": 3.3088, + "step": 5659 + }, + { + "epoch": 0.68, + "learning_rate": 2.6316129381592935e-06, + "loss": 3.3281, + "step": 5660 + }, + { + "epoch": 0.68, + "learning_rate": 2.6298515107057e-06, + "loss": 3.3554, + "step": 5661 + }, + { + "epoch": 0.68, + "learning_rate": 2.628090462570676e-06, + "loss": 3.3705, + "step": 5662 + }, + { + "epoch": 0.68, + "learning_rate": 2.6263297940360523e-06, + "loss": 3.4714, + "step": 5663 + }, + { + "epoch": 0.68, + "learning_rate": 2.624569505383612e-06, + "loss": 3.4288, + "step": 5664 + }, + { + "epoch": 0.68, + "learning_rate": 2.6228095968950697e-06, + "loss": 3.3916, + "step": 5665 + }, + { + "epoch": 0.68, + "learning_rate": 2.621050068852081e-06, + "loss": 3.3085, + "step": 5666 + }, + { + "epoch": 0.68, + "learning_rate": 2.619290921536242e-06, + "loss": 3.4197, + "step": 5667 + }, + { + "epoch": 0.68, + "learning_rate": 2.617532155229085e-06, + "loss": 3.3872, + "step": 5668 + }, + { + "epoch": 0.68, + "learning_rate": 2.6157737702120817e-06, + "loss": 3.3467, + "step": 5669 + }, + { + "epoch": 0.68, + "learning_rate": 2.61401576676665e-06, + "loss": 3.4296, + "step": 5670 + }, + { + "epoch": 0.68, + "learning_rate": 2.6122581451741323e-06, + "loss": 3.4106, + "step": 5671 + }, + { + "epoch": 0.68, + "learning_rate": 2.610500905715826e-06, + "loss": 3.4557, + "step": 5672 + }, + { + "epoch": 0.68, + "learning_rate": 2.608744048672955e-06, + "loss": 3.3529, + "step": 5673 + }, + { + "epoch": 0.68, + "learning_rate": 2.6069875743266894e-06, + "loss": 3.4633, + "step": 5674 + }, + { + "epoch": 0.68, + "learning_rate": 2.6052314829581337e-06, + "loss": 3.5049, + "step": 5675 + }, + { + "epoch": 0.68, + "learning_rate": 2.603475774848333e-06, + "loss": 3.2917, + "step": 5676 + }, + { + "epoch": 0.68, + "learning_rate": 2.601720450278269e-06, + "loss": 3.4252, + "step": 5677 + }, + { + "epoch": 0.68, + "learning_rate": 2.5999655095288702e-06, + "loss": 3.4223, + "step": 5678 + }, + { + "epoch": 0.68, + "learning_rate": 2.598210952880989e-06, + "loss": 3.3697, + "step": 5679 + }, + { + "epoch": 0.68, + "learning_rate": 2.596456780615431e-06, + "loss": 3.4695, + "step": 5680 + }, + { + "epoch": 0.68, + "learning_rate": 2.594702993012931e-06, + "loss": 3.5218, + "step": 5681 + }, + { + "epoch": 0.68, + "learning_rate": 2.5929495903541655e-06, + "loss": 3.3794, + "step": 5682 + }, + { + "epoch": 0.68, + "learning_rate": 2.5911965729197496e-06, + "loss": 3.4059, + "step": 5683 + }, + { + "epoch": 0.68, + "learning_rate": 2.589443940990236e-06, + "loss": 3.3801, + "step": 5684 + }, + { + "epoch": 0.68, + "learning_rate": 2.587691694846114e-06, + "loss": 3.4039, + "step": 5685 + }, + { + "epoch": 0.68, + "learning_rate": 2.585939834767819e-06, + "loss": 3.4404, + "step": 5686 + }, + { + "epoch": 0.68, + "learning_rate": 2.5841883610357103e-06, + "loss": 3.4018, + "step": 5687 + }, + { + "epoch": 0.68, + "learning_rate": 2.5824372739301007e-06, + "loss": 3.3953, + "step": 5688 + }, + { + "epoch": 0.68, + "learning_rate": 2.5806865737312304e-06, + "loss": 3.4492, + "step": 5689 + }, + { + "epoch": 0.68, + "learning_rate": 2.5789362607192837e-06, + "loss": 3.5228, + "step": 5690 + }, + { + "epoch": 0.68, + "learning_rate": 2.577186335174379e-06, + "loss": 3.4795, + "step": 5691 + }, + { + "epoch": 0.68, + "learning_rate": 2.5754367973765747e-06, + "loss": 3.4021, + "step": 5692 + }, + { + "epoch": 0.68, + "learning_rate": 2.573687647605866e-06, + "loss": 3.4178, + "step": 5693 + }, + { + "epoch": 0.68, + "learning_rate": 2.5719388861421894e-06, + "loss": 3.4354, + "step": 5694 + }, + { + "epoch": 0.68, + "learning_rate": 2.570190513265416e-06, + "loss": 3.4332, + "step": 5695 + }, + { + "epoch": 0.68, + "learning_rate": 2.568442529255354e-06, + "loss": 3.3872, + "step": 5696 + }, + { + "epoch": 0.68, + "learning_rate": 2.5666949343917514e-06, + "loss": 3.3306, + "step": 5697 + }, + { + "epoch": 0.68, + "learning_rate": 2.564947728954291e-06, + "loss": 3.3872, + "step": 5698 + }, + { + "epoch": 0.68, + "learning_rate": 2.563200913222602e-06, + "loss": 3.5253, + "step": 5699 + }, + { + "epoch": 0.68, + "learning_rate": 2.561454487476237e-06, + "loss": 3.4855, + "step": 5700 + }, + { + "epoch": 0.68, + "learning_rate": 2.5597084519946993e-06, + "loss": 3.3746, + "step": 5701 + }, + { + "epoch": 0.68, + "learning_rate": 2.5579628070574227e-06, + "loss": 3.417, + "step": 5702 + }, + { + "epoch": 0.68, + "learning_rate": 2.5562175529437805e-06, + "loss": 3.3661, + "step": 5703 + }, + { + "epoch": 0.68, + "learning_rate": 2.5544726899330824e-06, + "loss": 3.3903, + "step": 5704 + }, + { + "epoch": 0.68, + "learning_rate": 2.552728218304577e-06, + "loss": 3.4119, + "step": 5705 + }, + { + "epoch": 0.68, + "learning_rate": 2.550984138337448e-06, + "loss": 3.3748, + "step": 5706 + }, + { + "epoch": 0.68, + "learning_rate": 2.5492404503108226e-06, + "loss": 3.3674, + "step": 5707 + }, + { + "epoch": 0.68, + "learning_rate": 2.547497154503753e-06, + "loss": 3.4422, + "step": 5708 + }, + { + "epoch": 0.68, + "learning_rate": 2.5457542511952436e-06, + "loss": 3.3903, + "step": 5709 + }, + { + "epoch": 0.68, + "learning_rate": 2.544011740664225e-06, + "loss": 3.4348, + "step": 5710 + }, + { + "epoch": 0.68, + "learning_rate": 2.542269623189569e-06, + "loss": 3.3647, + "step": 5711 + }, + { + "epoch": 0.68, + "learning_rate": 2.540527899050085e-06, + "loss": 3.3948, + "step": 5712 + }, + { + "epoch": 0.68, + "learning_rate": 2.5387865685245172e-06, + "loss": 3.4473, + "step": 5713 + }, + { + "epoch": 0.68, + "learning_rate": 2.537045631891547e-06, + "loss": 3.4978, + "step": 5714 + }, + { + "epoch": 0.68, + "learning_rate": 2.5353050894298e-06, + "loss": 3.4752, + "step": 5715 + }, + { + "epoch": 0.68, + "learning_rate": 2.5335649414178244e-06, + "loss": 3.3942, + "step": 5716 + }, + { + "epoch": 0.68, + "learning_rate": 2.5318251881341183e-06, + "loss": 3.3416, + "step": 5717 + }, + { + "epoch": 0.68, + "learning_rate": 2.5300858298571117e-06, + "loss": 3.4343, + "step": 5718 + }, + { + "epoch": 0.68, + "learning_rate": 2.5283468668651704e-06, + "loss": 3.4736, + "step": 5719 + }, + { + "epoch": 0.68, + "learning_rate": 2.5266082994365987e-06, + "loss": 3.3942, + "step": 5720 + }, + { + "epoch": 0.68, + "learning_rate": 2.524870127849636e-06, + "loss": 3.2963, + "step": 5721 + }, + { + "epoch": 0.69, + "learning_rate": 2.5231323523824587e-06, + "loss": 3.3742, + "step": 5722 + }, + { + "epoch": 0.69, + "learning_rate": 2.521394973313185e-06, + "loss": 3.3808, + "step": 5723 + }, + { + "epoch": 0.69, + "learning_rate": 2.5196579909198575e-06, + "loss": 3.3796, + "step": 5724 + }, + { + "epoch": 0.69, + "learning_rate": 2.517921405480469e-06, + "loss": 3.4513, + "step": 5725 + }, + { + "epoch": 0.69, + "learning_rate": 2.5161852172729405e-06, + "loss": 3.3883, + "step": 5726 + }, + { + "epoch": 0.69, + "learning_rate": 2.514449426575132e-06, + "loss": 3.4271, + "step": 5727 + }, + { + "epoch": 0.69, + "learning_rate": 2.512714033664839e-06, + "loss": 3.4631, + "step": 5728 + }, + { + "epoch": 0.69, + "learning_rate": 2.5109790388197942e-06, + "loss": 3.3655, + "step": 5729 + }, + { + "epoch": 0.69, + "learning_rate": 2.5092444423176633e-06, + "loss": 3.4235, + "step": 5730 + }, + { + "epoch": 0.69, + "learning_rate": 2.5075102444360555e-06, + "loss": 3.3724, + "step": 5731 + }, + { + "epoch": 0.69, + "learning_rate": 2.50577644545251e-06, + "loss": 3.449, + "step": 5732 + }, + { + "epoch": 0.69, + "learning_rate": 2.5040430456445047e-06, + "loss": 3.3162, + "step": 5733 + }, + { + "epoch": 0.69, + "learning_rate": 2.5023100452894516e-06, + "loss": 3.4069, + "step": 5734 + }, + { + "epoch": 0.69, + "learning_rate": 2.5005774446647012e-06, + "loss": 3.3659, + "step": 5735 + }, + { + "epoch": 0.69, + "learning_rate": 2.498845244047538e-06, + "loss": 3.2956, + "step": 5736 + }, + { + "epoch": 0.69, + "learning_rate": 2.4971134437151816e-06, + "loss": 3.4168, + "step": 5737 + }, + { + "epoch": 0.69, + "learning_rate": 2.495382043944794e-06, + "loss": 3.4288, + "step": 5738 + }, + { + "epoch": 0.69, + "learning_rate": 2.4936510450134654e-06, + "loss": 3.4138, + "step": 5739 + }, + { + "epoch": 0.69, + "learning_rate": 2.4919204471982256e-06, + "loss": 3.4523, + "step": 5740 + }, + { + "epoch": 0.69, + "learning_rate": 2.4901902507760396e-06, + "loss": 3.4171, + "step": 5741 + }, + { + "epoch": 0.69, + "learning_rate": 2.4884604560238072e-06, + "loss": 3.3624, + "step": 5742 + }, + { + "epoch": 0.69, + "learning_rate": 2.4867310632183634e-06, + "loss": 3.3099, + "step": 5743 + }, + { + "epoch": 0.69, + "learning_rate": 2.4850020726364858e-06, + "loss": 3.3816, + "step": 5744 + }, + { + "epoch": 0.69, + "learning_rate": 2.483273484554875e-06, + "loss": 3.363, + "step": 5745 + }, + { + "epoch": 0.69, + "learning_rate": 2.481545299250179e-06, + "loss": 3.4625, + "step": 5746 + }, + { + "epoch": 0.69, + "learning_rate": 2.479817516998976e-06, + "loss": 3.4579, + "step": 5747 + }, + { + "epoch": 0.69, + "learning_rate": 2.4780901380777784e-06, + "loss": 3.4195, + "step": 5748 + }, + { + "epoch": 0.69, + "learning_rate": 2.4763631627630358e-06, + "loss": 3.3349, + "step": 5749 + }, + { + "epoch": 0.69, + "learning_rate": 2.4746365913311376e-06, + "loss": 3.3718, + "step": 5750 + }, + { + "epoch": 0.69, + "learning_rate": 2.472910424058398e-06, + "loss": 3.5, + "step": 5751 + }, + { + "epoch": 0.69, + "learning_rate": 2.4711846612210793e-06, + "loss": 3.3358, + "step": 5752 + }, + { + "epoch": 0.69, + "learning_rate": 2.4694593030953654e-06, + "loss": 3.3525, + "step": 5753 + }, + { + "epoch": 0.69, + "learning_rate": 2.4677343499573887e-06, + "loss": 3.3445, + "step": 5754 + }, + { + "epoch": 0.69, + "learning_rate": 2.4660098020832087e-06, + "loss": 3.4443, + "step": 5755 + }, + { + "epoch": 0.69, + "learning_rate": 2.464285659748821e-06, + "loss": 3.3494, + "step": 5756 + }, + { + "epoch": 0.69, + "learning_rate": 2.462561923230159e-06, + "loss": 3.336, + "step": 5757 + }, + { + "epoch": 0.69, + "learning_rate": 2.4608385928030885e-06, + "loss": 3.4013, + "step": 5758 + }, + { + "epoch": 0.69, + "learning_rate": 2.459115668743409e-06, + "loss": 3.4677, + "step": 5759 + }, + { + "epoch": 0.69, + "learning_rate": 2.4573931513268643e-06, + "loss": 3.3808, + "step": 5760 + }, + { + "epoch": 0.69, + "learning_rate": 2.4556710408291174e-06, + "loss": 3.4046, + "step": 5761 + }, + { + "epoch": 0.69, + "learning_rate": 2.453949337525781e-06, + "loss": 3.3394, + "step": 5762 + }, + { + "epoch": 0.69, + "learning_rate": 2.4522280416923943e-06, + "loss": 3.4265, + "step": 5763 + }, + { + "epoch": 0.69, + "learning_rate": 2.450507153604434e-06, + "loss": 3.3019, + "step": 5764 + }, + { + "epoch": 0.69, + "learning_rate": 2.44878667353731e-06, + "loss": 3.4258, + "step": 5765 + }, + { + "epoch": 0.69, + "learning_rate": 2.4470666017663693e-06, + "loss": 3.319, + "step": 5766 + }, + { + "epoch": 0.69, + "learning_rate": 2.4453469385668894e-06, + "loss": 3.5336, + "step": 5767 + }, + { + "epoch": 0.69, + "learning_rate": 2.4436276842140912e-06, + "loss": 3.4314, + "step": 5768 + }, + { + "epoch": 0.69, + "learning_rate": 2.4419088389831164e-06, + "loss": 3.327, + "step": 5769 + }, + { + "epoch": 0.69, + "learning_rate": 2.440190403149055e-06, + "loss": 3.4546, + "step": 5770 + }, + { + "epoch": 0.69, + "learning_rate": 2.4384723769869228e-06, + "loss": 3.5348, + "step": 5771 + }, + { + "epoch": 0.69, + "learning_rate": 2.4367547607716736e-06, + "loss": 3.3871, + "step": 5772 + }, + { + "epoch": 0.69, + "learning_rate": 2.435037554778194e-06, + "loss": 3.4191, + "step": 5773 + }, + { + "epoch": 0.69, + "learning_rate": 2.433320759281307e-06, + "loss": 3.4001, + "step": 5774 + }, + { + "epoch": 0.69, + "learning_rate": 2.431604374555765e-06, + "loss": 3.3296, + "step": 5775 + }, + { + "epoch": 0.69, + "learning_rate": 2.4298884008762624e-06, + "loss": 3.4706, + "step": 5776 + }, + { + "epoch": 0.69, + "learning_rate": 2.4281728385174226e-06, + "loss": 3.3588, + "step": 5777 + }, + { + "epoch": 0.69, + "learning_rate": 2.4264576877538033e-06, + "loss": 3.4319, + "step": 5778 + }, + { + "epoch": 0.69, + "learning_rate": 2.424742948859897e-06, + "loss": 3.5016, + "step": 5779 + }, + { + "epoch": 0.69, + "learning_rate": 2.423028622110132e-06, + "loss": 3.4209, + "step": 5780 + }, + { + "epoch": 0.69, + "learning_rate": 2.4213147077788672e-06, + "loss": 3.4265, + "step": 5781 + }, + { + "epoch": 0.69, + "learning_rate": 2.4196012061403974e-06, + "loss": 3.3558, + "step": 5782 + }, + { + "epoch": 0.69, + "learning_rate": 2.417888117468954e-06, + "loss": 3.4259, + "step": 5783 + }, + { + "epoch": 0.69, + "learning_rate": 2.4161754420386983e-06, + "loss": 3.403, + "step": 5784 + }, + { + "epoch": 0.69, + "learning_rate": 2.4144631801237267e-06, + "loss": 3.4386, + "step": 5785 + }, + { + "epoch": 0.69, + "learning_rate": 2.41275133199807e-06, + "loss": 3.3601, + "step": 5786 + }, + { + "epoch": 0.69, + "learning_rate": 2.4110398979356915e-06, + "loss": 3.4748, + "step": 5787 + }, + { + "epoch": 0.69, + "learning_rate": 2.409328878210488e-06, + "loss": 3.4301, + "step": 5788 + }, + { + "epoch": 0.69, + "learning_rate": 2.407618273096296e-06, + "loss": 3.3578, + "step": 5789 + }, + { + "epoch": 0.69, + "learning_rate": 2.4059080828668747e-06, + "loss": 3.3201, + "step": 5790 + }, + { + "epoch": 0.69, + "learning_rate": 2.404198307795927e-06, + "loss": 3.4244, + "step": 5791 + }, + { + "epoch": 0.69, + "learning_rate": 2.402488948157084e-06, + "loss": 3.4204, + "step": 5792 + }, + { + "epoch": 0.69, + "learning_rate": 2.4007800042239117e-06, + "loss": 3.4092, + "step": 5793 + }, + { + "epoch": 0.69, + "learning_rate": 2.3990714762699074e-06, + "loss": 3.4256, + "step": 5794 + }, + { + "epoch": 0.69, + "learning_rate": 2.3973633645685097e-06, + "loss": 3.454, + "step": 5795 + }, + { + "epoch": 0.69, + "learning_rate": 2.3956556693930773e-06, + "loss": 3.3572, + "step": 5796 + }, + { + "epoch": 0.69, + "learning_rate": 2.3939483910169177e-06, + "loss": 3.4812, + "step": 5797 + }, + { + "epoch": 0.69, + "learning_rate": 2.392241529713255e-06, + "loss": 3.3889, + "step": 5798 + }, + { + "epoch": 0.69, + "learning_rate": 2.3905350857552624e-06, + "loss": 3.4072, + "step": 5799 + }, + { + "epoch": 0.69, + "learning_rate": 2.388829059416036e-06, + "loss": 3.3556, + "step": 5800 + }, + { + "epoch": 0.69, + "learning_rate": 2.3871234509686094e-06, + "loss": 3.3996, + "step": 5801 + }, + { + "epoch": 0.69, + "learning_rate": 2.385418260685945e-06, + "loss": 3.4145, + "step": 5802 + }, + { + "epoch": 0.69, + "learning_rate": 2.383713488840948e-06, + "loss": 3.4166, + "step": 5803 + }, + { + "epoch": 0.69, + "learning_rate": 2.382009135706443e-06, + "loss": 3.476, + "step": 5804 + }, + { + "epoch": 0.69, + "learning_rate": 2.380305201555201e-06, + "loss": 3.5554, + "step": 5805 + }, + { + "epoch": 0.7, + "learning_rate": 2.378601686659913e-06, + "loss": 3.4525, + "step": 5806 + }, + { + "epoch": 0.7, + "learning_rate": 2.3768985912932146e-06, + "loss": 3.4531, + "step": 5807 + }, + { + "epoch": 0.7, + "learning_rate": 2.3751959157276676e-06, + "loss": 3.4306, + "step": 5808 + }, + { + "epoch": 0.7, + "learning_rate": 2.3734936602357688e-06, + "loss": 3.4788, + "step": 5809 + }, + { + "epoch": 0.7, + "learning_rate": 2.371791825089944e-06, + "loss": 3.3354, + "step": 5810 + }, + { + "epoch": 0.7, + "learning_rate": 2.3700904105625617e-06, + "loss": 3.3801, + "step": 5811 + }, + { + "epoch": 0.7, + "learning_rate": 2.3683894169259076e-06, + "loss": 3.4695, + "step": 5812 + }, + { + "epoch": 0.7, + "learning_rate": 2.3666888444522172e-06, + "loss": 3.4054, + "step": 5813 + }, + { + "epoch": 0.7, + "learning_rate": 2.3649886934136424e-06, + "loss": 3.3624, + "step": 5814 + }, + { + "epoch": 0.7, + "learning_rate": 2.363288964082281e-06, + "loss": 3.4356, + "step": 5815 + }, + { + "epoch": 0.7, + "learning_rate": 2.3615896567301557e-06, + "loss": 3.4208, + "step": 5816 + }, + { + "epoch": 0.7, + "learning_rate": 2.3598907716292236e-06, + "loss": 3.3309, + "step": 5817 + }, + { + "epoch": 0.7, + "learning_rate": 2.3581923090513718e-06, + "loss": 3.3638, + "step": 5818 + }, + { + "epoch": 0.7, + "learning_rate": 2.3564942692684274e-06, + "loss": 3.514, + "step": 5819 + }, + { + "epoch": 0.7, + "learning_rate": 2.3547966525521423e-06, + "loss": 3.3493, + "step": 5820 + }, + { + "epoch": 0.7, + "learning_rate": 2.3530994591742023e-06, + "loss": 3.4127, + "step": 5821 + }, + { + "epoch": 0.7, + "learning_rate": 2.351402689406227e-06, + "loss": 3.3756, + "step": 5822 + }, + { + "epoch": 0.7, + "learning_rate": 2.349706343519767e-06, + "loss": 3.3504, + "step": 5823 + }, + { + "epoch": 0.7, + "learning_rate": 2.348010421786306e-06, + "loss": 3.3535, + "step": 5824 + }, + { + "epoch": 0.7, + "learning_rate": 2.346314924477257e-06, + "loss": 3.3703, + "step": 5825 + }, + { + "epoch": 0.7, + "learning_rate": 2.3446198518639733e-06, + "loss": 3.4609, + "step": 5826 + }, + { + "epoch": 0.7, + "learning_rate": 2.342925204217727e-06, + "loss": 3.5311, + "step": 5827 + }, + { + "epoch": 0.7, + "learning_rate": 2.3412309818097345e-06, + "loss": 3.4474, + "step": 5828 + }, + { + "epoch": 0.7, + "learning_rate": 2.3395371849111377e-06, + "loss": 3.3576, + "step": 5829 + }, + { + "epoch": 0.7, + "learning_rate": 2.3378438137930116e-06, + "loss": 3.3425, + "step": 5830 + }, + { + "epoch": 0.7, + "learning_rate": 2.336150868726362e-06, + "loss": 3.4844, + "step": 5831 + }, + { + "epoch": 0.7, + "learning_rate": 2.334458349982132e-06, + "loss": 3.451, + "step": 5832 + }, + { + "epoch": 0.7, + "learning_rate": 2.3327662578311873e-06, + "loss": 3.3472, + "step": 5833 + }, + { + "epoch": 0.7, + "learning_rate": 2.3310745925443357e-06, + "loss": 3.3995, + "step": 5834 + }, + { + "epoch": 0.7, + "learning_rate": 2.3293833543923046e-06, + "loss": 3.4301, + "step": 5835 + }, + { + "epoch": 0.7, + "learning_rate": 2.3276925436457647e-06, + "loss": 3.3976, + "step": 5836 + }, + { + "epoch": 0.7, + "learning_rate": 2.326002160575313e-06, + "loss": 3.312, + "step": 5837 + }, + { + "epoch": 0.7, + "learning_rate": 2.324312205451477e-06, + "loss": 3.375, + "step": 5838 + }, + { + "epoch": 0.7, + "learning_rate": 2.3226226785447158e-06, + "loss": 3.4585, + "step": 5839 + }, + { + "epoch": 0.7, + "learning_rate": 2.320933580125427e-06, + "loss": 3.5112, + "step": 5840 + }, + { + "epoch": 0.7, + "learning_rate": 2.3192449104639257e-06, + "loss": 3.3802, + "step": 5841 + }, + { + "epoch": 0.7, + "learning_rate": 2.317556669830475e-06, + "loss": 3.395, + "step": 5842 + }, + { + "epoch": 0.7, + "learning_rate": 2.3158688584952534e-06, + "loss": 3.3395, + "step": 5843 + }, + { + "epoch": 0.7, + "learning_rate": 2.314181476728383e-06, + "loss": 3.2933, + "step": 5844 + }, + { + "epoch": 0.7, + "learning_rate": 2.3124945247999116e-06, + "loss": 3.3726, + "step": 5845 + }, + { + "epoch": 0.7, + "learning_rate": 2.310808002979818e-06, + "loss": 3.3938, + "step": 5846 + }, + { + "epoch": 0.7, + "learning_rate": 2.309121911538012e-06, + "loss": 3.2864, + "step": 5847 + }, + { + "epoch": 0.7, + "learning_rate": 2.3074362507443405e-06, + "loss": 3.4422, + "step": 5848 + }, + { + "epoch": 0.7, + "learning_rate": 2.3057510208685702e-06, + "loss": 3.3183, + "step": 5849 + }, + { + "epoch": 0.7, + "learning_rate": 2.304066222180412e-06, + "loss": 3.3624, + "step": 5850 + }, + { + "epoch": 0.7, + "learning_rate": 2.3023818549494937e-06, + "loss": 3.5279, + "step": 5851 + }, + { + "epoch": 0.7, + "learning_rate": 2.3006979194453873e-06, + "loss": 3.4222, + "step": 5852 + }, + { + "epoch": 0.7, + "learning_rate": 2.2990144159375878e-06, + "loss": 3.4811, + "step": 5853 + }, + { + "epoch": 0.7, + "learning_rate": 2.297331344695523e-06, + "loss": 3.3791, + "step": 5854 + }, + { + "epoch": 0.7, + "learning_rate": 2.29564870598855e-06, + "loss": 3.3703, + "step": 5855 + }, + { + "epoch": 0.7, + "learning_rate": 2.293966500085964e-06, + "loss": 3.4605, + "step": 5856 + }, + { + "epoch": 0.7, + "learning_rate": 2.292284727256977e-06, + "loss": 3.3076, + "step": 5857 + }, + { + "epoch": 0.7, + "learning_rate": 2.290603387770747e-06, + "loss": 3.4802, + "step": 5858 + }, + { + "epoch": 0.7, + "learning_rate": 2.2889224818963522e-06, + "loss": 3.4496, + "step": 5859 + }, + { + "epoch": 0.7, + "learning_rate": 2.287242009902805e-06, + "loss": 3.5172, + "step": 5860 + }, + { + "epoch": 0.7, + "learning_rate": 2.2855619720590495e-06, + "loss": 3.3801, + "step": 5861 + }, + { + "epoch": 0.7, + "learning_rate": 2.2838823686339574e-06, + "loss": 3.4978, + "step": 5862 + }, + { + "epoch": 0.7, + "learning_rate": 2.2822031998963313e-06, + "loss": 3.4584, + "step": 5863 + }, + { + "epoch": 0.7, + "learning_rate": 2.280524466114909e-06, + "loss": 3.2943, + "step": 5864 + }, + { + "epoch": 0.7, + "learning_rate": 2.2788461675583527e-06, + "loss": 3.5181, + "step": 5865 + }, + { + "epoch": 0.7, + "learning_rate": 2.2771683044952587e-06, + "loss": 3.5758, + "step": 5866 + }, + { + "epoch": 0.7, + "learning_rate": 2.2754908771941504e-06, + "loss": 3.3937, + "step": 5867 + }, + { + "epoch": 0.7, + "learning_rate": 2.2738138859234845e-06, + "loss": 3.4279, + "step": 5868 + }, + { + "epoch": 0.7, + "learning_rate": 2.272137330951646e-06, + "loss": 3.4503, + "step": 5869 + }, + { + "epoch": 0.7, + "learning_rate": 2.2704612125469487e-06, + "loss": 3.2704, + "step": 5870 + }, + { + "epoch": 0.7, + "learning_rate": 2.2687855309776437e-06, + "loss": 3.3091, + "step": 5871 + }, + { + "epoch": 0.7, + "learning_rate": 2.2671102865119045e-06, + "loss": 3.3667, + "step": 5872 + }, + { + "epoch": 0.7, + "learning_rate": 2.2654354794178363e-06, + "loss": 3.338, + "step": 5873 + }, + { + "epoch": 0.7, + "learning_rate": 2.263761109963476e-06, + "loss": 3.3779, + "step": 5874 + }, + { + "epoch": 0.7, + "learning_rate": 2.2620871784167895e-06, + "loss": 3.4028, + "step": 5875 + }, + { + "epoch": 0.7, + "learning_rate": 2.260413685045672e-06, + "loss": 3.3309, + "step": 5876 + }, + { + "epoch": 0.7, + "learning_rate": 2.258740630117954e-06, + "loss": 3.3574, + "step": 5877 + }, + { + "epoch": 0.7, + "learning_rate": 2.2570680139013827e-06, + "loss": 3.4461, + "step": 5878 + }, + { + "epoch": 0.7, + "learning_rate": 2.2553958366636513e-06, + "loss": 3.5275, + "step": 5879 + }, + { + "epoch": 0.7, + "learning_rate": 2.253724098672372e-06, + "loss": 3.4655, + "step": 5880 + }, + { + "epoch": 0.7, + "learning_rate": 2.2520528001950898e-06, + "loss": 3.3132, + "step": 5881 + }, + { + "epoch": 0.7, + "learning_rate": 2.2503819414992795e-06, + "loss": 3.4462, + "step": 5882 + }, + { + "epoch": 0.7, + "learning_rate": 2.2487115228523454e-06, + "loss": 3.3846, + "step": 5883 + }, + { + "epoch": 0.7, + "learning_rate": 2.247041544521619e-06, + "loss": 3.3907, + "step": 5884 + }, + { + "epoch": 0.7, + "learning_rate": 2.2453720067743702e-06, + "loss": 3.4, + "step": 5885 + }, + { + "epoch": 0.7, + "learning_rate": 2.2437029098777833e-06, + "loss": 3.3307, + "step": 5886 + }, + { + "epoch": 0.7, + "learning_rate": 2.2420342540989888e-06, + "loss": 3.4718, + "step": 5887 + }, + { + "epoch": 0.7, + "learning_rate": 2.2403660397050303e-06, + "loss": 3.442, + "step": 5888 + }, + { + "epoch": 0.71, + "learning_rate": 2.2386982669628945e-06, + "loss": 3.4002, + "step": 5889 + }, + { + "epoch": 0.71, + "learning_rate": 2.2370309361394906e-06, + "loss": 3.3952, + "step": 5890 + }, + { + "epoch": 0.71, + "learning_rate": 2.235364047501657e-06, + "loss": 3.3132, + "step": 5891 + }, + { + "epoch": 0.71, + "learning_rate": 2.2336976013161614e-06, + "loss": 3.4145, + "step": 5892 + }, + { + "epoch": 0.71, + "learning_rate": 2.232031597849708e-06, + "loss": 3.386, + "step": 5893 + }, + { + "epoch": 0.71, + "learning_rate": 2.2303660373689157e-06, + "loss": 3.4237, + "step": 5894 + }, + { + "epoch": 0.71, + "learning_rate": 2.2287009201403486e-06, + "loss": 3.4215, + "step": 5895 + }, + { + "epoch": 0.71, + "learning_rate": 2.2270362464304846e-06, + "loss": 3.4805, + "step": 5896 + }, + { + "epoch": 0.71, + "learning_rate": 2.2253720165057435e-06, + "loss": 3.3912, + "step": 5897 + }, + { + "epoch": 0.71, + "learning_rate": 2.2237082306324668e-06, + "loss": 3.2877, + "step": 5898 + }, + { + "epoch": 0.71, + "learning_rate": 2.222044889076927e-06, + "loss": 3.4452, + "step": 5899 + }, + { + "epoch": 0.71, + "learning_rate": 2.2203819921053237e-06, + "loss": 3.3585, + "step": 5900 + }, + { + "epoch": 0.71, + "learning_rate": 2.218719539983792e-06, + "loss": 3.4036, + "step": 5901 + }, + { + "epoch": 0.71, + "learning_rate": 2.217057532978383e-06, + "loss": 3.4366, + "step": 5902 + }, + { + "epoch": 0.71, + "learning_rate": 2.2153959713550905e-06, + "loss": 3.5056, + "step": 5903 + }, + { + "epoch": 0.71, + "learning_rate": 2.2137348553798286e-06, + "loss": 3.4068, + "step": 5904 + }, + { + "epoch": 0.71, + "learning_rate": 2.212074185318443e-06, + "loss": 3.4433, + "step": 5905 + }, + { + "epoch": 0.71, + "learning_rate": 2.2104139614367067e-06, + "loss": 3.4166, + "step": 5906 + }, + { + "epoch": 0.71, + "learning_rate": 2.2087541840003216e-06, + "loss": 3.4412, + "step": 5907 + }, + { + "epoch": 0.71, + "learning_rate": 2.2070948532749175e-06, + "loss": 3.3014, + "step": 5908 + }, + { + "epoch": 0.71, + "learning_rate": 2.205435969526057e-06, + "loss": 3.4385, + "step": 5909 + }, + { + "epoch": 0.71, + "learning_rate": 2.2037775330192256e-06, + "loss": 3.3709, + "step": 5910 + }, + { + "epoch": 0.71, + "learning_rate": 2.2021195440198403e-06, + "loss": 3.4967, + "step": 5911 + }, + { + "epoch": 0.71, + "learning_rate": 2.200462002793245e-06, + "loss": 3.4835, + "step": 5912 + }, + { + "epoch": 0.71, + "learning_rate": 2.1988049096047107e-06, + "loss": 3.448, + "step": 5913 + }, + { + "epoch": 0.71, + "learning_rate": 2.1971482647194446e-06, + "loss": 3.4681, + "step": 5914 + }, + { + "epoch": 0.71, + "learning_rate": 2.195492068402569e-06, + "loss": 3.4227, + "step": 5915 + }, + { + "epoch": 0.71, + "learning_rate": 2.193836320919146e-06, + "loss": 3.4286, + "step": 5916 + }, + { + "epoch": 0.71, + "learning_rate": 2.1921810225341605e-06, + "loss": 3.4147, + "step": 5917 + }, + { + "epoch": 0.71, + "learning_rate": 2.1905261735125265e-06, + "loss": 3.4764, + "step": 5918 + }, + { + "epoch": 0.71, + "learning_rate": 2.188871774119086e-06, + "loss": 3.3678, + "step": 5919 + }, + { + "epoch": 0.71, + "learning_rate": 2.1872178246186083e-06, + "loss": 3.4862, + "step": 5920 + }, + { + "epoch": 0.71, + "learning_rate": 2.1855643252757896e-06, + "loss": 3.3352, + "step": 5921 + }, + { + "epoch": 0.71, + "learning_rate": 2.1839112763552628e-06, + "loss": 3.4296, + "step": 5922 + }, + { + "epoch": 0.71, + "learning_rate": 2.182258678121574e-06, + "loss": 3.3998, + "step": 5923 + }, + { + "epoch": 0.71, + "learning_rate": 2.1806065308392094e-06, + "loss": 3.5374, + "step": 5924 + }, + { + "epoch": 0.71, + "learning_rate": 2.178954834772578e-06, + "loss": 3.5078, + "step": 5925 + }, + { + "epoch": 0.71, + "learning_rate": 2.1773035901860173e-06, + "loss": 3.3648, + "step": 5926 + }, + { + "epoch": 0.71, + "learning_rate": 2.175652797343792e-06, + "loss": 3.4265, + "step": 5927 + }, + { + "epoch": 0.71, + "learning_rate": 2.174002456510095e-06, + "loss": 3.3792, + "step": 5928 + }, + { + "epoch": 0.71, + "learning_rate": 2.1723525679490442e-06, + "loss": 3.3888, + "step": 5929 + }, + { + "epoch": 0.71, + "learning_rate": 2.170703131924695e-06, + "loss": 3.3109, + "step": 5930 + }, + { + "epoch": 0.71, + "learning_rate": 2.1690541487010153e-06, + "loss": 3.3574, + "step": 5931 + }, + { + "epoch": 0.71, + "learning_rate": 2.167405618541913e-06, + "loss": 3.3829, + "step": 5932 + }, + { + "epoch": 0.71, + "learning_rate": 2.1657575417112177e-06, + "loss": 3.4148, + "step": 5933 + }, + { + "epoch": 0.71, + "learning_rate": 2.164109918472687e-06, + "loss": 3.4211, + "step": 5934 + }, + { + "epoch": 0.71, + "learning_rate": 2.162462749090008e-06, + "loss": 3.4805, + "step": 5935 + }, + { + "epoch": 0.71, + "learning_rate": 2.160816033826792e-06, + "loss": 3.4411, + "step": 5936 + }, + { + "epoch": 0.71, + "learning_rate": 2.159169772946579e-06, + "loss": 3.4143, + "step": 5937 + }, + { + "epoch": 0.71, + "learning_rate": 2.15752396671284e-06, + "loss": 3.4271, + "step": 5938 + }, + { + "epoch": 0.71, + "learning_rate": 2.1558786153889647e-06, + "loss": 3.4159, + "step": 5939 + }, + { + "epoch": 0.71, + "learning_rate": 2.1542337192382793e-06, + "loss": 3.3644, + "step": 5940 + }, + { + "epoch": 0.71, + "learning_rate": 2.152589278524032e-06, + "loss": 3.3975, + "step": 5941 + }, + { + "epoch": 0.71, + "learning_rate": 2.1509452935093983e-06, + "loss": 3.3904, + "step": 5942 + }, + { + "epoch": 0.71, + "learning_rate": 2.149301764457482e-06, + "loss": 3.3248, + "step": 5943 + }, + { + "epoch": 0.71, + "learning_rate": 2.1476586916313135e-06, + "loss": 3.3616, + "step": 5944 + }, + { + "epoch": 0.71, + "learning_rate": 2.146016075293848e-06, + "loss": 3.4168, + "step": 5945 + }, + { + "epoch": 0.71, + "learning_rate": 2.144373915707974e-06, + "loss": 3.4236, + "step": 5946 + }, + { + "epoch": 0.71, + "learning_rate": 2.1427322131365007e-06, + "loss": 3.3852, + "step": 5947 + }, + { + "epoch": 0.71, + "learning_rate": 2.141090967842166e-06, + "loss": 3.4197, + "step": 5948 + }, + { + "epoch": 0.71, + "learning_rate": 2.1394501800876346e-06, + "loss": 3.484, + "step": 5949 + }, + { + "epoch": 0.71, + "learning_rate": 2.1378098501354986e-06, + "loss": 3.3861, + "step": 5950 + }, + { + "epoch": 0.71, + "learning_rate": 2.1361699782482765e-06, + "loss": 3.3874, + "step": 5951 + }, + { + "epoch": 0.71, + "learning_rate": 2.134530564688411e-06, + "loss": 3.4406, + "step": 5952 + }, + { + "epoch": 0.71, + "learning_rate": 2.1328916097182783e-06, + "loss": 3.394, + "step": 5953 + }, + { + "epoch": 0.71, + "learning_rate": 2.131253113600174e-06, + "loss": 3.3174, + "step": 5954 + }, + { + "epoch": 0.71, + "learning_rate": 2.1296150765963234e-06, + "loss": 3.443, + "step": 5955 + }, + { + "epoch": 0.71, + "learning_rate": 2.1279774989688787e-06, + "loss": 3.4938, + "step": 5956 + }, + { + "epoch": 0.71, + "learning_rate": 2.1263403809799166e-06, + "loss": 3.3376, + "step": 5957 + }, + { + "epoch": 0.71, + "learning_rate": 2.1247037228914407e-06, + "loss": 3.4056, + "step": 5958 + }, + { + "epoch": 0.71, + "learning_rate": 2.1230675249653863e-06, + "loss": 3.4071, + "step": 5959 + }, + { + "epoch": 0.71, + "learning_rate": 2.1214317874636044e-06, + "loss": 3.4552, + "step": 5960 + }, + { + "epoch": 0.71, + "learning_rate": 2.1197965106478822e-06, + "loss": 3.33, + "step": 5961 + }, + { + "epoch": 0.71, + "learning_rate": 2.118161694779929e-06, + "loss": 3.4963, + "step": 5962 + }, + { + "epoch": 0.71, + "learning_rate": 2.11652734012138e-06, + "loss": 3.4879, + "step": 5963 + }, + { + "epoch": 0.71, + "learning_rate": 2.114893446933798e-06, + "loss": 3.3665, + "step": 5964 + }, + { + "epoch": 0.71, + "learning_rate": 2.1132600154786702e-06, + "loss": 3.3622, + "step": 5965 + }, + { + "epoch": 0.71, + "learning_rate": 2.1116270460174102e-06, + "loss": 3.4405, + "step": 5966 + }, + { + "epoch": 0.71, + "learning_rate": 2.109994538811364e-06, + "loss": 3.3824, + "step": 5967 + }, + { + "epoch": 0.71, + "learning_rate": 2.10836249412179e-06, + "loss": 3.3758, + "step": 5968 + }, + { + "epoch": 0.71, + "learning_rate": 2.1067309122098863e-06, + "loss": 3.3345, + "step": 5969 + }, + { + "epoch": 0.71, + "learning_rate": 2.1050997933367697e-06, + "loss": 3.4452, + "step": 5970 + }, + { + "epoch": 0.71, + "learning_rate": 2.1034691377634838e-06, + "loss": 3.3466, + "step": 5971 + }, + { + "epoch": 0.71, + "learning_rate": 2.101838945750999e-06, + "loss": 3.4448, + "step": 5972 + }, + { + "epoch": 0.72, + "learning_rate": 2.1002092175602125e-06, + "loss": 3.3838, + "step": 5973 + }, + { + "epoch": 0.72, + "learning_rate": 2.0985799534519426e-06, + "loss": 3.4009, + "step": 5974 + }, + { + "epoch": 0.72, + "learning_rate": 2.0969511536869426e-06, + "loss": 3.3327, + "step": 5975 + }, + { + "epoch": 0.72, + "learning_rate": 2.0953228185258783e-06, + "loss": 3.3595, + "step": 5976 + }, + { + "epoch": 0.72, + "learning_rate": 2.0936949482293543e-06, + "loss": 3.4386, + "step": 5977 + }, + { + "epoch": 0.72, + "learning_rate": 2.092067543057893e-06, + "loss": 3.3709, + "step": 5978 + }, + { + "epoch": 0.72, + "learning_rate": 2.0904406032719437e-06, + "loss": 3.4113, + "step": 5979 + }, + { + "epoch": 0.72, + "learning_rate": 2.088814129131883e-06, + "loss": 3.3359, + "step": 5980 + }, + { + "epoch": 0.72, + "learning_rate": 2.087188120898011e-06, + "loss": 3.3001, + "step": 5981 + }, + { + "epoch": 0.72, + "learning_rate": 2.085562578830552e-06, + "loss": 3.3608, + "step": 5982 + }, + { + "epoch": 0.72, + "learning_rate": 2.0839375031896645e-06, + "loss": 3.4735, + "step": 5983 + }, + { + "epoch": 0.72, + "learning_rate": 2.082312894235417e-06, + "loss": 3.4513, + "step": 5984 + }, + { + "epoch": 0.72, + "learning_rate": 2.080688752227818e-06, + "loss": 3.3645, + "step": 5985 + }, + { + "epoch": 0.72, + "learning_rate": 2.0790650774267936e-06, + "loss": 3.4347, + "step": 5986 + }, + { + "epoch": 0.72, + "learning_rate": 2.0774418700921956e-06, + "loss": 3.4413, + "step": 5987 + }, + { + "epoch": 0.72, + "learning_rate": 2.0758191304838034e-06, + "loss": 3.3866, + "step": 5988 + }, + { + "epoch": 0.72, + "learning_rate": 2.0741968588613192e-06, + "loss": 3.5174, + "step": 5989 + }, + { + "epoch": 0.72, + "learning_rate": 2.0725750554843704e-06, + "loss": 3.4333, + "step": 5990 + }, + { + "epoch": 0.72, + "learning_rate": 2.0709537206125137e-06, + "loss": 3.3906, + "step": 5991 + }, + { + "epoch": 0.72, + "learning_rate": 2.0693328545052253e-06, + "loss": 3.3453, + "step": 5992 + }, + { + "epoch": 0.72, + "learning_rate": 2.067712457421909e-06, + "loss": 3.4465, + "step": 5993 + }, + { + "epoch": 0.72, + "learning_rate": 2.0660925296218936e-06, + "loss": 3.5013, + "step": 5994 + }, + { + "epoch": 0.72, + "learning_rate": 2.064473071364432e-06, + "loss": 3.4851, + "step": 5995 + }, + { + "epoch": 0.72, + "learning_rate": 2.062854082908702e-06, + "loss": 3.3521, + "step": 5996 + }, + { + "epoch": 0.72, + "learning_rate": 2.0612355645138044e-06, + "loss": 3.5141, + "step": 5997 + }, + { + "epoch": 0.72, + "learning_rate": 2.0596175164387717e-06, + "loss": 3.4126, + "step": 5998 + }, + { + "epoch": 0.72, + "learning_rate": 2.0579999389425537e-06, + "loss": 3.443, + "step": 5999 + }, + { + "epoch": 0.72, + "learning_rate": 2.0563828322840274e-06, + "loss": 3.392, + "step": 6000 + }, + { + "epoch": 0.72, + "learning_rate": 2.0547661967219944e-06, + "loss": 3.4838, + "step": 6001 + }, + { + "epoch": 0.72, + "learning_rate": 2.053150032515182e-06, + "loss": 3.3557, + "step": 6002 + }, + { + "epoch": 0.72, + "learning_rate": 2.051534339922239e-06, + "loss": 3.3053, + "step": 6003 + }, + { + "epoch": 0.72, + "learning_rate": 2.0499191192017455e-06, + "loss": 3.4389, + "step": 6004 + }, + { + "epoch": 0.72, + "learning_rate": 2.0483043706121947e-06, + "loss": 3.4111, + "step": 6005 + }, + { + "epoch": 0.72, + "learning_rate": 2.0466900944120164e-06, + "loss": 3.4085, + "step": 6006 + }, + { + "epoch": 0.72, + "learning_rate": 2.045076290859558e-06, + "loss": 3.441, + "step": 6007 + }, + { + "epoch": 0.72, + "learning_rate": 2.0434629602130918e-06, + "loss": 3.4888, + "step": 6008 + }, + { + "epoch": 0.72, + "learning_rate": 2.041850102730814e-06, + "loss": 3.4081, + "step": 6009 + }, + { + "epoch": 0.72, + "learning_rate": 2.0402377186708516e-06, + "loss": 3.4132, + "step": 6010 + }, + { + "epoch": 0.72, + "learning_rate": 2.038625808291243e-06, + "loss": 3.4757, + "step": 6011 + }, + { + "epoch": 0.72, + "learning_rate": 2.0370143718499657e-06, + "loss": 3.3887, + "step": 6012 + }, + { + "epoch": 0.72, + "learning_rate": 2.0354034096049073e-06, + "loss": 3.5163, + "step": 6013 + }, + { + "epoch": 0.72, + "learning_rate": 2.0337929218138906e-06, + "loss": 3.3386, + "step": 6014 + }, + { + "epoch": 0.72, + "learning_rate": 2.032182908734657e-06, + "loss": 3.3852, + "step": 6015 + }, + { + "epoch": 0.72, + "learning_rate": 2.030573370624874e-06, + "loss": 3.3523, + "step": 6016 + }, + { + "epoch": 0.72, + "learning_rate": 2.0289643077421294e-06, + "loss": 3.4324, + "step": 6017 + }, + { + "epoch": 0.72, + "learning_rate": 2.0273557203439396e-06, + "loss": 3.4984, + "step": 6018 + }, + { + "epoch": 0.72, + "learning_rate": 2.0257476086877415e-06, + "loss": 3.4565, + "step": 6019 + }, + { + "epoch": 0.72, + "learning_rate": 2.0241399730309013e-06, + "loss": 3.3975, + "step": 6020 + }, + { + "epoch": 0.72, + "learning_rate": 2.0225328136306982e-06, + "loss": 3.2984, + "step": 6021 + }, + { + "epoch": 0.72, + "learning_rate": 2.0209261307443477e-06, + "loss": 3.3878, + "step": 6022 + }, + { + "epoch": 0.72, + "learning_rate": 2.0193199246289814e-06, + "loss": 3.4333, + "step": 6023 + }, + { + "epoch": 0.72, + "learning_rate": 2.017714195541657e-06, + "loss": 3.4454, + "step": 6024 + }, + { + "epoch": 0.72, + "learning_rate": 2.0161089437393543e-06, + "loss": 3.4025, + "step": 6025 + }, + { + "epoch": 0.72, + "learning_rate": 2.0145041694789786e-06, + "loss": 3.3961, + "step": 6026 + }, + { + "epoch": 0.72, + "learning_rate": 2.012899873017356e-06, + "loss": 3.3906, + "step": 6027 + }, + { + "epoch": 0.72, + "learning_rate": 2.0112960546112435e-06, + "loss": 3.4058, + "step": 6028 + }, + { + "epoch": 0.72, + "learning_rate": 2.009692714517309e-06, + "loss": 3.4647, + "step": 6029 + }, + { + "epoch": 0.72, + "learning_rate": 2.0080898529921567e-06, + "loss": 3.4494, + "step": 6030 + }, + { + "epoch": 0.72, + "learning_rate": 2.0064874702923067e-06, + "loss": 3.4042, + "step": 6031 + }, + { + "epoch": 0.72, + "learning_rate": 2.0048855666742035e-06, + "loss": 3.3324, + "step": 6032 + }, + { + "epoch": 0.72, + "learning_rate": 2.0032841423942166e-06, + "loss": 3.3441, + "step": 6033 + }, + { + "epoch": 0.72, + "learning_rate": 2.0016831977086377e-06, + "loss": 3.4434, + "step": 6034 + }, + { + "epoch": 0.72, + "learning_rate": 2.0000827328736803e-06, + "loss": 3.4181, + "step": 6035 + }, + { + "epoch": 0.72, + "learning_rate": 1.9984827481454865e-06, + "loss": 3.3976, + "step": 6036 + }, + { + "epoch": 0.72, + "learning_rate": 1.9968832437801154e-06, + "loss": 3.3772, + "step": 6037 + }, + { + "epoch": 0.72, + "learning_rate": 1.995284220033553e-06, + "loss": 3.4474, + "step": 6038 + }, + { + "epoch": 0.72, + "learning_rate": 1.993685677161706e-06, + "loss": 3.4306, + "step": 6039 + }, + { + "epoch": 0.72, + "learning_rate": 1.992087615420403e-06, + "loss": 3.3488, + "step": 6040 + }, + { + "epoch": 0.72, + "learning_rate": 1.9904900350654043e-06, + "loss": 3.332, + "step": 6041 + }, + { + "epoch": 0.72, + "learning_rate": 1.988892936352379e-06, + "loss": 3.4941, + "step": 6042 + }, + { + "epoch": 0.72, + "learning_rate": 1.9872963195369315e-06, + "loss": 3.3787, + "step": 6043 + }, + { + "epoch": 0.72, + "learning_rate": 1.9857001848745837e-06, + "loss": 3.4521, + "step": 6044 + }, + { + "epoch": 0.72, + "learning_rate": 1.984104532620781e-06, + "loss": 3.4099, + "step": 6045 + }, + { + "epoch": 0.72, + "learning_rate": 1.982509363030889e-06, + "loss": 3.3685, + "step": 6046 + }, + { + "epoch": 0.72, + "learning_rate": 1.9809146763602044e-06, + "loss": 3.3523, + "step": 6047 + }, + { + "epoch": 0.72, + "learning_rate": 1.979320472863933e-06, + "loss": 3.4096, + "step": 6048 + }, + { + "epoch": 0.72, + "learning_rate": 1.9777267527972196e-06, + "loss": 3.4571, + "step": 6049 + }, + { + "epoch": 0.72, + "learning_rate": 1.9761335164151152e-06, + "loss": 3.5029, + "step": 6050 + }, + { + "epoch": 0.72, + "learning_rate": 1.9745407639726065e-06, + "loss": 3.3935, + "step": 6051 + }, + { + "epoch": 0.72, + "learning_rate": 1.9729484957245965e-06, + "loss": 3.4544, + "step": 6052 + }, + { + "epoch": 0.72, + "learning_rate": 1.9713567119259113e-06, + "loss": 3.4088, + "step": 6053 + }, + { + "epoch": 0.72, + "learning_rate": 1.969765412831298e-06, + "loss": 3.4708, + "step": 6054 + }, + { + "epoch": 0.72, + "learning_rate": 1.968174598695435e-06, + "loss": 3.3887, + "step": 6055 + }, + { + "epoch": 0.73, + "learning_rate": 1.9665842697729065e-06, + "loss": 3.3661, + "step": 6056 + }, + { + "epoch": 0.73, + "learning_rate": 1.964994426318238e-06, + "loss": 3.5221, + "step": 6057 + }, + { + "epoch": 0.73, + "learning_rate": 1.9634050685858595e-06, + "loss": 3.4236, + "step": 6058 + }, + { + "epoch": 0.73, + "learning_rate": 1.9618161968301388e-06, + "loss": 3.4459, + "step": 6059 + }, + { + "epoch": 0.73, + "learning_rate": 1.960227811305356e-06, + "loss": 3.4383, + "step": 6060 + }, + { + "epoch": 0.73, + "learning_rate": 1.9586399122657156e-06, + "loss": 3.4816, + "step": 6061 + }, + { + "epoch": 0.73, + "learning_rate": 1.9570524999653453e-06, + "loss": 3.3374, + "step": 6062 + }, + { + "epoch": 0.73, + "learning_rate": 1.955465574658298e-06, + "loss": 3.3575, + "step": 6063 + }, + { + "epoch": 0.73, + "learning_rate": 1.9538791365985386e-06, + "loss": 3.5255, + "step": 6064 + }, + { + "epoch": 0.73, + "learning_rate": 1.952293186039968e-06, + "loss": 3.399, + "step": 6065 + }, + { + "epoch": 0.73, + "learning_rate": 1.9507077232363954e-06, + "loss": 3.4023, + "step": 6066 + }, + { + "epoch": 0.73, + "learning_rate": 1.949122748441562e-06, + "loss": 3.4599, + "step": 6067 + }, + { + "epoch": 0.73, + "learning_rate": 1.947538261909126e-06, + "loss": 3.4256, + "step": 6068 + }, + { + "epoch": 0.73, + "learning_rate": 1.9459542638926693e-06, + "loss": 3.3792, + "step": 6069 + }, + { + "epoch": 0.73, + "learning_rate": 1.9443707546456923e-06, + "loss": 3.4139, + "step": 6070 + }, + { + "epoch": 0.73, + "learning_rate": 1.9427877344216264e-06, + "loss": 3.3848, + "step": 6071 + }, + { + "epoch": 0.73, + "learning_rate": 1.9412052034738087e-06, + "loss": 3.384, + "step": 6072 + }, + { + "epoch": 0.73, + "learning_rate": 1.939623162055516e-06, + "loss": 3.314, + "step": 6073 + }, + { + "epoch": 0.73, + "learning_rate": 1.9380416104199334e-06, + "loss": 3.4047, + "step": 6074 + }, + { + "epoch": 0.73, + "learning_rate": 1.9364605488201736e-06, + "loss": 3.3796, + "step": 6075 + }, + { + "epoch": 0.73, + "learning_rate": 1.93487997750927e-06, + "loss": 3.4501, + "step": 6076 + }, + { + "epoch": 0.73, + "learning_rate": 1.933299896740178e-06, + "loss": 3.3475, + "step": 6077 + }, + { + "epoch": 0.73, + "learning_rate": 1.93172030676577e-06, + "loss": 3.3587, + "step": 6078 + }, + { + "epoch": 0.73, + "learning_rate": 1.9301412078388485e-06, + "loss": 3.3697, + "step": 6079 + }, + { + "epoch": 0.73, + "learning_rate": 1.9285626002121304e-06, + "loss": 3.4661, + "step": 6080 + }, + { + "epoch": 0.73, + "learning_rate": 1.9269844841382557e-06, + "loss": 3.4513, + "step": 6081 + }, + { + "epoch": 0.73, + "learning_rate": 1.9254068598697866e-06, + "loss": 3.4066, + "step": 6082 + }, + { + "epoch": 0.73, + "learning_rate": 1.923829727659206e-06, + "loss": 3.4058, + "step": 6083 + }, + { + "epoch": 0.73, + "learning_rate": 1.9222530877589178e-06, + "loss": 3.4122, + "step": 6084 + }, + { + "epoch": 0.73, + "learning_rate": 1.920676940421246e-06, + "loss": 3.4596, + "step": 6085 + }, + { + "epoch": 0.73, + "learning_rate": 1.9191012858984426e-06, + "loss": 3.3307, + "step": 6086 + }, + { + "epoch": 0.73, + "learning_rate": 1.917526124442668e-06, + "loss": 3.4573, + "step": 6087 + }, + { + "epoch": 0.73, + "learning_rate": 1.915951456306016e-06, + "loss": 3.3786, + "step": 6088 + }, + { + "epoch": 0.73, + "learning_rate": 1.914377281740496e-06, + "loss": 3.4484, + "step": 6089 + }, + { + "epoch": 0.73, + "learning_rate": 1.912803600998038e-06, + "loss": 3.3008, + "step": 6090 + }, + { + "epoch": 0.73, + "learning_rate": 1.9112304143304915e-06, + "loss": 3.3844, + "step": 6091 + }, + { + "epoch": 0.73, + "learning_rate": 1.9096577219896356e-06, + "loss": 3.4274, + "step": 6092 + }, + { + "epoch": 0.73, + "learning_rate": 1.908085524227157e-06, + "loss": 3.4442, + "step": 6093 + }, + { + "epoch": 0.73, + "learning_rate": 1.9065138212946765e-06, + "loss": 3.3839, + "step": 6094 + }, + { + "epoch": 0.73, + "learning_rate": 1.9049426134437227e-06, + "loss": 3.4059, + "step": 6095 + }, + { + "epoch": 0.73, + "learning_rate": 1.9033719009257572e-06, + "loss": 3.4274, + "step": 6096 + }, + { + "epoch": 0.73, + "learning_rate": 1.901801683992155e-06, + "loss": 3.3993, + "step": 6097 + }, + { + "epoch": 0.73, + "learning_rate": 1.9002319628942135e-06, + "loss": 3.4586, + "step": 6098 + }, + { + "epoch": 0.73, + "learning_rate": 1.8986627378831491e-06, + "loss": 3.4749, + "step": 6099 + }, + { + "epoch": 0.73, + "learning_rate": 1.8970940092101059e-06, + "loss": 3.4628, + "step": 6100 + }, + { + "epoch": 0.73, + "learning_rate": 1.8955257771261365e-06, + "loss": 3.4107, + "step": 6101 + }, + { + "epoch": 0.73, + "learning_rate": 1.8939580418822273e-06, + "loss": 3.4434, + "step": 6102 + }, + { + "epoch": 0.73, + "learning_rate": 1.8923908037292721e-06, + "loss": 3.4059, + "step": 6103 + }, + { + "epoch": 0.73, + "learning_rate": 1.8908240629180963e-06, + "loss": 3.3138, + "step": 6104 + }, + { + "epoch": 0.73, + "learning_rate": 1.8892578196994398e-06, + "loss": 3.445, + "step": 6105 + }, + { + "epoch": 0.73, + "learning_rate": 1.8876920743239647e-06, + "loss": 3.3243, + "step": 6106 + }, + { + "epoch": 0.73, + "learning_rate": 1.88612682704225e-06, + "loss": 3.4027, + "step": 6107 + }, + { + "epoch": 0.73, + "learning_rate": 1.8845620781048036e-06, + "loss": 3.4434, + "step": 6108 + }, + { + "epoch": 0.73, + "learning_rate": 1.8829978277620409e-06, + "loss": 3.3963, + "step": 6109 + }, + { + "epoch": 0.73, + "learning_rate": 1.8814340762643118e-06, + "loss": 3.3542, + "step": 6110 + }, + { + "epoch": 0.73, + "learning_rate": 1.8798708238618725e-06, + "loss": 3.2475, + "step": 6111 + }, + { + "epoch": 0.73, + "learning_rate": 1.8783080708049101e-06, + "loss": 3.4079, + "step": 6112 + }, + { + "epoch": 0.73, + "learning_rate": 1.8767458173435261e-06, + "loss": 3.4476, + "step": 6113 + }, + { + "epoch": 0.73, + "learning_rate": 1.875184063727744e-06, + "loss": 3.4401, + "step": 6114 + }, + { + "epoch": 0.73, + "learning_rate": 1.873622810207505e-06, + "loss": 3.3321, + "step": 6115 + }, + { + "epoch": 0.73, + "learning_rate": 1.872062057032677e-06, + "loss": 3.4938, + "step": 6116 + }, + { + "epoch": 0.73, + "learning_rate": 1.8705018044530365e-06, + "loss": 3.358, + "step": 6117 + }, + { + "epoch": 0.73, + "learning_rate": 1.868942052718291e-06, + "loss": 3.3962, + "step": 6118 + }, + { + "epoch": 0.73, + "learning_rate": 1.8673828020780615e-06, + "loss": 3.4149, + "step": 6119 + }, + { + "epoch": 0.73, + "learning_rate": 1.865824052781891e-06, + "loss": 3.381, + "step": 6120 + }, + { + "epoch": 0.73, + "learning_rate": 1.8642658050792412e-06, + "loss": 3.4243, + "step": 6121 + }, + { + "epoch": 0.73, + "learning_rate": 1.862708059219494e-06, + "loss": 3.4302, + "step": 6122 + }, + { + "epoch": 0.73, + "learning_rate": 1.8611508154519504e-06, + "loss": 3.4564, + "step": 6123 + }, + { + "epoch": 0.73, + "learning_rate": 1.8595940740258333e-06, + "loss": 3.427, + "step": 6124 + }, + { + "epoch": 0.73, + "learning_rate": 1.8580378351902834e-06, + "loss": 3.4259, + "step": 6125 + }, + { + "epoch": 0.73, + "learning_rate": 1.8564820991943605e-06, + "loss": 3.5106, + "step": 6126 + }, + { + "epoch": 0.73, + "learning_rate": 1.8549268662870451e-06, + "loss": 3.4676, + "step": 6127 + }, + { + "epoch": 0.73, + "learning_rate": 1.853372136717234e-06, + "loss": 3.4148, + "step": 6128 + }, + { + "epoch": 0.73, + "learning_rate": 1.8518179107337524e-06, + "loss": 3.383, + "step": 6129 + }, + { + "epoch": 0.73, + "learning_rate": 1.850264188585331e-06, + "loss": 3.3823, + "step": 6130 + }, + { + "epoch": 0.73, + "learning_rate": 1.8487109705206336e-06, + "loss": 3.4542, + "step": 6131 + }, + { + "epoch": 0.73, + "learning_rate": 1.847158256788234e-06, + "loss": 3.413, + "step": 6132 + }, + { + "epoch": 0.73, + "learning_rate": 1.8456060476366295e-06, + "loss": 3.5033, + "step": 6133 + }, + { + "epoch": 0.73, + "learning_rate": 1.844054343314235e-06, + "loss": 3.4896, + "step": 6134 + }, + { + "epoch": 0.73, + "learning_rate": 1.8425031440693863e-06, + "loss": 3.3869, + "step": 6135 + }, + { + "epoch": 0.73, + "learning_rate": 1.8409524501503345e-06, + "loss": 3.4701, + "step": 6136 + }, + { + "epoch": 0.73, + "learning_rate": 1.8394022618052587e-06, + "loss": 3.476, + "step": 6137 + }, + { + "epoch": 0.73, + "learning_rate": 1.8378525792822437e-06, + "loss": 3.3428, + "step": 6138 + }, + { + "epoch": 0.73, + "learning_rate": 1.8363034028293058e-06, + "loss": 3.4677, + "step": 6139 + }, + { + "epoch": 0.74, + "learning_rate": 1.8347547326943744e-06, + "loss": 3.3895, + "step": 6140 + }, + { + "epoch": 0.74, + "learning_rate": 1.833206569125297e-06, + "loss": 3.3434, + "step": 6141 + }, + { + "epoch": 0.74, + "learning_rate": 1.831658912369843e-06, + "loss": 3.3502, + "step": 6142 + }, + { + "epoch": 0.74, + "learning_rate": 1.8301117626756987e-06, + "loss": 3.3889, + "step": 6143 + }, + { + "epoch": 0.74, + "learning_rate": 1.8285651202904686e-06, + "loss": 3.4749, + "step": 6144 + }, + { + "epoch": 0.74, + "learning_rate": 1.8270189854616827e-06, + "loss": 3.3809, + "step": 6145 + }, + { + "epoch": 0.74, + "learning_rate": 1.8254733584367768e-06, + "loss": 3.5387, + "step": 6146 + }, + { + "epoch": 0.74, + "learning_rate": 1.8239282394631202e-06, + "loss": 3.5078, + "step": 6147 + }, + { + "epoch": 0.74, + "learning_rate": 1.8223836287879876e-06, + "loss": 3.4563, + "step": 6148 + }, + { + "epoch": 0.74, + "learning_rate": 1.8208395266585832e-06, + "loss": 3.3716, + "step": 6149 + }, + { + "epoch": 0.74, + "learning_rate": 1.8192959333220227e-06, + "loss": 3.4117, + "step": 6150 + }, + { + "epoch": 0.74, + "learning_rate": 1.8177528490253438e-06, + "loss": 3.3922, + "step": 6151 + }, + { + "epoch": 0.74, + "learning_rate": 1.8162102740154997e-06, + "loss": 3.4623, + "step": 6152 + }, + { + "epoch": 0.74, + "learning_rate": 1.8146682085393686e-06, + "loss": 3.561, + "step": 6153 + }, + { + "epoch": 0.74, + "learning_rate": 1.8131266528437363e-06, + "loss": 3.3402, + "step": 6154 + }, + { + "epoch": 0.74, + "learning_rate": 1.8115856071753207e-06, + "loss": 3.4327, + "step": 6155 + }, + { + "epoch": 0.74, + "learning_rate": 1.8100450717807432e-06, + "loss": 3.3896, + "step": 6156 + }, + { + "epoch": 0.74, + "learning_rate": 1.8085050469065563e-06, + "loss": 3.4052, + "step": 6157 + }, + { + "epoch": 0.74, + "learning_rate": 1.8069655327992242e-06, + "loss": 3.4014, + "step": 6158 + }, + { + "epoch": 0.74, + "learning_rate": 1.8054265297051305e-06, + "loss": 3.3937, + "step": 6159 + }, + { + "epoch": 0.74, + "learning_rate": 1.803888037870576e-06, + "loss": 3.4108, + "step": 6160 + }, + { + "epoch": 0.74, + "learning_rate": 1.8023500575417835e-06, + "loss": 3.303, + "step": 6161 + }, + { + "epoch": 0.74, + "learning_rate": 1.8008125889648896e-06, + "loss": 3.3438, + "step": 6162 + }, + { + "epoch": 0.74, + "learning_rate": 1.7992756323859522e-06, + "loss": 3.3545, + "step": 6163 + }, + { + "epoch": 0.74, + "learning_rate": 1.7977391880509448e-06, + "loss": 3.3712, + "step": 6164 + }, + { + "epoch": 0.74, + "learning_rate": 1.7962032562057602e-06, + "loss": 3.4163, + "step": 6165 + }, + { + "epoch": 0.74, + "learning_rate": 1.7946678370962084e-06, + "loss": 3.4012, + "step": 6166 + }, + { + "epoch": 0.74, + "learning_rate": 1.7931329309680168e-06, + "loss": 3.4694, + "step": 6167 + }, + { + "epoch": 0.74, + "learning_rate": 1.7915985380668348e-06, + "loss": 3.3978, + "step": 6168 + }, + { + "epoch": 0.74, + "learning_rate": 1.7900646586382253e-06, + "loss": 3.4012, + "step": 6169 + }, + { + "epoch": 0.74, + "learning_rate": 1.7885312929276704e-06, + "loss": 3.2952, + "step": 6170 + }, + { + "epoch": 0.74, + "learning_rate": 1.7869984411805692e-06, + "loss": 3.4421, + "step": 6171 + }, + { + "epoch": 0.74, + "learning_rate": 1.78546610364224e-06, + "loss": 3.4806, + "step": 6172 + }, + { + "epoch": 0.74, + "learning_rate": 1.7839342805579163e-06, + "loss": 3.3762, + "step": 6173 + }, + { + "epoch": 0.74, + "learning_rate": 1.7824029721727565e-06, + "loss": 3.4365, + "step": 6174 + }, + { + "epoch": 0.74, + "learning_rate": 1.7808721787318234e-06, + "loss": 3.4777, + "step": 6175 + }, + { + "epoch": 0.74, + "learning_rate": 1.779341900480111e-06, + "loss": 3.3942, + "step": 6176 + }, + { + "epoch": 0.74, + "learning_rate": 1.7778121376625234e-06, + "loss": 3.4263, + "step": 6177 + }, + { + "epoch": 0.74, + "learning_rate": 1.7762828905238833e-06, + "loss": 3.3632, + "step": 6178 + }, + { + "epoch": 0.74, + "learning_rate": 1.7747541593089324e-06, + "loss": 3.3995, + "step": 6179 + }, + { + "epoch": 0.74, + "learning_rate": 1.7732259442623273e-06, + "loss": 3.3219, + "step": 6180 + }, + { + "epoch": 0.74, + "learning_rate": 1.7716982456286435e-06, + "loss": 3.4055, + "step": 6181 + }, + { + "epoch": 0.74, + "learning_rate": 1.7701710636523778e-06, + "loss": 3.3537, + "step": 6182 + }, + { + "epoch": 0.74, + "learning_rate": 1.7686443985779344e-06, + "loss": 3.3735, + "step": 6183 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671182506496454e-06, + "loss": 3.3361, + "step": 6184 + }, + { + "epoch": 0.74, + "learning_rate": 1.7655926201117535e-06, + "loss": 3.3891, + "step": 6185 + }, + { + "epoch": 0.74, + "learning_rate": 1.7640675072084213e-06, + "loss": 3.3931, + "step": 6186 + }, + { + "epoch": 0.74, + "learning_rate": 1.7625429121837279e-06, + "loss": 3.4209, + "step": 6187 + }, + { + "epoch": 0.74, + "learning_rate": 1.7610188352816687e-06, + "loss": 3.3161, + "step": 6188 + }, + { + "epoch": 0.74, + "learning_rate": 1.7594952767461565e-06, + "loss": 3.4257, + "step": 6189 + }, + { + "epoch": 0.74, + "learning_rate": 1.7579722368210257e-06, + "loss": 3.407, + "step": 6190 + }, + { + "epoch": 0.74, + "learning_rate": 1.7564497157500166e-06, + "loss": 3.3883, + "step": 6191 + }, + { + "epoch": 0.74, + "learning_rate": 1.7549277137767995e-06, + "loss": 3.3155, + "step": 6192 + }, + { + "epoch": 0.74, + "learning_rate": 1.7534062311449535e-06, + "loss": 3.3822, + "step": 6193 + }, + { + "epoch": 0.74, + "learning_rate": 1.7518852680979763e-06, + "loss": 3.3826, + "step": 6194 + }, + { + "epoch": 0.74, + "learning_rate": 1.7503648248792832e-06, + "loss": 3.3785, + "step": 6195 + }, + { + "epoch": 0.74, + "learning_rate": 1.7488449017322062e-06, + "loss": 3.4863, + "step": 6196 + }, + { + "epoch": 0.74, + "learning_rate": 1.7473254988999915e-06, + "loss": 3.3881, + "step": 6197 + }, + { + "epoch": 0.74, + "learning_rate": 1.7458066166258097e-06, + "loss": 3.4747, + "step": 6198 + }, + { + "epoch": 0.74, + "learning_rate": 1.7442882551527362e-06, + "loss": 3.3361, + "step": 6199 + }, + { + "epoch": 0.74, + "learning_rate": 1.7427704147237745e-06, + "loss": 3.4146, + "step": 6200 + }, + { + "epoch": 0.74, + "learning_rate": 1.741253095581838e-06, + "loss": 3.3968, + "step": 6201 + }, + { + "epoch": 0.74, + "learning_rate": 1.7397362979697586e-06, + "loss": 3.432, + "step": 6202 + }, + { + "epoch": 0.74, + "learning_rate": 1.7382200221302842e-06, + "loss": 3.4439, + "step": 6203 + }, + { + "epoch": 0.74, + "learning_rate": 1.7367042683060796e-06, + "loss": 3.3551, + "step": 6204 + }, + { + "epoch": 0.74, + "learning_rate": 1.735189036739725e-06, + "loss": 3.3959, + "step": 6205 + }, + { + "epoch": 0.74, + "learning_rate": 1.7336743276737206e-06, + "loss": 3.4115, + "step": 6206 + }, + { + "epoch": 0.74, + "learning_rate": 1.7321601413504796e-06, + "loss": 3.465, + "step": 6207 + }, + { + "epoch": 0.74, + "learning_rate": 1.7306464780123322e-06, + "loss": 3.3814, + "step": 6208 + }, + { + "epoch": 0.74, + "learning_rate": 1.729133337901524e-06, + "loss": 3.4503, + "step": 6209 + }, + { + "epoch": 0.74, + "learning_rate": 1.7276207212602198e-06, + "loss": 3.3997, + "step": 6210 + }, + { + "epoch": 0.74, + "learning_rate": 1.7261086283304968e-06, + "loss": 3.4613, + "step": 6211 + }, + { + "epoch": 0.74, + "learning_rate": 1.7245970593543503e-06, + "loss": 3.4916, + "step": 6212 + }, + { + "epoch": 0.74, + "learning_rate": 1.7230860145736943e-06, + "loss": 3.2642, + "step": 6213 + }, + { + "epoch": 0.74, + "learning_rate": 1.7215754942303548e-06, + "loss": 3.5145, + "step": 6214 + }, + { + "epoch": 0.74, + "learning_rate": 1.7200654985660758e-06, + "loss": 3.4227, + "step": 6215 + }, + { + "epoch": 0.74, + "learning_rate": 1.7185560278225166e-06, + "loss": 3.36, + "step": 6216 + }, + { + "epoch": 0.74, + "learning_rate": 1.7170470822412533e-06, + "loss": 3.4501, + "step": 6217 + }, + { + "epoch": 0.74, + "learning_rate": 1.7155386620637758e-06, + "loss": 3.4691, + "step": 6218 + }, + { + "epoch": 0.74, + "learning_rate": 1.714030767531496e-06, + "loss": 3.4903, + "step": 6219 + }, + { + "epoch": 0.74, + "learning_rate": 1.712523398885732e-06, + "loss": 3.419, + "step": 6220 + }, + { + "epoch": 0.74, + "learning_rate": 1.7110165563677266e-06, + "loss": 3.4387, + "step": 6221 + }, + { + "epoch": 0.74, + "learning_rate": 1.7095102402186342e-06, + "loss": 3.3725, + "step": 6222 + }, + { + "epoch": 0.75, + "learning_rate": 1.7080044506795257e-06, + "loss": 3.4695, + "step": 6223 + }, + { + "epoch": 0.75, + "learning_rate": 1.7064991879913873e-06, + "loss": 3.3696, + "step": 6224 + }, + { + "epoch": 0.75, + "learning_rate": 1.7049944523951207e-06, + "loss": 3.3853, + "step": 6225 + }, + { + "epoch": 0.75, + "learning_rate": 1.7034902441315432e-06, + "loss": 3.4344, + "step": 6226 + }, + { + "epoch": 0.75, + "learning_rate": 1.7019865634413923e-06, + "loss": 3.4043, + "step": 6227 + }, + { + "epoch": 0.75, + "learning_rate": 1.700483410565311e-06, + "loss": 3.432, + "step": 6228 + }, + { + "epoch": 0.75, + "learning_rate": 1.698980785743869e-06, + "loss": 3.375, + "step": 6229 + }, + { + "epoch": 0.75, + "learning_rate": 1.6974786892175443e-06, + "loss": 3.4637, + "step": 6230 + }, + { + "epoch": 0.75, + "learning_rate": 1.6959771212267328e-06, + "loss": 3.5046, + "step": 6231 + }, + { + "epoch": 0.75, + "learning_rate": 1.694476082011745e-06, + "loss": 3.2886, + "step": 6232 + }, + { + "epoch": 0.75, + "learning_rate": 1.6929755718128077e-06, + "loss": 3.46, + "step": 6233 + }, + { + "epoch": 0.75, + "learning_rate": 1.6914755908700604e-06, + "loss": 3.394, + "step": 6234 + }, + { + "epoch": 0.75, + "learning_rate": 1.6899761394235653e-06, + "loss": 3.3929, + "step": 6235 + }, + { + "epoch": 0.75, + "learning_rate": 1.688477217713288e-06, + "loss": 3.4139, + "step": 6236 + }, + { + "epoch": 0.75, + "learning_rate": 1.6869788259791208e-06, + "loss": 3.4337, + "step": 6237 + }, + { + "epoch": 0.75, + "learning_rate": 1.6854809644608645e-06, + "loss": 3.4698, + "step": 6238 + }, + { + "epoch": 0.75, + "learning_rate": 1.6839836333982373e-06, + "loss": 3.4474, + "step": 6239 + }, + { + "epoch": 0.75, + "learning_rate": 1.682486833030872e-06, + "loss": 3.4848, + "step": 6240 + }, + { + "epoch": 0.75, + "learning_rate": 1.6809905635983164e-06, + "loss": 3.4057, + "step": 6241 + }, + { + "epoch": 0.75, + "learning_rate": 1.6794948253400318e-06, + "loss": 3.4551, + "step": 6242 + }, + { + "epoch": 0.75, + "learning_rate": 1.6779996184954005e-06, + "loss": 3.5017, + "step": 6243 + }, + { + "epoch": 0.75, + "learning_rate": 1.67650494330371e-06, + "loss": 3.4326, + "step": 6244 + }, + { + "epoch": 0.75, + "learning_rate": 1.6750108000041721e-06, + "loss": 3.4995, + "step": 6245 + }, + { + "epoch": 0.75, + "learning_rate": 1.673517188835908e-06, + "loss": 3.4304, + "step": 6246 + }, + { + "epoch": 0.75, + "learning_rate": 1.6720241100379558e-06, + "loss": 3.3703, + "step": 6247 + }, + { + "epoch": 0.75, + "learning_rate": 1.6705315638492676e-06, + "loss": 3.4227, + "step": 6248 + }, + { + "epoch": 0.75, + "learning_rate": 1.6690395505087103e-06, + "loss": 3.4217, + "step": 6249 + }, + { + "epoch": 0.75, + "learning_rate": 1.6675480702550639e-06, + "loss": 3.3254, + "step": 6250 + }, + { + "epoch": 0.75, + "learning_rate": 1.6660571233270284e-06, + "loss": 3.3908, + "step": 6251 + }, + { + "epoch": 0.75, + "learning_rate": 1.6645667099632134e-06, + "loss": 3.3966, + "step": 6252 + }, + { + "epoch": 0.75, + "learning_rate": 1.663076830402145e-06, + "loss": 3.3578, + "step": 6253 + }, + { + "epoch": 0.75, + "learning_rate": 1.6615874848822628e-06, + "loss": 3.4499, + "step": 6254 + }, + { + "epoch": 0.75, + "learning_rate": 1.66009867364192e-06, + "loss": 3.437, + "step": 6255 + }, + { + "epoch": 0.75, + "learning_rate": 1.6586103969193917e-06, + "loss": 3.4369, + "step": 6256 + }, + { + "epoch": 0.75, + "learning_rate": 1.657122654952854e-06, + "loss": 3.4012, + "step": 6257 + }, + { + "epoch": 0.75, + "learning_rate": 1.655635447980411e-06, + "loss": 3.5131, + "step": 6258 + }, + { + "epoch": 0.75, + "learning_rate": 1.6541487762400738e-06, + "loss": 3.4736, + "step": 6259 + }, + { + "epoch": 0.75, + "learning_rate": 1.6526626399697682e-06, + "loss": 3.3016, + "step": 6260 + }, + { + "epoch": 0.75, + "learning_rate": 1.6511770394073352e-06, + "loss": 3.3658, + "step": 6261 + }, + { + "epoch": 0.75, + "learning_rate": 1.6496919747905337e-06, + "loss": 3.3435, + "step": 6262 + }, + { + "epoch": 0.75, + "learning_rate": 1.6482074463570285e-06, + "loss": 3.4018, + "step": 6263 + }, + { + "epoch": 0.75, + "learning_rate": 1.6467234543444089e-06, + "loss": 3.4593, + "step": 6264 + }, + { + "epoch": 0.75, + "learning_rate": 1.6452399989901669e-06, + "loss": 3.402, + "step": 6265 + }, + { + "epoch": 0.75, + "learning_rate": 1.6437570805317198e-06, + "loss": 3.4463, + "step": 6266 + }, + { + "epoch": 0.75, + "learning_rate": 1.6422746992063914e-06, + "loss": 3.4222, + "step": 6267 + }, + { + "epoch": 0.75, + "learning_rate": 1.6407928552514236e-06, + "loss": 3.472, + "step": 6268 + }, + { + "epoch": 0.75, + "learning_rate": 1.6393115489039668e-06, + "loss": 3.3732, + "step": 6269 + }, + { + "epoch": 0.75, + "learning_rate": 1.6378307804010962e-06, + "loss": 3.3866, + "step": 6270 + }, + { + "epoch": 0.75, + "learning_rate": 1.6363505499797865e-06, + "loss": 3.3862, + "step": 6271 + }, + { + "epoch": 0.75, + "learning_rate": 1.6348708578769407e-06, + "loss": 3.3242, + "step": 6272 + }, + { + "epoch": 0.75, + "learning_rate": 1.6333917043293623e-06, + "loss": 3.4118, + "step": 6273 + }, + { + "epoch": 0.75, + "learning_rate": 1.631913089573779e-06, + "loss": 3.3481, + "step": 6274 + }, + { + "epoch": 0.75, + "learning_rate": 1.6304350138468277e-06, + "loss": 3.3645, + "step": 6275 + }, + { + "epoch": 0.75, + "learning_rate": 1.628957477385059e-06, + "loss": 3.3622, + "step": 6276 + }, + { + "epoch": 0.75, + "learning_rate": 1.6274804804249384e-06, + "loss": 3.4279, + "step": 6277 + }, + { + "epoch": 0.75, + "learning_rate": 1.6260040232028435e-06, + "loss": 3.4137, + "step": 6278 + }, + { + "epoch": 0.75, + "learning_rate": 1.6245281059550661e-06, + "loss": 3.4216, + "step": 6279 + }, + { + "epoch": 0.75, + "learning_rate": 1.6230527289178161e-06, + "loss": 3.4243, + "step": 6280 + }, + { + "epoch": 0.75, + "learning_rate": 1.6215778923272068e-06, + "loss": 3.31, + "step": 6281 + }, + { + "epoch": 0.75, + "learning_rate": 1.6201035964192757e-06, + "loss": 3.4418, + "step": 6282 + }, + { + "epoch": 0.75, + "learning_rate": 1.6186298414299673e-06, + "loss": 3.4498, + "step": 6283 + }, + { + "epoch": 0.75, + "learning_rate": 1.6171566275951417e-06, + "loss": 3.4076, + "step": 6284 + }, + { + "epoch": 0.75, + "learning_rate": 1.6156839551505721e-06, + "loss": 3.345, + "step": 6285 + }, + { + "epoch": 0.75, + "learning_rate": 1.6142118243319444e-06, + "loss": 3.3766, + "step": 6286 + }, + { + "epoch": 0.75, + "learning_rate": 1.6127402353748578e-06, + "loss": 3.3858, + "step": 6287 + }, + { + "epoch": 0.75, + "learning_rate": 1.6112691885148274e-06, + "loss": 3.4181, + "step": 6288 + }, + { + "epoch": 0.75, + "learning_rate": 1.6097986839872787e-06, + "loss": 3.4026, + "step": 6289 + }, + { + "epoch": 0.75, + "learning_rate": 1.6083287220275511e-06, + "loss": 3.3736, + "step": 6290 + }, + { + "epoch": 0.75, + "learning_rate": 1.6068593028708972e-06, + "loss": 3.4217, + "step": 6291 + }, + { + "epoch": 0.75, + "learning_rate": 1.6053904267524834e-06, + "loss": 3.3971, + "step": 6292 + }, + { + "epoch": 0.75, + "learning_rate": 1.6039220939073879e-06, + "loss": 3.516, + "step": 6293 + }, + { + "epoch": 0.75, + "learning_rate": 1.6024543045706016e-06, + "loss": 3.3882, + "step": 6294 + }, + { + "epoch": 0.75, + "learning_rate": 1.600987058977032e-06, + "loss": 3.4582, + "step": 6295 + }, + { + "epoch": 0.75, + "learning_rate": 1.5995203573614958e-06, + "loss": 3.3604, + "step": 6296 + }, + { + "epoch": 0.75, + "learning_rate": 1.5980541999587246e-06, + "loss": 3.4197, + "step": 6297 + }, + { + "epoch": 0.75, + "learning_rate": 1.596588587003362e-06, + "loss": 3.3487, + "step": 6298 + }, + { + "epoch": 0.75, + "learning_rate": 1.595123518729964e-06, + "loss": 3.4426, + "step": 6299 + }, + { + "epoch": 0.75, + "learning_rate": 1.5936589953729991e-06, + "loss": 3.3963, + "step": 6300 + }, + { + "epoch": 0.75, + "learning_rate": 1.5921950171668539e-06, + "loss": 3.3289, + "step": 6301 + }, + { + "epoch": 0.75, + "learning_rate": 1.5907315843458176e-06, + "loss": 3.4451, + "step": 6302 + }, + { + "epoch": 0.75, + "learning_rate": 1.589268697144103e-06, + "loss": 3.4833, + "step": 6303 + }, + { + "epoch": 0.75, + "learning_rate": 1.5878063557958285e-06, + "loss": 3.3762, + "step": 6304 + }, + { + "epoch": 0.75, + "learning_rate": 1.586344560535027e-06, + "loss": 3.3638, + "step": 6305 + }, + { + "epoch": 0.75, + "learning_rate": 1.584883311595643e-06, + "loss": 3.3518, + "step": 6306 + }, + { + "epoch": 0.76, + "learning_rate": 1.5834226092115396e-06, + "loss": 3.4181, + "step": 6307 + }, + { + "epoch": 0.76, + "learning_rate": 1.5819624536164813e-06, + "loss": 3.4204, + "step": 6308 + }, + { + "epoch": 0.76, + "learning_rate": 1.5805028450441573e-06, + "loss": 3.3629, + "step": 6309 + }, + { + "epoch": 0.76, + "learning_rate": 1.5790437837281575e-06, + "loss": 3.3787, + "step": 6310 + }, + { + "epoch": 0.76, + "learning_rate": 1.5775852699019946e-06, + "loss": 3.3444, + "step": 6311 + }, + { + "epoch": 0.76, + "learning_rate": 1.5761273037990876e-06, + "loss": 3.4501, + "step": 6312 + }, + { + "epoch": 0.76, + "learning_rate": 1.5746698856527692e-06, + "loss": 3.3775, + "step": 6313 + }, + { + "epoch": 0.76, + "learning_rate": 1.5732130156962839e-06, + "loss": 3.3601, + "step": 6314 + }, + { + "epoch": 0.76, + "learning_rate": 1.5717566941627927e-06, + "loss": 3.3614, + "step": 6315 + }, + { + "epoch": 0.76, + "learning_rate": 1.5703009212853593e-06, + "loss": 3.291, + "step": 6316 + }, + { + "epoch": 0.76, + "learning_rate": 1.5688456972969723e-06, + "loss": 3.3838, + "step": 6317 + }, + { + "epoch": 0.76, + "learning_rate": 1.5673910224305194e-06, + "loss": 3.5239, + "step": 6318 + }, + { + "epoch": 0.76, + "learning_rate": 1.5659368969188115e-06, + "loss": 3.4311, + "step": 6319 + }, + { + "epoch": 0.76, + "learning_rate": 1.5644833209945648e-06, + "loss": 3.3829, + "step": 6320 + }, + { + "epoch": 0.76, + "learning_rate": 1.5630302948904102e-06, + "loss": 3.3649, + "step": 6321 + }, + { + "epoch": 0.76, + "learning_rate": 1.561577818838888e-06, + "loss": 3.4669, + "step": 6322 + }, + { + "epoch": 0.76, + "learning_rate": 1.5601258930724572e-06, + "loss": 3.4025, + "step": 6323 + }, + { + "epoch": 0.76, + "learning_rate": 1.558674517823478e-06, + "loss": 3.3958, + "step": 6324 + }, + { + "epoch": 0.76, + "learning_rate": 1.5572236933242346e-06, + "loss": 3.3649, + "step": 6325 + }, + { + "epoch": 0.76, + "learning_rate": 1.5557734198069107e-06, + "loss": 3.2997, + "step": 6326 + }, + { + "epoch": 0.76, + "learning_rate": 1.554323697503613e-06, + "loss": 3.3899, + "step": 6327 + }, + { + "epoch": 0.76, + "learning_rate": 1.552874526646353e-06, + "loss": 3.3007, + "step": 6328 + }, + { + "epoch": 0.76, + "learning_rate": 1.5514259074670562e-06, + "loss": 3.2949, + "step": 6329 + }, + { + "epoch": 0.76, + "learning_rate": 1.5499778401975584e-06, + "loss": 3.3765, + "step": 6330 + }, + { + "epoch": 0.76, + "learning_rate": 1.5485303250696126e-06, + "loss": 3.2951, + "step": 6331 + }, + { + "epoch": 0.76, + "learning_rate": 1.5470833623148728e-06, + "loss": 3.3779, + "step": 6332 + }, + { + "epoch": 0.76, + "learning_rate": 1.5456369521649161e-06, + "loss": 3.391, + "step": 6333 + }, + { + "epoch": 0.76, + "learning_rate": 1.544191094851224e-06, + "loss": 3.3492, + "step": 6334 + }, + { + "epoch": 0.76, + "learning_rate": 1.5427457906051912e-06, + "loss": 3.329, + "step": 6335 + }, + { + "epoch": 0.76, + "learning_rate": 1.5413010396581251e-06, + "loss": 3.3799, + "step": 6336 + }, + { + "epoch": 0.76, + "learning_rate": 1.539856842241243e-06, + "loss": 3.3212, + "step": 6337 + }, + { + "epoch": 0.76, + "learning_rate": 1.5384131985856726e-06, + "loss": 3.4231, + "step": 6338 + }, + { + "epoch": 0.76, + "learning_rate": 1.5369701089224587e-06, + "loss": 3.5129, + "step": 6339 + }, + { + "epoch": 0.76, + "learning_rate": 1.5355275734825504e-06, + "loss": 3.4533, + "step": 6340 + }, + { + "epoch": 0.76, + "learning_rate": 1.5340855924968118e-06, + "loss": 3.446, + "step": 6341 + }, + { + "epoch": 0.76, + "learning_rate": 1.5326441661960179e-06, + "loss": 3.4603, + "step": 6342 + }, + { + "epoch": 0.76, + "learning_rate": 1.531203294810854e-06, + "loss": 3.473, + "step": 6343 + }, + { + "epoch": 0.76, + "learning_rate": 1.529762978571917e-06, + "loss": 3.4341, + "step": 6344 + }, + { + "epoch": 0.76, + "learning_rate": 1.5283232177097146e-06, + "loss": 3.436, + "step": 6345 + }, + { + "epoch": 0.76, + "learning_rate": 1.5268840124546697e-06, + "loss": 3.5289, + "step": 6346 + }, + { + "epoch": 0.76, + "learning_rate": 1.525445363037107e-06, + "loss": 3.3587, + "step": 6347 + }, + { + "epoch": 0.76, + "learning_rate": 1.524007269687272e-06, + "loss": 3.4292, + "step": 6348 + }, + { + "epoch": 0.76, + "learning_rate": 1.5225697326353168e-06, + "loss": 3.4307, + "step": 6349 + }, + { + "epoch": 0.76, + "learning_rate": 1.5211327521113035e-06, + "loss": 3.4495, + "step": 6350 + }, + { + "epoch": 0.76, + "learning_rate": 1.519696328345206e-06, + "loss": 3.4115, + "step": 6351 + }, + { + "epoch": 0.76, + "learning_rate": 1.5182604615669134e-06, + "loss": 3.3568, + "step": 6352 + }, + { + "epoch": 0.76, + "learning_rate": 1.516825152006216e-06, + "loss": 3.401, + "step": 6353 + }, + { + "epoch": 0.76, + "learning_rate": 1.515390399892827e-06, + "loss": 3.2935, + "step": 6354 + }, + { + "epoch": 0.76, + "learning_rate": 1.5139562054563584e-06, + "loss": 3.4579, + "step": 6355 + }, + { + "epoch": 0.76, + "learning_rate": 1.5125225689263424e-06, + "loss": 3.3753, + "step": 6356 + }, + { + "epoch": 0.76, + "learning_rate": 1.5110894905322164e-06, + "loss": 3.3654, + "step": 6357 + }, + { + "epoch": 0.76, + "learning_rate": 1.509656970503332e-06, + "loss": 3.3967, + "step": 6358 + }, + { + "epoch": 0.76, + "learning_rate": 1.508225009068946e-06, + "loss": 3.3738, + "step": 6359 + }, + { + "epoch": 0.76, + "learning_rate": 1.5067936064582362e-06, + "loss": 3.3935, + "step": 6360 + }, + { + "epoch": 0.76, + "learning_rate": 1.5053627629002764e-06, + "loss": 3.4074, + "step": 6361 + }, + { + "epoch": 0.76, + "learning_rate": 1.5039324786240655e-06, + "loss": 3.5039, + "step": 6362 + }, + { + "epoch": 0.76, + "learning_rate": 1.5025027538585007e-06, + "loss": 3.3312, + "step": 6363 + }, + { + "epoch": 0.76, + "learning_rate": 1.501073588832399e-06, + "loss": 3.3891, + "step": 6364 + }, + { + "epoch": 0.76, + "learning_rate": 1.4996449837744831e-06, + "loss": 3.369, + "step": 6365 + }, + { + "epoch": 0.76, + "learning_rate": 1.498216938913386e-06, + "loss": 3.2923, + "step": 6366 + }, + { + "epoch": 0.76, + "learning_rate": 1.4967894544776513e-06, + "loss": 3.4318, + "step": 6367 + }, + { + "epoch": 0.76, + "learning_rate": 1.4953625306957382e-06, + "loss": 3.3449, + "step": 6368 + }, + { + "epoch": 0.76, + "learning_rate": 1.4939361677960045e-06, + "loss": 3.4493, + "step": 6369 + }, + { + "epoch": 0.76, + "learning_rate": 1.4925103660067324e-06, + "loss": 3.518, + "step": 6370 + }, + { + "epoch": 0.76, + "learning_rate": 1.491085125556101e-06, + "loss": 3.459, + "step": 6371 + }, + { + "epoch": 0.76, + "learning_rate": 1.4896604466722104e-06, + "loss": 3.4377, + "step": 6372 + }, + { + "epoch": 0.76, + "learning_rate": 1.4882363295830642e-06, + "loss": 3.3639, + "step": 6373 + }, + { + "epoch": 0.76, + "learning_rate": 1.4868127745165784e-06, + "loss": 3.4636, + "step": 6374 + }, + { + "epoch": 0.76, + "learning_rate": 1.4853897817005768e-06, + "loss": 3.424, + "step": 6375 + }, + { + "epoch": 0.76, + "learning_rate": 1.4839673513627994e-06, + "loss": 3.4538, + "step": 6376 + }, + { + "epoch": 0.76, + "learning_rate": 1.4825454837308894e-06, + "loss": 3.4244, + "step": 6377 + }, + { + "epoch": 0.76, + "learning_rate": 1.4811241790324026e-06, + "loss": 3.3537, + "step": 6378 + }, + { + "epoch": 0.76, + "learning_rate": 1.479703437494805e-06, + "loss": 3.3505, + "step": 6379 + }, + { + "epoch": 0.76, + "learning_rate": 1.4782832593454715e-06, + "loss": 3.2869, + "step": 6380 + }, + { + "epoch": 0.76, + "learning_rate": 1.476863644811688e-06, + "loss": 3.5014, + "step": 6381 + }, + { + "epoch": 0.76, + "learning_rate": 1.475444594120648e-06, + "loss": 3.3949, + "step": 6382 + }, + { + "epoch": 0.76, + "learning_rate": 1.4740261074994588e-06, + "loss": 3.2444, + "step": 6383 + }, + { + "epoch": 0.76, + "learning_rate": 1.4726081851751338e-06, + "loss": 3.4396, + "step": 6384 + }, + { + "epoch": 0.76, + "learning_rate": 1.4711908273745978e-06, + "loss": 3.3739, + "step": 6385 + }, + { + "epoch": 0.76, + "learning_rate": 1.4697740343246846e-06, + "loss": 3.328, + "step": 6386 + }, + { + "epoch": 0.76, + "learning_rate": 1.4683578062521376e-06, + "loss": 3.4126, + "step": 6387 + }, + { + "epoch": 0.76, + "learning_rate": 1.4669421433836079e-06, + "loss": 3.4204, + "step": 6388 + }, + { + "epoch": 0.76, + "learning_rate": 1.4655270459456633e-06, + "loss": 3.3435, + "step": 6389 + }, + { + "epoch": 0.76, + "learning_rate": 1.4641125141647695e-06, + "loss": 3.4024, + "step": 6390 + }, + { + "epoch": 0.77, + "learning_rate": 1.4626985482673134e-06, + "loss": 3.3968, + "step": 6391 + }, + { + "epoch": 0.77, + "learning_rate": 1.4612851484795843e-06, + "loss": 3.4154, + "step": 6392 + }, + { + "epoch": 0.77, + "learning_rate": 1.4598723150277827e-06, + "loss": 3.4507, + "step": 6393 + }, + { + "epoch": 0.77, + "learning_rate": 1.4584600481380179e-06, + "loss": 3.3536, + "step": 6394 + }, + { + "epoch": 0.77, + "learning_rate": 1.4570483480363102e-06, + "loss": 3.3684, + "step": 6395 + }, + { + "epoch": 0.77, + "learning_rate": 1.4556372149485854e-06, + "loss": 3.4855, + "step": 6396 + }, + { + "epoch": 0.77, + "learning_rate": 1.4542266491006868e-06, + "loss": 3.3354, + "step": 6397 + }, + { + "epoch": 0.77, + "learning_rate": 1.4528166507183544e-06, + "loss": 3.342, + "step": 6398 + }, + { + "epoch": 0.77, + "learning_rate": 1.45140722002725e-06, + "loss": 3.2823, + "step": 6399 + }, + { + "epoch": 0.77, + "learning_rate": 1.4499983572529359e-06, + "loss": 3.3267, + "step": 6400 + }, + { + "epoch": 0.77, + "learning_rate": 1.4485900626208876e-06, + "loss": 3.4202, + "step": 6401 + }, + { + "epoch": 0.77, + "learning_rate": 1.4471823363564886e-06, + "loss": 3.3377, + "step": 6402 + }, + { + "epoch": 0.77, + "learning_rate": 1.4457751786850304e-06, + "loss": 3.3629, + "step": 6403 + }, + { + "epoch": 0.77, + "learning_rate": 1.4443685898317145e-06, + "loss": 3.3782, + "step": 6404 + }, + { + "epoch": 0.77, + "learning_rate": 1.4429625700216548e-06, + "loss": 3.5173, + "step": 6405 + }, + { + "epoch": 0.77, + "learning_rate": 1.441557119479865e-06, + "loss": 3.4698, + "step": 6406 + }, + { + "epoch": 0.77, + "learning_rate": 1.4401522384312795e-06, + "loss": 3.3052, + "step": 6407 + }, + { + "epoch": 0.77, + "learning_rate": 1.4387479271007287e-06, + "loss": 3.5359, + "step": 6408 + }, + { + "epoch": 0.77, + "learning_rate": 1.4373441857129644e-06, + "loss": 3.4255, + "step": 6409 + }, + { + "epoch": 0.77, + "learning_rate": 1.4359410144926388e-06, + "loss": 3.33, + "step": 6410 + }, + { + "epoch": 0.77, + "learning_rate": 1.4345384136643164e-06, + "loss": 3.3461, + "step": 6411 + }, + { + "epoch": 0.77, + "learning_rate": 1.433136383452467e-06, + "loss": 3.3579, + "step": 6412 + }, + { + "epoch": 0.77, + "learning_rate": 1.4317349240814766e-06, + "loss": 3.359, + "step": 6413 + }, + { + "epoch": 0.77, + "learning_rate": 1.4303340357756285e-06, + "loss": 3.5336, + "step": 6414 + }, + { + "epoch": 0.77, + "learning_rate": 1.428933718759125e-06, + "loss": 3.3689, + "step": 6415 + }, + { + "epoch": 0.77, + "learning_rate": 1.4275339732560722e-06, + "loss": 3.4045, + "step": 6416 + }, + { + "epoch": 0.77, + "learning_rate": 1.4261347994904851e-06, + "loss": 3.3854, + "step": 6417 + }, + { + "epoch": 0.77, + "learning_rate": 1.4247361976862866e-06, + "loss": 3.4061, + "step": 6418 + }, + { + "epoch": 0.77, + "learning_rate": 1.4233381680673103e-06, + "loss": 3.382, + "step": 6419 + }, + { + "epoch": 0.77, + "learning_rate": 1.4219407108572947e-06, + "loss": 3.3425, + "step": 6420 + }, + { + "epoch": 0.77, + "learning_rate": 1.4205438262798916e-06, + "loss": 3.4267, + "step": 6421 + }, + { + "epoch": 0.77, + "learning_rate": 1.4191475145586569e-06, + "loss": 3.366, + "step": 6422 + }, + { + "epoch": 0.77, + "learning_rate": 1.4177517759170572e-06, + "loss": 3.3225, + "step": 6423 + }, + { + "epoch": 0.77, + "learning_rate": 1.4163566105784653e-06, + "loss": 3.4102, + "step": 6424 + }, + { + "epoch": 0.77, + "learning_rate": 1.4149620187661645e-06, + "loss": 3.4773, + "step": 6425 + }, + { + "epoch": 0.77, + "learning_rate": 1.4135680007033447e-06, + "loss": 3.4474, + "step": 6426 + }, + { + "epoch": 0.77, + "learning_rate": 1.4121745566131034e-06, + "loss": 3.4652, + "step": 6427 + }, + { + "epoch": 0.77, + "learning_rate": 1.41078168671845e-06, + "loss": 3.3754, + "step": 6428 + }, + { + "epoch": 0.77, + "learning_rate": 1.409389391242298e-06, + "loss": 3.388, + "step": 6429 + }, + { + "epoch": 0.77, + "learning_rate": 1.4079976704074704e-06, + "loss": 3.4859, + "step": 6430 + }, + { + "epoch": 0.77, + "learning_rate": 1.4066065244366984e-06, + "loss": 3.2696, + "step": 6431 + }, + { + "epoch": 0.77, + "learning_rate": 1.405215953552621e-06, + "loss": 3.344, + "step": 6432 + }, + { + "epoch": 0.77, + "learning_rate": 1.403825957977783e-06, + "loss": 3.4253, + "step": 6433 + }, + { + "epoch": 0.77, + "learning_rate": 1.402436537934645e-06, + "loss": 3.361, + "step": 6434 + }, + { + "epoch": 0.77, + "learning_rate": 1.4010476936455625e-06, + "loss": 3.3963, + "step": 6435 + }, + { + "epoch": 0.77, + "learning_rate": 1.3996594253328115e-06, + "loss": 3.4256, + "step": 6436 + }, + { + "epoch": 0.77, + "learning_rate": 1.398271733218569e-06, + "loss": 3.4399, + "step": 6437 + }, + { + "epoch": 0.77, + "learning_rate": 1.3968846175249206e-06, + "loss": 3.337, + "step": 6438 + }, + { + "epoch": 0.77, + "learning_rate": 1.395498078473861e-06, + "loss": 3.3773, + "step": 6439 + }, + { + "epoch": 0.77, + "learning_rate": 1.3941121162872912e-06, + "loss": 3.3582, + "step": 6440 + }, + { + "epoch": 0.77, + "learning_rate": 1.392726731187019e-06, + "loss": 3.3454, + "step": 6441 + }, + { + "epoch": 0.77, + "learning_rate": 1.3913419233947672e-06, + "loss": 3.4487, + "step": 6442 + }, + { + "epoch": 0.77, + "learning_rate": 1.3899576931321534e-06, + "loss": 3.4022, + "step": 6443 + }, + { + "epoch": 0.77, + "learning_rate": 1.3885740406207144e-06, + "loss": 3.3408, + "step": 6444 + }, + { + "epoch": 0.77, + "learning_rate": 1.3871909660818883e-06, + "loss": 3.3983, + "step": 6445 + }, + { + "epoch": 0.77, + "learning_rate": 1.3858084697370233e-06, + "loss": 3.5411, + "step": 6446 + }, + { + "epoch": 0.77, + "learning_rate": 1.3844265518073729e-06, + "loss": 3.4747, + "step": 6447 + }, + { + "epoch": 0.77, + "learning_rate": 1.3830452125140997e-06, + "loss": 3.3787, + "step": 6448 + }, + { + "epoch": 0.77, + "learning_rate": 1.3816644520782725e-06, + "loss": 3.381, + "step": 6449 + }, + { + "epoch": 0.77, + "learning_rate": 1.3802842707208713e-06, + "loss": 3.3638, + "step": 6450 + }, + { + "epoch": 0.77, + "learning_rate": 1.3789046686627756e-06, + "loss": 3.3899, + "step": 6451 + }, + { + "epoch": 0.77, + "learning_rate": 1.377525646124781e-06, + "loss": 3.2876, + "step": 6452 + }, + { + "epoch": 0.77, + "learning_rate": 1.3761472033275841e-06, + "loss": 3.3705, + "step": 6453 + }, + { + "epoch": 0.77, + "learning_rate": 1.3747693404917917e-06, + "loss": 3.3959, + "step": 6454 + }, + { + "epoch": 0.77, + "learning_rate": 1.3733920578379167e-06, + "loss": 3.403, + "step": 6455 + }, + { + "epoch": 0.77, + "learning_rate": 1.3720153555863792e-06, + "loss": 3.4397, + "step": 6456 + }, + { + "epoch": 0.77, + "learning_rate": 1.370639233957506e-06, + "loss": 3.5059, + "step": 6457 + }, + { + "epoch": 0.77, + "learning_rate": 1.3692636931715348e-06, + "loss": 3.4856, + "step": 6458 + }, + { + "epoch": 0.77, + "learning_rate": 1.367888733448602e-06, + "loss": 3.4467, + "step": 6459 + }, + { + "epoch": 0.77, + "learning_rate": 1.36651435500876e-06, + "loss": 3.3702, + "step": 6460 + }, + { + "epoch": 0.77, + "learning_rate": 1.365140558071964e-06, + "loss": 3.4189, + "step": 6461 + }, + { + "epoch": 0.77, + "learning_rate": 1.3637673428580756e-06, + "loss": 3.3411, + "step": 6462 + }, + { + "epoch": 0.77, + "learning_rate": 1.3623947095868639e-06, + "loss": 3.3648, + "step": 6463 + }, + { + "epoch": 0.77, + "learning_rate": 1.361022658478006e-06, + "loss": 3.4028, + "step": 6464 + }, + { + "epoch": 0.77, + "learning_rate": 1.3596511897510827e-06, + "loss": 3.3824, + "step": 6465 + }, + { + "epoch": 0.77, + "learning_rate": 1.358280303625587e-06, + "loss": 3.417, + "step": 6466 + }, + { + "epoch": 0.77, + "learning_rate": 1.3569100003209147e-06, + "loss": 3.3572, + "step": 6467 + }, + { + "epoch": 0.77, + "learning_rate": 1.3555402800563683e-06, + "loss": 3.4186, + "step": 6468 + }, + { + "epoch": 0.77, + "learning_rate": 1.3541711430511584e-06, + "loss": 3.3928, + "step": 6469 + }, + { + "epoch": 0.77, + "learning_rate": 1.3528025895243995e-06, + "loss": 3.4121, + "step": 6470 + }, + { + "epoch": 0.77, + "learning_rate": 1.35143461969512e-06, + "loss": 3.4627, + "step": 6471 + }, + { + "epoch": 0.77, + "learning_rate": 1.350067233782244e-06, + "loss": 3.3651, + "step": 6472 + }, + { + "epoch": 0.77, + "learning_rate": 1.3487004320046114e-06, + "loss": 3.446, + "step": 6473 + }, + { + "epoch": 0.78, + "learning_rate": 1.3473342145809649e-06, + "loss": 3.3477, + "step": 6474 + }, + { + "epoch": 0.78, + "learning_rate": 1.3459685817299535e-06, + "loss": 3.3339, + "step": 6475 + }, + { + "epoch": 0.78, + "learning_rate": 1.3446035336701325e-06, + "loss": 3.3598, + "step": 6476 + }, + { + "epoch": 0.78, + "learning_rate": 1.3432390706199643e-06, + "loss": 3.3817, + "step": 6477 + }, + { + "epoch": 0.78, + "learning_rate": 1.3418751927978168e-06, + "loss": 3.3822, + "step": 6478 + }, + { + "epoch": 0.78, + "learning_rate": 1.3405119004219697e-06, + "loss": 3.383, + "step": 6479 + }, + { + "epoch": 0.78, + "learning_rate": 1.3391491937105972e-06, + "loss": 3.4609, + "step": 6480 + }, + { + "epoch": 0.78, + "learning_rate": 1.3377870728817916e-06, + "loss": 3.4412, + "step": 6481 + }, + { + "epoch": 0.78, + "learning_rate": 1.3364255381535462e-06, + "loss": 3.4205, + "step": 6482 + }, + { + "epoch": 0.78, + "learning_rate": 1.3350645897437598e-06, + "loss": 3.3308, + "step": 6483 + }, + { + "epoch": 0.78, + "learning_rate": 1.3337042278702396e-06, + "loss": 3.3505, + "step": 6484 + }, + { + "epoch": 0.78, + "learning_rate": 1.3323444527506974e-06, + "loss": 3.4098, + "step": 6485 + }, + { + "epoch": 0.78, + "learning_rate": 1.3309852646027499e-06, + "loss": 3.3894, + "step": 6486 + }, + { + "epoch": 0.78, + "learning_rate": 1.3296266636439265e-06, + "loss": 3.3967, + "step": 6487 + }, + { + "epoch": 0.78, + "learning_rate": 1.3282686500916513e-06, + "loss": 3.5477, + "step": 6488 + }, + { + "epoch": 0.78, + "learning_rate": 1.3269112241632654e-06, + "loss": 3.3076, + "step": 6489 + }, + { + "epoch": 0.78, + "learning_rate": 1.3255543860760106e-06, + "loss": 3.2986, + "step": 6490 + }, + { + "epoch": 0.78, + "learning_rate": 1.324198136047034e-06, + "loss": 3.306, + "step": 6491 + }, + { + "epoch": 0.78, + "learning_rate": 1.3228424742933904e-06, + "loss": 3.5014, + "step": 6492 + }, + { + "epoch": 0.78, + "learning_rate": 1.32148740103204e-06, + "loss": 3.3867, + "step": 6493 + }, + { + "epoch": 0.78, + "learning_rate": 1.3201329164798472e-06, + "loss": 3.4565, + "step": 6494 + }, + { + "epoch": 0.78, + "learning_rate": 1.3187790208535878e-06, + "loss": 3.5365, + "step": 6495 + }, + { + "epoch": 0.78, + "learning_rate": 1.3174257143699342e-06, + "loss": 3.4632, + "step": 6496 + }, + { + "epoch": 0.78, + "learning_rate": 1.3160729972454729e-06, + "loss": 3.4173, + "step": 6497 + }, + { + "epoch": 0.78, + "learning_rate": 1.3147208696966923e-06, + "loss": 3.3959, + "step": 6498 + }, + { + "epoch": 0.78, + "learning_rate": 1.313369331939986e-06, + "loss": 3.4403, + "step": 6499 + }, + { + "epoch": 0.78, + "learning_rate": 1.3120183841916544e-06, + "loss": 3.4765, + "step": 6500 + }, + { + "epoch": 0.78, + "learning_rate": 1.310668026667904e-06, + "loss": 3.3919, + "step": 6501 + }, + { + "epoch": 0.78, + "learning_rate": 1.309318259584843e-06, + "loss": 3.4006, + "step": 6502 + }, + { + "epoch": 0.78, + "learning_rate": 1.3079690831584923e-06, + "loss": 3.438, + "step": 6503 + }, + { + "epoch": 0.78, + "learning_rate": 1.3066204976047725e-06, + "loss": 3.3095, + "step": 6504 + }, + { + "epoch": 0.78, + "learning_rate": 1.3052725031395103e-06, + "loss": 3.3905, + "step": 6505 + }, + { + "epoch": 0.78, + "learning_rate": 1.3039250999784397e-06, + "loss": 3.4497, + "step": 6506 + }, + { + "epoch": 0.78, + "learning_rate": 1.302578288337199e-06, + "loss": 3.4142, + "step": 6507 + }, + { + "epoch": 0.78, + "learning_rate": 1.3012320684313318e-06, + "loss": 3.4714, + "step": 6508 + }, + { + "epoch": 0.78, + "learning_rate": 1.2998864404762855e-06, + "loss": 3.516, + "step": 6509 + }, + { + "epoch": 0.78, + "learning_rate": 1.2985414046874172e-06, + "loss": 3.3847, + "step": 6510 + }, + { + "epoch": 0.78, + "learning_rate": 1.2971969612799856e-06, + "loss": 3.3223, + "step": 6511 + }, + { + "epoch": 0.78, + "learning_rate": 1.2958531104691547e-06, + "loss": 3.3276, + "step": 6512 + }, + { + "epoch": 0.78, + "learning_rate": 1.294509852469995e-06, + "loss": 3.4448, + "step": 6513 + }, + { + "epoch": 0.78, + "learning_rate": 1.2931671874974811e-06, + "loss": 3.3859, + "step": 6514 + }, + { + "epoch": 0.78, + "learning_rate": 1.2918251157664913e-06, + "loss": 3.507, + "step": 6515 + }, + { + "epoch": 0.78, + "learning_rate": 1.2904836374918161e-06, + "loss": 3.5502, + "step": 6516 + }, + { + "epoch": 0.78, + "learning_rate": 1.2891427528881396e-06, + "loss": 3.2946, + "step": 6517 + }, + { + "epoch": 0.78, + "learning_rate": 1.2878024621700603e-06, + "loss": 3.4782, + "step": 6518 + }, + { + "epoch": 0.78, + "learning_rate": 1.2864627655520783e-06, + "loss": 3.3539, + "step": 6519 + }, + { + "epoch": 0.78, + "learning_rate": 1.285123663248598e-06, + "loss": 3.4442, + "step": 6520 + }, + { + "epoch": 0.78, + "learning_rate": 1.2837851554739271e-06, + "loss": 3.4008, + "step": 6521 + }, + { + "epoch": 0.78, + "learning_rate": 1.2824472424422868e-06, + "loss": 3.4384, + "step": 6522 + }, + { + "epoch": 0.78, + "learning_rate": 1.2811099243677888e-06, + "loss": 3.4493, + "step": 6523 + }, + { + "epoch": 0.78, + "learning_rate": 1.2797732014644643e-06, + "loss": 3.3786, + "step": 6524 + }, + { + "epoch": 0.78, + "learning_rate": 1.2784370739462364e-06, + "loss": 3.3825, + "step": 6525 + }, + { + "epoch": 0.78, + "learning_rate": 1.2771015420269434e-06, + "loss": 3.4773, + "step": 6526 + }, + { + "epoch": 0.78, + "learning_rate": 1.2757666059203227e-06, + "loss": 3.3501, + "step": 6527 + }, + { + "epoch": 0.78, + "learning_rate": 1.2744322658400166e-06, + "loss": 3.3586, + "step": 6528 + }, + { + "epoch": 0.78, + "learning_rate": 1.2730985219995723e-06, + "loss": 3.464, + "step": 6529 + }, + { + "epoch": 0.78, + "learning_rate": 1.271765374612446e-06, + "loss": 3.4631, + "step": 6530 + }, + { + "epoch": 0.78, + "learning_rate": 1.270432823891989e-06, + "loss": 3.355, + "step": 6531 + }, + { + "epoch": 0.78, + "learning_rate": 1.2691008700514685e-06, + "loss": 3.3544, + "step": 6532 + }, + { + "epoch": 0.78, + "learning_rate": 1.2677695133040446e-06, + "loss": 3.3207, + "step": 6533 + }, + { + "epoch": 0.78, + "learning_rate": 1.266438753862792e-06, + "loss": 3.4686, + "step": 6534 + }, + { + "epoch": 0.78, + "learning_rate": 1.2651085919406836e-06, + "loss": 3.3386, + "step": 6535 + }, + { + "epoch": 0.78, + "learning_rate": 1.263779027750599e-06, + "loss": 3.3655, + "step": 6536 + }, + { + "epoch": 0.78, + "learning_rate": 1.262450061505322e-06, + "loss": 3.4404, + "step": 6537 + }, + { + "epoch": 0.78, + "learning_rate": 1.2611216934175397e-06, + "loss": 3.5241, + "step": 6538 + }, + { + "epoch": 0.78, + "learning_rate": 1.2597939236998424e-06, + "loss": 3.4088, + "step": 6539 + }, + { + "epoch": 0.78, + "learning_rate": 1.258466752564732e-06, + "loss": 3.379, + "step": 6540 + }, + { + "epoch": 0.78, + "learning_rate": 1.2571401802246019e-06, + "loss": 3.51, + "step": 6541 + }, + { + "epoch": 0.78, + "learning_rate": 1.2558142068917611e-06, + "loss": 3.4931, + "step": 6542 + }, + { + "epoch": 0.78, + "learning_rate": 1.2544888327784184e-06, + "loss": 3.4409, + "step": 6543 + }, + { + "epoch": 0.78, + "learning_rate": 1.2531640580966848e-06, + "loss": 3.4869, + "step": 6544 + }, + { + "epoch": 0.78, + "learning_rate": 1.2518398830585782e-06, + "loss": 3.3879, + "step": 6545 + }, + { + "epoch": 0.78, + "learning_rate": 1.2505163078760196e-06, + "loss": 3.4088, + "step": 6546 + }, + { + "epoch": 0.78, + "learning_rate": 1.2491933327608325e-06, + "loss": 3.3039, + "step": 6547 + }, + { + "epoch": 0.78, + "learning_rate": 1.2478709579247484e-06, + "loss": 3.4194, + "step": 6548 + }, + { + "epoch": 0.78, + "learning_rate": 1.2465491835793992e-06, + "loss": 3.3938, + "step": 6549 + }, + { + "epoch": 0.78, + "learning_rate": 1.2452280099363212e-06, + "loss": 3.4172, + "step": 6550 + }, + { + "epoch": 0.78, + "learning_rate": 1.2439074372069554e-06, + "loss": 3.3788, + "step": 6551 + }, + { + "epoch": 0.78, + "learning_rate": 1.242587465602646e-06, + "loss": 3.3954, + "step": 6552 + }, + { + "epoch": 0.78, + "learning_rate": 1.2412680953346412e-06, + "loss": 3.4079, + "step": 6553 + }, + { + "epoch": 0.78, + "learning_rate": 1.2399493266140917e-06, + "loss": 3.3962, + "step": 6554 + }, + { + "epoch": 0.78, + "learning_rate": 1.238631159652056e-06, + "loss": 3.4867, + "step": 6555 + }, + { + "epoch": 0.78, + "learning_rate": 1.2373135946594928e-06, + "loss": 3.4209, + "step": 6556 + }, + { + "epoch": 0.78, + "learning_rate": 1.2359966318472645e-06, + "loss": 3.4996, + "step": 6557 + }, + { + "epoch": 0.79, + "learning_rate": 1.234680271426138e-06, + "loss": 3.5133, + "step": 6558 + }, + { + "epoch": 0.79, + "learning_rate": 1.2333645136067835e-06, + "loss": 3.5196, + "step": 6559 + }, + { + "epoch": 0.79, + "learning_rate": 1.2320493585997734e-06, + "loss": 3.3696, + "step": 6560 + }, + { + "epoch": 0.79, + "learning_rate": 1.23073480661559e-06, + "loss": 3.4505, + "step": 6561 + }, + { + "epoch": 0.79, + "learning_rate": 1.2294208578646078e-06, + "loss": 3.432, + "step": 6562 + }, + { + "epoch": 0.79, + "learning_rate": 1.2281075125571157e-06, + "loss": 3.3821, + "step": 6563 + }, + { + "epoch": 0.79, + "learning_rate": 1.2267947709033002e-06, + "loss": 3.3001, + "step": 6564 + }, + { + "epoch": 0.79, + "learning_rate": 1.2254826331132519e-06, + "loss": 3.4438, + "step": 6565 + }, + { + "epoch": 0.79, + "learning_rate": 1.2241710993969647e-06, + "loss": 3.3637, + "step": 6566 + }, + { + "epoch": 0.79, + "learning_rate": 1.2228601699643405e-06, + "loss": 3.2993, + "step": 6567 + }, + { + "epoch": 0.79, + "learning_rate": 1.2215498450251744e-06, + "loss": 3.4224, + "step": 6568 + }, + { + "epoch": 0.79, + "learning_rate": 1.2202401247891766e-06, + "loss": 3.2483, + "step": 6569 + }, + { + "epoch": 0.79, + "learning_rate": 1.218931009465949e-06, + "loss": 3.3947, + "step": 6570 + }, + { + "epoch": 0.79, + "learning_rate": 1.2176224992650072e-06, + "loss": 3.4697, + "step": 6571 + }, + { + "epoch": 0.79, + "learning_rate": 1.2163145943957633e-06, + "loss": 3.434, + "step": 6572 + }, + { + "epoch": 0.79, + "learning_rate": 1.2150072950675345e-06, + "loss": 3.371, + "step": 6573 + }, + { + "epoch": 0.79, + "learning_rate": 1.2137006014895398e-06, + "loss": 3.3788, + "step": 6574 + }, + { + "epoch": 0.79, + "learning_rate": 1.2123945138709059e-06, + "loss": 3.3873, + "step": 6575 + }, + { + "epoch": 0.79, + "learning_rate": 1.2110890324206543e-06, + "loss": 3.3969, + "step": 6576 + }, + { + "epoch": 0.79, + "learning_rate": 1.2097841573477192e-06, + "loss": 3.4074, + "step": 6577 + }, + { + "epoch": 0.79, + "learning_rate": 1.2084798888609277e-06, + "loss": 3.4691, + "step": 6578 + }, + { + "epoch": 0.79, + "learning_rate": 1.207176227169019e-06, + "loss": 3.4551, + "step": 6579 + }, + { + "epoch": 0.79, + "learning_rate": 1.20587317248063e-06, + "loss": 3.3847, + "step": 6580 + }, + { + "epoch": 0.79, + "learning_rate": 1.2045707250043009e-06, + "loss": 3.4046, + "step": 6581 + }, + { + "epoch": 0.79, + "learning_rate": 1.2032688849484742e-06, + "loss": 3.4996, + "step": 6582 + }, + { + "epoch": 0.79, + "learning_rate": 1.2019676525215007e-06, + "loss": 3.4503, + "step": 6583 + }, + { + "epoch": 0.79, + "learning_rate": 1.2006670279316246e-06, + "loss": 3.3815, + "step": 6584 + }, + { + "epoch": 0.79, + "learning_rate": 1.1993670113870026e-06, + "loss": 3.4291, + "step": 6585 + }, + { + "epoch": 0.79, + "learning_rate": 1.198067603095684e-06, + "loss": 3.471, + "step": 6586 + }, + { + "epoch": 0.79, + "learning_rate": 1.1967688032656305e-06, + "loss": 3.3462, + "step": 6587 + }, + { + "epoch": 0.79, + "learning_rate": 1.1954706121047004e-06, + "loss": 3.4313, + "step": 6588 + }, + { + "epoch": 0.79, + "learning_rate": 1.1941730298206566e-06, + "loss": 3.3287, + "step": 6589 + }, + { + "epoch": 0.79, + "learning_rate": 1.1928760566211623e-06, + "loss": 3.4066, + "step": 6590 + }, + { + "epoch": 0.79, + "learning_rate": 1.1915796927137879e-06, + "loss": 3.4206, + "step": 6591 + }, + { + "epoch": 0.79, + "learning_rate": 1.1902839383060021e-06, + "loss": 3.3809, + "step": 6592 + }, + { + "epoch": 0.79, + "learning_rate": 1.188988793605178e-06, + "loss": 3.4597, + "step": 6593 + }, + { + "epoch": 0.79, + "learning_rate": 1.1876942588185902e-06, + "loss": 3.4229, + "step": 6594 + }, + { + "epoch": 0.79, + "learning_rate": 1.1864003341534158e-06, + "loss": 3.414, + "step": 6595 + }, + { + "epoch": 0.79, + "learning_rate": 1.1851070198167346e-06, + "loss": 3.449, + "step": 6596 + }, + { + "epoch": 0.79, + "learning_rate": 1.183814316015528e-06, + "loss": 3.3446, + "step": 6597 + }, + { + "epoch": 0.79, + "learning_rate": 1.1825222229566818e-06, + "loss": 3.5515, + "step": 6598 + }, + { + "epoch": 0.79, + "learning_rate": 1.181230740846982e-06, + "loss": 3.3999, + "step": 6599 + }, + { + "epoch": 0.79, + "learning_rate": 1.1799398698931175e-06, + "loss": 3.3885, + "step": 6600 + }, + { + "epoch": 0.79, + "learning_rate": 1.1786496103016786e-06, + "loss": 3.4485, + "step": 6601 + }, + { + "epoch": 0.79, + "learning_rate": 1.1773599622791594e-06, + "loss": 3.4203, + "step": 6602 + }, + { + "epoch": 0.79, + "learning_rate": 1.1760709260319525e-06, + "loss": 3.3919, + "step": 6603 + }, + { + "epoch": 0.79, + "learning_rate": 1.1747825017663606e-06, + "loss": 3.4385, + "step": 6604 + }, + { + "epoch": 0.79, + "learning_rate": 1.1734946896885768e-06, + "loss": 3.3466, + "step": 6605 + }, + { + "epoch": 0.79, + "learning_rate": 1.1722074900047093e-06, + "loss": 3.5036, + "step": 6606 + }, + { + "epoch": 0.79, + "learning_rate": 1.1709209029207541e-06, + "loss": 3.3926, + "step": 6607 + }, + { + "epoch": 0.79, + "learning_rate": 1.1696349286426224e-06, + "loss": 3.477, + "step": 6608 + }, + { + "epoch": 0.79, + "learning_rate": 1.168349567376119e-06, + "loss": 3.4467, + "step": 6609 + }, + { + "epoch": 0.79, + "learning_rate": 1.1670648193269545e-06, + "loss": 3.3649, + "step": 6610 + }, + { + "epoch": 0.79, + "learning_rate": 1.165780684700737e-06, + "loss": 3.3771, + "step": 6611 + }, + { + "epoch": 0.79, + "learning_rate": 1.1644971637029844e-06, + "loss": 3.4626, + "step": 6612 + }, + { + "epoch": 0.79, + "learning_rate": 1.1632142565391057e-06, + "loss": 3.3556, + "step": 6613 + }, + { + "epoch": 0.79, + "learning_rate": 1.1619319634144232e-06, + "loss": 3.4726, + "step": 6614 + }, + { + "epoch": 0.79, + "learning_rate": 1.1606502845341494e-06, + "loss": 3.3649, + "step": 6615 + }, + { + "epoch": 0.79, + "learning_rate": 1.1593692201034085e-06, + "loss": 3.3411, + "step": 6616 + }, + { + "epoch": 0.79, + "learning_rate": 1.1580887703272197e-06, + "loss": 3.4676, + "step": 6617 + }, + { + "epoch": 0.79, + "learning_rate": 1.156808935410507e-06, + "loss": 3.4423, + "step": 6618 + }, + { + "epoch": 0.79, + "learning_rate": 1.155529715558094e-06, + "loss": 3.3887, + "step": 6619 + }, + { + "epoch": 0.79, + "learning_rate": 1.1542511109747113e-06, + "loss": 3.4251, + "step": 6620 + }, + { + "epoch": 0.79, + "learning_rate": 1.1529731218649803e-06, + "loss": 3.4253, + "step": 6621 + }, + { + "epoch": 0.79, + "learning_rate": 1.1516957484334362e-06, + "loss": 3.5815, + "step": 6622 + }, + { + "epoch": 0.79, + "learning_rate": 1.1504189908845047e-06, + "loss": 3.4606, + "step": 6623 + }, + { + "epoch": 0.79, + "learning_rate": 1.1491428494225227e-06, + "loss": 3.306, + "step": 6624 + }, + { + "epoch": 0.79, + "learning_rate": 1.1478673242517214e-06, + "loss": 3.372, + "step": 6625 + }, + { + "epoch": 0.79, + "learning_rate": 1.1465924155762364e-06, + "loss": 3.3102, + "step": 6626 + }, + { + "epoch": 0.79, + "learning_rate": 1.1453181236001026e-06, + "loss": 3.3133, + "step": 6627 + }, + { + "epoch": 0.79, + "learning_rate": 1.1440444485272622e-06, + "loss": 3.5559, + "step": 6628 + }, + { + "epoch": 0.79, + "learning_rate": 1.1427713905615478e-06, + "loss": 3.4368, + "step": 6629 + }, + { + "epoch": 0.79, + "learning_rate": 1.1414989499067048e-06, + "loss": 3.5039, + "step": 6630 + }, + { + "epoch": 0.79, + "learning_rate": 1.1402271267663722e-06, + "loss": 3.3996, + "step": 6631 + }, + { + "epoch": 0.79, + "learning_rate": 1.1389559213440937e-06, + "loss": 3.3294, + "step": 6632 + }, + { + "epoch": 0.79, + "learning_rate": 1.137685333843312e-06, + "loss": 3.2611, + "step": 6633 + }, + { + "epoch": 0.79, + "learning_rate": 1.136415364467372e-06, + "loss": 3.4784, + "step": 6634 + }, + { + "epoch": 0.79, + "learning_rate": 1.1351460134195186e-06, + "loss": 3.434, + "step": 6635 + }, + { + "epoch": 0.79, + "learning_rate": 1.133877280902902e-06, + "loss": 3.5377, + "step": 6636 + }, + { + "epoch": 0.79, + "learning_rate": 1.1326091671205674e-06, + "loss": 3.4341, + "step": 6637 + }, + { + "epoch": 0.79, + "learning_rate": 1.131341672275465e-06, + "loss": 3.3888, + "step": 6638 + }, + { + "epoch": 0.79, + "learning_rate": 1.130074796570444e-06, + "loss": 3.3746, + "step": 6639 + }, + { + "epoch": 0.79, + "learning_rate": 1.1288085402082555e-06, + "loss": 3.4433, + "step": 6640 + }, + { + "epoch": 0.8, + "learning_rate": 1.1275429033915515e-06, + "loss": 3.3912, + "step": 6641 + }, + { + "epoch": 0.8, + "learning_rate": 1.126277886322882e-06, + "loss": 3.3813, + "step": 6642 + }, + { + "epoch": 0.8, + "learning_rate": 1.1250134892047039e-06, + "loss": 3.4223, + "step": 6643 + }, + { + "epoch": 0.8, + "learning_rate": 1.1237497122393699e-06, + "loss": 3.4724, + "step": 6644 + }, + { + "epoch": 0.8, + "learning_rate": 1.1224865556291349e-06, + "loss": 3.4574, + "step": 6645 + }, + { + "epoch": 0.8, + "learning_rate": 1.1212240195761543e-06, + "loss": 3.4543, + "step": 6646 + }, + { + "epoch": 0.8, + "learning_rate": 1.1199621042824843e-06, + "loss": 3.4352, + "step": 6647 + }, + { + "epoch": 0.8, + "learning_rate": 1.1187008099500807e-06, + "loss": 3.4583, + "step": 6648 + }, + { + "epoch": 0.8, + "learning_rate": 1.1174401367808051e-06, + "loss": 3.3115, + "step": 6649 + }, + { + "epoch": 0.8, + "learning_rate": 1.1161800849764104e-06, + "loss": 3.3633, + "step": 6650 + }, + { + "epoch": 0.8, + "learning_rate": 1.1149206547385588e-06, + "loss": 3.366, + "step": 6651 + }, + { + "epoch": 0.8, + "learning_rate": 1.1136618462688081e-06, + "loss": 3.3213, + "step": 6652 + }, + { + "epoch": 0.8, + "learning_rate": 1.1124036597686188e-06, + "loss": 3.4566, + "step": 6653 + }, + { + "epoch": 0.8, + "learning_rate": 1.1111460954393506e-06, + "loss": 3.474, + "step": 6654 + }, + { + "epoch": 0.8, + "learning_rate": 1.1098891534822642e-06, + "loss": 3.3672, + "step": 6655 + }, + { + "epoch": 0.8, + "learning_rate": 1.1086328340985192e-06, + "loss": 3.4357, + "step": 6656 + }, + { + "epoch": 0.8, + "learning_rate": 1.1073771374891805e-06, + "loss": 3.4391, + "step": 6657 + }, + { + "epoch": 0.8, + "learning_rate": 1.1061220638552055e-06, + "loss": 3.374, + "step": 6658 + }, + { + "epoch": 0.8, + "learning_rate": 1.104867613397459e-06, + "loss": 3.5188, + "step": 6659 + }, + { + "epoch": 0.8, + "learning_rate": 1.1036137863167028e-06, + "loss": 3.4429, + "step": 6660 + }, + { + "epoch": 0.8, + "learning_rate": 1.102360582813599e-06, + "loss": 3.3964, + "step": 6661 + }, + { + "epoch": 0.8, + "learning_rate": 1.1011080030887107e-06, + "loss": 3.4615, + "step": 6662 + }, + { + "epoch": 0.8, + "learning_rate": 1.0998560473425003e-06, + "loss": 3.2944, + "step": 6663 + }, + { + "epoch": 0.8, + "learning_rate": 1.0986047157753293e-06, + "loss": 3.4066, + "step": 6664 + }, + { + "epoch": 0.8, + "learning_rate": 1.0973540085874657e-06, + "loss": 3.4037, + "step": 6665 + }, + { + "epoch": 0.8, + "learning_rate": 1.0961039259790662e-06, + "loss": 3.2916, + "step": 6666 + }, + { + "epoch": 0.8, + "learning_rate": 1.0948544681502e-06, + "loss": 3.3273, + "step": 6667 + }, + { + "epoch": 0.8, + "learning_rate": 1.093605635300825e-06, + "loss": 3.4696, + "step": 6668 + }, + { + "epoch": 0.8, + "learning_rate": 1.0923574276308085e-06, + "loss": 3.4095, + "step": 6669 + }, + { + "epoch": 0.8, + "learning_rate": 1.0911098453399116e-06, + "loss": 3.4465, + "step": 6670 + }, + { + "epoch": 0.8, + "learning_rate": 1.0898628886277984e-06, + "loss": 3.3673, + "step": 6671 + }, + { + "epoch": 0.8, + "learning_rate": 1.0886165576940299e-06, + "loss": 3.4913, + "step": 6672 + }, + { + "epoch": 0.8, + "learning_rate": 1.0873708527380733e-06, + "loss": 3.3586, + "step": 6673 + }, + { + "epoch": 0.8, + "learning_rate": 1.086125773959285e-06, + "loss": 3.402, + "step": 6674 + }, + { + "epoch": 0.8, + "learning_rate": 1.0848813215569325e-06, + "loss": 3.3241, + "step": 6675 + }, + { + "epoch": 0.8, + "learning_rate": 1.083637495730176e-06, + "loss": 3.4116, + "step": 6676 + }, + { + "epoch": 0.8, + "learning_rate": 1.0823942966780777e-06, + "loss": 3.5082, + "step": 6677 + }, + { + "epoch": 0.8, + "learning_rate": 1.0811517245995989e-06, + "loss": 3.4517, + "step": 6678 + }, + { + "epoch": 0.8, + "learning_rate": 1.0799097796936008e-06, + "loss": 3.3959, + "step": 6679 + }, + { + "epoch": 0.8, + "learning_rate": 1.0786684621588433e-06, + "loss": 3.3701, + "step": 6680 + }, + { + "epoch": 0.8, + "learning_rate": 1.0774277721939892e-06, + "loss": 3.4379, + "step": 6681 + }, + { + "epoch": 0.8, + "learning_rate": 1.0761877099975977e-06, + "loss": 3.3619, + "step": 6682 + }, + { + "epoch": 0.8, + "learning_rate": 1.0749482757681273e-06, + "loss": 3.3725, + "step": 6683 + }, + { + "epoch": 0.8, + "learning_rate": 1.0737094697039379e-06, + "loss": 3.4395, + "step": 6684 + }, + { + "epoch": 0.8, + "learning_rate": 1.072471292003287e-06, + "loss": 3.3759, + "step": 6685 + }, + { + "epoch": 0.8, + "learning_rate": 1.0712337428643355e-06, + "loss": 3.4546, + "step": 6686 + }, + { + "epoch": 0.8, + "learning_rate": 1.0699968224851365e-06, + "loss": 3.3761, + "step": 6687 + }, + { + "epoch": 0.8, + "learning_rate": 1.06876053106365e-06, + "loss": 3.4604, + "step": 6688 + }, + { + "epoch": 0.8, + "learning_rate": 1.0675248687977307e-06, + "loss": 3.385, + "step": 6689 + }, + { + "epoch": 0.8, + "learning_rate": 1.0662898358851348e-06, + "loss": 3.4176, + "step": 6690 + }, + { + "epoch": 0.8, + "learning_rate": 1.0650554325235158e-06, + "loss": 3.4405, + "step": 6691 + }, + { + "epoch": 0.8, + "learning_rate": 1.0638216589104288e-06, + "loss": 3.3994, + "step": 6692 + }, + { + "epoch": 0.8, + "learning_rate": 1.0625885152433246e-06, + "loss": 3.412, + "step": 6693 + }, + { + "epoch": 0.8, + "learning_rate": 1.0613560017195607e-06, + "loss": 3.3675, + "step": 6694 + }, + { + "epoch": 0.8, + "learning_rate": 1.060124118536382e-06, + "loss": 3.36, + "step": 6695 + }, + { + "epoch": 0.8, + "learning_rate": 1.0588928658909435e-06, + "loss": 3.465, + "step": 6696 + }, + { + "epoch": 0.8, + "learning_rate": 1.0576622439802937e-06, + "loss": 3.4291, + "step": 6697 + }, + { + "epoch": 0.8, + "learning_rate": 1.0564322530013815e-06, + "loss": 3.3416, + "step": 6698 + }, + { + "epoch": 0.8, + "learning_rate": 1.0552028931510538e-06, + "loss": 3.3491, + "step": 6699 + }, + { + "epoch": 0.8, + "learning_rate": 1.0539741646260586e-06, + "loss": 3.4371, + "step": 6700 + }, + { + "epoch": 0.8, + "learning_rate": 1.0527460676230393e-06, + "loss": 3.4487, + "step": 6701 + }, + { + "epoch": 0.8, + "learning_rate": 1.0515186023385455e-06, + "loss": 3.3379, + "step": 6702 + }, + { + "epoch": 0.8, + "learning_rate": 1.0502917689690139e-06, + "loss": 3.3937, + "step": 6703 + }, + { + "epoch": 0.8, + "learning_rate": 1.0490655677107925e-06, + "loss": 3.4508, + "step": 6704 + }, + { + "epoch": 0.8, + "learning_rate": 1.0478399987601202e-06, + "loss": 3.3985, + "step": 6705 + }, + { + "epoch": 0.8, + "learning_rate": 1.0466150623131378e-06, + "loss": 3.4345, + "step": 6706 + }, + { + "epoch": 0.8, + "learning_rate": 1.045390758565883e-06, + "loss": 3.5171, + "step": 6707 + }, + { + "epoch": 0.8, + "learning_rate": 1.0441670877142944e-06, + "loss": 3.307, + "step": 6708 + }, + { + "epoch": 0.8, + "learning_rate": 1.042944049954207e-06, + "loss": 3.401, + "step": 6709 + }, + { + "epoch": 0.8, + "learning_rate": 1.0417216454813588e-06, + "loss": 3.3658, + "step": 6710 + }, + { + "epoch": 0.8, + "learning_rate": 1.040499874491379e-06, + "loss": 3.402, + "step": 6711 + }, + { + "epoch": 0.8, + "learning_rate": 1.0392787371798035e-06, + "loss": 3.3563, + "step": 6712 + }, + { + "epoch": 0.8, + "learning_rate": 1.038058233742062e-06, + "loss": 3.5231, + "step": 6713 + }, + { + "epoch": 0.8, + "learning_rate": 1.0368383643734835e-06, + "loss": 3.3203, + "step": 6714 + }, + { + "epoch": 0.8, + "learning_rate": 1.0356191292692963e-06, + "loss": 3.4507, + "step": 6715 + }, + { + "epoch": 0.8, + "learning_rate": 1.0344005286246262e-06, + "loss": 3.4591, + "step": 6716 + }, + { + "epoch": 0.8, + "learning_rate": 1.033182562634497e-06, + "loss": 3.279, + "step": 6717 + }, + { + "epoch": 0.8, + "learning_rate": 1.031965231493835e-06, + "loss": 3.4315, + "step": 6718 + }, + { + "epoch": 0.8, + "learning_rate": 1.03074853539746e-06, + "loss": 3.3652, + "step": 6719 + }, + { + "epoch": 0.8, + "learning_rate": 1.0295324745400925e-06, + "loss": 3.314, + "step": 6720 + }, + { + "epoch": 0.8, + "learning_rate": 1.0283170491163509e-06, + "loss": 3.3581, + "step": 6721 + }, + { + "epoch": 0.8, + "learning_rate": 1.0271022593207508e-06, + "loss": 3.363, + "step": 6722 + }, + { + "epoch": 0.8, + "learning_rate": 1.0258881053477089e-06, + "loss": 3.4345, + "step": 6723 + }, + { + "epoch": 0.8, + "learning_rate": 1.0246745873915358e-06, + "loss": 3.4086, + "step": 6724 + }, + { + "epoch": 0.81, + "learning_rate": 1.0234617056464464e-06, + "loss": 3.4328, + "step": 6725 + }, + { + "epoch": 0.81, + "learning_rate": 1.022249460306548e-06, + "loss": 3.5711, + "step": 6726 + }, + { + "epoch": 0.81, + "learning_rate": 1.0210378515658492e-06, + "loss": 3.3715, + "step": 6727 + }, + { + "epoch": 0.81, + "learning_rate": 1.0198268796182552e-06, + "loss": 3.3858, + "step": 6728 + }, + { + "epoch": 0.81, + "learning_rate": 1.0186165446575707e-06, + "loss": 3.3813, + "step": 6729 + }, + { + "epoch": 0.81, + "learning_rate": 1.0174068468774955e-06, + "loss": 3.3848, + "step": 6730 + }, + { + "epoch": 0.81, + "learning_rate": 1.0161977864716344e-06, + "loss": 3.3867, + "step": 6731 + }, + { + "epoch": 0.81, + "learning_rate": 1.0149893636334801e-06, + "loss": 3.3345, + "step": 6732 + }, + { + "epoch": 0.81, + "learning_rate": 1.013781578556432e-06, + "loss": 3.485, + "step": 6733 + }, + { + "epoch": 0.81, + "learning_rate": 1.0125744314337827e-06, + "loss": 3.4265, + "step": 6734 + }, + { + "epoch": 0.81, + "learning_rate": 1.0113679224587247e-06, + "loss": 3.43, + "step": 6735 + }, + { + "epoch": 0.81, + "learning_rate": 1.010162051824347e-06, + "loss": 3.4543, + "step": 6736 + }, + { + "epoch": 0.81, + "learning_rate": 1.0089568197236378e-06, + "loss": 3.5113, + "step": 6737 + }, + { + "epoch": 0.81, + "learning_rate": 1.0077522263494805e-06, + "loss": 3.3707, + "step": 6738 + }, + { + "epoch": 0.81, + "learning_rate": 1.0065482718946628e-06, + "loss": 3.3899, + "step": 6739 + }, + { + "epoch": 0.81, + "learning_rate": 1.0053449565518597e-06, + "loss": 3.3871, + "step": 6740 + }, + { + "epoch": 0.81, + "learning_rate": 1.0041422805136542e-06, + "loss": 3.3307, + "step": 6741 + }, + { + "epoch": 0.81, + "learning_rate": 1.0029402439725206e-06, + "loss": 3.3945, + "step": 6742 + }, + { + "epoch": 0.81, + "learning_rate": 1.0017388471208333e-06, + "loss": 3.3986, + "step": 6743 + }, + { + "epoch": 0.81, + "learning_rate": 1.0005380901508643e-06, + "loss": 3.4117, + "step": 6744 + }, + { + "epoch": 0.81, + "learning_rate": 9.99337973254782e-07, + "loss": 3.5041, + "step": 6745 + }, + { + "epoch": 0.81, + "learning_rate": 9.981384966246522e-07, + "loss": 3.4318, + "step": 6746 + }, + { + "epoch": 0.81, + "learning_rate": 9.969396604524429e-07, + "loss": 3.4711, + "step": 6747 + }, + { + "epoch": 0.81, + "learning_rate": 9.957414649300107e-07, + "loss": 3.3757, + "step": 6748 + }, + { + "epoch": 0.81, + "learning_rate": 9.945439102491183e-07, + "loss": 3.5498, + "step": 6749 + }, + { + "epoch": 0.81, + "learning_rate": 9.93346996601422e-07, + "loss": 3.4491, + "step": 6750 + }, + { + "epoch": 0.81, + "learning_rate": 9.921507241784751e-07, + "loss": 3.4497, + "step": 6751 + }, + { + "epoch": 0.81, + "learning_rate": 9.909550931717287e-07, + "loss": 3.3848, + "step": 6752 + }, + { + "epoch": 0.81, + "learning_rate": 9.89760103772533e-07, + "loss": 3.3145, + "step": 6753 + }, + { + "epoch": 0.81, + "learning_rate": 9.885657561721313e-07, + "loss": 3.5104, + "step": 6754 + }, + { + "epoch": 0.81, + "learning_rate": 9.873720505616719e-07, + "loss": 3.4435, + "step": 6755 + }, + { + "epoch": 0.81, + "learning_rate": 9.861789871321897e-07, + "loss": 3.3624, + "step": 6756 + }, + { + "epoch": 0.81, + "learning_rate": 9.849865660746266e-07, + "loss": 3.3318, + "step": 6757 + }, + { + "epoch": 0.81, + "learning_rate": 9.837947875798165e-07, + "loss": 3.335, + "step": 6758 + }, + { + "epoch": 0.81, + "learning_rate": 9.826036518384912e-07, + "loss": 3.4711, + "step": 6759 + }, + { + "epoch": 0.81, + "learning_rate": 9.814131590412806e-07, + "loss": 3.3085, + "step": 6760 + }, + { + "epoch": 0.81, + "learning_rate": 9.80223309378711e-07, + "loss": 3.4995, + "step": 6761 + }, + { + "epoch": 0.81, + "learning_rate": 9.790341030412048e-07, + "loss": 3.4485, + "step": 6762 + }, + { + "epoch": 0.81, + "learning_rate": 9.778455402190844e-07, + "loss": 3.4846, + "step": 6763 + }, + { + "epoch": 0.81, + "learning_rate": 9.766576211025674e-07, + "loss": 3.461, + "step": 6764 + }, + { + "epoch": 0.81, + "learning_rate": 9.754703458817671e-07, + "loss": 3.4457, + "step": 6765 + }, + { + "epoch": 0.81, + "learning_rate": 9.742837147466961e-07, + "loss": 3.3085, + "step": 6766 + }, + { + "epoch": 0.81, + "learning_rate": 9.73097727887262e-07, + "loss": 3.5041, + "step": 6767 + }, + { + "epoch": 0.81, + "learning_rate": 9.719123854932706e-07, + "loss": 3.3783, + "step": 6768 + }, + { + "epoch": 0.81, + "learning_rate": 9.707276877544224e-07, + "loss": 3.3647, + "step": 6769 + }, + { + "epoch": 0.81, + "learning_rate": 9.695436348603198e-07, + "loss": 3.3761, + "step": 6770 + }, + { + "epoch": 0.81, + "learning_rate": 9.683602270004567e-07, + "loss": 3.3834, + "step": 6771 + }, + { + "epoch": 0.81, + "learning_rate": 9.671774643642257e-07, + "loss": 3.2575, + "step": 6772 + }, + { + "epoch": 0.81, + "learning_rate": 9.659953471409161e-07, + "loss": 3.2792, + "step": 6773 + }, + { + "epoch": 0.81, + "learning_rate": 9.648138755197145e-07, + "loss": 3.4247, + "step": 6774 + }, + { + "epoch": 0.81, + "learning_rate": 9.636330496897011e-07, + "loss": 3.3299, + "step": 6775 + }, + { + "epoch": 0.81, + "learning_rate": 9.624528698398606e-07, + "loss": 3.4587, + "step": 6776 + }, + { + "epoch": 0.81, + "learning_rate": 9.612733361590631e-07, + "loss": 3.5099, + "step": 6777 + }, + { + "epoch": 0.81, + "learning_rate": 9.600944488360854e-07, + "loss": 3.3716, + "step": 6778 + }, + { + "epoch": 0.81, + "learning_rate": 9.589162080595949e-07, + "loss": 3.4279, + "step": 6779 + }, + { + "epoch": 0.81, + "learning_rate": 9.57738614018157e-07, + "loss": 3.4543, + "step": 6780 + }, + { + "epoch": 0.81, + "learning_rate": 9.565616669002331e-07, + "loss": 3.405, + "step": 6781 + }, + { + "epoch": 0.81, + "learning_rate": 9.553853668941858e-07, + "loss": 3.3339, + "step": 6782 + }, + { + "epoch": 0.81, + "learning_rate": 9.54209714188265e-07, + "loss": 3.453, + "step": 6783 + }, + { + "epoch": 0.81, + "learning_rate": 9.530347089706265e-07, + "loss": 3.4119, + "step": 6784 + }, + { + "epoch": 0.81, + "learning_rate": 9.51860351429314e-07, + "loss": 3.4034, + "step": 6785 + }, + { + "epoch": 0.81, + "learning_rate": 9.506866417522747e-07, + "loss": 3.3707, + "step": 6786 + }, + { + "epoch": 0.81, + "learning_rate": 9.495135801273487e-07, + "loss": 3.3775, + "step": 6787 + }, + { + "epoch": 0.81, + "learning_rate": 9.48341166742272e-07, + "loss": 3.3247, + "step": 6788 + }, + { + "epoch": 0.81, + "learning_rate": 9.471694017846767e-07, + "loss": 3.3913, + "step": 6789 + }, + { + "epoch": 0.81, + "learning_rate": 9.459982854420957e-07, + "loss": 3.439, + "step": 6790 + }, + { + "epoch": 0.81, + "learning_rate": 9.448278179019498e-07, + "loss": 3.3966, + "step": 6791 + }, + { + "epoch": 0.81, + "learning_rate": 9.436579993515654e-07, + "loss": 3.3631, + "step": 6792 + }, + { + "epoch": 0.81, + "learning_rate": 9.424888299781548e-07, + "loss": 3.3545, + "step": 6793 + }, + { + "epoch": 0.81, + "learning_rate": 9.413203099688367e-07, + "loss": 3.4826, + "step": 6794 + }, + { + "epoch": 0.81, + "learning_rate": 9.401524395106187e-07, + "loss": 3.4208, + "step": 6795 + }, + { + "epoch": 0.81, + "learning_rate": 9.389852187904075e-07, + "loss": 3.4619, + "step": 6796 + }, + { + "epoch": 0.81, + "learning_rate": 9.378186479950047e-07, + "loss": 3.3709, + "step": 6797 + }, + { + "epoch": 0.81, + "learning_rate": 9.366527273111086e-07, + "loss": 3.4522, + "step": 6798 + }, + { + "epoch": 0.81, + "learning_rate": 9.354874569253119e-07, + "loss": 3.4989, + "step": 6799 + }, + { + "epoch": 0.81, + "learning_rate": 9.343228370241075e-07, + "loss": 3.5079, + "step": 6800 + }, + { + "epoch": 0.81, + "learning_rate": 9.331588677938774e-07, + "loss": 3.4635, + "step": 6801 + }, + { + "epoch": 0.81, + "learning_rate": 9.31995549420906e-07, + "loss": 3.3402, + "step": 6802 + }, + { + "epoch": 0.81, + "learning_rate": 9.308328820913698e-07, + "loss": 3.3524, + "step": 6803 + }, + { + "epoch": 0.81, + "learning_rate": 9.296708659913422e-07, + "loss": 3.4018, + "step": 6804 + }, + { + "epoch": 0.81, + "learning_rate": 9.285095013067919e-07, + "loss": 3.3986, + "step": 6805 + }, + { + "epoch": 0.81, + "learning_rate": 9.273487882235844e-07, + "loss": 3.4357, + "step": 6806 + }, + { + "epoch": 0.81, + "learning_rate": 9.261887269274783e-07, + "loss": 3.3722, + "step": 6807 + }, + { + "epoch": 0.82, + "learning_rate": 9.250293176041325e-07, + "loss": 3.4166, + "step": 6808 + }, + { + "epoch": 0.82, + "learning_rate": 9.23870560439098e-07, + "loss": 3.3957, + "step": 6809 + }, + { + "epoch": 0.82, + "learning_rate": 9.227124556178219e-07, + "loss": 3.4224, + "step": 6810 + }, + { + "epoch": 0.82, + "learning_rate": 9.21555003325647e-07, + "loss": 3.3939, + "step": 6811 + }, + { + "epoch": 0.82, + "learning_rate": 9.203982037478116e-07, + "loss": 3.393, + "step": 6812 + }, + { + "epoch": 0.82, + "learning_rate": 9.192420570694532e-07, + "loss": 3.3027, + "step": 6813 + }, + { + "epoch": 0.82, + "learning_rate": 9.180865634755964e-07, + "loss": 3.4117, + "step": 6814 + }, + { + "epoch": 0.82, + "learning_rate": 9.169317231511698e-07, + "loss": 3.3689, + "step": 6815 + }, + { + "epoch": 0.82, + "learning_rate": 9.157775362809935e-07, + "loss": 3.5202, + "step": 6816 + }, + { + "epoch": 0.82, + "learning_rate": 9.146240030497832e-07, + "loss": 3.411, + "step": 6817 + }, + { + "epoch": 0.82, + "learning_rate": 9.134711236421495e-07, + "loss": 3.3275, + "step": 6818 + }, + { + "epoch": 0.82, + "learning_rate": 9.123188982426023e-07, + "loss": 3.4247, + "step": 6819 + }, + { + "epoch": 0.82, + "learning_rate": 9.111673270355392e-07, + "loss": 3.423, + "step": 6820 + }, + { + "epoch": 0.82, + "learning_rate": 9.100164102052628e-07, + "loss": 3.3494, + "step": 6821 + }, + { + "epoch": 0.82, + "learning_rate": 9.088661479359601e-07, + "loss": 3.4793, + "step": 6822 + }, + { + "epoch": 0.82, + "learning_rate": 9.077165404117238e-07, + "loss": 3.4857, + "step": 6823 + }, + { + "epoch": 0.82, + "learning_rate": 9.065675878165353e-07, + "loss": 3.375, + "step": 6824 + }, + { + "epoch": 0.82, + "learning_rate": 9.054192903342729e-07, + "loss": 3.4028, + "step": 6825 + }, + { + "epoch": 0.82, + "learning_rate": 9.042716481487091e-07, + "loss": 3.4382, + "step": 6826 + }, + { + "epoch": 0.82, + "learning_rate": 9.031246614435163e-07, + "loss": 3.4553, + "step": 6827 + }, + { + "epoch": 0.82, + "learning_rate": 9.019783304022528e-07, + "loss": 3.3671, + "step": 6828 + }, + { + "epoch": 0.82, + "learning_rate": 9.008326552083829e-07, + "loss": 3.4134, + "step": 6829 + }, + { + "epoch": 0.82, + "learning_rate": 8.996876360452556e-07, + "loss": 3.4404, + "step": 6830 + }, + { + "epoch": 0.82, + "learning_rate": 8.98543273096123e-07, + "loss": 3.3531, + "step": 6831 + }, + { + "epoch": 0.82, + "learning_rate": 8.973995665441282e-07, + "loss": 3.4154, + "step": 6832 + }, + { + "epoch": 0.82, + "learning_rate": 8.962565165723097e-07, + "loss": 3.334, + "step": 6833 + }, + { + "epoch": 0.82, + "learning_rate": 8.951141233636001e-07, + "loss": 3.4089, + "step": 6834 + }, + { + "epoch": 0.82, + "learning_rate": 8.939723871008321e-07, + "loss": 3.5248, + "step": 6835 + }, + { + "epoch": 0.82, + "learning_rate": 8.928313079667234e-07, + "loss": 3.4254, + "step": 6836 + }, + { + "epoch": 0.82, + "learning_rate": 8.916908861438972e-07, + "loss": 3.4239, + "step": 6837 + }, + { + "epoch": 0.82, + "learning_rate": 8.905511218148627e-07, + "loss": 3.3956, + "step": 6838 + }, + { + "epoch": 0.82, + "learning_rate": 8.894120151620306e-07, + "loss": 3.3962, + "step": 6839 + }, + { + "epoch": 0.82, + "learning_rate": 8.882735663677028e-07, + "loss": 3.3687, + "step": 6840 + }, + { + "epoch": 0.82, + "learning_rate": 8.871357756140763e-07, + "loss": 3.3538, + "step": 6841 + }, + { + "epoch": 0.82, + "learning_rate": 8.859986430832424e-07, + "loss": 3.4222, + "step": 6842 + }, + { + "epoch": 0.82, + "learning_rate": 8.848621689571907e-07, + "loss": 3.3533, + "step": 6843 + }, + { + "epoch": 0.82, + "learning_rate": 8.837263534177981e-07, + "loss": 3.3115, + "step": 6844 + }, + { + "epoch": 0.82, + "learning_rate": 8.825911966468442e-07, + "loss": 3.2913, + "step": 6845 + }, + { + "epoch": 0.82, + "learning_rate": 8.814566988259981e-07, + "loss": 3.4135, + "step": 6846 + }, + { + "epoch": 0.82, + "learning_rate": 8.803228601368252e-07, + "loss": 3.3574, + "step": 6847 + }, + { + "epoch": 0.82, + "learning_rate": 8.791896807607847e-07, + "loss": 3.4845, + "step": 6848 + }, + { + "epoch": 0.82, + "learning_rate": 8.780571608792304e-07, + "loss": 3.287, + "step": 6849 + }, + { + "epoch": 0.82, + "learning_rate": 8.769253006734101e-07, + "loss": 3.3331, + "step": 6850 + }, + { + "epoch": 0.82, + "learning_rate": 8.757941003244691e-07, + "loss": 3.2994, + "step": 6851 + }, + { + "epoch": 0.82, + "learning_rate": 8.746635600134429e-07, + "loss": 3.4147, + "step": 6852 + }, + { + "epoch": 0.82, + "learning_rate": 8.73533679921264e-07, + "loss": 3.3495, + "step": 6853 + }, + { + "epoch": 0.82, + "learning_rate": 8.72404460228758e-07, + "loss": 3.3645, + "step": 6854 + }, + { + "epoch": 0.82, + "learning_rate": 8.71275901116645e-07, + "loss": 3.4184, + "step": 6855 + }, + { + "epoch": 0.82, + "learning_rate": 8.701480027655396e-07, + "loss": 3.4488, + "step": 6856 + }, + { + "epoch": 0.82, + "learning_rate": 8.690207653559496e-07, + "loss": 3.2894, + "step": 6857 + }, + { + "epoch": 0.82, + "learning_rate": 8.678941890682807e-07, + "loss": 3.3642, + "step": 6858 + }, + { + "epoch": 0.82, + "learning_rate": 8.667682740828292e-07, + "loss": 3.4767, + "step": 6859 + }, + { + "epoch": 0.82, + "learning_rate": 8.656430205797856e-07, + "loss": 3.4301, + "step": 6860 + }, + { + "epoch": 0.82, + "learning_rate": 8.645184287392367e-07, + "loss": 3.4051, + "step": 6861 + }, + { + "epoch": 0.82, + "learning_rate": 8.633944987411607e-07, + "loss": 3.4582, + "step": 6862 + }, + { + "epoch": 0.82, + "learning_rate": 8.622712307654313e-07, + "loss": 3.5422, + "step": 6863 + }, + { + "epoch": 0.82, + "learning_rate": 8.611486249918193e-07, + "loss": 3.4492, + "step": 6864 + }, + { + "epoch": 0.82, + "learning_rate": 8.600266815999825e-07, + "loss": 3.3844, + "step": 6865 + }, + { + "epoch": 0.82, + "learning_rate": 8.589054007694808e-07, + "loss": 3.4211, + "step": 6866 + }, + { + "epoch": 0.82, + "learning_rate": 8.577847826797592e-07, + "loss": 3.4124, + "step": 6867 + }, + { + "epoch": 0.82, + "learning_rate": 8.566648275101652e-07, + "loss": 3.3467, + "step": 6868 + }, + { + "epoch": 0.82, + "learning_rate": 8.555455354399351e-07, + "loss": 3.3651, + "step": 6869 + }, + { + "epoch": 0.82, + "learning_rate": 8.544269066482009e-07, + "loss": 3.4197, + "step": 6870 + }, + { + "epoch": 0.82, + "learning_rate": 8.533089413139861e-07, + "loss": 3.402, + "step": 6871 + }, + { + "epoch": 0.82, + "learning_rate": 8.521916396162144e-07, + "loss": 3.4442, + "step": 6872 + }, + { + "epoch": 0.82, + "learning_rate": 8.510750017336927e-07, + "loss": 3.3407, + "step": 6873 + }, + { + "epoch": 0.82, + "learning_rate": 8.499590278451331e-07, + "loss": 3.4292, + "step": 6874 + }, + { + "epoch": 0.82, + "learning_rate": 8.488437181291315e-07, + "loss": 3.3999, + "step": 6875 + }, + { + "epoch": 0.82, + "learning_rate": 8.477290727641857e-07, + "loss": 3.4375, + "step": 6876 + }, + { + "epoch": 0.82, + "learning_rate": 8.466150919286819e-07, + "loss": 3.3416, + "step": 6877 + }, + { + "epoch": 0.82, + "learning_rate": 8.455017758009021e-07, + "loss": 3.4211, + "step": 6878 + }, + { + "epoch": 0.82, + "learning_rate": 8.443891245590197e-07, + "loss": 3.3745, + "step": 6879 + }, + { + "epoch": 0.82, + "learning_rate": 8.432771383811072e-07, + "loss": 3.4096, + "step": 6880 + }, + { + "epoch": 0.82, + "learning_rate": 8.421658174451219e-07, + "loss": 3.362, + "step": 6881 + }, + { + "epoch": 0.82, + "learning_rate": 8.410551619289253e-07, + "loss": 3.473, + "step": 6882 + }, + { + "epoch": 0.82, + "learning_rate": 8.399451720102608e-07, + "loss": 3.3426, + "step": 6883 + }, + { + "epoch": 0.82, + "learning_rate": 8.388358478667752e-07, + "loss": 3.5343, + "step": 6884 + }, + { + "epoch": 0.82, + "learning_rate": 8.377271896760036e-07, + "loss": 3.3811, + "step": 6885 + }, + { + "epoch": 0.82, + "learning_rate": 8.366191976153754e-07, + "loss": 3.451, + "step": 6886 + }, + { + "epoch": 0.82, + "learning_rate": 8.355118718622129e-07, + "loss": 3.3409, + "step": 6887 + }, + { + "epoch": 0.82, + "learning_rate": 8.344052125937358e-07, + "loss": 3.3105, + "step": 6888 + }, + { + "epoch": 0.82, + "learning_rate": 8.332992199870493e-07, + "loss": 3.3425, + "step": 6889 + }, + { + "epoch": 0.82, + "learning_rate": 8.321938942191593e-07, + "loss": 3.5605, + "step": 6890 + }, + { + "epoch": 0.82, + "learning_rate": 8.31089235466962e-07, + "loss": 3.3838, + "step": 6891 + }, + { + "epoch": 0.83, + "learning_rate": 8.299852439072464e-07, + "loss": 3.3835, + "step": 6892 + }, + { + "epoch": 0.83, + "learning_rate": 8.288819197166958e-07, + "loss": 3.2762, + "step": 6893 + }, + { + "epoch": 0.83, + "learning_rate": 8.277792630718856e-07, + "loss": 3.3698, + "step": 6894 + }, + { + "epoch": 0.83, + "learning_rate": 8.266772741492834e-07, + "loss": 3.411, + "step": 6895 + }, + { + "epoch": 0.83, + "learning_rate": 8.25575953125255e-07, + "loss": 3.3701, + "step": 6896 + }, + { + "epoch": 0.83, + "learning_rate": 8.244753001760542e-07, + "loss": 3.4598, + "step": 6897 + }, + { + "epoch": 0.83, + "learning_rate": 8.233753154778296e-07, + "loss": 3.4268, + "step": 6898 + }, + { + "epoch": 0.83, + "learning_rate": 8.222759992066221e-07, + "loss": 3.401, + "step": 6899 + }, + { + "epoch": 0.83, + "learning_rate": 8.211773515383659e-07, + "loss": 3.3735, + "step": 6900 + }, + { + "epoch": 0.83, + "learning_rate": 8.200793726488915e-07, + "loss": 3.2955, + "step": 6901 + }, + { + "epoch": 0.83, + "learning_rate": 8.18982062713915e-07, + "loss": 3.4159, + "step": 6902 + }, + { + "epoch": 0.83, + "learning_rate": 8.178854219090532e-07, + "loss": 3.4105, + "step": 6903 + }, + { + "epoch": 0.83, + "learning_rate": 8.167894504098122e-07, + "loss": 3.468, + "step": 6904 + }, + { + "epoch": 0.83, + "learning_rate": 8.156941483915904e-07, + "loss": 3.4519, + "step": 6905 + }, + { + "epoch": 0.83, + "learning_rate": 8.145995160296799e-07, + "loss": 3.3939, + "step": 6906 + }, + { + "epoch": 0.83, + "learning_rate": 8.135055534992658e-07, + "loss": 3.4353, + "step": 6907 + }, + { + "epoch": 0.83, + "learning_rate": 8.124122609754254e-07, + "loss": 3.2621, + "step": 6908 + }, + { + "epoch": 0.83, + "learning_rate": 8.113196386331318e-07, + "loss": 3.3933, + "step": 6909 + }, + { + "epoch": 0.83, + "learning_rate": 8.102276866472436e-07, + "loss": 3.4037, + "step": 6910 + }, + { + "epoch": 0.83, + "learning_rate": 8.09136405192521e-07, + "loss": 3.4359, + "step": 6911 + }, + { + "epoch": 0.83, + "learning_rate": 8.080457944436115e-07, + "loss": 3.4073, + "step": 6912 + }, + { + "epoch": 0.83, + "learning_rate": 8.069558545750556e-07, + "loss": 3.2914, + "step": 6913 + }, + { + "epoch": 0.83, + "learning_rate": 8.058665857612879e-07, + "loss": 3.3473, + "step": 6914 + }, + { + "epoch": 0.83, + "learning_rate": 8.047779881766355e-07, + "loss": 3.4577, + "step": 6915 + }, + { + "epoch": 0.83, + "learning_rate": 8.036900619953147e-07, + "loss": 3.475, + "step": 6916 + }, + { + "epoch": 0.83, + "learning_rate": 8.026028073914427e-07, + "loss": 3.3942, + "step": 6917 + }, + { + "epoch": 0.83, + "learning_rate": 8.015162245390179e-07, + "loss": 3.473, + "step": 6918 + }, + { + "epoch": 0.83, + "learning_rate": 8.004303136119407e-07, + "loss": 3.4964, + "step": 6919 + }, + { + "epoch": 0.83, + "learning_rate": 7.993450747839987e-07, + "loss": 3.4537, + "step": 6920 + }, + { + "epoch": 0.83, + "learning_rate": 7.982605082288747e-07, + "loss": 3.404, + "step": 6921 + }, + { + "epoch": 0.83, + "learning_rate": 7.97176614120142e-07, + "loss": 3.4152, + "step": 6922 + }, + { + "epoch": 0.83, + "learning_rate": 7.960933926312664e-07, + "loss": 3.2962, + "step": 6923 + }, + { + "epoch": 0.83, + "learning_rate": 7.950108439356069e-07, + "loss": 3.3381, + "step": 6924 + }, + { + "epoch": 0.83, + "learning_rate": 7.939289682064171e-07, + "loss": 3.4285, + "step": 6925 + }, + { + "epoch": 0.83, + "learning_rate": 7.92847765616836e-07, + "loss": 3.392, + "step": 6926 + }, + { + "epoch": 0.83, + "learning_rate": 7.917672363399037e-07, + "loss": 3.4211, + "step": 6927 + }, + { + "epoch": 0.83, + "learning_rate": 7.906873805485438e-07, + "loss": 3.4205, + "step": 6928 + }, + { + "epoch": 0.83, + "learning_rate": 7.896081984155796e-07, + "loss": 3.3904, + "step": 6929 + }, + { + "epoch": 0.83, + "learning_rate": 7.885296901137224e-07, + "loss": 3.4295, + "step": 6930 + }, + { + "epoch": 0.83, + "learning_rate": 7.874518558155769e-07, + "loss": 3.3674, + "step": 6931 + }, + { + "epoch": 0.83, + "learning_rate": 7.86374695693638e-07, + "loss": 3.4557, + "step": 6932 + }, + { + "epoch": 0.83, + "learning_rate": 7.852982099202972e-07, + "loss": 3.4667, + "step": 6933 + }, + { + "epoch": 0.83, + "learning_rate": 7.842223986678343e-07, + "loss": 3.4791, + "step": 6934 + }, + { + "epoch": 0.83, + "learning_rate": 7.831472621084213e-07, + "loss": 3.3263, + "step": 6935 + }, + { + "epoch": 0.83, + "learning_rate": 7.820728004141237e-07, + "loss": 3.4674, + "step": 6936 + }, + { + "epoch": 0.83, + "learning_rate": 7.809990137568984e-07, + "loss": 3.4175, + "step": 6937 + }, + { + "epoch": 0.83, + "learning_rate": 7.799259023085937e-07, + "loss": 3.2648, + "step": 6938 + }, + { + "epoch": 0.83, + "learning_rate": 7.788534662409497e-07, + "loss": 3.4425, + "step": 6939 + }, + { + "epoch": 0.83, + "learning_rate": 7.777817057256016e-07, + "loss": 3.2919, + "step": 6940 + }, + { + "epoch": 0.83, + "learning_rate": 7.767106209340719e-07, + "loss": 3.2884, + "step": 6941 + }, + { + "epoch": 0.83, + "learning_rate": 7.756402120377776e-07, + "loss": 3.4952, + "step": 6942 + }, + { + "epoch": 0.83, + "learning_rate": 7.745704792080267e-07, + "loss": 3.3854, + "step": 6943 + }, + { + "epoch": 0.83, + "learning_rate": 7.735014226160198e-07, + "loss": 3.3794, + "step": 6944 + }, + { + "epoch": 0.83, + "learning_rate": 7.724330424328469e-07, + "loss": 3.5026, + "step": 6945 + }, + { + "epoch": 0.83, + "learning_rate": 7.713653388294956e-07, + "loss": 3.339, + "step": 6946 + }, + { + "epoch": 0.83, + "learning_rate": 7.70298311976836e-07, + "loss": 3.416, + "step": 6947 + }, + { + "epoch": 0.83, + "learning_rate": 7.692319620456384e-07, + "loss": 3.4312, + "step": 6948 + }, + { + "epoch": 0.83, + "learning_rate": 7.681662892065611e-07, + "loss": 3.4452, + "step": 6949 + }, + { + "epoch": 0.83, + "learning_rate": 7.671012936301541e-07, + "loss": 3.4187, + "step": 6950 + }, + { + "epoch": 0.83, + "learning_rate": 7.660369754868591e-07, + "loss": 3.4663, + "step": 6951 + }, + { + "epoch": 0.83, + "learning_rate": 7.649733349470095e-07, + "loss": 3.2702, + "step": 6952 + }, + { + "epoch": 0.83, + "learning_rate": 7.639103721808294e-07, + "loss": 3.3031, + "step": 6953 + }, + { + "epoch": 0.83, + "learning_rate": 7.628480873584387e-07, + "loss": 3.4353, + "step": 6954 + }, + { + "epoch": 0.83, + "learning_rate": 7.61786480649841e-07, + "loss": 3.3093, + "step": 6955 + }, + { + "epoch": 0.83, + "learning_rate": 7.60725552224939e-07, + "loss": 3.4829, + "step": 6956 + }, + { + "epoch": 0.83, + "learning_rate": 7.596653022535227e-07, + "loss": 3.4287, + "step": 6957 + }, + { + "epoch": 0.83, + "learning_rate": 7.586057309052747e-07, + "loss": 3.5388, + "step": 6958 + }, + { + "epoch": 0.83, + "learning_rate": 7.575468383497686e-07, + "loss": 3.4104, + "step": 6959 + }, + { + "epoch": 0.83, + "learning_rate": 7.564886247564696e-07, + "loss": 3.4261, + "step": 6960 + }, + { + "epoch": 0.83, + "learning_rate": 7.554310902947332e-07, + "loss": 3.4591, + "step": 6961 + }, + { + "epoch": 0.83, + "learning_rate": 7.543742351338101e-07, + "loss": 3.3645, + "step": 6962 + }, + { + "epoch": 0.83, + "learning_rate": 7.533180594428357e-07, + "loss": 3.3475, + "step": 6963 + }, + { + "epoch": 0.83, + "learning_rate": 7.522625633908432e-07, + "loss": 3.3661, + "step": 6964 + }, + { + "epoch": 0.83, + "learning_rate": 7.512077471467527e-07, + "loss": 3.418, + "step": 6965 + }, + { + "epoch": 0.83, + "learning_rate": 7.501536108793777e-07, + "loss": 3.4597, + "step": 6966 + }, + { + "epoch": 0.83, + "learning_rate": 7.491001547574222e-07, + "loss": 3.3359, + "step": 6967 + }, + { + "epoch": 0.83, + "learning_rate": 7.480473789494807e-07, + "loss": 3.4584, + "step": 6968 + }, + { + "epoch": 0.83, + "learning_rate": 7.469952836240391e-07, + "loss": 3.4738, + "step": 6969 + }, + { + "epoch": 0.83, + "learning_rate": 7.459438689494775e-07, + "loss": 3.43, + "step": 6970 + }, + { + "epoch": 0.83, + "learning_rate": 7.448931350940597e-07, + "loss": 3.4716, + "step": 6971 + }, + { + "epoch": 0.83, + "learning_rate": 7.438430822259485e-07, + "loss": 3.4389, + "step": 6972 + }, + { + "epoch": 0.83, + "learning_rate": 7.427937105131943e-07, + "loss": 3.4488, + "step": 6973 + }, + { + "epoch": 0.83, + "learning_rate": 7.417450201237375e-07, + "loss": 3.4309, + "step": 6974 + }, + { + "epoch": 0.84, + "learning_rate": 7.406970112254108e-07, + "loss": 3.4139, + "step": 6975 + }, + { + "epoch": 0.84, + "learning_rate": 7.396496839859373e-07, + "loss": 3.377, + "step": 6976 + }, + { + "epoch": 0.84, + "learning_rate": 7.386030385729304e-07, + "loss": 3.43, + "step": 6977 + }, + { + "epoch": 0.84, + "learning_rate": 7.375570751538979e-07, + "loss": 3.3472, + "step": 6978 + }, + { + "epoch": 0.84, + "learning_rate": 7.365117938962341e-07, + "loss": 3.3269, + "step": 6979 + }, + { + "epoch": 0.84, + "learning_rate": 7.354671949672254e-07, + "loss": 3.4002, + "step": 6980 + }, + { + "epoch": 0.84, + "learning_rate": 7.344232785340505e-07, + "loss": 3.4089, + "step": 6981 + }, + { + "epoch": 0.84, + "learning_rate": 7.333800447637768e-07, + "loss": 3.4602, + "step": 6982 + }, + { + "epoch": 0.84, + "learning_rate": 7.32337493823364e-07, + "loss": 3.4034, + "step": 6983 + }, + { + "epoch": 0.84, + "learning_rate": 7.312956258796611e-07, + "loss": 3.455, + "step": 6984 + }, + { + "epoch": 0.84, + "learning_rate": 7.3025444109941e-07, + "loss": 3.3497, + "step": 6985 + }, + { + "epoch": 0.84, + "learning_rate": 7.292139396492414e-07, + "loss": 3.3329, + "step": 6986 + }, + { + "epoch": 0.84, + "learning_rate": 7.281741216956767e-07, + "loss": 3.3502, + "step": 6987 + }, + { + "epoch": 0.84, + "learning_rate": 7.271349874051293e-07, + "loss": 3.3983, + "step": 6988 + }, + { + "epoch": 0.84, + "learning_rate": 7.260965369439011e-07, + "loss": 3.4668, + "step": 6989 + }, + { + "epoch": 0.84, + "learning_rate": 7.250587704781852e-07, + "loss": 3.4568, + "step": 6990 + }, + { + "epoch": 0.84, + "learning_rate": 7.240216881740697e-07, + "loss": 3.2956, + "step": 6991 + }, + { + "epoch": 0.84, + "learning_rate": 7.229852901975232e-07, + "loss": 3.4413, + "step": 6992 + }, + { + "epoch": 0.84, + "learning_rate": 7.219495767144158e-07, + "loss": 3.43, + "step": 6993 + }, + { + "epoch": 0.84, + "learning_rate": 7.209145478905011e-07, + "loss": 3.4059, + "step": 6994 + }, + { + "epoch": 0.84, + "learning_rate": 7.198802038914248e-07, + "loss": 3.3622, + "step": 6995 + }, + { + "epoch": 0.84, + "learning_rate": 7.188465448827242e-07, + "loss": 3.4595, + "step": 6996 + }, + { + "epoch": 0.84, + "learning_rate": 7.178135710298262e-07, + "loss": 3.3569, + "step": 6997 + }, + { + "epoch": 0.84, + "learning_rate": 7.167812824980452e-07, + "loss": 3.4454, + "step": 6998 + }, + { + "epoch": 0.84, + "learning_rate": 7.157496794525943e-07, + "loss": 3.4238, + "step": 6999 + }, + { + "epoch": 0.84, + "learning_rate": 7.147187620585649e-07, + "loss": 3.3769, + "step": 7000 + }, + { + "epoch": 0.84, + "learning_rate": 7.136885304809487e-07, + "loss": 3.3776, + "step": 7001 + }, + { + "epoch": 0.84, + "learning_rate": 7.126589848846238e-07, + "loss": 3.3523, + "step": 7002 + }, + { + "epoch": 0.84, + "learning_rate": 7.116301254343583e-07, + "loss": 3.5041, + "step": 7003 + }, + { + "epoch": 0.84, + "learning_rate": 7.106019522948105e-07, + "loss": 3.4882, + "step": 7004 + }, + { + "epoch": 0.84, + "learning_rate": 7.095744656305286e-07, + "loss": 3.4401, + "step": 7005 + }, + { + "epoch": 0.84, + "learning_rate": 7.08547665605952e-07, + "loss": 3.2977, + "step": 7006 + }, + { + "epoch": 0.84, + "learning_rate": 7.075215523854123e-07, + "loss": 3.4542, + "step": 7007 + }, + { + "epoch": 0.84, + "learning_rate": 7.064961261331243e-07, + "loss": 3.2667, + "step": 7008 + }, + { + "epoch": 0.84, + "learning_rate": 7.054713870132007e-07, + "loss": 3.3072, + "step": 7009 + }, + { + "epoch": 0.84, + "learning_rate": 7.044473351896392e-07, + "loss": 3.3838, + "step": 7010 + }, + { + "epoch": 0.84, + "learning_rate": 7.034239708263291e-07, + "loss": 3.3953, + "step": 7011 + }, + { + "epoch": 0.84, + "learning_rate": 7.024012940870501e-07, + "loss": 3.3352, + "step": 7012 + }, + { + "epoch": 0.84, + "learning_rate": 7.013793051354711e-07, + "loss": 3.3878, + "step": 7013 + }, + { + "epoch": 0.84, + "learning_rate": 7.003580041351498e-07, + "loss": 3.4351, + "step": 7014 + }, + { + "epoch": 0.84, + "learning_rate": 6.993373912495393e-07, + "loss": 3.4264, + "step": 7015 + }, + { + "epoch": 0.84, + "learning_rate": 6.98317466641974e-07, + "loss": 3.4859, + "step": 7016 + }, + { + "epoch": 0.84, + "learning_rate": 6.972982304756848e-07, + "loss": 3.3646, + "step": 7017 + }, + { + "epoch": 0.84, + "learning_rate": 6.962796829137903e-07, + "loss": 3.4343, + "step": 7018 + }, + { + "epoch": 0.84, + "learning_rate": 6.952618241192988e-07, + "loss": 3.3749, + "step": 7019 + }, + { + "epoch": 0.84, + "learning_rate": 6.942446542551073e-07, + "loss": 3.5541, + "step": 7020 + }, + { + "epoch": 0.84, + "learning_rate": 6.932281734840052e-07, + "loss": 3.383, + "step": 7021 + }, + { + "epoch": 0.84, + "learning_rate": 6.922123819686683e-07, + "loss": 3.4433, + "step": 7022 + }, + { + "epoch": 0.84, + "learning_rate": 6.911972798716654e-07, + "loss": 3.4712, + "step": 7023 + }, + { + "epoch": 0.84, + "learning_rate": 6.901828673554533e-07, + "loss": 3.4784, + "step": 7024 + }, + { + "epoch": 0.84, + "learning_rate": 6.891691445823783e-07, + "loss": 3.4218, + "step": 7025 + }, + { + "epoch": 0.84, + "learning_rate": 6.881561117146768e-07, + "loss": 3.3042, + "step": 7026 + }, + { + "epoch": 0.84, + "learning_rate": 6.871437689144733e-07, + "loss": 3.4429, + "step": 7027 + }, + { + "epoch": 0.84, + "learning_rate": 6.861321163437862e-07, + "loss": 3.4082, + "step": 7028 + }, + { + "epoch": 0.84, + "learning_rate": 6.851211541645158e-07, + "loss": 3.3764, + "step": 7029 + }, + { + "epoch": 0.84, + "learning_rate": 6.841108825384601e-07, + "loss": 3.3404, + "step": 7030 + }, + { + "epoch": 0.84, + "learning_rate": 6.831013016273025e-07, + "loss": 3.4841, + "step": 7031 + }, + { + "epoch": 0.84, + "learning_rate": 6.820924115926153e-07, + "loss": 3.4209, + "step": 7032 + }, + { + "epoch": 0.84, + "learning_rate": 6.810842125958611e-07, + "loss": 3.4586, + "step": 7033 + }, + { + "epoch": 0.84, + "learning_rate": 6.800767047983941e-07, + "loss": 3.4861, + "step": 7034 + }, + { + "epoch": 0.84, + "learning_rate": 6.790698883614527e-07, + "loss": 3.4538, + "step": 7035 + }, + { + "epoch": 0.84, + "learning_rate": 6.780637634461717e-07, + "loss": 3.4091, + "step": 7036 + }, + { + "epoch": 0.84, + "learning_rate": 6.77058330213567e-07, + "loss": 3.4157, + "step": 7037 + }, + { + "epoch": 0.84, + "learning_rate": 6.760535888245512e-07, + "loss": 3.3484, + "step": 7038 + }, + { + "epoch": 0.84, + "learning_rate": 6.750495394399226e-07, + "loss": 3.4024, + "step": 7039 + }, + { + "epoch": 0.84, + "learning_rate": 6.740461822203687e-07, + "loss": 3.3942, + "step": 7040 + }, + { + "epoch": 0.84, + "learning_rate": 6.730435173264654e-07, + "loss": 3.4813, + "step": 7041 + }, + { + "epoch": 0.84, + "learning_rate": 6.720415449186835e-07, + "loss": 3.3802, + "step": 7042 + }, + { + "epoch": 0.84, + "learning_rate": 6.710402651573738e-07, + "loss": 3.4308, + "step": 7043 + }, + { + "epoch": 0.84, + "learning_rate": 6.700396782027852e-07, + "loss": 3.3616, + "step": 7044 + }, + { + "epoch": 0.84, + "learning_rate": 6.69039784215047e-07, + "loss": 3.4473, + "step": 7045 + }, + { + "epoch": 0.84, + "learning_rate": 6.68040583354187e-07, + "loss": 3.5026, + "step": 7046 + }, + { + "epoch": 0.84, + "learning_rate": 6.670420757801144e-07, + "loss": 3.4011, + "step": 7047 + }, + { + "epoch": 0.84, + "learning_rate": 6.660442616526314e-07, + "loss": 3.3601, + "step": 7048 + }, + { + "epoch": 0.84, + "learning_rate": 6.650471411314269e-07, + "loss": 3.3541, + "step": 7049 + }, + { + "epoch": 0.84, + "learning_rate": 6.640507143760832e-07, + "loss": 3.4296, + "step": 7050 + }, + { + "epoch": 0.84, + "learning_rate": 6.63054981546064e-07, + "loss": 3.3821, + "step": 7051 + }, + { + "epoch": 0.84, + "learning_rate": 6.620599428007313e-07, + "loss": 3.4212, + "step": 7052 + }, + { + "epoch": 0.84, + "learning_rate": 6.610655982993264e-07, + "loss": 3.4043, + "step": 7053 + }, + { + "epoch": 0.84, + "learning_rate": 6.600719482009871e-07, + "loss": 3.3838, + "step": 7054 + }, + { + "epoch": 0.84, + "learning_rate": 6.590789926647362e-07, + "loss": 3.4562, + "step": 7055 + }, + { + "epoch": 0.84, + "learning_rate": 6.580867318494865e-07, + "loss": 3.3472, + "step": 7056 + }, + { + "epoch": 0.84, + "learning_rate": 6.570951659140401e-07, + "loss": 3.4384, + "step": 7057 + }, + { + "epoch": 0.84, + "learning_rate": 6.561042950170859e-07, + "loss": 3.426, + "step": 7058 + }, + { + "epoch": 0.85, + "learning_rate": 6.551141193172028e-07, + "loss": 3.3465, + "step": 7059 + }, + { + "epoch": 0.85, + "learning_rate": 6.5412463897286e-07, + "loss": 3.4465, + "step": 7060 + }, + { + "epoch": 0.85, + "learning_rate": 6.531358541424132e-07, + "loss": 3.4032, + "step": 7061 + }, + { + "epoch": 0.85, + "learning_rate": 6.521477649841074e-07, + "loss": 3.316, + "step": 7062 + }, + { + "epoch": 0.85, + "learning_rate": 6.511603716560766e-07, + "loss": 3.4124, + "step": 7063 + }, + { + "epoch": 0.85, + "learning_rate": 6.501736743163433e-07, + "loss": 3.3144, + "step": 7064 + }, + { + "epoch": 0.85, + "learning_rate": 6.49187673122818e-07, + "loss": 3.4467, + "step": 7065 + }, + { + "epoch": 0.85, + "learning_rate": 6.482023682333e-07, + "loss": 3.3329, + "step": 7066 + }, + { + "epoch": 0.85, + "learning_rate": 6.472177598054785e-07, + "loss": 3.3595, + "step": 7067 + }, + { + "epoch": 0.85, + "learning_rate": 6.462338479969305e-07, + "loss": 3.4456, + "step": 7068 + }, + { + "epoch": 0.85, + "learning_rate": 6.452506329651209e-07, + "loss": 3.3998, + "step": 7069 + }, + { + "epoch": 0.85, + "learning_rate": 6.442681148674029e-07, + "loss": 3.4429, + "step": 7070 + }, + { + "epoch": 0.85, + "learning_rate": 6.432862938610185e-07, + "loss": 3.313, + "step": 7071 + }, + { + "epoch": 0.85, + "learning_rate": 6.423051701030986e-07, + "loss": 3.3743, + "step": 7072 + }, + { + "epoch": 0.85, + "learning_rate": 6.413247437506637e-07, + "loss": 3.3538, + "step": 7073 + }, + { + "epoch": 0.85, + "learning_rate": 6.403450149606183e-07, + "loss": 3.4583, + "step": 7074 + }, + { + "epoch": 0.85, + "learning_rate": 6.393659838897614e-07, + "loss": 3.4435, + "step": 7075 + }, + { + "epoch": 0.85, + "learning_rate": 6.383876506947751e-07, + "loss": 3.3932, + "step": 7076 + }, + { + "epoch": 0.85, + "learning_rate": 6.374100155322327e-07, + "loss": 3.4333, + "step": 7077 + }, + { + "epoch": 0.85, + "learning_rate": 6.364330785585931e-07, + "loss": 3.4012, + "step": 7078 + }, + { + "epoch": 0.85, + "learning_rate": 6.354568399302092e-07, + "loss": 3.4375, + "step": 7079 + }, + { + "epoch": 0.85, + "learning_rate": 6.344812998033134e-07, + "loss": 3.4439, + "step": 7080 + }, + { + "epoch": 0.85, + "learning_rate": 6.335064583340361e-07, + "loss": 3.3706, + "step": 7081 + }, + { + "epoch": 0.85, + "learning_rate": 6.325323156783864e-07, + "loss": 3.3499, + "step": 7082 + }, + { + "epoch": 0.85, + "learning_rate": 6.315588719922688e-07, + "loss": 3.323, + "step": 7083 + }, + { + "epoch": 0.85, + "learning_rate": 6.305861274314722e-07, + "loss": 3.4399, + "step": 7084 + }, + { + "epoch": 0.85, + "learning_rate": 6.296140821516756e-07, + "loss": 3.4107, + "step": 7085 + }, + { + "epoch": 0.85, + "learning_rate": 6.286427363084429e-07, + "loss": 3.3445, + "step": 7086 + }, + { + "epoch": 0.85, + "learning_rate": 6.276720900572326e-07, + "loss": 3.4538, + "step": 7087 + }, + { + "epoch": 0.85, + "learning_rate": 6.267021435533816e-07, + "loss": 3.4896, + "step": 7088 + }, + { + "epoch": 0.85, + "learning_rate": 6.257328969521254e-07, + "loss": 3.3671, + "step": 7089 + }, + { + "epoch": 0.85, + "learning_rate": 6.24764350408577e-07, + "loss": 3.4007, + "step": 7090 + }, + { + "epoch": 0.85, + "learning_rate": 6.237965040777461e-07, + "loss": 3.393, + "step": 7091 + }, + { + "epoch": 0.85, + "learning_rate": 6.228293581145261e-07, + "loss": 3.3497, + "step": 7092 + }, + { + "epoch": 0.85, + "learning_rate": 6.218629126736992e-07, + "loss": 3.4799, + "step": 7093 + }, + { + "epoch": 0.85, + "learning_rate": 6.208971679099335e-07, + "loss": 3.4525, + "step": 7094 + }, + { + "epoch": 0.85, + "learning_rate": 6.1993212397779e-07, + "loss": 3.3597, + "step": 7095 + }, + { + "epoch": 0.85, + "learning_rate": 6.189677810317113e-07, + "loss": 3.4547, + "step": 7096 + }, + { + "epoch": 0.85, + "learning_rate": 6.180041392260338e-07, + "loss": 3.3551, + "step": 7097 + }, + { + "epoch": 0.85, + "learning_rate": 6.170411987149749e-07, + "loss": 3.4106, + "step": 7098 + }, + { + "epoch": 0.85, + "learning_rate": 6.160789596526468e-07, + "loss": 3.5085, + "step": 7099 + }, + { + "epoch": 0.85, + "learning_rate": 6.151174221930451e-07, + "loss": 3.3424, + "step": 7100 + }, + { + "epoch": 0.85, + "learning_rate": 6.141565864900545e-07, + "loss": 3.35, + "step": 7101 + }, + { + "epoch": 0.85, + "learning_rate": 6.13196452697446e-07, + "loss": 3.4183, + "step": 7102 + }, + { + "epoch": 0.85, + "learning_rate": 6.122370209688827e-07, + "loss": 3.45, + "step": 7103 + }, + { + "epoch": 0.85, + "learning_rate": 6.112782914579074e-07, + "loss": 3.3991, + "step": 7104 + }, + { + "epoch": 0.85, + "learning_rate": 6.103202643179589e-07, + "loss": 3.4327, + "step": 7105 + }, + { + "epoch": 0.85, + "learning_rate": 6.093629397023582e-07, + "loss": 3.4077, + "step": 7106 + }, + { + "epoch": 0.85, + "learning_rate": 6.084063177643156e-07, + "loss": 3.438, + "step": 7107 + }, + { + "epoch": 0.85, + "learning_rate": 6.074503986569297e-07, + "loss": 3.4457, + "step": 7108 + }, + { + "epoch": 0.85, + "learning_rate": 6.064951825331849e-07, + "loss": 3.4019, + "step": 7109 + }, + { + "epoch": 0.85, + "learning_rate": 6.055406695459532e-07, + "loss": 3.434, + "step": 7110 + }, + { + "epoch": 0.85, + "learning_rate": 6.045868598479971e-07, + "loss": 3.3633, + "step": 7111 + }, + { + "epoch": 0.85, + "learning_rate": 6.036337535919634e-07, + "loss": 3.4415, + "step": 7112 + }, + { + "epoch": 0.85, + "learning_rate": 6.026813509303869e-07, + "loss": 3.4236, + "step": 7113 + }, + { + "epoch": 0.85, + "learning_rate": 6.017296520156901e-07, + "loss": 3.3578, + "step": 7114 + }, + { + "epoch": 0.85, + "learning_rate": 6.007786570001828e-07, + "loss": 3.335, + "step": 7115 + }, + { + "epoch": 0.85, + "learning_rate": 5.998283660360627e-07, + "loss": 3.3704, + "step": 7116 + }, + { + "epoch": 0.85, + "learning_rate": 5.988787792754131e-07, + "loss": 3.3622, + "step": 7117 + }, + { + "epoch": 0.85, + "learning_rate": 5.979298968702079e-07, + "loss": 3.4556, + "step": 7118 + }, + { + "epoch": 0.85, + "learning_rate": 5.969817189723048e-07, + "loss": 3.4178, + "step": 7119 + }, + { + "epoch": 0.85, + "learning_rate": 5.960342457334511e-07, + "loss": 3.3783, + "step": 7120 + }, + { + "epoch": 0.85, + "learning_rate": 5.9508747730528e-07, + "loss": 3.3687, + "step": 7121 + }, + { + "epoch": 0.85, + "learning_rate": 5.94141413839312e-07, + "loss": 3.4783, + "step": 7122 + }, + { + "epoch": 0.85, + "learning_rate": 5.931960554869542e-07, + "loss": 3.4226, + "step": 7123 + }, + { + "epoch": 0.85, + "learning_rate": 5.922514023995051e-07, + "loss": 3.4595, + "step": 7124 + }, + { + "epoch": 0.85, + "learning_rate": 5.913074547281428e-07, + "loss": 3.3603, + "step": 7125 + }, + { + "epoch": 0.85, + "learning_rate": 5.903642126239412e-07, + "loss": 3.4407, + "step": 7126 + }, + { + "epoch": 0.85, + "learning_rate": 5.894216762378513e-07, + "loss": 3.4705, + "step": 7127 + }, + { + "epoch": 0.85, + "learning_rate": 5.884798457207214e-07, + "loss": 3.4513, + "step": 7128 + }, + { + "epoch": 0.85, + "learning_rate": 5.875387212232797e-07, + "loss": 3.372, + "step": 7129 + }, + { + "epoch": 0.85, + "learning_rate": 5.865983028961441e-07, + "loss": 3.4989, + "step": 7130 + }, + { + "epoch": 0.85, + "learning_rate": 5.856585908898188e-07, + "loss": 3.3538, + "step": 7131 + }, + { + "epoch": 0.85, + "learning_rate": 5.847195853546978e-07, + "loss": 3.2616, + "step": 7132 + }, + { + "epoch": 0.85, + "learning_rate": 5.837812864410553e-07, + "loss": 3.4364, + "step": 7133 + }, + { + "epoch": 0.85, + "learning_rate": 5.828436942990611e-07, + "loss": 3.4259, + "step": 7134 + }, + { + "epoch": 0.85, + "learning_rate": 5.819068090787633e-07, + "loss": 3.3849, + "step": 7135 + }, + { + "epoch": 0.85, + "learning_rate": 5.809706309301039e-07, + "loss": 3.4467, + "step": 7136 + }, + { + "epoch": 0.85, + "learning_rate": 5.800351600029081e-07, + "loss": 3.3155, + "step": 7137 + }, + { + "epoch": 0.85, + "learning_rate": 5.791003964468883e-07, + "loss": 3.3374, + "step": 7138 + }, + { + "epoch": 0.85, + "learning_rate": 5.781663404116433e-07, + "loss": 3.3994, + "step": 7139 + }, + { + "epoch": 0.85, + "learning_rate": 5.772329920466629e-07, + "loss": 3.3415, + "step": 7140 + }, + { + "epoch": 0.85, + "learning_rate": 5.76300351501315e-07, + "loss": 3.4116, + "step": 7141 + }, + { + "epoch": 0.86, + "learning_rate": 5.753684189248648e-07, + "loss": 3.4196, + "step": 7142 + }, + { + "epoch": 0.86, + "learning_rate": 5.744371944664534e-07, + "loss": 3.3685, + "step": 7143 + }, + { + "epoch": 0.86, + "learning_rate": 5.735066782751181e-07, + "loss": 3.398, + "step": 7144 + }, + { + "epoch": 0.86, + "learning_rate": 5.72576870499777e-07, + "loss": 3.3106, + "step": 7145 + }, + { + "epoch": 0.86, + "learning_rate": 5.716477712892371e-07, + "loss": 3.3389, + "step": 7146 + }, + { + "epoch": 0.86, + "learning_rate": 5.707193807921896e-07, + "loss": 3.3424, + "step": 7147 + }, + { + "epoch": 0.86, + "learning_rate": 5.697916991572172e-07, + "loss": 3.4665, + "step": 7148 + }, + { + "epoch": 0.86, + "learning_rate": 5.688647265327841e-07, + "loss": 3.448, + "step": 7149 + }, + { + "epoch": 0.86, + "learning_rate": 5.679384630672436e-07, + "loss": 3.4051, + "step": 7150 + }, + { + "epoch": 0.86, + "learning_rate": 5.670129089088344e-07, + "loss": 3.4276, + "step": 7151 + }, + { + "epoch": 0.86, + "learning_rate": 5.660880642056832e-07, + "loss": 3.3811, + "step": 7152 + }, + { + "epoch": 0.86, + "learning_rate": 5.651639291058009e-07, + "loss": 3.4314, + "step": 7153 + }, + { + "epoch": 0.86, + "learning_rate": 5.642405037570859e-07, + "loss": 3.4161, + "step": 7154 + }, + { + "epoch": 0.86, + "learning_rate": 5.633177883073243e-07, + "loss": 3.4479, + "step": 7155 + }, + { + "epoch": 0.86, + "learning_rate": 5.623957829041876e-07, + "loss": 3.4094, + "step": 7156 + }, + { + "epoch": 0.86, + "learning_rate": 5.614744876952332e-07, + "loss": 3.4423, + "step": 7157 + }, + { + "epoch": 0.86, + "learning_rate": 5.60553902827905e-07, + "loss": 3.4068, + "step": 7158 + }, + { + "epoch": 0.86, + "learning_rate": 5.596340284495333e-07, + "loss": 3.498, + "step": 7159 + }, + { + "epoch": 0.86, + "learning_rate": 5.587148647073337e-07, + "loss": 3.3939, + "step": 7160 + }, + { + "epoch": 0.86, + "learning_rate": 5.577964117484125e-07, + "loss": 3.4182, + "step": 7161 + }, + { + "epoch": 0.86, + "learning_rate": 5.568786697197547e-07, + "loss": 3.4148, + "step": 7162 + }, + { + "epoch": 0.86, + "learning_rate": 5.559616387682393e-07, + "loss": 3.4096, + "step": 7163 + }, + { + "epoch": 0.86, + "learning_rate": 5.550453190406263e-07, + "loss": 3.3685, + "step": 7164 + }, + { + "epoch": 0.86, + "learning_rate": 5.541297106835635e-07, + "loss": 3.447, + "step": 7165 + }, + { + "epoch": 0.86, + "learning_rate": 5.532148138435855e-07, + "loss": 3.405, + "step": 7166 + }, + { + "epoch": 0.86, + "learning_rate": 5.523006286671118e-07, + "loss": 3.4065, + "step": 7167 + }, + { + "epoch": 0.86, + "learning_rate": 5.513871553004474e-07, + "loss": 3.4031, + "step": 7168 + }, + { + "epoch": 0.86, + "learning_rate": 5.504743938897883e-07, + "loss": 3.3752, + "step": 7169 + }, + { + "epoch": 0.86, + "learning_rate": 5.495623445812082e-07, + "loss": 3.4203, + "step": 7170 + }, + { + "epoch": 0.86, + "learning_rate": 5.486510075206747e-07, + "loss": 3.4519, + "step": 7171 + }, + { + "epoch": 0.86, + "learning_rate": 5.477403828540373e-07, + "loss": 3.3827, + "step": 7172 + }, + { + "epoch": 0.86, + "learning_rate": 5.468304707270322e-07, + "loss": 3.4787, + "step": 7173 + }, + { + "epoch": 0.86, + "learning_rate": 5.459212712852819e-07, + "loss": 3.3936, + "step": 7174 + }, + { + "epoch": 0.86, + "learning_rate": 5.450127846742947e-07, + "loss": 3.37, + "step": 7175 + }, + { + "epoch": 0.86, + "learning_rate": 5.441050110394636e-07, + "loss": 3.5052, + "step": 7176 + }, + { + "epoch": 0.86, + "learning_rate": 5.431979505260715e-07, + "loss": 3.4147, + "step": 7177 + }, + { + "epoch": 0.86, + "learning_rate": 5.422916032792813e-07, + "loss": 3.3523, + "step": 7178 + }, + { + "epoch": 0.86, + "learning_rate": 5.413859694441464e-07, + "loss": 3.5039, + "step": 7179 + }, + { + "epoch": 0.86, + "learning_rate": 5.404810491656048e-07, + "loss": 3.473, + "step": 7180 + }, + { + "epoch": 0.86, + "learning_rate": 5.395768425884796e-07, + "loss": 3.4209, + "step": 7181 + }, + { + "epoch": 0.86, + "learning_rate": 5.386733498574792e-07, + "loss": 3.426, + "step": 7182 + }, + { + "epoch": 0.86, + "learning_rate": 5.377705711171993e-07, + "loss": 3.4691, + "step": 7183 + }, + { + "epoch": 0.86, + "learning_rate": 5.368685065121193e-07, + "loss": 3.4279, + "step": 7184 + }, + { + "epoch": 0.86, + "learning_rate": 5.359671561866092e-07, + "loss": 3.4547, + "step": 7185 + }, + { + "epoch": 0.86, + "learning_rate": 5.350665202849164e-07, + "loss": 3.5377, + "step": 7186 + }, + { + "epoch": 0.86, + "learning_rate": 5.341665989511819e-07, + "loss": 3.4979, + "step": 7187 + }, + { + "epoch": 0.86, + "learning_rate": 5.332673923294285e-07, + "loss": 3.4417, + "step": 7188 + }, + { + "epoch": 0.86, + "learning_rate": 5.323689005635646e-07, + "loss": 3.3605, + "step": 7189 + }, + { + "epoch": 0.86, + "learning_rate": 5.314711237973851e-07, + "loss": 3.3162, + "step": 7190 + }, + { + "epoch": 0.86, + "learning_rate": 5.305740621745703e-07, + "loss": 3.3709, + "step": 7191 + }, + { + "epoch": 0.86, + "learning_rate": 5.29677715838685e-07, + "loss": 3.4239, + "step": 7192 + }, + { + "epoch": 0.86, + "learning_rate": 5.287820849331821e-07, + "loss": 3.5406, + "step": 7193 + }, + { + "epoch": 0.86, + "learning_rate": 5.278871696013976e-07, + "loss": 3.4745, + "step": 7194 + }, + { + "epoch": 0.86, + "learning_rate": 5.269929699865539e-07, + "loss": 3.3877, + "step": 7195 + }, + { + "epoch": 0.86, + "learning_rate": 5.260994862317587e-07, + "loss": 3.4932, + "step": 7196 + }, + { + "epoch": 0.86, + "learning_rate": 5.252067184800047e-07, + "loss": 3.2452, + "step": 7197 + }, + { + "epoch": 0.86, + "learning_rate": 5.243146668741706e-07, + "loss": 3.3909, + "step": 7198 + }, + { + "epoch": 0.86, + "learning_rate": 5.234233315570197e-07, + "loss": 3.359, + "step": 7199 + }, + { + "epoch": 0.86, + "learning_rate": 5.225327126712033e-07, + "loss": 3.3949, + "step": 7200 + }, + { + "epoch": 0.86, + "learning_rate": 5.216428103592547e-07, + "loss": 3.3921, + "step": 7201 + }, + { + "epoch": 0.86, + "learning_rate": 5.207536247635941e-07, + "loss": 3.3756, + "step": 7202 + }, + { + "epoch": 0.86, + "learning_rate": 5.198651560265267e-07, + "loss": 3.4122, + "step": 7203 + }, + { + "epoch": 0.86, + "learning_rate": 5.189774042902423e-07, + "loss": 3.3572, + "step": 7204 + }, + { + "epoch": 0.86, + "learning_rate": 5.18090369696817e-07, + "loss": 3.4201, + "step": 7205 + }, + { + "epoch": 0.86, + "learning_rate": 5.172040523882143e-07, + "loss": 3.3434, + "step": 7206 + }, + { + "epoch": 0.86, + "learning_rate": 5.163184525062764e-07, + "loss": 3.3437, + "step": 7207 + }, + { + "epoch": 0.86, + "learning_rate": 5.154335701927371e-07, + "loss": 3.3957, + "step": 7208 + }, + { + "epoch": 0.86, + "learning_rate": 5.14549405589213e-07, + "loss": 3.4933, + "step": 7209 + }, + { + "epoch": 0.86, + "learning_rate": 5.136659588372051e-07, + "loss": 3.3679, + "step": 7210 + }, + { + "epoch": 0.86, + "learning_rate": 5.127832300781005e-07, + "loss": 3.3414, + "step": 7211 + }, + { + "epoch": 0.86, + "learning_rate": 5.119012194531709e-07, + "loss": 3.4075, + "step": 7212 + }, + { + "epoch": 0.86, + "learning_rate": 5.110199271035726e-07, + "loss": 3.4248, + "step": 7213 + }, + { + "epoch": 0.86, + "learning_rate": 5.101393531703502e-07, + "loss": 3.2786, + "step": 7214 + }, + { + "epoch": 0.86, + "learning_rate": 5.092594977944276e-07, + "loss": 3.4214, + "step": 7215 + }, + { + "epoch": 0.86, + "learning_rate": 5.083803611166189e-07, + "loss": 3.4161, + "step": 7216 + }, + { + "epoch": 0.86, + "learning_rate": 5.075019432776201e-07, + "loss": 3.3013, + "step": 7217 + }, + { + "epoch": 0.86, + "learning_rate": 5.066242444180142e-07, + "loss": 3.3633, + "step": 7218 + }, + { + "epoch": 0.86, + "learning_rate": 5.057472646782668e-07, + "loss": 3.4542, + "step": 7219 + }, + { + "epoch": 0.86, + "learning_rate": 5.04871004198731e-07, + "loss": 3.4019, + "step": 7220 + }, + { + "epoch": 0.86, + "learning_rate": 5.039954631196419e-07, + "loss": 3.4898, + "step": 7221 + }, + { + "epoch": 0.86, + "learning_rate": 5.03120641581124e-07, + "loss": 3.4687, + "step": 7222 + }, + { + "epoch": 0.86, + "learning_rate": 5.022465397231802e-07, + "loss": 3.4953, + "step": 7223 + }, + { + "epoch": 0.86, + "learning_rate": 5.013731576857039e-07, + "loss": 3.3912, + "step": 7224 + }, + { + "epoch": 0.86, + "learning_rate": 5.005004956084713e-07, + "loss": 3.3697, + "step": 7225 + }, + { + "epoch": 0.87, + "learning_rate": 4.996285536311423e-07, + "loss": 3.4746, + "step": 7226 + }, + { + "epoch": 0.87, + "learning_rate": 4.987573318932632e-07, + "loss": 3.33, + "step": 7227 + }, + { + "epoch": 0.87, + "learning_rate": 4.978868305342638e-07, + "loss": 3.4065, + "step": 7228 + }, + { + "epoch": 0.87, + "learning_rate": 4.970170496934584e-07, + "loss": 3.4208, + "step": 7229 + }, + { + "epoch": 0.87, + "learning_rate": 4.961479895100501e-07, + "loss": 3.3721, + "step": 7230 + }, + { + "epoch": 0.87, + "learning_rate": 4.952796501231183e-07, + "loss": 3.4, + "step": 7231 + }, + { + "epoch": 0.87, + "learning_rate": 4.944120316716355e-07, + "loss": 3.3633, + "step": 7232 + }, + { + "epoch": 0.87, + "learning_rate": 4.935451342944548e-07, + "loss": 3.3969, + "step": 7233 + }, + { + "epoch": 0.87, + "learning_rate": 4.926789581303138e-07, + "loss": 3.3199, + "step": 7234 + }, + { + "epoch": 0.87, + "learning_rate": 4.918135033178362e-07, + "loss": 3.3981, + "step": 7235 + }, + { + "epoch": 0.87, + "learning_rate": 4.909487699955279e-07, + "loss": 3.4534, + "step": 7236 + }, + { + "epoch": 0.87, + "learning_rate": 4.900847583017815e-07, + "loss": 3.5247, + "step": 7237 + }, + { + "epoch": 0.87, + "learning_rate": 4.892214683748747e-07, + "loss": 3.3244, + "step": 7238 + }, + { + "epoch": 0.87, + "learning_rate": 4.88358900352967e-07, + "loss": 3.4347, + "step": 7239 + }, + { + "epoch": 0.87, + "learning_rate": 4.874970543741042e-07, + "loss": 3.3345, + "step": 7240 + }, + { + "epoch": 0.87, + "learning_rate": 4.866359305762164e-07, + "loss": 3.3862, + "step": 7241 + }, + { + "epoch": 0.87, + "learning_rate": 4.857755290971161e-07, + "loss": 3.4429, + "step": 7242 + }, + { + "epoch": 0.87, + "learning_rate": 4.84915850074506e-07, + "loss": 3.3098, + "step": 7243 + }, + { + "epoch": 0.87, + "learning_rate": 4.840568936459644e-07, + "loss": 3.3532, + "step": 7244 + }, + { + "epoch": 0.87, + "learning_rate": 4.831986599489619e-07, + "loss": 3.3844, + "step": 7245 + }, + { + "epoch": 0.87, + "learning_rate": 4.823411491208485e-07, + "loss": 3.4, + "step": 7246 + }, + { + "epoch": 0.87, + "learning_rate": 4.814843612988618e-07, + "loss": 3.4145, + "step": 7247 + }, + { + "epoch": 0.87, + "learning_rate": 4.806282966201198e-07, + "loss": 3.3906, + "step": 7248 + }, + { + "epoch": 0.87, + "learning_rate": 4.797729552216302e-07, + "loss": 3.3717, + "step": 7249 + }, + { + "epoch": 0.87, + "learning_rate": 4.789183372402789e-07, + "loss": 3.4123, + "step": 7250 + }, + { + "epoch": 0.87, + "learning_rate": 4.780644428128417e-07, + "loss": 3.403, + "step": 7251 + }, + { + "epoch": 0.87, + "learning_rate": 4.772112720759725e-07, + "loss": 3.4155, + "step": 7252 + }, + { + "epoch": 0.87, + "learning_rate": 4.7635882516621543e-07, + "loss": 3.3663, + "step": 7253 + }, + { + "epoch": 0.87, + "learning_rate": 4.755071022199953e-07, + "loss": 3.5471, + "step": 7254 + }, + { + "epoch": 0.87, + "learning_rate": 4.7465610337362246e-07, + "loss": 3.4997, + "step": 7255 + }, + { + "epoch": 0.87, + "learning_rate": 4.7380582876328964e-07, + "loss": 3.3603, + "step": 7256 + }, + { + "epoch": 0.87, + "learning_rate": 4.729562785250752e-07, + "loss": 3.3289, + "step": 7257 + }, + { + "epoch": 0.87, + "learning_rate": 4.721074527949404e-07, + "loss": 3.4237, + "step": 7258 + }, + { + "epoch": 0.87, + "learning_rate": 4.7125935170873436e-07, + "loss": 3.3348, + "step": 7259 + }, + { + "epoch": 0.87, + "learning_rate": 4.704119754021824e-07, + "loss": 3.3507, + "step": 7260 + }, + { + "epoch": 0.87, + "learning_rate": 4.695653240109027e-07, + "loss": 3.4002, + "step": 7261 + }, + { + "epoch": 0.87, + "learning_rate": 4.6871939767039186e-07, + "loss": 3.51, + "step": 7262 + }, + { + "epoch": 0.87, + "learning_rate": 4.6787419651603216e-07, + "loss": 3.4538, + "step": 7263 + }, + { + "epoch": 0.87, + "learning_rate": 4.670297206830887e-07, + "loss": 3.3809, + "step": 7264 + }, + { + "epoch": 0.87, + "learning_rate": 4.661859703067123e-07, + "loss": 3.3895, + "step": 7265 + }, + { + "epoch": 0.87, + "learning_rate": 4.6534294552193604e-07, + "loss": 3.3528, + "step": 7266 + }, + { + "epoch": 0.87, + "learning_rate": 4.645006464636792e-07, + "loss": 3.3233, + "step": 7267 + }, + { + "epoch": 0.87, + "learning_rate": 4.636590732667412e-07, + "loss": 3.3851, + "step": 7268 + }, + { + "epoch": 0.87, + "learning_rate": 4.628182260658087e-07, + "loss": 3.374, + "step": 7269 + }, + { + "epoch": 0.87, + "learning_rate": 4.619781049954508e-07, + "loss": 3.3188, + "step": 7270 + }, + { + "epoch": 0.87, + "learning_rate": 4.6113871019012045e-07, + "loss": 3.3897, + "step": 7271 + }, + { + "epoch": 0.87, + "learning_rate": 4.603000417841541e-07, + "loss": 3.3583, + "step": 7272 + }, + { + "epoch": 0.87, + "learning_rate": 4.5946209991177216e-07, + "loss": 3.4421, + "step": 7273 + }, + { + "epoch": 0.87, + "learning_rate": 4.5862488470707843e-07, + "loss": 3.4195, + "step": 7274 + }, + { + "epoch": 0.87, + "learning_rate": 4.5778839630406246e-07, + "loss": 3.4343, + "step": 7275 + }, + { + "epoch": 0.87, + "learning_rate": 4.569526348365949e-07, + "loss": 3.402, + "step": 7276 + }, + { + "epoch": 0.87, + "learning_rate": 4.56117600438431e-07, + "loss": 3.3773, + "step": 7277 + }, + { + "epoch": 0.87, + "learning_rate": 4.5528329324320944e-07, + "loss": 3.3549, + "step": 7278 + }, + { + "epoch": 0.87, + "learning_rate": 4.5444971338445276e-07, + "loss": 3.458, + "step": 7279 + }, + { + "epoch": 0.87, + "learning_rate": 4.536168609955677e-07, + "loss": 3.3565, + "step": 7280 + }, + { + "epoch": 0.87, + "learning_rate": 4.5278473620984255e-07, + "loss": 3.3585, + "step": 7281 + }, + { + "epoch": 0.87, + "learning_rate": 4.5195333916045245e-07, + "loss": 3.4539, + "step": 7282 + }, + { + "epoch": 0.87, + "learning_rate": 4.5112266998045273e-07, + "loss": 3.388, + "step": 7283 + }, + { + "epoch": 0.87, + "learning_rate": 4.5029272880278473e-07, + "loss": 3.326, + "step": 7284 + }, + { + "epoch": 0.87, + "learning_rate": 4.494635157602717e-07, + "loss": 3.4708, + "step": 7285 + }, + { + "epoch": 0.87, + "learning_rate": 4.4863503098562135e-07, + "loss": 3.4164, + "step": 7286 + }, + { + "epoch": 0.87, + "learning_rate": 4.478072746114226e-07, + "loss": 3.4423, + "step": 7287 + }, + { + "epoch": 0.87, + "learning_rate": 4.4698024677015284e-07, + "loss": 3.3725, + "step": 7288 + }, + { + "epoch": 0.87, + "learning_rate": 4.4615394759416564e-07, + "loss": 3.4905, + "step": 7289 + }, + { + "epoch": 0.87, + "learning_rate": 4.453283772157052e-07, + "loss": 3.4967, + "step": 7290 + }, + { + "epoch": 0.87, + "learning_rate": 4.445035357668942e-07, + "loss": 3.4903, + "step": 7291 + }, + { + "epoch": 0.87, + "learning_rate": 4.436794233797415e-07, + "loss": 3.3911, + "step": 7292 + }, + { + "epoch": 0.87, + "learning_rate": 4.4285604018613535e-07, + "loss": 3.4162, + "step": 7293 + }, + { + "epoch": 0.87, + "learning_rate": 4.420333863178544e-07, + "loss": 3.4821, + "step": 7294 + }, + { + "epoch": 0.87, + "learning_rate": 4.412114619065516e-07, + "loss": 3.398, + "step": 7295 + }, + { + "epoch": 0.87, + "learning_rate": 4.403902670837712e-07, + "loss": 3.4784, + "step": 7296 + }, + { + "epoch": 0.87, + "learning_rate": 4.395698019809347e-07, + "loss": 3.354, + "step": 7297 + }, + { + "epoch": 0.87, + "learning_rate": 4.3875006672935096e-07, + "loss": 3.425, + "step": 7298 + }, + { + "epoch": 0.87, + "learning_rate": 4.3793106146020945e-07, + "loss": 3.3671, + "step": 7299 + }, + { + "epoch": 0.87, + "learning_rate": 4.3711278630458486e-07, + "loss": 3.3787, + "step": 7300 + }, + { + "epoch": 0.87, + "learning_rate": 4.3629524139343236e-07, + "loss": 3.43, + "step": 7301 + }, + { + "epoch": 0.87, + "learning_rate": 4.35478426857594e-07, + "loss": 3.4038, + "step": 7302 + }, + { + "epoch": 0.87, + "learning_rate": 4.346623428277902e-07, + "loss": 3.4525, + "step": 7303 + }, + { + "epoch": 0.87, + "learning_rate": 4.338469894346303e-07, + "loss": 3.4527, + "step": 7304 + }, + { + "epoch": 0.87, + "learning_rate": 4.3303236680859953e-07, + "loss": 3.3466, + "step": 7305 + }, + { + "epoch": 0.87, + "learning_rate": 4.3221847508007285e-07, + "loss": 3.3815, + "step": 7306 + }, + { + "epoch": 0.87, + "learning_rate": 4.3140531437930513e-07, + "loss": 3.4234, + "step": 7307 + }, + { + "epoch": 0.87, + "learning_rate": 4.305928848364338e-07, + "loss": 3.4585, + "step": 7308 + }, + { + "epoch": 0.88, + "learning_rate": 4.2978118658147995e-07, + "loss": 3.3466, + "step": 7309 + }, + { + "epoch": 0.88, + "learning_rate": 4.2897021974435017e-07, + "loss": 3.3814, + "step": 7310 + }, + { + "epoch": 0.88, + "learning_rate": 4.281599844548279e-07, + "loss": 3.4284, + "step": 7311 + }, + { + "epoch": 0.88, + "learning_rate": 4.2735048084258657e-07, + "loss": 3.4599, + "step": 7312 + }, + { + "epoch": 0.88, + "learning_rate": 4.2654170903717597e-07, + "loss": 3.2868, + "step": 7313 + }, + { + "epoch": 0.88, + "learning_rate": 4.257336691680347e-07, + "loss": 3.3063, + "step": 7314 + }, + { + "epoch": 0.88, + "learning_rate": 4.249263613644805e-07, + "loss": 3.4203, + "step": 7315 + }, + { + "epoch": 0.88, + "learning_rate": 4.2411978575571446e-07, + "loss": 3.4031, + "step": 7316 + }, + { + "epoch": 0.88, + "learning_rate": 4.2331394247082104e-07, + "loss": 3.402, + "step": 7317 + }, + { + "epoch": 0.88, + "learning_rate": 4.2250883163876823e-07, + "loss": 3.4146, + "step": 7318 + }, + { + "epoch": 0.88, + "learning_rate": 4.21704453388404e-07, + "loss": 3.3594, + "step": 7319 + }, + { + "epoch": 0.88, + "learning_rate": 4.209008078484633e-07, + "loss": 3.3999, + "step": 7320 + }, + { + "epoch": 0.88, + "learning_rate": 4.200978951475609e-07, + "loss": 3.4142, + "step": 7321 + }, + { + "epoch": 0.88, + "learning_rate": 4.192957154141947e-07, + "loss": 3.2989, + "step": 7322 + }, + { + "epoch": 0.88, + "learning_rate": 4.1849426877674516e-07, + "loss": 3.4292, + "step": 7323 + }, + { + "epoch": 0.88, + "learning_rate": 4.1769355536347646e-07, + "loss": 3.3666, + "step": 7324 + }, + { + "epoch": 0.88, + "learning_rate": 4.1689357530253494e-07, + "loss": 3.5317, + "step": 7325 + }, + { + "epoch": 0.88, + "learning_rate": 4.1609432872194767e-07, + "loss": 3.3984, + "step": 7326 + }, + { + "epoch": 0.88, + "learning_rate": 4.1529581574962784e-07, + "loss": 3.463, + "step": 7327 + }, + { + "epoch": 0.88, + "learning_rate": 4.1449803651337e-07, + "loss": 3.4807, + "step": 7328 + }, + { + "epoch": 0.88, + "learning_rate": 4.1370099114084903e-07, + "loss": 3.3488, + "step": 7329 + }, + { + "epoch": 0.88, + "learning_rate": 4.129046797596248e-07, + "loss": 3.3466, + "step": 7330 + }, + { + "epoch": 0.88, + "learning_rate": 4.1210910249713907e-07, + "loss": 3.4372, + "step": 7331 + }, + { + "epoch": 0.88, + "learning_rate": 4.113142594807151e-07, + "loss": 3.371, + "step": 7332 + }, + { + "epoch": 0.88, + "learning_rate": 4.1052015083756223e-07, + "loss": 3.4268, + "step": 7333 + }, + { + "epoch": 0.88, + "learning_rate": 4.09726776694766e-07, + "loss": 3.3923, + "step": 7334 + }, + { + "epoch": 0.88, + "learning_rate": 4.0893413717930096e-07, + "loss": 3.4625, + "step": 7335 + }, + { + "epoch": 0.88, + "learning_rate": 4.0814223241801954e-07, + "loss": 3.3517, + "step": 7336 + }, + { + "epoch": 0.88, + "learning_rate": 4.0735106253765866e-07, + "loss": 3.4112, + "step": 7337 + }, + { + "epoch": 0.88, + "learning_rate": 4.0656062766483596e-07, + "loss": 3.3699, + "step": 7338 + }, + { + "epoch": 0.88, + "learning_rate": 4.0577092792605634e-07, + "loss": 3.4306, + "step": 7339 + }, + { + "epoch": 0.88, + "learning_rate": 4.0498196344769814e-07, + "loss": 3.4579, + "step": 7340 + }, + { + "epoch": 0.88, + "learning_rate": 4.0419373435603205e-07, + "loss": 3.3797, + "step": 7341 + }, + { + "epoch": 0.88, + "learning_rate": 4.034062407772021e-07, + "loss": 3.5164, + "step": 7342 + }, + { + "epoch": 0.88, + "learning_rate": 4.0261948283724206e-07, + "loss": 3.3723, + "step": 7343 + }, + { + "epoch": 0.88, + "learning_rate": 4.0183346066206276e-07, + "loss": 3.338, + "step": 7344 + }, + { + "epoch": 0.88, + "learning_rate": 4.010481743774602e-07, + "loss": 3.4902, + "step": 7345 + }, + { + "epoch": 0.88, + "learning_rate": 4.002636241091101e-07, + "loss": 3.3371, + "step": 7346 + }, + { + "epoch": 0.88, + "learning_rate": 3.9947980998257476e-07, + "loss": 3.3788, + "step": 7347 + }, + { + "epoch": 0.88, + "learning_rate": 3.9869673212329263e-07, + "loss": 3.4086, + "step": 7348 + }, + { + "epoch": 0.88, + "learning_rate": 3.979143906565902e-07, + "loss": 3.43, + "step": 7349 + }, + { + "epoch": 0.88, + "learning_rate": 3.971327857076701e-07, + "loss": 3.3519, + "step": 7350 + }, + { + "epoch": 0.88, + "learning_rate": 3.9635191740162336e-07, + "loss": 3.3802, + "step": 7351 + }, + { + "epoch": 0.88, + "learning_rate": 3.955717858634195e-07, + "loss": 3.3895, + "step": 7352 + }, + { + "epoch": 0.88, + "learning_rate": 3.9479239121791023e-07, + "loss": 3.463, + "step": 7353 + }, + { + "epoch": 0.88, + "learning_rate": 3.940137335898292e-07, + "loss": 3.5004, + "step": 7354 + }, + { + "epoch": 0.88, + "learning_rate": 3.93235813103795e-07, + "loss": 3.3977, + "step": 7355 + }, + { + "epoch": 0.88, + "learning_rate": 3.9245862988430304e-07, + "loss": 3.5199, + "step": 7356 + }, + { + "epoch": 0.88, + "learning_rate": 3.916821840557372e-07, + "loss": 3.3544, + "step": 7357 + }, + { + "epoch": 0.88, + "learning_rate": 3.909064757423564e-07, + "loss": 3.4814, + "step": 7358 + }, + { + "epoch": 0.88, + "learning_rate": 3.901315050683069e-07, + "loss": 3.4299, + "step": 7359 + }, + { + "epoch": 0.88, + "learning_rate": 3.89357272157615e-07, + "loss": 3.3779, + "step": 7360 + }, + { + "epoch": 0.88, + "learning_rate": 3.885837771341888e-07, + "loss": 3.3494, + "step": 7361 + }, + { + "epoch": 0.88, + "learning_rate": 3.878110201218177e-07, + "loss": 3.3298, + "step": 7362 + }, + { + "epoch": 0.88, + "learning_rate": 3.870390012441755e-07, + "loss": 3.3733, + "step": 7363 + }, + { + "epoch": 0.88, + "learning_rate": 3.8626772062481334e-07, + "loss": 3.4594, + "step": 7364 + }, + { + "epoch": 0.88, + "learning_rate": 3.8549717838716916e-07, + "loss": 3.4026, + "step": 7365 + }, + { + "epoch": 0.88, + "learning_rate": 3.8472737465456047e-07, + "loss": 3.3253, + "step": 7366 + }, + { + "epoch": 0.88, + "learning_rate": 3.8395830955018587e-07, + "loss": 3.4603, + "step": 7367 + }, + { + "epoch": 0.88, + "learning_rate": 3.831899831971264e-07, + "loss": 3.4144, + "step": 7368 + }, + { + "epoch": 0.88, + "learning_rate": 3.8242239571834483e-07, + "loss": 3.4077, + "step": 7369 + }, + { + "epoch": 0.88, + "learning_rate": 3.8165554723668783e-07, + "loss": 3.3775, + "step": 7370 + }, + { + "epoch": 0.88, + "learning_rate": 3.808894378748795e-07, + "loss": 3.3743, + "step": 7371 + }, + { + "epoch": 0.88, + "learning_rate": 3.801240677555296e-07, + "loss": 3.4262, + "step": 7372 + }, + { + "epoch": 0.88, + "learning_rate": 3.793594370011272e-07, + "loss": 3.3532, + "step": 7373 + }, + { + "epoch": 0.88, + "learning_rate": 3.7859554573404347e-07, + "loss": 3.4578, + "step": 7374 + }, + { + "epoch": 0.88, + "learning_rate": 3.778323940765316e-07, + "loss": 3.3696, + "step": 7375 + }, + { + "epoch": 0.88, + "learning_rate": 3.770699821507279e-07, + "loss": 3.4154, + "step": 7376 + }, + { + "epoch": 0.88, + "learning_rate": 3.7630831007864634e-07, + "loss": 3.3594, + "step": 7377 + }, + { + "epoch": 0.88, + "learning_rate": 3.7554737798218723e-07, + "loss": 3.3969, + "step": 7378 + }, + { + "epoch": 0.88, + "learning_rate": 3.747871859831287e-07, + "loss": 3.4023, + "step": 7379 + }, + { + "epoch": 0.88, + "learning_rate": 3.740277342031323e-07, + "loss": 3.4801, + "step": 7380 + }, + { + "epoch": 0.88, + "learning_rate": 3.7326902276374087e-07, + "loss": 3.3745, + "step": 7381 + }, + { + "epoch": 0.88, + "learning_rate": 3.7251105178637834e-07, + "loss": 3.3938, + "step": 7382 + }, + { + "epoch": 0.88, + "learning_rate": 3.7175382139234997e-07, + "loss": 3.4223, + "step": 7383 + }, + { + "epoch": 0.88, + "learning_rate": 3.709973317028448e-07, + "loss": 3.3433, + "step": 7384 + }, + { + "epoch": 0.88, + "learning_rate": 3.702415828389283e-07, + "loss": 3.3607, + "step": 7385 + }, + { + "epoch": 0.88, + "learning_rate": 3.6948657492155424e-07, + "loss": 3.3812, + "step": 7386 + }, + { + "epoch": 0.88, + "learning_rate": 3.6873230807155046e-07, + "loss": 3.4172, + "step": 7387 + }, + { + "epoch": 0.88, + "learning_rate": 3.6797878240963203e-07, + "loss": 3.4857, + "step": 7388 + }, + { + "epoch": 0.88, + "learning_rate": 3.6722599805639246e-07, + "loss": 3.4081, + "step": 7389 + }, + { + "epoch": 0.88, + "learning_rate": 3.664739551323082e-07, + "loss": 3.2324, + "step": 7390 + }, + { + "epoch": 0.88, + "learning_rate": 3.657226537577341e-07, + "loss": 3.4545, + "step": 7391 + }, + { + "epoch": 0.88, + "learning_rate": 3.6497209405291165e-07, + "loss": 3.3953, + "step": 7392 + }, + { + "epoch": 0.89, + "learning_rate": 3.642222761379571e-07, + "loss": 3.4347, + "step": 7393 + }, + { + "epoch": 0.89, + "learning_rate": 3.6347320013287504e-07, + "loss": 3.4735, + "step": 7394 + }, + { + "epoch": 0.89, + "learning_rate": 3.6272486615754334e-07, + "loss": 3.4068, + "step": 7395 + }, + { + "epoch": 0.89, + "learning_rate": 3.6197727433172793e-07, + "loss": 3.4321, + "step": 7396 + }, + { + "epoch": 0.89, + "learning_rate": 3.6123042477507307e-07, + "loss": 3.2822, + "step": 7397 + }, + { + "epoch": 0.89, + "learning_rate": 3.604843176071049e-07, + "loss": 3.488, + "step": 7398 + }, + { + "epoch": 0.89, + "learning_rate": 3.597389529472289e-07, + "loss": 3.5441, + "step": 7399 + }, + { + "epoch": 0.89, + "learning_rate": 3.5899433091473534e-07, + "loss": 3.3048, + "step": 7400 + }, + { + "epoch": 0.89, + "learning_rate": 3.582504516287916e-07, + "loss": 3.3329, + "step": 7401 + }, + { + "epoch": 0.89, + "learning_rate": 3.575073152084496e-07, + "loss": 3.4194, + "step": 7402 + }, + { + "epoch": 0.89, + "learning_rate": 3.567649217726399e-07, + "loss": 3.4644, + "step": 7403 + }, + { + "epoch": 0.89, + "learning_rate": 3.560232714401757e-07, + "loss": 3.411, + "step": 7404 + }, + { + "epoch": 0.89, + "learning_rate": 3.552823643297509e-07, + "loss": 3.4183, + "step": 7405 + }, + { + "epoch": 0.89, + "learning_rate": 3.5454220055993916e-07, + "loss": 3.4196, + "step": 7406 + }, + { + "epoch": 0.89, + "learning_rate": 3.538027802491967e-07, + "loss": 3.3717, + "step": 7407 + }, + { + "epoch": 0.89, + "learning_rate": 3.530641035158616e-07, + "loss": 3.3897, + "step": 7408 + }, + { + "epoch": 0.89, + "learning_rate": 3.5232617047815053e-07, + "loss": 3.472, + "step": 7409 + }, + { + "epoch": 0.89, + "learning_rate": 3.5158898125416287e-07, + "loss": 3.333, + "step": 7410 + }, + { + "epoch": 0.89, + "learning_rate": 3.508525359618775e-07, + "loss": 3.4747, + "step": 7411 + }, + { + "epoch": 0.89, + "learning_rate": 3.5011683471915635e-07, + "loss": 3.4115, + "step": 7412 + }, + { + "epoch": 0.89, + "learning_rate": 3.4938187764373964e-07, + "loss": 3.3665, + "step": 7413 + }, + { + "epoch": 0.89, + "learning_rate": 3.486476648532505e-07, + "loss": 3.3809, + "step": 7414 + }, + { + "epoch": 0.89, + "learning_rate": 3.4791419646519274e-07, + "loss": 3.3797, + "step": 7415 + }, + { + "epoch": 0.89, + "learning_rate": 3.4718147259695077e-07, + "loss": 3.3756, + "step": 7416 + }, + { + "epoch": 0.89, + "learning_rate": 3.4644949336578916e-07, + "loss": 3.3235, + "step": 7417 + }, + { + "epoch": 0.89, + "learning_rate": 3.457182588888541e-07, + "loss": 3.3912, + "step": 7418 + }, + { + "epoch": 0.89, + "learning_rate": 3.4498776928317214e-07, + "loss": 3.5055, + "step": 7419 + }, + { + "epoch": 0.89, + "learning_rate": 3.442580246656507e-07, + "loss": 3.4704, + "step": 7420 + }, + { + "epoch": 0.89, + "learning_rate": 3.435290251530793e-07, + "loss": 3.3754, + "step": 7421 + }, + { + "epoch": 0.89, + "learning_rate": 3.42800770862125e-07, + "loss": 3.3882, + "step": 7422 + }, + { + "epoch": 0.89, + "learning_rate": 3.420732619093392e-07, + "loss": 3.4101, + "step": 7423 + }, + { + "epoch": 0.89, + "learning_rate": 3.41346498411152e-07, + "loss": 3.3673, + "step": 7424 + }, + { + "epoch": 0.89, + "learning_rate": 3.4062048048387475e-07, + "loss": 3.3593, + "step": 7425 + }, + { + "epoch": 0.89, + "learning_rate": 3.398952082436996e-07, + "loss": 3.5246, + "step": 7426 + }, + { + "epoch": 0.89, + "learning_rate": 3.391706818066981e-07, + "loss": 3.3178, + "step": 7427 + }, + { + "epoch": 0.89, + "learning_rate": 3.384469012888236e-07, + "loss": 3.4603, + "step": 7428 + }, + { + "epoch": 0.89, + "learning_rate": 3.3772386680591186e-07, + "loss": 3.3535, + "step": 7429 + }, + { + "epoch": 0.89, + "learning_rate": 3.370015784736741e-07, + "loss": 3.369, + "step": 7430 + }, + { + "epoch": 0.89, + "learning_rate": 3.3628003640770847e-07, + "loss": 3.3692, + "step": 7431 + }, + { + "epoch": 0.89, + "learning_rate": 3.3555924072348866e-07, + "loss": 3.3891, + "step": 7432 + }, + { + "epoch": 0.89, + "learning_rate": 3.348391915363719e-07, + "loss": 3.4647, + "step": 7433 + }, + { + "epoch": 0.89, + "learning_rate": 3.341198889615943e-07, + "loss": 3.4304, + "step": 7434 + }, + { + "epoch": 0.89, + "learning_rate": 3.334013331142727e-07, + "loss": 3.457, + "step": 7435 + }, + { + "epoch": 0.89, + "learning_rate": 3.3268352410940453e-07, + "loss": 3.3432, + "step": 7436 + }, + { + "epoch": 0.89, + "learning_rate": 3.319664620618701e-07, + "loss": 3.3222, + "step": 7437 + }, + { + "epoch": 0.89, + "learning_rate": 3.312501470864249e-07, + "loss": 3.4101, + "step": 7438 + }, + { + "epoch": 0.89, + "learning_rate": 3.305345792977105e-07, + "loss": 3.3644, + "step": 7439 + }, + { + "epoch": 0.89, + "learning_rate": 3.298197588102459e-07, + "loss": 3.4107, + "step": 7440 + }, + { + "epoch": 0.89, + "learning_rate": 3.291056857384295e-07, + "loss": 3.3922, + "step": 7441 + }, + { + "epoch": 0.89, + "learning_rate": 3.283923601965433e-07, + "loss": 3.3581, + "step": 7442 + }, + { + "epoch": 0.89, + "learning_rate": 3.276797822987471e-07, + "loss": 3.4373, + "step": 7443 + }, + { + "epoch": 0.89, + "learning_rate": 3.2696795215908073e-07, + "loss": 3.5042, + "step": 7444 + }, + { + "epoch": 0.89, + "learning_rate": 3.2625686989146865e-07, + "loss": 3.3764, + "step": 7445 + }, + { + "epoch": 0.89, + "learning_rate": 3.255465356097093e-07, + "loss": 3.2433, + "step": 7446 + }, + { + "epoch": 0.89, + "learning_rate": 3.2483694942748613e-07, + "loss": 3.4075, + "step": 7447 + }, + { + "epoch": 0.89, + "learning_rate": 3.241281114583611e-07, + "loss": 3.4093, + "step": 7448 + }, + { + "epoch": 0.89, + "learning_rate": 3.2342002181577626e-07, + "loss": 3.421, + "step": 7449 + }, + { + "epoch": 0.89, + "learning_rate": 3.2271268061305486e-07, + "loss": 3.5036, + "step": 7450 + }, + { + "epoch": 0.89, + "learning_rate": 3.220060879633996e-07, + "loss": 3.3389, + "step": 7451 + }, + { + "epoch": 0.89, + "learning_rate": 3.213002439798923e-07, + "loss": 3.373, + "step": 7452 + }, + { + "epoch": 0.89, + "learning_rate": 3.2059514877549824e-07, + "loss": 3.4537, + "step": 7453 + }, + { + "epoch": 0.89, + "learning_rate": 3.1989080246306035e-07, + "loss": 3.4468, + "step": 7454 + }, + { + "epoch": 0.89, + "learning_rate": 3.1918720515530133e-07, + "loss": 3.4118, + "step": 7455 + }, + { + "epoch": 0.89, + "learning_rate": 3.1848435696482606e-07, + "loss": 3.3903, + "step": 7456 + }, + { + "epoch": 0.89, + "learning_rate": 3.177822580041162e-07, + "loss": 3.4182, + "step": 7457 + }, + { + "epoch": 0.89, + "learning_rate": 3.1708090838553916e-07, + "loss": 3.4494, + "step": 7458 + }, + { + "epoch": 0.89, + "learning_rate": 3.163803082213357e-07, + "loss": 3.4079, + "step": 7459 + }, + { + "epoch": 0.89, + "learning_rate": 3.1568045762363107e-07, + "loss": 3.4237, + "step": 7460 + }, + { + "epoch": 0.89, + "learning_rate": 3.149813567044302e-07, + "loss": 3.4012, + "step": 7461 + }, + { + "epoch": 0.89, + "learning_rate": 3.142830055756163e-07, + "loss": 3.4682, + "step": 7462 + }, + { + "epoch": 0.89, + "learning_rate": 3.135854043489533e-07, + "loss": 3.4707, + "step": 7463 + }, + { + "epoch": 0.89, + "learning_rate": 3.128885531360859e-07, + "loss": 3.4254, + "step": 7464 + }, + { + "epoch": 0.89, + "learning_rate": 3.12192452048537e-07, + "loss": 3.3442, + "step": 7465 + }, + { + "epoch": 0.89, + "learning_rate": 3.11497101197713e-07, + "loss": 3.4216, + "step": 7466 + }, + { + "epoch": 0.89, + "learning_rate": 3.1080250069489505e-07, + "loss": 3.487, + "step": 7467 + }, + { + "epoch": 0.89, + "learning_rate": 3.1010865065124905e-07, + "loss": 3.4234, + "step": 7468 + }, + { + "epoch": 0.89, + "learning_rate": 3.094155511778174e-07, + "loss": 3.3634, + "step": 7469 + }, + { + "epoch": 0.89, + "learning_rate": 3.0872320238552524e-07, + "loss": 3.4687, + "step": 7470 + }, + { + "epoch": 0.89, + "learning_rate": 3.0803160438517445e-07, + "loss": 3.2946, + "step": 7471 + }, + { + "epoch": 0.89, + "learning_rate": 3.0734075728744983e-07, + "loss": 3.3262, + "step": 7472 + }, + { + "epoch": 0.89, + "learning_rate": 3.0665066120291243e-07, + "loss": 3.392, + "step": 7473 + }, + { + "epoch": 0.89, + "learning_rate": 3.059613162420089e-07, + "loss": 3.4259, + "step": 7474 + }, + { + "epoch": 0.89, + "learning_rate": 3.0527272251505755e-07, + "loss": 3.3406, + "step": 7475 + }, + { + "epoch": 0.9, + "learning_rate": 3.0458488013226426e-07, + "loss": 3.4601, + "step": 7476 + }, + { + "epoch": 0.9, + "learning_rate": 3.038977892037098e-07, + "loss": 3.4764, + "step": 7477 + }, + { + "epoch": 0.9, + "learning_rate": 3.0321144983935734e-07, + "loss": 3.4764, + "step": 7478 + }, + { + "epoch": 0.9, + "learning_rate": 3.025258621490473e-07, + "loss": 3.3156, + "step": 7479 + }, + { + "epoch": 0.9, + "learning_rate": 3.0184102624250143e-07, + "loss": 3.4243, + "step": 7480 + }, + { + "epoch": 0.9, + "learning_rate": 3.011569422293209e-07, + "loss": 3.4171, + "step": 7481 + }, + { + "epoch": 0.9, + "learning_rate": 3.0047361021898815e-07, + "loss": 3.4342, + "step": 7482 + }, + { + "epoch": 0.9, + "learning_rate": 2.997910303208612e-07, + "loss": 3.451, + "step": 7483 + }, + { + "epoch": 0.9, + "learning_rate": 2.991092026441811e-07, + "loss": 3.3967, + "step": 7484 + }, + { + "epoch": 0.9, + "learning_rate": 2.9842812729806824e-07, + "loss": 3.366, + "step": 7485 + }, + { + "epoch": 0.9, + "learning_rate": 2.977478043915211e-07, + "loss": 3.2511, + "step": 7486 + }, + { + "epoch": 0.9, + "learning_rate": 2.970682340334191e-07, + "loss": 3.3657, + "step": 7487 + }, + { + "epoch": 0.9, + "learning_rate": 2.9638941633251985e-07, + "loss": 3.4214, + "step": 7488 + }, + { + "epoch": 0.9, + "learning_rate": 2.9571135139746134e-07, + "loss": 3.4433, + "step": 7489 + }, + { + "epoch": 0.9, + "learning_rate": 2.9503403933676236e-07, + "loss": 3.4117, + "step": 7490 + }, + { + "epoch": 0.9, + "learning_rate": 2.943574802588195e-07, + "loss": 3.4782, + "step": 7491 + }, + { + "epoch": 0.9, + "learning_rate": 2.9368167427190843e-07, + "loss": 3.3421, + "step": 7492 + }, + { + "epoch": 0.9, + "learning_rate": 2.930066214841859e-07, + "loss": 3.3413, + "step": 7493 + }, + { + "epoch": 0.9, + "learning_rate": 2.923323220036867e-07, + "loss": 3.4432, + "step": 7494 + }, + { + "epoch": 0.9, + "learning_rate": 2.916587759383266e-07, + "loss": 3.4399, + "step": 7495 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098598339589836e-07, + "loss": 3.4041, + "step": 7496 + }, + { + "epoch": 0.9, + "learning_rate": 2.9031394448407803e-07, + "loss": 3.3867, + "step": 7497 + }, + { + "epoch": 0.9, + "learning_rate": 2.8964265931041735e-07, + "loss": 3.4767, + "step": 7498 + }, + { + "epoch": 0.9, + "learning_rate": 2.8897212798234875e-07, + "loss": 3.5358, + "step": 7499 + }, + { + "epoch": 0.9, + "learning_rate": 2.8830235060718414e-07, + "loss": 3.4077, + "step": 7500 + }, + { + "epoch": 0.9, + "learning_rate": 2.8763332729211444e-07, + "loss": 3.438, + "step": 7501 + }, + { + "epoch": 0.9, + "learning_rate": 2.8696505814421017e-07, + "loss": 3.4654, + "step": 7502 + }, + { + "epoch": 0.9, + "learning_rate": 2.8629754327042294e-07, + "loss": 3.512, + "step": 7503 + }, + { + "epoch": 0.9, + "learning_rate": 2.856307827775789e-07, + "loss": 3.3385, + "step": 7504 + }, + { + "epoch": 0.9, + "learning_rate": 2.849647767723884e-07, + "loss": 3.4146, + "step": 7505 + }, + { + "epoch": 0.9, + "learning_rate": 2.842995253614389e-07, + "loss": 3.4255, + "step": 7506 + }, + { + "epoch": 0.9, + "learning_rate": 2.836350286511963e-07, + "loss": 3.4281, + "step": 7507 + }, + { + "epoch": 0.9, + "learning_rate": 2.8297128674800665e-07, + "loss": 3.4677, + "step": 7508 + }, + { + "epoch": 0.9, + "learning_rate": 2.8230829975809726e-07, + "loss": 3.3355, + "step": 7509 + }, + { + "epoch": 0.9, + "learning_rate": 2.816460677875693e-07, + "loss": 3.3438, + "step": 7510 + }, + { + "epoch": 0.9, + "learning_rate": 2.809845909424097e-07, + "loss": 3.3074, + "step": 7511 + }, + { + "epoch": 0.9, + "learning_rate": 2.8032386932847823e-07, + "loss": 3.4459, + "step": 7512 + }, + { + "epoch": 0.9, + "learning_rate": 2.796639030515191e-07, + "loss": 3.2886, + "step": 7513 + }, + { + "epoch": 0.9, + "learning_rate": 2.7900469221715185e-07, + "loss": 3.324, + "step": 7514 + }, + { + "epoch": 0.9, + "learning_rate": 2.7834623693087757e-07, + "loss": 3.3999, + "step": 7515 + }, + { + "epoch": 0.9, + "learning_rate": 2.776885372980748e-07, + "loss": 3.3825, + "step": 7516 + }, + { + "epoch": 0.9, + "learning_rate": 2.77031593424002e-07, + "loss": 3.4288, + "step": 7517 + }, + { + "epoch": 0.9, + "learning_rate": 2.763754054137957e-07, + "loss": 3.3162, + "step": 7518 + }, + { + "epoch": 0.9, + "learning_rate": 2.7571997337247415e-07, + "loss": 3.4387, + "step": 7519 + }, + { + "epoch": 0.9, + "learning_rate": 2.750652974049306e-07, + "loss": 3.3469, + "step": 7520 + }, + { + "epoch": 0.9, + "learning_rate": 2.744113776159407e-07, + "loss": 3.4825, + "step": 7521 + }, + { + "epoch": 0.9, + "learning_rate": 2.7375821411015745e-07, + "loss": 3.4089, + "step": 7522 + }, + { + "epoch": 0.9, + "learning_rate": 2.7310580699211277e-07, + "loss": 3.3598, + "step": 7523 + }, + { + "epoch": 0.9, + "learning_rate": 2.7245415636621863e-07, + "loss": 3.4773, + "step": 7524 + }, + { + "epoch": 0.9, + "learning_rate": 2.718032623367645e-07, + "loss": 3.4292, + "step": 7525 + }, + { + "epoch": 0.9, + "learning_rate": 2.711531250079191e-07, + "loss": 3.2654, + "step": 7526 + }, + { + "epoch": 0.9, + "learning_rate": 2.7050374448373276e-07, + "loss": 3.4231, + "step": 7527 + }, + { + "epoch": 0.9, + "learning_rate": 2.698551208681288e-07, + "loss": 3.3627, + "step": 7528 + }, + { + "epoch": 0.9, + "learning_rate": 2.6920725426491544e-07, + "loss": 3.4228, + "step": 7529 + }, + { + "epoch": 0.9, + "learning_rate": 2.685601447777769e-07, + "loss": 3.2965, + "step": 7530 + }, + { + "epoch": 0.9, + "learning_rate": 2.6791379251027595e-07, + "loss": 3.3686, + "step": 7531 + }, + { + "epoch": 0.9, + "learning_rate": 2.6726819756585534e-07, + "loss": 3.3312, + "step": 7532 + }, + { + "epoch": 0.9, + "learning_rate": 2.666233600478357e-07, + "loss": 3.5234, + "step": 7533 + }, + { + "epoch": 0.9, + "learning_rate": 2.6597928005941633e-07, + "loss": 3.3206, + "step": 7534 + }, + { + "epoch": 0.9, + "learning_rate": 2.653359577036768e-07, + "loss": 3.4097, + "step": 7535 + }, + { + "epoch": 0.9, + "learning_rate": 2.646933930835749e-07, + "loss": 3.4207, + "step": 7536 + }, + { + "epoch": 0.9, + "learning_rate": 2.640515863019449e-07, + "loss": 3.3547, + "step": 7537 + }, + { + "epoch": 0.9, + "learning_rate": 2.6341053746150314e-07, + "loss": 3.4449, + "step": 7538 + }, + { + "epoch": 0.9, + "learning_rate": 2.627702466648424e-07, + "loss": 3.4106, + "step": 7539 + }, + { + "epoch": 0.9, + "learning_rate": 2.621307140144341e-07, + "loss": 3.4407, + "step": 7540 + }, + { + "epoch": 0.9, + "learning_rate": 2.6149193961262975e-07, + "loss": 3.443, + "step": 7541 + }, + { + "epoch": 0.9, + "learning_rate": 2.608539235616597e-07, + "loss": 3.4975, + "step": 7542 + }, + { + "epoch": 0.9, + "learning_rate": 2.602166659636307e-07, + "loss": 3.4561, + "step": 7543 + }, + { + "epoch": 0.9, + "learning_rate": 2.5958016692053054e-07, + "loss": 3.3548, + "step": 7544 + }, + { + "epoch": 0.9, + "learning_rate": 2.5894442653422334e-07, + "loss": 3.4322, + "step": 7545 + }, + { + "epoch": 0.9, + "learning_rate": 2.5830944490645383e-07, + "loss": 3.414, + "step": 7546 + }, + { + "epoch": 0.9, + "learning_rate": 2.5767522213884346e-07, + "loss": 3.4693, + "step": 7547 + }, + { + "epoch": 0.9, + "learning_rate": 2.5704175833289544e-07, + "loss": 3.4281, + "step": 7548 + }, + { + "epoch": 0.9, + "learning_rate": 2.5640905358998657e-07, + "loss": 3.3587, + "step": 7549 + }, + { + "epoch": 0.9, + "learning_rate": 2.557771080113769e-07, + "loss": 3.3599, + "step": 7550 + }, + { + "epoch": 0.9, + "learning_rate": 2.551459216982022e-07, + "loss": 3.45, + "step": 7551 + }, + { + "epoch": 0.9, + "learning_rate": 2.545154947514777e-07, + "loss": 3.4406, + "step": 7552 + }, + { + "epoch": 0.9, + "learning_rate": 2.538858272720962e-07, + "loss": 3.4566, + "step": 7553 + }, + { + "epoch": 0.9, + "learning_rate": 2.53256919360832e-07, + "loss": 3.4032, + "step": 7554 + }, + { + "epoch": 0.9, + "learning_rate": 2.526287711183323e-07, + "loss": 3.4759, + "step": 7555 + }, + { + "epoch": 0.9, + "learning_rate": 2.5200138264512895e-07, + "loss": 3.4332, + "step": 7556 + }, + { + "epoch": 0.9, + "learning_rate": 2.5137475404162616e-07, + "loss": 3.3305, + "step": 7557 + }, + { + "epoch": 0.9, + "learning_rate": 2.5074888540811246e-07, + "loss": 3.4041, + "step": 7558 + }, + { + "epoch": 0.9, + "learning_rate": 2.501237768447501e-07, + "loss": 3.4432, + "step": 7559 + }, + { + "epoch": 0.91, + "learning_rate": 2.4949942845158227e-07, + "loss": 3.4739, + "step": 7560 + }, + { + "epoch": 0.91, + "learning_rate": 2.488758403285285e-07, + "loss": 3.4189, + "step": 7561 + }, + { + "epoch": 0.91, + "learning_rate": 2.4825301257539015e-07, + "loss": 3.4795, + "step": 7562 + }, + { + "epoch": 0.91, + "learning_rate": 2.4763094529184185e-07, + "loss": 3.4602, + "step": 7563 + }, + { + "epoch": 0.91, + "learning_rate": 2.4700963857744173e-07, + "loss": 3.3096, + "step": 7564 + }, + { + "epoch": 0.91, + "learning_rate": 2.4638909253162145e-07, + "loss": 3.4068, + "step": 7565 + }, + { + "epoch": 0.91, + "learning_rate": 2.457693072536949e-07, + "loss": 3.4107, + "step": 7566 + }, + { + "epoch": 0.91, + "learning_rate": 2.45150282842852e-07, + "loss": 3.362, + "step": 7567 + }, + { + "epoch": 0.91, + "learning_rate": 2.4453201939816096e-07, + "loss": 3.4164, + "step": 7568 + }, + { + "epoch": 0.91, + "learning_rate": 2.439145170185686e-07, + "loss": 3.4489, + "step": 7569 + }, + { + "epoch": 0.91, + "learning_rate": 2.4329777580290146e-07, + "loss": 3.4531, + "step": 7570 + }, + { + "epoch": 0.91, + "learning_rate": 2.426817958498606e-07, + "loss": 3.4572, + "step": 7571 + }, + { + "epoch": 0.91, + "learning_rate": 2.4206657725802997e-07, + "loss": 3.4101, + "step": 7572 + }, + { + "epoch": 0.91, + "learning_rate": 2.414521201258663e-07, + "loss": 3.3128, + "step": 7573 + }, + { + "epoch": 0.91, + "learning_rate": 2.408384245517098e-07, + "loss": 3.4071, + "step": 7574 + }, + { + "epoch": 0.91, + "learning_rate": 2.4022549063377533e-07, + "loss": 3.4345, + "step": 7575 + }, + { + "epoch": 0.91, + "learning_rate": 2.396133184701566e-07, + "loss": 3.2989, + "step": 7576 + }, + { + "epoch": 0.91, + "learning_rate": 2.390019081588257e-07, + "loss": 3.4989, + "step": 7577 + }, + { + "epoch": 0.91, + "learning_rate": 2.383912597976329e-07, + "loss": 3.4129, + "step": 7578 + }, + { + "epoch": 0.91, + "learning_rate": 2.3778137348430597e-07, + "loss": 3.3868, + "step": 7579 + }, + { + "epoch": 0.91, + "learning_rate": 2.3717224931645244e-07, + "loss": 3.3598, + "step": 7580 + }, + { + "epoch": 0.91, + "learning_rate": 2.365638873915549e-07, + "loss": 3.3764, + "step": 7581 + }, + { + "epoch": 0.91, + "learning_rate": 2.3595628780697655e-07, + "loss": 3.4767, + "step": 7582 + }, + { + "epoch": 0.91, + "learning_rate": 2.3534945065995796e-07, + "loss": 3.3469, + "step": 7583 + }, + { + "epoch": 0.91, + "learning_rate": 2.3474337604761534e-07, + "loss": 3.4358, + "step": 7584 + }, + { + "epoch": 0.91, + "learning_rate": 2.341380640669483e-07, + "loss": 3.4487, + "step": 7585 + }, + { + "epoch": 0.91, + "learning_rate": 2.335335148148271e-07, + "loss": 3.4229, + "step": 7586 + }, + { + "epoch": 0.91, + "learning_rate": 2.3292972838800666e-07, + "loss": 3.5282, + "step": 7587 + }, + { + "epoch": 0.91, + "learning_rate": 2.323267048831157e-07, + "loss": 3.3964, + "step": 7588 + }, + { + "epoch": 0.91, + "learning_rate": 2.3172444439666263e-07, + "loss": 3.4061, + "step": 7589 + }, + { + "epoch": 0.91, + "learning_rate": 2.31122947025032e-07, + "loss": 3.4056, + "step": 7590 + }, + { + "epoch": 0.91, + "learning_rate": 2.3052221286449017e-07, + "loss": 3.3615, + "step": 7591 + }, + { + "epoch": 0.91, + "learning_rate": 2.299222420111752e-07, + "loss": 3.5069, + "step": 7592 + }, + { + "epoch": 0.91, + "learning_rate": 2.293230345611097e-07, + "loss": 3.4495, + "step": 7593 + }, + { + "epoch": 0.91, + "learning_rate": 2.2872459061018758e-07, + "loss": 3.3751, + "step": 7594 + }, + { + "epoch": 0.91, + "learning_rate": 2.281269102541861e-07, + "loss": 3.3925, + "step": 7595 + }, + { + "epoch": 0.91, + "learning_rate": 2.275299935887576e-07, + "loss": 3.3963, + "step": 7596 + }, + { + "epoch": 0.91, + "learning_rate": 2.2693384070943235e-07, + "loss": 3.396, + "step": 7597 + }, + { + "epoch": 0.91, + "learning_rate": 2.263384517116185e-07, + "loss": 3.4128, + "step": 7598 + }, + { + "epoch": 0.91, + "learning_rate": 2.2574382669060313e-07, + "loss": 3.3395, + "step": 7599 + }, + { + "epoch": 0.91, + "learning_rate": 2.2514996574154792e-07, + "loss": 3.4689, + "step": 7600 + }, + { + "epoch": 0.91, + "learning_rate": 2.2455686895949736e-07, + "loss": 3.3764, + "step": 7601 + }, + { + "epoch": 0.91, + "learning_rate": 2.2396453643936723e-07, + "loss": 3.3803, + "step": 7602 + }, + { + "epoch": 0.91, + "learning_rate": 2.2337296827595723e-07, + "loss": 3.3929, + "step": 7603 + }, + { + "epoch": 0.91, + "learning_rate": 2.2278216456394054e-07, + "loss": 3.4134, + "step": 7604 + }, + { + "epoch": 0.91, + "learning_rate": 2.221921253978704e-07, + "loss": 3.4987, + "step": 7605 + }, + { + "epoch": 0.91, + "learning_rate": 2.216028508721746e-07, + "loss": 3.2794, + "step": 7606 + }, + { + "epoch": 0.91, + "learning_rate": 2.2101434108116382e-07, + "loss": 3.485, + "step": 7607 + }, + { + "epoch": 0.91, + "learning_rate": 2.204265961190194e-07, + "loss": 3.3704, + "step": 7608 + }, + { + "epoch": 0.91, + "learning_rate": 2.1983961607980832e-07, + "loss": 3.3786, + "step": 7609 + }, + { + "epoch": 0.91, + "learning_rate": 2.192534010574665e-07, + "loss": 3.4384, + "step": 7610 + }, + { + "epoch": 0.91, + "learning_rate": 2.1866795114581452e-07, + "loss": 3.4063, + "step": 7611 + }, + { + "epoch": 0.91, + "learning_rate": 2.1808326643854738e-07, + "loss": 3.5076, + "step": 7612 + }, + { + "epoch": 0.91, + "learning_rate": 2.1749934702923804e-07, + "loss": 3.4027, + "step": 7613 + }, + { + "epoch": 0.91, + "learning_rate": 2.169161930113356e-07, + "loss": 3.4442, + "step": 7614 + }, + { + "epoch": 0.91, + "learning_rate": 2.1633380447817097e-07, + "loss": 3.339, + "step": 7615 + }, + { + "epoch": 0.91, + "learning_rate": 2.1575218152294576e-07, + "loss": 3.3179, + "step": 7616 + }, + { + "epoch": 0.91, + "learning_rate": 2.1517132423874598e-07, + "loss": 3.3596, + "step": 7617 + }, + { + "epoch": 0.91, + "learning_rate": 2.145912327185312e-07, + "loss": 3.3839, + "step": 7618 + }, + { + "epoch": 0.91, + "learning_rate": 2.1401190705513886e-07, + "loss": 3.3606, + "step": 7619 + }, + { + "epoch": 0.91, + "learning_rate": 2.1343334734128418e-07, + "loss": 3.3567, + "step": 7620 + }, + { + "epoch": 0.91, + "learning_rate": 2.1285555366956033e-07, + "loss": 3.2822, + "step": 7621 + }, + { + "epoch": 0.91, + "learning_rate": 2.1227852613243616e-07, + "loss": 3.575, + "step": 7622 + }, + { + "epoch": 0.91, + "learning_rate": 2.1170226482226108e-07, + "loss": 3.3464, + "step": 7623 + }, + { + "epoch": 0.91, + "learning_rate": 2.1112676983125912e-07, + "loss": 3.4626, + "step": 7624 + }, + { + "epoch": 0.91, + "learning_rate": 2.1055204125153216e-07, + "loss": 3.5865, + "step": 7625 + }, + { + "epoch": 0.91, + "learning_rate": 2.0997807917506053e-07, + "loss": 3.3964, + "step": 7626 + }, + { + "epoch": 0.91, + "learning_rate": 2.0940488369370015e-07, + "loss": 3.4456, + "step": 7627 + }, + { + "epoch": 0.91, + "learning_rate": 2.0883245489918602e-07, + "loss": 3.4417, + "step": 7628 + }, + { + "epoch": 0.91, + "learning_rate": 2.0826079288312817e-07, + "loss": 3.3583, + "step": 7629 + }, + { + "epoch": 0.91, + "learning_rate": 2.0768989773701732e-07, + "loss": 3.3466, + "step": 7630 + }, + { + "epoch": 0.91, + "learning_rate": 2.0711976955221924e-07, + "loss": 3.2955, + "step": 7631 + }, + { + "epoch": 0.91, + "learning_rate": 2.0655040841997654e-07, + "loss": 3.3397, + "step": 7632 + }, + { + "epoch": 0.91, + "learning_rate": 2.0598181443140964e-07, + "loss": 3.4609, + "step": 7633 + }, + { + "epoch": 0.91, + "learning_rate": 2.0541398767751685e-07, + "loss": 3.3216, + "step": 7634 + }, + { + "epoch": 0.91, + "learning_rate": 2.048469282491722e-07, + "loss": 3.3476, + "step": 7635 + }, + { + "epoch": 0.91, + "learning_rate": 2.0428063623713024e-07, + "loss": 3.3555, + "step": 7636 + }, + { + "epoch": 0.91, + "learning_rate": 2.0371511173201741e-07, + "loss": 3.4403, + "step": 7637 + }, + { + "epoch": 0.91, + "learning_rate": 2.0315035482434186e-07, + "loss": 3.5011, + "step": 7638 + }, + { + "epoch": 0.91, + "learning_rate": 2.025863656044874e-07, + "loss": 3.4002, + "step": 7639 + }, + { + "epoch": 0.91, + "learning_rate": 2.0202314416271463e-07, + "loss": 3.3563, + "step": 7640 + }, + { + "epoch": 0.91, + "learning_rate": 2.0146069058916085e-07, + "loss": 3.4213, + "step": 7641 + }, + { + "epoch": 0.91, + "learning_rate": 2.008990049738424e-07, + "loss": 3.3509, + "step": 7642 + }, + { + "epoch": 0.92, + "learning_rate": 2.0033808740664961e-07, + "loss": 3.4364, + "step": 7643 + }, + { + "epoch": 0.92, + "learning_rate": 1.99777937977354e-07, + "loss": 3.3114, + "step": 7644 + }, + { + "epoch": 0.92, + "learning_rate": 1.992185567755994e-07, + "loss": 3.4066, + "step": 7645 + }, + { + "epoch": 0.92, + "learning_rate": 1.9865994389091204e-07, + "loss": 3.4801, + "step": 7646 + }, + { + "epoch": 0.92, + "learning_rate": 1.9810209941268978e-07, + "loss": 3.259, + "step": 7647 + }, + { + "epoch": 0.92, + "learning_rate": 1.975450234302112e-07, + "loss": 3.3645, + "step": 7648 + }, + { + "epoch": 0.92, + "learning_rate": 1.969887160326306e-07, + "loss": 3.3998, + "step": 7649 + }, + { + "epoch": 0.92, + "learning_rate": 1.964331773089795e-07, + "loss": 3.3943, + "step": 7650 + }, + { + "epoch": 0.92, + "learning_rate": 1.9587840734816566e-07, + "loss": 3.4044, + "step": 7651 + }, + { + "epoch": 0.92, + "learning_rate": 1.9532440623897587e-07, + "loss": 3.3528, + "step": 7652 + }, + { + "epoch": 0.92, + "learning_rate": 1.9477117407007084e-07, + "loss": 3.4099, + "step": 7653 + }, + { + "epoch": 0.92, + "learning_rate": 1.9421871092999144e-07, + "loss": 3.3902, + "step": 7654 + }, + { + "epoch": 0.92, + "learning_rate": 1.936670169071525e-07, + "loss": 3.3914, + "step": 7655 + }, + { + "epoch": 0.92, + "learning_rate": 1.9311609208984784e-07, + "loss": 3.3365, + "step": 7656 + }, + { + "epoch": 0.92, + "learning_rate": 1.9256593656624744e-07, + "loss": 3.3858, + "step": 7657 + }, + { + "epoch": 0.92, + "learning_rate": 1.920165504243976e-07, + "loss": 3.3343, + "step": 7658 + }, + { + "epoch": 0.92, + "learning_rate": 1.9146793375222238e-07, + "loss": 3.5093, + "step": 7659 + }, + { + "epoch": 0.92, + "learning_rate": 1.909200866375238e-07, + "loss": 3.5304, + "step": 7660 + }, + { + "epoch": 0.92, + "learning_rate": 1.9037300916797673e-07, + "loss": 3.4439, + "step": 7661 + }, + { + "epoch": 0.92, + "learning_rate": 1.8982670143113778e-07, + "loss": 3.4698, + "step": 7662 + }, + { + "epoch": 0.92, + "learning_rate": 1.8928116351443648e-07, + "loss": 3.2344, + "step": 7663 + }, + { + "epoch": 0.92, + "learning_rate": 1.887363955051813e-07, + "loss": 3.3075, + "step": 7664 + }, + { + "epoch": 0.92, + "learning_rate": 1.8819239749055752e-07, + "loss": 3.2873, + "step": 7665 + }, + { + "epoch": 0.92, + "learning_rate": 1.8764916955762547e-07, + "loss": 3.2888, + "step": 7666 + }, + { + "epoch": 0.92, + "learning_rate": 1.871067117933234e-07, + "loss": 3.3221, + "step": 7667 + }, + { + "epoch": 0.92, + "learning_rate": 1.8656502428446742e-07, + "loss": 3.4053, + "step": 7668 + }, + { + "epoch": 0.92, + "learning_rate": 1.860241071177482e-07, + "loss": 3.5023, + "step": 7669 + }, + { + "epoch": 0.92, + "learning_rate": 1.8548396037973425e-07, + "loss": 3.3364, + "step": 7670 + }, + { + "epoch": 0.92, + "learning_rate": 1.8494458415687145e-07, + "loss": 3.5225, + "step": 7671 + }, + { + "epoch": 0.92, + "learning_rate": 1.8440597853548014e-07, + "loss": 3.3758, + "step": 7672 + }, + { + "epoch": 0.92, + "learning_rate": 1.838681436017603e-07, + "loss": 3.4648, + "step": 7673 + }, + { + "epoch": 0.92, + "learning_rate": 1.833310794417853e-07, + "loss": 3.432, + "step": 7674 + }, + { + "epoch": 0.92, + "learning_rate": 1.8279478614150803e-07, + "loss": 3.3838, + "step": 7675 + }, + { + "epoch": 0.92, + "learning_rate": 1.8225926378675762e-07, + "loss": 3.5301, + "step": 7676 + }, + { + "epoch": 0.92, + "learning_rate": 1.8172451246323718e-07, + "loss": 3.3216, + "step": 7677 + }, + { + "epoch": 0.92, + "learning_rate": 1.8119053225652993e-07, + "loss": 3.3265, + "step": 7678 + }, + { + "epoch": 0.92, + "learning_rate": 1.806573232520936e-07, + "loss": 3.3681, + "step": 7679 + }, + { + "epoch": 0.92, + "learning_rate": 1.8012488553526165e-07, + "loss": 3.4576, + "step": 7680 + }, + { + "epoch": 0.92, + "learning_rate": 1.7959321919124806e-07, + "loss": 3.374, + "step": 7681 + }, + { + "epoch": 0.92, + "learning_rate": 1.7906232430513814e-07, + "loss": 3.3914, + "step": 7682 + }, + { + "epoch": 0.92, + "learning_rate": 1.7853220096189727e-07, + "loss": 3.5028, + "step": 7683 + }, + { + "epoch": 0.92, + "learning_rate": 1.7800284924636701e-07, + "loss": 3.4349, + "step": 7684 + }, + { + "epoch": 0.92, + "learning_rate": 1.774742692432646e-07, + "loss": 3.3923, + "step": 7685 + }, + { + "epoch": 0.92, + "learning_rate": 1.7694646103718293e-07, + "loss": 3.3234, + "step": 7686 + }, + { + "epoch": 0.92, + "learning_rate": 1.7641942471259388e-07, + "loss": 3.4347, + "step": 7687 + }, + { + "epoch": 0.92, + "learning_rate": 1.758931603538422e-07, + "loss": 3.3855, + "step": 7688 + }, + { + "epoch": 0.92, + "learning_rate": 1.7536766804515437e-07, + "loss": 3.3261, + "step": 7689 + }, + { + "epoch": 0.92, + "learning_rate": 1.748429478706276e-07, + "loss": 3.4677, + "step": 7690 + }, + { + "epoch": 0.92, + "learning_rate": 1.7431899991423917e-07, + "loss": 3.3574, + "step": 7691 + }, + { + "epoch": 0.92, + "learning_rate": 1.7379582425984144e-07, + "loss": 3.3218, + "step": 7692 + }, + { + "epoch": 0.92, + "learning_rate": 1.732734209911635e-07, + "loss": 3.4282, + "step": 7693 + }, + { + "epoch": 0.92, + "learning_rate": 1.7275179019181132e-07, + "loss": 3.3381, + "step": 7694 + }, + { + "epoch": 0.92, + "learning_rate": 1.7223093194526586e-07, + "loss": 3.4024, + "step": 7695 + }, + { + "epoch": 0.92, + "learning_rate": 1.717108463348849e-07, + "loss": 3.4543, + "step": 7696 + }, + { + "epoch": 0.92, + "learning_rate": 1.711915334439046e-07, + "loss": 3.3344, + "step": 7697 + }, + { + "epoch": 0.92, + "learning_rate": 1.706729933554341e-07, + "loss": 3.3505, + "step": 7698 + }, + { + "epoch": 0.92, + "learning_rate": 1.7015522615246138e-07, + "loss": 3.4936, + "step": 7699 + }, + { + "epoch": 0.92, + "learning_rate": 1.6963823191785024e-07, + "loss": 3.3164, + "step": 7700 + }, + { + "epoch": 0.92, + "learning_rate": 1.6912201073433943e-07, + "loss": 3.4153, + "step": 7701 + }, + { + "epoch": 0.92, + "learning_rate": 1.6860656268454621e-07, + "loss": 3.3511, + "step": 7702 + }, + { + "epoch": 0.92, + "learning_rate": 1.6809188785096186e-07, + "loss": 3.3468, + "step": 7703 + }, + { + "epoch": 0.92, + "learning_rate": 1.675779863159549e-07, + "loss": 3.4186, + "step": 7704 + }, + { + "epoch": 0.92, + "learning_rate": 1.670648581617712e-07, + "loss": 3.4138, + "step": 7705 + }, + { + "epoch": 0.92, + "learning_rate": 1.6655250347053063e-07, + "loss": 3.543, + "step": 7706 + }, + { + "epoch": 0.92, + "learning_rate": 1.660409223242315e-07, + "loss": 3.3816, + "step": 7707 + }, + { + "epoch": 0.92, + "learning_rate": 1.6553011480474667e-07, + "loss": 3.4286, + "step": 7708 + }, + { + "epoch": 0.92, + "learning_rate": 1.6502008099382572e-07, + "loss": 3.4913, + "step": 7709 + }, + { + "epoch": 0.92, + "learning_rate": 1.6451082097309444e-07, + "loss": 3.3786, + "step": 7710 + }, + { + "epoch": 0.92, + "learning_rate": 1.6400233482405436e-07, + "loss": 3.4048, + "step": 7711 + }, + { + "epoch": 0.92, + "learning_rate": 1.6349462262808425e-07, + "loss": 3.3526, + "step": 7712 + }, + { + "epoch": 0.92, + "learning_rate": 1.629876844664391e-07, + "loss": 3.4204, + "step": 7713 + }, + { + "epoch": 0.92, + "learning_rate": 1.624815204202479e-07, + "loss": 3.4516, + "step": 7714 + }, + { + "epoch": 0.92, + "learning_rate": 1.6197613057051808e-07, + "loss": 3.4832, + "step": 7715 + }, + { + "epoch": 0.92, + "learning_rate": 1.6147151499813162e-07, + "loss": 3.3204, + "step": 7716 + }, + { + "epoch": 0.92, + "learning_rate": 1.609676737838467e-07, + "loss": 3.3746, + "step": 7717 + }, + { + "epoch": 0.92, + "learning_rate": 1.6046460700830048e-07, + "loss": 3.4094, + "step": 7718 + }, + { + "epoch": 0.92, + "learning_rate": 1.5996231475200074e-07, + "loss": 3.3617, + "step": 7719 + }, + { + "epoch": 0.92, + "learning_rate": 1.5946079709533603e-07, + "loss": 3.412, + "step": 7720 + }, + { + "epoch": 0.92, + "learning_rate": 1.5896005411856873e-07, + "loss": 3.3073, + "step": 7721 + }, + { + "epoch": 0.92, + "learning_rate": 1.584600859018387e-07, + "loss": 3.3875, + "step": 7722 + }, + { + "epoch": 0.92, + "learning_rate": 1.579608925251591e-07, + "loss": 3.3666, + "step": 7723 + }, + { + "epoch": 0.92, + "learning_rate": 1.5746247406842275e-07, + "loss": 3.3486, + "step": 7724 + }, + { + "epoch": 0.92, + "learning_rate": 1.5696483061139412e-07, + "loss": 3.415, + "step": 7725 + }, + { + "epoch": 0.92, + "learning_rate": 1.5646796223371951e-07, + "loss": 3.3935, + "step": 7726 + }, + { + "epoch": 0.93, + "learning_rate": 1.5597186901491423e-07, + "loss": 3.3823, + "step": 7727 + }, + { + "epoch": 0.93, + "learning_rate": 1.5547655103437476e-07, + "loss": 3.3871, + "step": 7728 + }, + { + "epoch": 0.93, + "learning_rate": 1.5498200837137213e-07, + "loss": 3.5024, + "step": 7729 + }, + { + "epoch": 0.93, + "learning_rate": 1.5448824110505244e-07, + "loss": 3.3188, + "step": 7730 + }, + { + "epoch": 0.93, + "learning_rate": 1.539952493144381e-07, + "loss": 3.466, + "step": 7731 + }, + { + "epoch": 0.93, + "learning_rate": 1.5350303307842705e-07, + "loss": 3.4748, + "step": 7732 + }, + { + "epoch": 0.93, + "learning_rate": 1.5301159247579413e-07, + "loss": 3.3431, + "step": 7733 + }, + { + "epoch": 0.93, + "learning_rate": 1.5252092758519076e-07, + "loss": 3.3971, + "step": 7734 + }, + { + "epoch": 0.93, + "learning_rate": 1.5203103848514034e-07, + "loss": 3.2659, + "step": 7735 + }, + { + "epoch": 0.93, + "learning_rate": 1.5154192525404677e-07, + "loss": 3.3767, + "step": 7736 + }, + { + "epoch": 0.93, + "learning_rate": 1.510535879701869e-07, + "loss": 3.3615, + "step": 7737 + }, + { + "epoch": 0.93, + "learning_rate": 1.505660267117143e-07, + "loss": 3.4189, + "step": 7738 + }, + { + "epoch": 0.93, + "learning_rate": 1.500792415566582e-07, + "loss": 3.418, + "step": 7739 + }, + { + "epoch": 0.93, + "learning_rate": 1.4959323258292413e-07, + "loss": 3.3848, + "step": 7740 + }, + { + "epoch": 0.93, + "learning_rate": 1.4910799986829206e-07, + "loss": 3.3068, + "step": 7741 + }, + { + "epoch": 0.93, + "learning_rate": 1.486235434904204e-07, + "loss": 3.3854, + "step": 7742 + }, + { + "epoch": 0.93, + "learning_rate": 1.4813986352683886e-07, + "loss": 3.3884, + "step": 7743 + }, + { + "epoch": 0.93, + "learning_rate": 1.4765696005495822e-07, + "loss": 3.3514, + "step": 7744 + }, + { + "epoch": 0.93, + "learning_rate": 1.471748331520606e-07, + "loss": 3.3803, + "step": 7745 + }, + { + "epoch": 0.93, + "learning_rate": 1.466934828953065e-07, + "loss": 3.304, + "step": 7746 + }, + { + "epoch": 0.93, + "learning_rate": 1.4621290936173148e-07, + "loss": 3.4024, + "step": 7747 + }, + { + "epoch": 0.93, + "learning_rate": 1.457331126282452e-07, + "loss": 3.3929, + "step": 7748 + }, + { + "epoch": 0.93, + "learning_rate": 1.4525409277163505e-07, + "loss": 3.2942, + "step": 7749 + }, + { + "epoch": 0.93, + "learning_rate": 1.447758498685642e-07, + "loss": 3.4113, + "step": 7750 + }, + { + "epoch": 0.93, + "learning_rate": 1.4429838399556917e-07, + "loss": 3.431, + "step": 7751 + }, + { + "epoch": 0.93, + "learning_rate": 1.4382169522906498e-07, + "loss": 3.3771, + "step": 7752 + }, + { + "epoch": 0.93, + "learning_rate": 1.4334578364534003e-07, + "loss": 3.3624, + "step": 7753 + }, + { + "epoch": 0.93, + "learning_rate": 1.428706493205595e-07, + "loss": 3.3349, + "step": 7754 + }, + { + "epoch": 0.93, + "learning_rate": 1.423962923307637e-07, + "loss": 3.3416, + "step": 7755 + }, + { + "epoch": 0.93, + "learning_rate": 1.4192271275186798e-07, + "loss": 3.3943, + "step": 7756 + }, + { + "epoch": 0.93, + "learning_rate": 1.4144991065966564e-07, + "loss": 3.3603, + "step": 7757 + }, + { + "epoch": 0.93, + "learning_rate": 1.4097788612982333e-07, + "loss": 3.4544, + "step": 7758 + }, + { + "epoch": 0.93, + "learning_rate": 1.4050663923788342e-07, + "loss": 3.3254, + "step": 7759 + }, + { + "epoch": 0.93, + "learning_rate": 1.4003617005926452e-07, + "loss": 3.4788, + "step": 7760 + }, + { + "epoch": 0.93, + "learning_rate": 1.3956647866926075e-07, + "loss": 3.2882, + "step": 7761 + }, + { + "epoch": 0.93, + "learning_rate": 1.3909756514304094e-07, + "loss": 3.4761, + "step": 7762 + }, + { + "epoch": 0.93, + "learning_rate": 1.386294295556506e-07, + "loss": 3.307, + "step": 7763 + }, + { + "epoch": 0.93, + "learning_rate": 1.3816207198200983e-07, + "loss": 3.4339, + "step": 7764 + }, + { + "epoch": 0.93, + "learning_rate": 1.3769549249691426e-07, + "loss": 3.4176, + "step": 7765 + }, + { + "epoch": 0.93, + "learning_rate": 1.3722969117503648e-07, + "loss": 3.4148, + "step": 7766 + }, + { + "epoch": 0.93, + "learning_rate": 1.3676466809092182e-07, + "loss": 3.3306, + "step": 7767 + }, + { + "epoch": 0.93, + "learning_rate": 1.3630042331899295e-07, + "loss": 3.3403, + "step": 7768 + }, + { + "epoch": 0.93, + "learning_rate": 1.358369569335488e-07, + "loss": 3.2633, + "step": 7769 + }, + { + "epoch": 0.93, + "learning_rate": 1.3537426900876115e-07, + "loss": 3.3297, + "step": 7770 + }, + { + "epoch": 0.93, + "learning_rate": 1.3491235961867966e-07, + "loss": 3.4689, + "step": 7771 + }, + { + "epoch": 0.93, + "learning_rate": 1.344512288372263e-07, + "loss": 3.3657, + "step": 7772 + }, + { + "epoch": 0.93, + "learning_rate": 1.3399087673820255e-07, + "loss": 3.3599, + "step": 7773 + }, + { + "epoch": 0.93, + "learning_rate": 1.3353130339528287e-07, + "loss": 3.394, + "step": 7774 + }, + { + "epoch": 0.93, + "learning_rate": 1.3307250888201674e-07, + "loss": 3.1924, + "step": 7775 + }, + { + "epoch": 0.93, + "learning_rate": 1.3261449327182985e-07, + "loss": 3.4349, + "step": 7776 + }, + { + "epoch": 0.93, + "learning_rate": 1.3215725663802303e-07, + "loss": 3.4531, + "step": 7777 + }, + { + "epoch": 0.93, + "learning_rate": 1.3170079905377164e-07, + "loss": 3.4061, + "step": 7778 + }, + { + "epoch": 0.93, + "learning_rate": 1.312451205921289e-07, + "loss": 3.3824, + "step": 7779 + }, + { + "epoch": 0.93, + "learning_rate": 1.307902213260198e-07, + "loss": 3.3758, + "step": 7780 + }, + { + "epoch": 0.93, + "learning_rate": 1.303361013282478e-07, + "loss": 3.4051, + "step": 7781 + }, + { + "epoch": 0.93, + "learning_rate": 1.2988276067148974e-07, + "loss": 3.4584, + "step": 7782 + }, + { + "epoch": 0.93, + "learning_rate": 1.2943019942829816e-07, + "loss": 3.3956, + "step": 7783 + }, + { + "epoch": 0.93, + "learning_rate": 1.289784176711012e-07, + "loss": 3.4487, + "step": 7784 + }, + { + "epoch": 0.93, + "learning_rate": 1.285274154722016e-07, + "loss": 3.4735, + "step": 7785 + }, + { + "epoch": 0.93, + "learning_rate": 1.2807719290377775e-07, + "loss": 3.3817, + "step": 7786 + }, + { + "epoch": 0.93, + "learning_rate": 1.2762775003788419e-07, + "loss": 3.4053, + "step": 7787 + }, + { + "epoch": 0.93, + "learning_rate": 1.2717908694644843e-07, + "loss": 3.2453, + "step": 7788 + }, + { + "epoch": 0.93, + "learning_rate": 1.2673120370127522e-07, + "loss": 3.3668, + "step": 7789 + }, + { + "epoch": 0.93, + "learning_rate": 1.2628410037404394e-07, + "loss": 3.412, + "step": 7790 + }, + { + "epoch": 0.93, + "learning_rate": 1.2583777703630894e-07, + "loss": 3.4311, + "step": 7791 + }, + { + "epoch": 0.93, + "learning_rate": 1.2539223375949983e-07, + "loss": 3.5071, + "step": 7792 + }, + { + "epoch": 0.93, + "learning_rate": 1.2494747061492064e-07, + "loss": 3.3953, + "step": 7793 + }, + { + "epoch": 0.93, + "learning_rate": 1.2450348767375164e-07, + "loss": 3.4152, + "step": 7794 + }, + { + "epoch": 0.93, + "learning_rate": 1.2406028500704826e-07, + "loss": 3.5039, + "step": 7795 + }, + { + "epoch": 0.93, + "learning_rate": 1.2361786268574095e-07, + "loss": 3.3521, + "step": 7796 + }, + { + "epoch": 0.93, + "learning_rate": 1.231762207806342e-07, + "loss": 3.4279, + "step": 7797 + }, + { + "epoch": 0.93, + "learning_rate": 1.2273535936240866e-07, + "loss": 3.4096, + "step": 7798 + }, + { + "epoch": 0.93, + "learning_rate": 1.2229527850161905e-07, + "loss": 3.3866, + "step": 7799 + }, + { + "epoch": 0.93, + "learning_rate": 1.2185597826869788e-07, + "loss": 3.4313, + "step": 7800 + }, + { + "epoch": 0.93, + "learning_rate": 1.2141745873394839e-07, + "loss": 3.3507, + "step": 7801 + }, + { + "epoch": 0.93, + "learning_rate": 1.2097971996755277e-07, + "loss": 3.3404, + "step": 7802 + }, + { + "epoch": 0.93, + "learning_rate": 1.205427620395666e-07, + "loss": 3.324, + "step": 7803 + }, + { + "epoch": 0.93, + "learning_rate": 1.2010658501992013e-07, + "loss": 3.2783, + "step": 7804 + }, + { + "epoch": 0.93, + "learning_rate": 1.1967118897841912e-07, + "loss": 3.3547, + "step": 7805 + }, + { + "epoch": 0.93, + "learning_rate": 1.1923657398474564e-07, + "loss": 3.3545, + "step": 7806 + }, + { + "epoch": 0.93, + "learning_rate": 1.1880274010845294e-07, + "loss": 3.4271, + "step": 7807 + }, + { + "epoch": 0.93, + "learning_rate": 1.1836968741897492e-07, + "loss": 3.4212, + "step": 7808 + }, + { + "epoch": 0.93, + "learning_rate": 1.1793741598561447e-07, + "loss": 3.395, + "step": 7809 + }, + { + "epoch": 0.93, + "learning_rate": 1.1750592587755405e-07, + "loss": 3.4056, + "step": 7810 + }, + { + "epoch": 0.94, + "learning_rate": 1.170752171638484e-07, + "loss": 3.471, + "step": 7811 + }, + { + "epoch": 0.94, + "learning_rate": 1.1664528991342905e-07, + "loss": 3.4567, + "step": 7812 + }, + { + "epoch": 0.94, + "learning_rate": 1.1621614419510041e-07, + "loss": 3.4109, + "step": 7813 + }, + { + "epoch": 0.94, + "learning_rate": 1.1578778007754477e-07, + "loss": 3.3939, + "step": 7814 + }, + { + "epoch": 0.94, + "learning_rate": 1.1536019762931505e-07, + "loss": 3.3022, + "step": 7815 + }, + { + "epoch": 0.94, + "learning_rate": 1.1493339691884376e-07, + "loss": 3.3896, + "step": 7816 + }, + { + "epoch": 0.94, + "learning_rate": 1.1450737801443457e-07, + "loss": 3.4262, + "step": 7817 + }, + { + "epoch": 0.94, + "learning_rate": 1.140821409842685e-07, + "loss": 3.4144, + "step": 7818 + }, + { + "epoch": 0.94, + "learning_rate": 1.1365768589640059e-07, + "loss": 3.3454, + "step": 7819 + }, + { + "epoch": 0.94, + "learning_rate": 1.1323401281875979e-07, + "loss": 3.4108, + "step": 7820 + }, + { + "epoch": 0.94, + "learning_rate": 1.1281112181915077e-07, + "loss": 3.3745, + "step": 7821 + }, + { + "epoch": 0.94, + "learning_rate": 1.123890129652544e-07, + "loss": 3.3705, + "step": 7822 + }, + { + "epoch": 0.94, + "learning_rate": 1.1196768632462274e-07, + "loss": 3.4959, + "step": 7823 + }, + { + "epoch": 0.94, + "learning_rate": 1.1154714196468741e-07, + "loss": 3.4397, + "step": 7824 + }, + { + "epoch": 0.94, + "learning_rate": 1.1112737995275013e-07, + "loss": 3.274, + "step": 7825 + }, + { + "epoch": 0.94, + "learning_rate": 1.1070840035599051e-07, + "loss": 3.4382, + "step": 7826 + }, + { + "epoch": 0.94, + "learning_rate": 1.1029020324146266e-07, + "loss": 3.4241, + "step": 7827 + }, + { + "epoch": 0.94, + "learning_rate": 1.0987278867609419e-07, + "loss": 3.3499, + "step": 7828 + }, + { + "epoch": 0.94, + "learning_rate": 1.0945615672668719e-07, + "loss": 3.388, + "step": 7829 + }, + { + "epoch": 0.94, + "learning_rate": 1.0904030745992222e-07, + "loss": 3.4261, + "step": 7830 + }, + { + "epoch": 0.94, + "learning_rate": 1.0862524094234827e-07, + "loss": 3.3307, + "step": 7831 + }, + { + "epoch": 0.94, + "learning_rate": 1.0821095724039498e-07, + "loss": 3.3536, + "step": 7832 + }, + { + "epoch": 0.94, + "learning_rate": 1.0779745642036432e-07, + "loss": 3.3372, + "step": 7833 + }, + { + "epoch": 0.94, + "learning_rate": 1.0738473854843167e-07, + "loss": 3.4152, + "step": 7834 + }, + { + "epoch": 0.94, + "learning_rate": 1.0697280369064921e-07, + "loss": 3.4243, + "step": 7835 + }, + { + "epoch": 0.94, + "learning_rate": 1.0656165191294254e-07, + "loss": 3.4773, + "step": 7836 + }, + { + "epoch": 0.94, + "learning_rate": 1.0615128328111346e-07, + "loss": 3.3565, + "step": 7837 + }, + { + "epoch": 0.94, + "learning_rate": 1.0574169786083554e-07, + "loss": 3.4175, + "step": 7838 + }, + { + "epoch": 0.94, + "learning_rate": 1.0533289571766026e-07, + "loss": 3.3129, + "step": 7839 + }, + { + "epoch": 0.94, + "learning_rate": 1.0492487691701192e-07, + "loss": 3.4009, + "step": 7840 + }, + { + "epoch": 0.94, + "learning_rate": 1.0451764152418997e-07, + "loss": 3.3723, + "step": 7841 + }, + { + "epoch": 0.94, + "learning_rate": 1.0411118960436839e-07, + "loss": 3.3787, + "step": 7842 + }, + { + "epoch": 0.94, + "learning_rate": 1.0370552122259569e-07, + "loss": 3.4077, + "step": 7843 + }, + { + "epoch": 0.94, + "learning_rate": 1.033006364437944e-07, + "loss": 3.4513, + "step": 7844 + }, + { + "epoch": 0.94, + "learning_rate": 1.0289653533276378e-07, + "loss": 3.3825, + "step": 7845 + }, + { + "epoch": 0.94, + "learning_rate": 1.0249321795417377e-07, + "loss": 3.4313, + "step": 7846 + }, + { + "epoch": 0.94, + "learning_rate": 1.0209068437257385e-07, + "loss": 3.4905, + "step": 7847 + }, + { + "epoch": 0.94, + "learning_rate": 1.0168893465238416e-07, + "loss": 3.4736, + "step": 7848 + }, + { + "epoch": 0.94, + "learning_rate": 1.0128796885790104e-07, + "loss": 3.322, + "step": 7849 + }, + { + "epoch": 0.94, + "learning_rate": 1.0088778705329427e-07, + "loss": 3.3781, + "step": 7850 + }, + { + "epoch": 0.94, + "learning_rate": 1.0048838930261095e-07, + "loss": 3.3844, + "step": 7851 + }, + { + "epoch": 0.94, + "learning_rate": 1.000897756697683e-07, + "loss": 3.4058, + "step": 7852 + }, + { + "epoch": 0.94, + "learning_rate": 9.969194621856249e-08, + "loss": 3.4262, + "step": 7853 + }, + { + "epoch": 0.94, + "learning_rate": 9.929490101265982e-08, + "loss": 3.4007, + "step": 7854 + }, + { + "epoch": 0.94, + "learning_rate": 9.889864011560557e-08, + "loss": 3.4706, + "step": 7855 + }, + { + "epoch": 0.94, + "learning_rate": 9.85031635908168e-08, + "loss": 3.4723, + "step": 7856 + }, + { + "epoch": 0.94, + "learning_rate": 9.810847150158509e-08, + "loss": 3.3762, + "step": 7857 + }, + { + "epoch": 0.94, + "learning_rate": 9.77145639110766e-08, + "loss": 3.4173, + "step": 7858 + }, + { + "epoch": 0.94, + "learning_rate": 9.732144088233419e-08, + "loss": 3.4615, + "step": 7859 + }, + { + "epoch": 0.94, + "learning_rate": 9.692910247827037e-08, + "loss": 3.4063, + "step": 7860 + }, + { + "epoch": 0.94, + "learning_rate": 9.653754876167821e-08, + "loss": 3.4348, + "step": 7861 + }, + { + "epoch": 0.94, + "learning_rate": 9.614677979521925e-08, + "loss": 3.4597, + "step": 7862 + }, + { + "epoch": 0.94, + "learning_rate": 9.575679564143347e-08, + "loss": 3.3836, + "step": 7863 + }, + { + "epoch": 0.94, + "learning_rate": 9.536759636273373e-08, + "loss": 3.4246, + "step": 7864 + }, + { + "epoch": 0.94, + "learning_rate": 9.497918202140744e-08, + "loss": 3.4964, + "step": 7865 + }, + { + "epoch": 0.94, + "learning_rate": 9.459155267961595e-08, + "loss": 3.386, + "step": 7866 + }, + { + "epoch": 0.94, + "learning_rate": 9.420470839939688e-08, + "loss": 3.3681, + "step": 7867 + }, + { + "epoch": 0.94, + "learning_rate": 9.381864924265904e-08, + "loss": 3.3508, + "step": 7868 + }, + { + "epoch": 0.94, + "learning_rate": 9.343337527118912e-08, + "loss": 3.3512, + "step": 7869 + }, + { + "epoch": 0.94, + "learning_rate": 9.304888654664446e-08, + "loss": 3.3625, + "step": 7870 + }, + { + "epoch": 0.94, + "learning_rate": 9.266518313056027e-08, + "loss": 3.4441, + "step": 7871 + }, + { + "epoch": 0.94, + "learning_rate": 9.228226508434357e-08, + "loss": 3.4436, + "step": 7872 + }, + { + "epoch": 0.94, + "learning_rate": 9.190013246927642e-08, + "loss": 3.4574, + "step": 7873 + }, + { + "epoch": 0.94, + "learning_rate": 9.151878534651603e-08, + "loss": 3.3809, + "step": 7874 + }, + { + "epoch": 0.94, + "learning_rate": 9.113822377709303e-08, + "loss": 3.3962, + "step": 7875 + }, + { + "epoch": 0.94, + "learning_rate": 9.075844782191145e-08, + "loss": 3.3931, + "step": 7876 + }, + { + "epoch": 0.94, + "learning_rate": 9.037945754175214e-08, + "loss": 3.395, + "step": 7877 + }, + { + "epoch": 0.94, + "learning_rate": 9.00012529972677e-08, + "loss": 3.3525, + "step": 7878 + }, + { + "epoch": 0.94, + "learning_rate": 8.962383424898691e-08, + "loss": 3.4418, + "step": 7879 + }, + { + "epoch": 0.94, + "learning_rate": 8.92472013573109e-08, + "loss": 3.3857, + "step": 7880 + }, + { + "epoch": 0.94, + "learning_rate": 8.887135438251648e-08, + "loss": 3.4437, + "step": 7881 + }, + { + "epoch": 0.94, + "learning_rate": 8.84962933847533e-08, + "loss": 3.3415, + "step": 7882 + }, + { + "epoch": 0.94, + "learning_rate": 8.812201842404777e-08, + "loss": 3.3543, + "step": 7883 + }, + { + "epoch": 0.94, + "learning_rate": 8.774852956029756e-08, + "loss": 3.4447, + "step": 7884 + }, + { + "epoch": 0.94, + "learning_rate": 8.73758268532765e-08, + "loss": 3.4206, + "step": 7885 + }, + { + "epoch": 0.94, + "learning_rate": 8.70039103626319e-08, + "loss": 3.4265, + "step": 7886 + }, + { + "epoch": 0.94, + "learning_rate": 8.6632780147885e-08, + "loss": 3.4209, + "step": 7887 + }, + { + "epoch": 0.94, + "learning_rate": 8.626243626843166e-08, + "loss": 3.3934, + "step": 7888 + }, + { + "epoch": 0.94, + "learning_rate": 8.589287878354114e-08, + "loss": 3.4212, + "step": 7889 + }, + { + "epoch": 0.94, + "learning_rate": 8.552410775235831e-08, + "loss": 3.4989, + "step": 7890 + }, + { + "epoch": 0.94, + "learning_rate": 8.515612323390044e-08, + "loss": 3.4735, + "step": 7891 + }, + { + "epoch": 0.94, + "learning_rate": 8.47889252870604e-08, + "loss": 3.434, + "step": 7892 + }, + { + "epoch": 0.94, + "learning_rate": 8.442251397060453e-08, + "loss": 3.4023, + "step": 7893 + }, + { + "epoch": 0.95, + "learning_rate": 8.405688934317313e-08, + "loss": 3.3727, + "step": 7894 + }, + { + "epoch": 0.95, + "learning_rate": 8.369205146328052e-08, + "loss": 3.3133, + "step": 7895 + }, + { + "epoch": 0.95, + "learning_rate": 8.33280003893161e-08, + "loss": 3.4379, + "step": 7896 + }, + { + "epoch": 0.95, + "learning_rate": 8.296473617954159e-08, + "loss": 3.3901, + "step": 7897 + }, + { + "epoch": 0.95, + "learning_rate": 8.26022588920944e-08, + "loss": 3.3525, + "step": 7898 + }, + { + "epoch": 0.95, + "learning_rate": 8.22405685849853e-08, + "loss": 3.342, + "step": 7899 + }, + { + "epoch": 0.95, + "learning_rate": 8.187966531609915e-08, + "loss": 3.4013, + "step": 7900 + }, + { + "epoch": 0.95, + "learning_rate": 8.151954914319527e-08, + "loss": 3.3298, + "step": 7901 + }, + { + "epoch": 0.95, + "learning_rate": 8.11602201239059e-08, + "loss": 3.3183, + "step": 7902 + }, + { + "epoch": 0.95, + "learning_rate": 8.080167831573837e-08, + "loss": 3.4756, + "step": 7903 + }, + { + "epoch": 0.95, + "learning_rate": 8.044392377607458e-08, + "loss": 3.4082, + "step": 7904 + }, + { + "epoch": 0.95, + "learning_rate": 8.008695656216814e-08, + "loss": 3.4924, + "step": 7905 + }, + { + "epoch": 0.95, + "learning_rate": 7.973077673114948e-08, + "loss": 3.3197, + "step": 7906 + }, + { + "epoch": 0.95, + "learning_rate": 7.937538434002024e-08, + "loss": 3.4131, + "step": 7907 + }, + { + "epoch": 0.95, + "learning_rate": 7.902077944565823e-08, + "loss": 3.3885, + "step": 7908 + }, + { + "epoch": 0.95, + "learning_rate": 7.866696210481472e-08, + "loss": 3.3502, + "step": 7909 + }, + { + "epoch": 0.95, + "learning_rate": 7.831393237411389e-08, + "loss": 3.3685, + "step": 7910 + }, + { + "epoch": 0.95, + "learning_rate": 7.79616903100544e-08, + "loss": 3.4061, + "step": 7911 + }, + { + "epoch": 0.95, + "learning_rate": 7.761023596901063e-08, + "loss": 3.4079, + "step": 7912 + }, + { + "epoch": 0.95, + "learning_rate": 7.725956940722701e-08, + "loss": 3.3647, + "step": 7913 + }, + { + "epoch": 0.95, + "learning_rate": 7.690969068082698e-08, + "loss": 3.5029, + "step": 7914 + }, + { + "epoch": 0.95, + "learning_rate": 7.656059984580244e-08, + "loss": 3.4188, + "step": 7915 + }, + { + "epoch": 0.95, + "learning_rate": 7.621229695802368e-08, + "loss": 3.3128, + "step": 7916 + }, + { + "epoch": 0.95, + "learning_rate": 7.586478207323222e-08, + "loss": 3.4407, + "step": 7917 + }, + { + "epoch": 0.95, + "learning_rate": 7.551805524704525e-08, + "loss": 3.4053, + "step": 7918 + }, + { + "epoch": 0.95, + "learning_rate": 7.517211653495115e-08, + "loss": 3.4979, + "step": 7919 + }, + { + "epoch": 0.95, + "learning_rate": 7.482696599231676e-08, + "loss": 3.3881, + "step": 7920 + }, + { + "epoch": 0.95, + "learning_rate": 7.448260367437676e-08, + "loss": 3.3993, + "step": 7921 + }, + { + "epoch": 0.95, + "learning_rate": 7.413902963624542e-08, + "loss": 3.3758, + "step": 7922 + }, + { + "epoch": 0.95, + "learning_rate": 7.379624393290707e-08, + "loss": 3.3908, + "step": 7923 + }, + { + "epoch": 0.95, + "learning_rate": 7.345424661922118e-08, + "loss": 3.3257, + "step": 7924 + }, + { + "epoch": 0.95, + "learning_rate": 7.311303774992173e-08, + "loss": 3.4598, + "step": 7925 + }, + { + "epoch": 0.95, + "learning_rate": 7.277261737961505e-08, + "loss": 3.4603, + "step": 7926 + }, + { + "epoch": 0.95, + "learning_rate": 7.243298556278256e-08, + "loss": 3.4659, + "step": 7927 + }, + { + "epoch": 0.95, + "learning_rate": 7.209414235377854e-08, + "loss": 3.4319, + "step": 7928 + }, + { + "epoch": 0.95, + "learning_rate": 7.17560878068313e-08, + "loss": 3.5176, + "step": 7929 + }, + { + "epoch": 0.95, + "learning_rate": 7.141882197604366e-08, + "loss": 3.4271, + "step": 7930 + }, + { + "epoch": 0.95, + "learning_rate": 7.108234491539134e-08, + "loss": 3.3756, + "step": 7931 + }, + { + "epoch": 0.95, + "learning_rate": 7.074665667872405e-08, + "loss": 3.5107, + "step": 7932 + }, + { + "epoch": 0.95, + "learning_rate": 7.041175731976546e-08, + "loss": 3.5054, + "step": 7933 + }, + { + "epoch": 0.95, + "learning_rate": 7.007764689211272e-08, + "loss": 3.3111, + "step": 7934 + }, + { + "epoch": 0.95, + "learning_rate": 6.974432544923748e-08, + "loss": 3.2756, + "step": 7935 + }, + { + "epoch": 0.95, + "learning_rate": 6.94117930444832e-08, + "loss": 3.3611, + "step": 7936 + }, + { + "epoch": 0.95, + "learning_rate": 6.908004973107007e-08, + "loss": 3.4442, + "step": 7937 + }, + { + "epoch": 0.95, + "learning_rate": 6.874909556208898e-08, + "loss": 3.3835, + "step": 7938 + }, + { + "epoch": 0.95, + "learning_rate": 6.841893059050587e-08, + "loss": 3.3973, + "step": 7939 + }, + { + "epoch": 0.95, + "learning_rate": 6.808955486916068e-08, + "loss": 3.3693, + "step": 7940 + }, + { + "epoch": 0.95, + "learning_rate": 6.776096845076796e-08, + "loss": 3.353, + "step": 7941 + }, + { + "epoch": 0.95, + "learning_rate": 6.743317138791228e-08, + "loss": 3.4935, + "step": 7942 + }, + { + "epoch": 0.95, + "learning_rate": 6.710616373305556e-08, + "loss": 3.4115, + "step": 7943 + }, + { + "epoch": 0.95, + "learning_rate": 6.677994553853207e-08, + "loss": 3.2901, + "step": 7944 + }, + { + "epoch": 0.95, + "learning_rate": 6.645451685655002e-08, + "loss": 3.4465, + "step": 7945 + }, + { + "epoch": 0.95, + "learning_rate": 6.612987773919055e-08, + "loss": 3.3862, + "step": 7946 + }, + { + "epoch": 0.95, + "learning_rate": 6.580602823840932e-08, + "loss": 3.4301, + "step": 7947 + }, + { + "epoch": 0.95, + "learning_rate": 6.54829684060343e-08, + "loss": 3.4353, + "step": 7948 + }, + { + "epoch": 0.95, + "learning_rate": 6.516069829376914e-08, + "loss": 3.3739, + "step": 7949 + }, + { + "epoch": 0.95, + "learning_rate": 6.483921795318927e-08, + "loss": 3.3228, + "step": 7950 + }, + { + "epoch": 0.95, + "learning_rate": 6.45185274357446e-08, + "loss": 3.5302, + "step": 7951 + }, + { + "epoch": 0.95, + "learning_rate": 6.419862679275801e-08, + "loss": 3.3581, + "step": 7952 + }, + { + "epoch": 0.95, + "learning_rate": 6.387951607542742e-08, + "loss": 3.4351, + "step": 7953 + }, + { + "epoch": 0.95, + "learning_rate": 6.356119533482252e-08, + "loss": 3.4443, + "step": 7954 + }, + { + "epoch": 0.95, + "learning_rate": 6.324366462188759e-08, + "loss": 3.3469, + "step": 7955 + }, + { + "epoch": 0.95, + "learning_rate": 6.292692398743971e-08, + "loss": 3.4218, + "step": 7956 + }, + { + "epoch": 0.95, + "learning_rate": 6.261097348217116e-08, + "loss": 3.406, + "step": 7957 + }, + { + "epoch": 0.95, + "learning_rate": 6.229581315664535e-08, + "loss": 3.3641, + "step": 7958 + }, + { + "epoch": 0.95, + "learning_rate": 6.198144306130139e-08, + "loss": 3.4549, + "step": 7959 + }, + { + "epoch": 0.95, + "learning_rate": 6.166786324645124e-08, + "loss": 3.4549, + "step": 7960 + }, + { + "epoch": 0.95, + "learning_rate": 6.135507376227978e-08, + "loss": 3.4145, + "step": 7961 + }, + { + "epoch": 0.95, + "learning_rate": 6.104307465884584e-08, + "loss": 3.3595, + "step": 7962 + }, + { + "epoch": 0.95, + "learning_rate": 6.073186598608172e-08, + "loss": 3.3157, + "step": 7963 + }, + { + "epoch": 0.95, + "learning_rate": 6.042144779379366e-08, + "loss": 3.4435, + "step": 7964 + }, + { + "epoch": 0.95, + "learning_rate": 6.011182013166028e-08, + "loss": 3.4734, + "step": 7965 + }, + { + "epoch": 0.95, + "learning_rate": 5.980298304923526e-08, + "loss": 3.414, + "step": 7966 + }, + { + "epoch": 0.95, + "learning_rate": 5.9494936595944076e-08, + "loss": 3.4518, + "step": 7967 + }, + { + "epoch": 0.95, + "learning_rate": 5.918768082108728e-08, + "loss": 3.3308, + "step": 7968 + }, + { + "epoch": 0.95, + "learning_rate": 5.888121577383776e-08, + "loss": 3.4195, + "step": 7969 + }, + { + "epoch": 0.95, + "learning_rate": 5.857554150324185e-08, + "loss": 3.329, + "step": 7970 + }, + { + "epoch": 0.95, + "learning_rate": 5.82706580582193e-08, + "loss": 3.3898, + "step": 7971 + }, + { + "epoch": 0.95, + "learning_rate": 5.7966565487564965e-08, + "loss": 3.394, + "step": 7972 + }, + { + "epoch": 0.95, + "learning_rate": 5.7663263839944915e-08, + "loss": 3.384, + "step": 7973 + }, + { + "epoch": 0.95, + "learning_rate": 5.7360753163899774e-08, + "loss": 3.3058, + "step": 7974 + }, + { + "epoch": 0.95, + "learning_rate": 5.705903350784303e-08, + "loss": 3.3093, + "step": 7975 + }, + { + "epoch": 0.95, + "learning_rate": 5.675810492006273e-08, + "loss": 3.476, + "step": 7976 + }, + { + "epoch": 0.95, + "learning_rate": 5.645796744871812e-08, + "loss": 3.5225, + "step": 7977 + }, + { + "epoch": 0.96, + "learning_rate": 5.615862114184467e-08, + "loss": 3.4699, + "step": 7978 + }, + { + "epoch": 0.96, + "learning_rate": 5.586006604734906e-08, + "loss": 3.3494, + "step": 7979 + }, + { + "epoch": 0.96, + "learning_rate": 5.556230221301195e-08, + "loss": 3.3992, + "step": 7980 + }, + { + "epoch": 0.96, + "learning_rate": 5.5265329686487455e-08, + "loss": 3.393, + "step": 7981 + }, + { + "epoch": 0.96, + "learning_rate": 5.496914851530366e-08, + "loss": 3.4805, + "step": 7982 + }, + { + "epoch": 0.96, + "learning_rate": 5.467375874686043e-08, + "loss": 3.3458, + "step": 7983 + }, + { + "epoch": 0.96, + "learning_rate": 5.437916042843272e-08, + "loss": 3.4603, + "step": 7984 + }, + { + "epoch": 0.96, + "learning_rate": 5.408535360716782e-08, + "loss": 3.354, + "step": 7985 + }, + { + "epoch": 0.96, + "learning_rate": 5.3792338330086455e-08, + "loss": 3.4039, + "step": 7986 + }, + { + "epoch": 0.96, + "learning_rate": 5.350011464408278e-08, + "loss": 3.2983, + "step": 7987 + }, + { + "epoch": 0.96, + "learning_rate": 5.320868259592493e-08, + "loss": 3.4304, + "step": 7988 + }, + { + "epoch": 0.96, + "learning_rate": 5.291804223225283e-08, + "loss": 3.3501, + "step": 7989 + }, + { + "epoch": 0.96, + "learning_rate": 5.2628193599580377e-08, + "loss": 3.4263, + "step": 7990 + }, + { + "epoch": 0.96, + "learning_rate": 5.2339136744296023e-08, + "loss": 3.4717, + "step": 7991 + }, + { + "epoch": 0.96, + "learning_rate": 5.205087171265999e-08, + "loss": 3.4185, + "step": 7992 + }, + { + "epoch": 0.96, + "learning_rate": 5.176339855080537e-08, + "loss": 3.4593, + "step": 7993 + }, + { + "epoch": 0.96, + "learning_rate": 5.1476717304740905e-08, + "loss": 3.4384, + "step": 7994 + }, + { + "epoch": 0.96, + "learning_rate": 5.1190828020345476e-08, + "loss": 3.3296, + "step": 7995 + }, + { + "epoch": 0.96, + "learning_rate": 5.090573074337357e-08, + "loss": 3.4303, + "step": 7996 + }, + { + "epoch": 0.96, + "learning_rate": 5.06214255194526e-08, + "loss": 3.4142, + "step": 7997 + }, + { + "epoch": 0.96, + "learning_rate": 5.033791239408225e-08, + "loss": 3.4209, + "step": 7998 + }, + { + "epoch": 0.96, + "learning_rate": 5.0055191412635705e-08, + "loss": 3.2888, + "step": 7999 + }, + { + "epoch": 0.96, + "learning_rate": 4.9773262620360084e-08, + "loss": 3.3983, + "step": 8000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9492126062374856e-08, + "loss": 3.3906, + "step": 8001 + }, + { + "epoch": 0.96, + "learning_rate": 4.9211781783674026e-08, + "loss": 3.385, + "step": 8002 + }, + { + "epoch": 0.96, + "learning_rate": 4.893222982912282e-08, + "loss": 3.3948, + "step": 8003 + }, + { + "epoch": 0.96, + "learning_rate": 4.865347024346101e-08, + "loss": 3.4405, + "step": 8004 + }, + { + "epoch": 0.96, + "learning_rate": 4.837550307130179e-08, + "loss": 3.3702, + "step": 8005 + }, + { + "epoch": 0.96, + "learning_rate": 4.809832835713069e-08, + "loss": 3.4415, + "step": 8006 + }, + { + "epoch": 0.96, + "learning_rate": 4.782194614530666e-08, + "loss": 3.4884, + "step": 8007 + }, + { + "epoch": 0.96, + "learning_rate": 4.754635648006212e-08, + "loss": 3.3457, + "step": 8008 + }, + { + "epoch": 0.96, + "learning_rate": 4.727155940550177e-08, + "loss": 3.282, + "step": 8009 + }, + { + "epoch": 0.96, + "learning_rate": 4.699755496560543e-08, + "loss": 3.4198, + "step": 8010 + }, + { + "epoch": 0.96, + "learning_rate": 4.672434320422359e-08, + "loss": 3.3862, + "step": 8011 + }, + { + "epoch": 0.96, + "learning_rate": 4.645192416508182e-08, + "loss": 3.4169, + "step": 8012 + }, + { + "epoch": 0.96, + "learning_rate": 4.618029789177747e-08, + "loss": 3.3276, + "step": 8013 + }, + { + "epoch": 0.96, + "learning_rate": 4.5909464427781324e-08, + "loss": 3.2976, + "step": 8014 + }, + { + "epoch": 0.96, + "learning_rate": 4.5639423816439264e-08, + "loss": 3.4201, + "step": 8015 + }, + { + "epoch": 0.96, + "learning_rate": 4.537017610096672e-08, + "loss": 3.4942, + "step": 8016 + }, + { + "epoch": 0.96, + "learning_rate": 4.510172132445478e-08, + "loss": 3.3441, + "step": 8017 + }, + { + "epoch": 0.96, + "learning_rate": 4.4834059529867414e-08, + "loss": 3.3546, + "step": 8018 + }, + { + "epoch": 0.96, + "learning_rate": 4.4567190760040346e-08, + "loss": 3.3771, + "step": 8019 + }, + { + "epoch": 0.96, + "learning_rate": 4.430111505768331e-08, + "loss": 3.4061, + "step": 8020 + }, + { + "epoch": 0.96, + "learning_rate": 4.4035832465380565e-08, + "loss": 3.3528, + "step": 8021 + }, + { + "epoch": 0.96, + "learning_rate": 4.377134302558594e-08, + "loss": 3.3849, + "step": 8022 + }, + { + "epoch": 0.96, + "learning_rate": 4.350764678063002e-08, + "loss": 3.4174, + "step": 8023 + }, + { + "epoch": 0.96, + "learning_rate": 4.324474377271293e-08, + "loss": 3.3914, + "step": 8024 + }, + { + "epoch": 0.96, + "learning_rate": 4.298263404391156e-08, + "loss": 3.3796, + "step": 8025 + }, + { + "epoch": 0.96, + "learning_rate": 4.2721317636172374e-08, + "loss": 3.3797, + "step": 8026 + }, + { + "epoch": 0.96, + "learning_rate": 4.2460794591317465e-08, + "loss": 3.3383, + "step": 8027 + }, + { + "epoch": 0.96, + "learning_rate": 4.220106495104071e-08, + "loss": 3.3843, + "step": 8028 + }, + { + "epoch": 0.96, + "learning_rate": 4.194212875690884e-08, + "loss": 3.3924, + "step": 8029 + }, + { + "epoch": 0.96, + "learning_rate": 4.1683986050362614e-08, + "loss": 3.3508, + "step": 8030 + }, + { + "epoch": 0.96, + "learning_rate": 4.1426636872715086e-08, + "loss": 3.3198, + "step": 8031 + }, + { + "epoch": 0.96, + "learning_rate": 4.1170081265151654e-08, + "loss": 3.4319, + "step": 8032 + }, + { + "epoch": 0.96, + "learning_rate": 4.091431926873224e-08, + "loss": 3.3997, + "step": 8033 + }, + { + "epoch": 0.96, + "learning_rate": 4.0659350924388555e-08, + "loss": 3.3074, + "step": 8034 + }, + { + "epoch": 0.96, + "learning_rate": 4.0405176272926284e-08, + "loss": 3.3522, + "step": 8035 + }, + { + "epoch": 0.96, + "learning_rate": 4.015179535502345e-08, + "loss": 3.3992, + "step": 8036 + }, + { + "epoch": 0.96, + "learning_rate": 3.9899208211230945e-08, + "loss": 3.3167, + "step": 8037 + }, + { + "epoch": 0.96, + "learning_rate": 3.964741488197199e-08, + "loss": 3.4404, + "step": 8038 + }, + { + "epoch": 0.96, + "learning_rate": 3.939641540754546e-08, + "loss": 3.4335, + "step": 8039 + }, + { + "epoch": 0.96, + "learning_rate": 3.9146209828119233e-08, + "loss": 3.5174, + "step": 8040 + }, + { + "epoch": 0.96, + "learning_rate": 3.8896798183737926e-08, + "loss": 3.3399, + "step": 8041 + }, + { + "epoch": 0.96, + "learning_rate": 3.864818051431629e-08, + "loss": 3.3036, + "step": 8042 + }, + { + "epoch": 0.96, + "learning_rate": 3.840035685964361e-08, + "loss": 3.4727, + "step": 8043 + }, + { + "epoch": 0.96, + "learning_rate": 3.815332725938148e-08, + "loss": 3.4586, + "step": 8044 + }, + { + "epoch": 0.96, + "learning_rate": 3.79070917530644e-08, + "loss": 3.424, + "step": 8045 + }, + { + "epoch": 0.96, + "learning_rate": 3.7661650380099726e-08, + "loss": 3.3945, + "step": 8046 + }, + { + "epoch": 0.96, + "learning_rate": 3.7417003179768265e-08, + "loss": 3.4424, + "step": 8047 + }, + { + "epoch": 0.96, + "learning_rate": 3.7173150191223696e-08, + "loss": 3.3937, + "step": 8048 + }, + { + "epoch": 0.96, + "learning_rate": 3.693009145349147e-08, + "loss": 3.3947, + "step": 8049 + }, + { + "epoch": 0.96, + "learning_rate": 3.6687827005471024e-08, + "loss": 3.4283, + "step": 8050 + }, + { + "epoch": 0.96, + "learning_rate": 3.644635688593468e-08, + "loss": 3.453, + "step": 8051 + }, + { + "epoch": 0.96, + "learning_rate": 3.620568113352651e-08, + "loss": 3.2976, + "step": 8052 + }, + { + "epoch": 0.96, + "learning_rate": 3.5965799786765175e-08, + "loss": 3.4354, + "step": 8053 + }, + { + "epoch": 0.96, + "learning_rate": 3.5726712884040505e-08, + "loss": 3.328, + "step": 8054 + }, + { + "epoch": 0.96, + "learning_rate": 3.548842046361689e-08, + "loss": 3.3531, + "step": 8055 + }, + { + "epoch": 0.96, + "learning_rate": 3.5250922563629386e-08, + "loss": 3.3316, + "step": 8056 + }, + { + "epoch": 0.96, + "learning_rate": 3.5014219222088696e-08, + "loss": 3.4638, + "step": 8057 + }, + { + "epoch": 0.96, + "learning_rate": 3.477831047687508e-08, + "loss": 3.2898, + "step": 8058 + }, + { + "epoch": 0.96, + "learning_rate": 3.454319636574499e-08, + "loss": 3.2586, + "step": 8059 + }, + { + "epoch": 0.96, + "learning_rate": 3.4308876926325005e-08, + "loss": 3.389, + "step": 8060 + }, + { + "epoch": 0.97, + "learning_rate": 3.4075352196116244e-08, + "loss": 3.3652, + "step": 8061 + }, + { + "epoch": 0.97, + "learning_rate": 3.3842622212491575e-08, + "loss": 3.3858, + "step": 8062 + }, + { + "epoch": 0.97, + "learning_rate": 3.361068701269732e-08, + "loss": 3.4727, + "step": 8063 + }, + { + "epoch": 0.97, + "learning_rate": 3.3379546633852125e-08, + "loss": 3.351, + "step": 8064 + }, + { + "epoch": 0.97, + "learning_rate": 3.314920111294806e-08, + "loss": 3.3718, + "step": 8065 + }, + { + "epoch": 0.97, + "learning_rate": 3.291965048685009e-08, + "loss": 3.5028, + "step": 8066 + }, + { + "epoch": 0.97, + "learning_rate": 3.269089479229382e-08, + "loss": 3.3474, + "step": 8067 + }, + { + "epoch": 0.97, + "learning_rate": 3.246293406589163e-08, + "loss": 3.3722, + "step": 8068 + }, + { + "epoch": 0.97, + "learning_rate": 3.223576834412379e-08, + "loss": 3.3867, + "step": 8069 + }, + { + "epoch": 0.97, + "learning_rate": 3.2009397663347874e-08, + "loss": 3.4953, + "step": 8070 + }, + { + "epoch": 0.97, + "learning_rate": 3.178382205979158e-08, + "loss": 3.3797, + "step": 8071 + }, + { + "epoch": 0.97, + "learning_rate": 3.1559041569556026e-08, + "loss": 3.4203, + "step": 8072 + }, + { + "epoch": 0.97, + "learning_rate": 3.133505622861521e-08, + "loss": 3.3826, + "step": 8073 + }, + { + "epoch": 0.97, + "learning_rate": 3.111186607281546e-08, + "loss": 3.3531, + "step": 8074 + }, + { + "epoch": 0.97, + "learning_rate": 3.0889471137875994e-08, + "loss": 3.435, + "step": 8075 + }, + { + "epoch": 0.97, + "learning_rate": 3.066787145938999e-08, + "loss": 3.3625, + "step": 8076 + }, + { + "epoch": 0.97, + "learning_rate": 3.0447067072820745e-08, + "loss": 3.4208, + "step": 8077 + }, + { + "epoch": 0.97, + "learning_rate": 3.022705801350667e-08, + "loss": 3.3122, + "step": 8078 + }, + { + "epoch": 0.97, + "learning_rate": 3.0007844316658486e-08, + "loss": 3.3971, + "step": 8079 + }, + { + "epoch": 0.97, + "learning_rate": 2.978942601735757e-08, + "loss": 3.367, + "step": 8080 + }, + { + "epoch": 0.97, + "learning_rate": 2.9571803150560962e-08, + "loss": 3.3767, + "step": 8081 + }, + { + "epoch": 0.97, + "learning_rate": 2.9354975751096916e-08, + "loss": 3.4297, + "step": 8082 + }, + { + "epoch": 0.97, + "learning_rate": 2.9138943853666e-08, + "loss": 3.4031, + "step": 8083 + }, + { + "epoch": 0.97, + "learning_rate": 2.8923707492842233e-08, + "loss": 3.409, + "step": 8084 + }, + { + "epoch": 0.97, + "learning_rate": 2.870926670307139e-08, + "loss": 3.426, + "step": 8085 + }, + { + "epoch": 0.97, + "learning_rate": 2.84956215186738e-08, + "loss": 3.4199, + "step": 8086 + }, + { + "epoch": 0.97, + "learning_rate": 2.828277197384044e-08, + "loss": 3.4888, + "step": 8087 + }, + { + "epoch": 0.97, + "learning_rate": 2.807071810263573e-08, + "loss": 3.2736, + "step": 8088 + }, + { + "epoch": 0.97, + "learning_rate": 2.7859459938996415e-08, + "loss": 3.3415, + "step": 8089 + }, + { + "epoch": 0.97, + "learning_rate": 2.7648997516733777e-08, + "loss": 3.3956, + "step": 8090 + }, + { + "epoch": 0.97, + "learning_rate": 2.7439330869528658e-08, + "loss": 3.4592, + "step": 8091 + }, + { + "epoch": 0.97, + "learning_rate": 2.723046003093699e-08, + "loss": 3.4941, + "step": 8092 + }, + { + "epoch": 0.97, + "learning_rate": 2.7022385034385924e-08, + "loss": 3.4449, + "step": 8093 + }, + { + "epoch": 0.97, + "learning_rate": 2.6815105913176042e-08, + "loss": 3.378, + "step": 8094 + }, + { + "epoch": 0.97, + "learning_rate": 2.660862270048026e-08, + "loss": 3.4072, + "step": 8095 + }, + { + "epoch": 0.97, + "learning_rate": 2.64029354293438e-08, + "loss": 3.4652, + "step": 8096 + }, + { + "epoch": 0.97, + "learning_rate": 2.619804413268534e-08, + "loss": 3.3398, + "step": 8097 + }, + { + "epoch": 0.97, + "learning_rate": 2.599394884329587e-08, + "loss": 3.3662, + "step": 8098 + }, + { + "epoch": 0.97, + "learning_rate": 2.5790649593838146e-08, + "loss": 3.377, + "step": 8099 + }, + { + "epoch": 0.97, + "learning_rate": 2.558814641684837e-08, + "loss": 3.3652, + "step": 8100 + }, + { + "epoch": 0.97, + "learning_rate": 2.5386439344735614e-08, + "loss": 3.3934, + "step": 8101 + }, + { + "epoch": 0.97, + "learning_rate": 2.5185528409780722e-08, + "loss": 3.4345, + "step": 8102 + }, + { + "epoch": 0.97, + "learning_rate": 2.4985413644137426e-08, + "loss": 3.4239, + "step": 8103 + }, + { + "epoch": 0.97, + "learning_rate": 2.478609507983176e-08, + "loss": 3.483, + "step": 8104 + }, + { + "epoch": 0.97, + "learning_rate": 2.458757274876378e-08, + "loss": 3.3055, + "step": 8105 + }, + { + "epoch": 0.97, + "learning_rate": 2.4389846682704167e-08, + "loss": 3.4577, + "step": 8106 + }, + { + "epoch": 0.97, + "learning_rate": 2.419291691329706e-08, + "loss": 3.4408, + "step": 8107 + }, + { + "epoch": 0.97, + "learning_rate": 2.3996783472058916e-08, + "loss": 3.406, + "step": 8108 + }, + { + "epoch": 0.97, + "learning_rate": 2.3801446390379068e-08, + "loss": 3.4162, + "step": 8109 + }, + { + "epoch": 0.97, + "learning_rate": 2.3606905699519735e-08, + "loss": 3.3739, + "step": 8110 + }, + { + "epoch": 0.97, + "learning_rate": 2.3413161430614895e-08, + "loss": 3.3962, + "step": 8111 + }, + { + "epoch": 0.97, + "learning_rate": 2.3220213614670863e-08, + "loss": 3.4323, + "step": 8112 + }, + { + "epoch": 0.97, + "learning_rate": 2.3028062282568487e-08, + "loss": 3.4417, + "step": 8113 + }, + { + "epoch": 0.97, + "learning_rate": 2.2836707465057617e-08, + "loss": 3.3068, + "step": 8114 + }, + { + "epoch": 0.97, + "learning_rate": 2.264614919276431e-08, + "loss": 3.3016, + "step": 8115 + }, + { + "epoch": 0.97, + "learning_rate": 2.2456387496184728e-08, + "loss": 3.4137, + "step": 8116 + }, + { + "epoch": 0.97, + "learning_rate": 2.2267422405688467e-08, + "loss": 3.427, + "step": 8117 + }, + { + "epoch": 0.97, + "learning_rate": 2.2079253951517442e-08, + "loss": 3.468, + "step": 8118 + }, + { + "epoch": 0.97, + "learning_rate": 2.189188216378646e-08, + "loss": 3.4501, + "step": 8119 + }, + { + "epoch": 0.97, + "learning_rate": 2.1705307072482085e-08, + "loss": 3.4137, + "step": 8120 + }, + { + "epoch": 0.97, + "learning_rate": 2.1519528707464875e-08, + "loss": 3.2783, + "step": 8121 + }, + { + "epoch": 0.97, + "learning_rate": 2.1334547098464943e-08, + "loss": 3.4301, + "step": 8122 + }, + { + "epoch": 0.97, + "learning_rate": 2.1150362275088042e-08, + "loss": 3.3858, + "step": 8123 + }, + { + "epoch": 0.97, + "learning_rate": 2.0966974266810604e-08, + "loss": 3.423, + "step": 8124 + }, + { + "epoch": 0.97, + "learning_rate": 2.0784383102982477e-08, + "loss": 3.3557, + "step": 8125 + }, + { + "epoch": 0.97, + "learning_rate": 2.0602588812825287e-08, + "loss": 3.519, + "step": 8126 + }, + { + "epoch": 0.97, + "learning_rate": 2.042159142543354e-08, + "loss": 3.4067, + "step": 8127 + }, + { + "epoch": 0.97, + "learning_rate": 2.024139096977351e-08, + "loss": 3.3837, + "step": 8128 + }, + { + "epoch": 0.97, + "learning_rate": 2.0061987474685463e-08, + "loss": 3.5269, + "step": 8129 + }, + { + "epoch": 0.97, + "learning_rate": 1.9883380968880316e-08, + "loss": 3.454, + "step": 8130 + }, + { + "epoch": 0.97, + "learning_rate": 1.9705571480942988e-08, + "loss": 3.4408, + "step": 8131 + }, + { + "epoch": 0.97, + "learning_rate": 1.9528559039329044e-08, + "loss": 3.4318, + "step": 8132 + }, + { + "epoch": 0.97, + "learning_rate": 1.9352343672368047e-08, + "loss": 3.4057, + "step": 8133 + }, + { + "epoch": 0.97, + "learning_rate": 1.9176925408261883e-08, + "loss": 3.3113, + "step": 8134 + }, + { + "epoch": 0.97, + "learning_rate": 1.900230427508476e-08, + "loss": 3.4218, + "step": 8135 + }, + { + "epoch": 0.97, + "learning_rate": 1.8828480300781548e-08, + "loss": 3.3533, + "step": 8136 + }, + { + "epoch": 0.97, + "learning_rate": 1.8655453513172217e-08, + "loss": 3.4452, + "step": 8137 + }, + { + "epoch": 0.97, + "learning_rate": 1.8483223939947948e-08, + "loss": 3.449, + "step": 8138 + }, + { + "epoch": 0.97, + "learning_rate": 1.8311791608672246e-08, + "loss": 3.5629, + "step": 8139 + }, + { + "epoch": 0.97, + "learning_rate": 1.8141156546780947e-08, + "loss": 3.3567, + "step": 8140 + }, + { + "epoch": 0.97, + "learning_rate": 1.797131878158276e-08, + "loss": 3.332, + "step": 8141 + }, + { + "epoch": 0.97, + "learning_rate": 1.780227834025816e-08, + "loss": 3.4686, + "step": 8142 + }, + { + "epoch": 0.97, + "learning_rate": 1.7634035249860514e-08, + "loss": 3.3912, + "step": 8143 + }, + { + "epoch": 0.97, + "learning_rate": 1.746658953731606e-08, + "loss": 3.3821, + "step": 8144 + }, + { + "epoch": 0.98, + "learning_rate": 1.729994122942169e-08, + "loss": 3.3637, + "step": 8145 + }, + { + "epoch": 0.98, + "learning_rate": 1.71340903528483e-08, + "loss": 3.3354, + "step": 8146 + }, + { + "epoch": 0.98, + "learning_rate": 1.6969036934139095e-08, + "loss": 3.3117, + "step": 8147 + }, + { + "epoch": 0.98, + "learning_rate": 1.6804780999709058e-08, + "loss": 3.3946, + "step": 8148 + }, + { + "epoch": 0.98, + "learning_rate": 1.6641322575844943e-08, + "loss": 3.4616, + "step": 8149 + }, + { + "epoch": 0.98, + "learning_rate": 1.6478661688707487e-08, + "loss": 3.3881, + "step": 8150 + }, + { + "epoch": 0.98, + "learning_rate": 1.6316798364328645e-08, + "loss": 3.4404, + "step": 8151 + }, + { + "epoch": 0.98, + "learning_rate": 1.6155732628612697e-08, + "loss": 3.3873, + "step": 8152 + }, + { + "epoch": 0.98, + "learning_rate": 1.5995464507337356e-08, + "loss": 3.396, + "step": 8153 + }, + { + "epoch": 0.98, + "learning_rate": 1.5835994026151545e-08, + "loss": 3.3958, + "step": 8154 + }, + { + "epoch": 0.98, + "learning_rate": 1.5677321210576525e-08, + "loss": 3.304, + "step": 8155 + }, + { + "epoch": 0.98, + "learning_rate": 1.5519446086006972e-08, + "loss": 3.438, + "step": 8156 + }, + { + "epoch": 0.98, + "learning_rate": 1.5362368677708795e-08, + "loss": 3.3941, + "step": 8157 + }, + { + "epoch": 0.98, + "learning_rate": 1.520608901082077e-08, + "loss": 3.4059, + "step": 8158 + }, + { + "epoch": 0.98, + "learning_rate": 1.5050607110353997e-08, + "loss": 3.4639, + "step": 8159 + }, + { + "epoch": 0.98, + "learning_rate": 1.4895923001191914e-08, + "loss": 3.3504, + "step": 8160 + }, + { + "epoch": 0.98, + "learning_rate": 1.4742036708089713e-08, + "loss": 3.3976, + "step": 8161 + }, + { + "epoch": 0.98, + "learning_rate": 1.4588948255675473e-08, + "loss": 3.4466, + "step": 8162 + }, + { + "epoch": 0.98, + "learning_rate": 1.4436657668449593e-08, + "loss": 3.3923, + "step": 8163 + }, + { + "epoch": 0.98, + "learning_rate": 1.4285164970785359e-08, + "loss": 3.4011, + "step": 8164 + }, + { + "epoch": 0.98, + "learning_rate": 1.4134470186926153e-08, + "loss": 3.4161, + "step": 8165 + }, + { + "epoch": 0.98, + "learning_rate": 1.398457334099046e-08, + "loss": 3.4057, + "step": 8166 + }, + { + "epoch": 0.98, + "learning_rate": 1.3835474456966868e-08, + "loss": 3.312, + "step": 8167 + }, + { + "epoch": 0.98, + "learning_rate": 1.368717355871796e-08, + "loss": 3.3458, + "step": 8168 + }, + { + "epoch": 0.98, + "learning_rate": 1.3539670669976967e-08, + "loss": 3.4406, + "step": 8169 + }, + { + "epoch": 0.98, + "learning_rate": 1.3392965814351122e-08, + "loss": 3.4417, + "step": 8170 + }, + { + "epoch": 0.98, + "learning_rate": 1.3247059015318864e-08, + "loss": 3.456, + "step": 8171 + }, + { + "epoch": 0.98, + "learning_rate": 1.3101950296230403e-08, + "loss": 3.3334, + "step": 8172 + }, + { + "epoch": 0.98, + "learning_rate": 1.2957639680309942e-08, + "loss": 3.3245, + "step": 8173 + }, + { + "epoch": 0.98, + "learning_rate": 1.2814127190652337e-08, + "loss": 3.4168, + "step": 8174 + }, + { + "epoch": 0.98, + "learning_rate": 1.2671412850225329e-08, + "loss": 3.4026, + "step": 8175 + }, + { + "epoch": 0.98, + "learning_rate": 1.252949668186898e-08, + "loss": 3.434, + "step": 8176 + }, + { + "epoch": 0.98, + "learning_rate": 1.2388378708295678e-08, + "loss": 3.4625, + "step": 8177 + }, + { + "epoch": 0.98, + "learning_rate": 1.2248058952090136e-08, + "loss": 3.404, + "step": 8178 + }, + { + "epoch": 0.98, + "learning_rate": 1.2108537435708834e-08, + "loss": 3.4212, + "step": 8179 + }, + { + "epoch": 0.98, + "learning_rate": 1.1969814181480577e-08, + "loss": 3.3196, + "step": 8180 + }, + { + "epoch": 0.98, + "learning_rate": 1.183188921160705e-08, + "loss": 3.398, + "step": 8181 + }, + { + "epoch": 0.98, + "learning_rate": 1.169476254816171e-08, + "loss": 3.329, + "step": 8182 + }, + { + "epoch": 0.98, + "learning_rate": 1.1558434213090331e-08, + "loss": 3.4433, + "step": 8183 + }, + { + "epoch": 0.98, + "learning_rate": 1.1422904228210464e-08, + "loss": 3.4412, + "step": 8184 + }, + { + "epoch": 0.98, + "learning_rate": 1.128817261521309e-08, + "loss": 3.3371, + "step": 8185 + }, + { + "epoch": 0.98, + "learning_rate": 1.115423939565985e-08, + "loss": 3.4669, + "step": 8186 + }, + { + "epoch": 0.98, + "learning_rate": 1.1021104590985821e-08, + "loss": 3.3795, + "step": 8187 + }, + { + "epoch": 0.98, + "learning_rate": 1.0888768222497848e-08, + "loss": 3.3865, + "step": 8188 + }, + { + "epoch": 0.98, + "learning_rate": 1.07572303113751e-08, + "loss": 3.4131, + "step": 8189 + }, + { + "epoch": 0.98, + "learning_rate": 1.0626490878669071e-08, + "loss": 3.5311, + "step": 8190 + }, + { + "epoch": 0.98, + "learning_rate": 1.049654994530247e-08, + "loss": 3.3203, + "step": 8191 + }, + { + "epoch": 0.98, + "learning_rate": 1.0367407532071993e-08, + "loss": 3.3917, + "step": 8192 + }, + { + "epoch": 0.98, + "learning_rate": 1.0239063659645554e-08, + "loss": 3.4422, + "step": 8193 + }, + { + "epoch": 0.98, + "learning_rate": 1.0111518348562833e-08, + "loss": 3.429, + "step": 8194 + }, + { + "epoch": 0.98, + "learning_rate": 9.98477161923639e-09, + "loss": 3.3027, + "step": 8195 + }, + { + "epoch": 0.98, + "learning_rate": 9.858823491950552e-09, + "loss": 3.3738, + "step": 8196 + }, + { + "epoch": 0.98, + "learning_rate": 9.733673986862536e-09, + "loss": 3.5238, + "step": 8197 + }, + { + "epoch": 0.98, + "learning_rate": 9.609323124001313e-09, + "loss": 3.4413, + "step": 8198 + }, + { + "epoch": 0.98, + "learning_rate": 9.48577092326708e-09, + "loss": 3.3402, + "step": 8199 + }, + { + "epoch": 0.98, + "learning_rate": 9.363017404433461e-09, + "loss": 3.4539, + "step": 8200 + }, + { + "epoch": 0.98, + "learning_rate": 9.241062587146965e-09, + "loss": 3.4017, + "step": 8201 + }, + { + "epoch": 0.98, + "learning_rate": 9.119906490924202e-09, + "loss": 3.3856, + "step": 8202 + }, + { + "epoch": 0.98, + "learning_rate": 8.999549135154661e-09, + "loss": 3.3247, + "step": 8203 + }, + { + "epoch": 0.98, + "learning_rate": 8.879990539101825e-09, + "loss": 3.3646, + "step": 8204 + }, + { + "epoch": 0.98, + "learning_rate": 8.761230721898161e-09, + "loss": 3.3124, + "step": 8205 + }, + { + "epoch": 0.98, + "learning_rate": 8.643269702551248e-09, + "loss": 3.4078, + "step": 8206 + }, + { + "epoch": 0.98, + "learning_rate": 8.526107499939317e-09, + "loss": 3.4398, + "step": 8207 + }, + { + "epoch": 0.98, + "learning_rate": 8.409744132812369e-09, + "loss": 3.4374, + "step": 8208 + }, + { + "epoch": 0.98, + "learning_rate": 8.29417961979384e-09, + "loss": 3.439, + "step": 8209 + }, + { + "epoch": 0.98, + "learning_rate": 8.17941397937838e-09, + "loss": 3.3577, + "step": 8210 + }, + { + "epoch": 0.98, + "learning_rate": 8.065447229932965e-09, + "loss": 3.3421, + "step": 8211 + }, + { + "epoch": 0.98, + "learning_rate": 7.952279389696893e-09, + "loss": 3.3439, + "step": 8212 + }, + { + "epoch": 0.98, + "learning_rate": 7.839910476782341e-09, + "loss": 3.3835, + "step": 8213 + }, + { + "epoch": 0.98, + "learning_rate": 7.728340509171594e-09, + "loss": 3.5001, + "step": 8214 + }, + { + "epoch": 0.98, + "learning_rate": 7.617569504721478e-09, + "loss": 3.3997, + "step": 8215 + }, + { + "epoch": 0.98, + "learning_rate": 7.507597481158368e-09, + "loss": 3.3959, + "step": 8216 + }, + { + "epoch": 0.98, + "learning_rate": 7.398424456083741e-09, + "loss": 3.2891, + "step": 8217 + }, + { + "epoch": 0.98, + "learning_rate": 7.290050446968622e-09, + "loss": 3.4263, + "step": 8218 + }, + { + "epoch": 0.98, + "learning_rate": 7.182475471157468e-09, + "loss": 3.3157, + "step": 8219 + }, + { + "epoch": 0.98, + "learning_rate": 7.07569954586651e-09, + "loss": 3.3928, + "step": 8220 + }, + { + "epoch": 0.98, + "learning_rate": 6.9697226881843e-09, + "loss": 3.3446, + "step": 8221 + }, + { + "epoch": 0.98, + "learning_rate": 6.864544915071158e-09, + "loss": 3.3627, + "step": 8222 + }, + { + "epoch": 0.98, + "learning_rate": 6.760166243360289e-09, + "loss": 3.3897, + "step": 8223 + }, + { + "epoch": 0.98, + "learning_rate": 6.656586689755551e-09, + "loss": 3.3575, + "step": 8224 + }, + { + "epoch": 0.98, + "learning_rate": 6.553806270834795e-09, + "loss": 3.3616, + "step": 8225 + }, + { + "epoch": 0.98, + "learning_rate": 6.45182500304653e-09, + "loss": 3.5377, + "step": 8226 + }, + { + "epoch": 0.98, + "learning_rate": 6.35064290271159e-09, + "loss": 3.3368, + "step": 8227 + }, + { + "epoch": 0.99, + "learning_rate": 6.250259986023688e-09, + "loss": 3.3946, + "step": 8228 + }, + { + "epoch": 0.99, + "learning_rate": 6.15067626904775e-09, + "loss": 3.5265, + "step": 8229 + }, + { + "epoch": 0.99, + "learning_rate": 6.05189176772103e-09, + "loss": 3.4405, + "step": 8230 + }, + { + "epoch": 0.99, + "learning_rate": 5.953906497853101e-09, + "loss": 3.3525, + "step": 8231 + }, + { + "epoch": 0.99, + "learning_rate": 5.856720475125865e-09, + "loss": 3.3958, + "step": 8232 + }, + { + "epoch": 0.99, + "learning_rate": 5.760333715092992e-09, + "loss": 3.3535, + "step": 8233 + }, + { + "epoch": 0.99, + "learning_rate": 5.664746233179918e-09, + "loss": 3.3323, + "step": 8234 + }, + { + "epoch": 0.99, + "learning_rate": 5.569958044683854e-09, + "loss": 3.3425, + "step": 8235 + }, + { + "epoch": 0.99, + "learning_rate": 5.475969164775996e-09, + "loss": 3.4466, + "step": 8236 + }, + { + "epoch": 0.99, + "learning_rate": 5.382779608497646e-09, + "loss": 3.4486, + "step": 8237 + }, + { + "epoch": 0.99, + "learning_rate": 5.29038939076243e-09, + "loss": 3.3546, + "step": 8238 + }, + { + "epoch": 0.99, + "learning_rate": 5.198798526357407e-09, + "loss": 3.5818, + "step": 8239 + }, + { + "epoch": 0.99, + "learning_rate": 5.1080070299402985e-09, + "loss": 3.3204, + "step": 8240 + }, + { + "epoch": 0.99, + "learning_rate": 5.018014916041148e-09, + "loss": 3.4484, + "step": 8241 + }, + { + "epoch": 0.99, + "learning_rate": 4.928822199062877e-09, + "loss": 3.4169, + "step": 8242 + }, + { + "epoch": 0.99, + "learning_rate": 4.84042889327907e-09, + "loss": 3.3727, + "step": 8243 + }, + { + "epoch": 0.99, + "learning_rate": 4.752835012837298e-09, + "loss": 3.41, + "step": 8244 + }, + { + "epoch": 0.99, + "learning_rate": 4.666040571754682e-09, + "loss": 3.3726, + "step": 8245 + }, + { + "epoch": 0.99, + "learning_rate": 4.580045583923443e-09, + "loss": 3.3669, + "step": 8246 + }, + { + "epoch": 0.99, + "learning_rate": 4.494850063104794e-09, + "loss": 3.3438, + "step": 8247 + }, + { + "epoch": 0.99, + "learning_rate": 4.410454022933941e-09, + "loss": 3.3272, + "step": 8248 + }, + { + "epoch": 0.99, + "learning_rate": 4.326857476917856e-09, + "loss": 3.3025, + "step": 8249 + }, + { + "epoch": 0.99, + "learning_rate": 4.244060438435282e-09, + "loss": 3.3608, + "step": 8250 + }, + { + "epoch": 0.99, + "learning_rate": 4.162062920736731e-09, + "loss": 3.3546, + "step": 8251 + }, + { + "epoch": 0.99, + "learning_rate": 4.080864936945595e-09, + "loss": 3.4633, + "step": 8252 + }, + { + "epoch": 0.99, + "learning_rate": 4.000466500055922e-09, + "loss": 3.3551, + "step": 8253 + }, + { + "epoch": 0.99, + "learning_rate": 3.920867622935753e-09, + "loss": 3.412, + "step": 8254 + }, + { + "epoch": 0.99, + "learning_rate": 3.8420683183232335e-09, + "loss": 3.3978, + "step": 8255 + }, + { + "epoch": 0.99, + "learning_rate": 3.764068598830495e-09, + "loss": 3.4137, + "step": 8256 + }, + { + "epoch": 0.99, + "learning_rate": 3.6868684769392203e-09, + "loss": 3.3674, + "step": 8257 + }, + { + "epoch": 0.99, + "learning_rate": 3.6104679650050824e-09, + "loss": 3.419, + "step": 8258 + }, + { + "epoch": 0.99, + "learning_rate": 3.5348670752555216e-09, + "loss": 3.4099, + "step": 8259 + }, + { + "epoch": 0.99, + "learning_rate": 3.4600658197897485e-09, + "loss": 3.5098, + "step": 8260 + }, + { + "epoch": 0.99, + "learning_rate": 3.3860642105781882e-09, + "loss": 3.3474, + "step": 8261 + }, + { + "epoch": 0.99, + "learning_rate": 3.3128622594647e-09, + "loss": 3.467, + "step": 8262 + }, + { + "epoch": 0.99, + "learning_rate": 3.2404599781643565e-09, + "loss": 3.3859, + "step": 8263 + }, + { + "epoch": 0.99, + "learning_rate": 3.168857378264556e-09, + "loss": 3.388, + "step": 8264 + }, + { + "epoch": 0.99, + "learning_rate": 3.0980544712244655e-09, + "loss": 3.3697, + "step": 8265 + }, + { + "epoch": 0.99, + "learning_rate": 3.028051268375576e-09, + "loss": 3.393, + "step": 8266 + }, + { + "epoch": 0.99, + "learning_rate": 2.958847780921148e-09, + "loss": 3.3846, + "step": 8267 + }, + { + "epoch": 0.99, + "learning_rate": 2.8904440199356564e-09, + "loss": 3.4313, + "step": 8268 + }, + { + "epoch": 0.99, + "learning_rate": 2.8228399963675655e-09, + "loss": 3.3865, + "step": 8269 + }, + { + "epoch": 0.99, + "learning_rate": 2.756035721035999e-09, + "loss": 3.3999, + "step": 8270 + }, + { + "epoch": 0.99, + "learning_rate": 2.6900312046318487e-09, + "loss": 3.3989, + "step": 8271 + }, + { + "epoch": 0.99, + "learning_rate": 2.6248264577188876e-09, + "loss": 3.4854, + "step": 8272 + }, + { + "epoch": 0.99, + "learning_rate": 2.560421490732656e-09, + "loss": 3.5021, + "step": 8273 + }, + { + "epoch": 0.99, + "learning_rate": 2.496816313979911e-09, + "loss": 3.5175, + "step": 8274 + }, + { + "epoch": 0.99, + "learning_rate": 2.4340109376402853e-09, + "loss": 3.3725, + "step": 8275 + }, + { + "epoch": 0.99, + "learning_rate": 2.3720053717651846e-09, + "loss": 3.3613, + "step": 8276 + }, + { + "epoch": 0.99, + "learning_rate": 2.310799626277782e-09, + "loss": 3.4202, + "step": 8277 + }, + { + "epoch": 0.99, + "learning_rate": 2.2503937109741304e-09, + "loss": 3.3084, + "step": 8278 + }, + { + "epoch": 0.99, + "learning_rate": 2.1907876355209413e-09, + "loss": 3.3909, + "step": 8279 + }, + { + "epoch": 0.99, + "learning_rate": 2.131981409457806e-09, + "loss": 3.3279, + "step": 8280 + }, + { + "epoch": 0.99, + "learning_rate": 2.0739750421960857e-09, + "loss": 3.403, + "step": 8281 + }, + { + "epoch": 0.99, + "learning_rate": 2.0167685430189098e-09, + "loss": 3.3178, + "step": 8282 + }, + { + "epoch": 0.99, + "learning_rate": 1.9603619210817325e-09, + "loss": 3.3514, + "step": 8283 + }, + { + "epoch": 0.99, + "learning_rate": 1.9047551854117774e-09, + "loss": 3.5304, + "step": 8284 + }, + { + "epoch": 0.99, + "learning_rate": 1.849948344908592e-09, + "loss": 3.4003, + "step": 8285 + }, + { + "epoch": 0.99, + "learning_rate": 1.7959414083434935e-09, + "loss": 3.3713, + "step": 8286 + }, + { + "epoch": 0.99, + "learning_rate": 1.7427343843595679e-09, + "loss": 3.3301, + "step": 8287 + }, + { + "epoch": 0.99, + "learning_rate": 1.690327281471671e-09, + "loss": 3.3596, + "step": 8288 + }, + { + "epoch": 0.99, + "learning_rate": 1.6387201080680926e-09, + "loss": 3.4088, + "step": 8289 + }, + { + "epoch": 0.99, + "learning_rate": 1.5879128724066718e-09, + "loss": 3.4343, + "step": 8290 + }, + { + "epoch": 0.99, + "learning_rate": 1.537905582619792e-09, + "loss": 3.3748, + "step": 8291 + }, + { + "epoch": 0.99, + "learning_rate": 1.488698246710496e-09, + "loss": 3.3045, + "step": 8292 + }, + { + "epoch": 0.99, + "learning_rate": 1.4402908725530406e-09, + "loss": 3.4812, + "step": 8293 + }, + { + "epoch": 0.99, + "learning_rate": 1.3926834678951173e-09, + "loss": 3.3752, + "step": 8294 + }, + { + "epoch": 0.99, + "learning_rate": 1.3458760403561865e-09, + "loss": 3.384, + "step": 8295 + }, + { + "epoch": 0.99, + "learning_rate": 1.299868597426368e-09, + "loss": 3.4033, + "step": 8296 + }, + { + "epoch": 0.99, + "learning_rate": 1.254661146469771e-09, + "loss": 3.3737, + "step": 8297 + }, + { + "epoch": 0.99, + "learning_rate": 1.2102536947206088e-09, + "loss": 3.4217, + "step": 8298 + }, + { + "epoch": 0.99, + "learning_rate": 1.1666462492859743e-09, + "loss": 3.3521, + "step": 8299 + }, + { + "epoch": 0.99, + "learning_rate": 1.123838817144729e-09, + "loss": 3.4237, + "step": 8300 + }, + { + "epoch": 0.99, + "learning_rate": 1.0818314051486145e-09, + "loss": 3.4223, + "step": 8301 + }, + { + "epoch": 0.99, + "learning_rate": 1.0406240200194762e-09, + "loss": 3.3971, + "step": 8302 + }, + { + "epoch": 0.99, + "learning_rate": 1.0002166683525939e-09, + "loss": 3.3196, + "step": 8303 + }, + { + "epoch": 0.99, + "learning_rate": 9.606093566144613e-10, + "loss": 3.3696, + "step": 8304 + }, + { + "epoch": 0.99, + "learning_rate": 9.218020911444525e-10, + "loss": 3.3831, + "step": 8305 + }, + { + "epoch": 0.99, + "learning_rate": 8.837948781525996e-10, + "loss": 3.368, + "step": 8306 + }, + { + "epoch": 0.99, + "learning_rate": 8.465877237218145e-10, + "loss": 3.408, + "step": 8307 + }, + { + "epoch": 0.99, + "learning_rate": 8.101806338073337e-10, + "loss": 3.4649, + "step": 8308 + }, + { + "epoch": 0.99, + "learning_rate": 7.745736142344973e-10, + "loss": 3.3819, + "step": 8309 + }, + { + "epoch": 0.99, + "learning_rate": 7.397666707031903e-10, + "loss": 3.3977, + "step": 8310 + }, + { + "epoch": 0.99, + "learning_rate": 7.057598087828466e-10, + "loss": 3.4683, + "step": 8311 + }, + { + "epoch": 1.0, + "learning_rate": 6.725530339163345e-10, + "loss": 3.3839, + "step": 8312 + }, + { + "epoch": 1.0, + "learning_rate": 6.401463514182915e-10, + "loss": 3.3537, + "step": 8313 + }, + { + "epoch": 1.0, + "learning_rate": 6.085397664745696e-10, + "loss": 3.477, + "step": 8314 + }, + { + "epoch": 1.0, + "learning_rate": 5.777332841444549e-10, + "loss": 3.3781, + "step": 8315 + }, + { + "epoch": 1.0, + "learning_rate": 5.477269093567827e-10, + "loss": 3.4587, + "step": 8316 + }, + { + "epoch": 1.0, + "learning_rate": 5.185206469149329e-10, + "loss": 3.3078, + "step": 8317 + }, + { + "epoch": 1.0, + "learning_rate": 4.901145014929442e-10, + "loss": 3.4327, + "step": 8318 + }, + { + "epoch": 1.0, + "learning_rate": 4.625084776366251e-10, + "loss": 3.45, + "step": 8319 + }, + { + "epoch": 1.0, + "learning_rate": 4.3570257976355277e-10, + "loss": 3.5292, + "step": 8320 + }, + { + "epoch": 1.0, + "learning_rate": 4.096968121647393e-10, + "loss": 3.3593, + "step": 8321 + }, + { + "epoch": 1.0, + "learning_rate": 3.8449117900185575e-10, + "loss": 3.4134, + "step": 8322 + }, + { + "epoch": 1.0, + "learning_rate": 3.6008568430834223e-10, + "loss": 3.2419, + "step": 8323 + }, + { + "epoch": 1.0, + "learning_rate": 3.3648033199051857e-10, + "loss": 3.3705, + "step": 8324 + }, + { + "epoch": 1.0, + "learning_rate": 3.1367512582591853e-10, + "loss": 3.5076, + "step": 8325 + }, + { + "epoch": 1.0, + "learning_rate": 2.9167006946440033e-10, + "loss": 3.3783, + "step": 8326 + }, + { + "epoch": 1.0, + "learning_rate": 2.704651664275915e-10, + "loss": 3.409, + "step": 8327 + }, + { + "epoch": 1.0, + "learning_rate": 2.5006042010944367e-10, + "loss": 3.4945, + "step": 8328 + }, + { + "epoch": 1.0, + "learning_rate": 2.3045583377512282e-10, + "loss": 3.3542, + "step": 8329 + }, + { + "epoch": 1.0, + "learning_rate": 2.1165141056267435e-10, + "loss": 3.3666, + "step": 8330 + }, + { + "epoch": 1.0, + "learning_rate": 1.9364715348080266e-10, + "loss": 3.4228, + "step": 8331 + }, + { + "epoch": 1.0, + "learning_rate": 1.7644306541109157e-10, + "loss": 3.4501, + "step": 8332 + }, + { + "epoch": 1.0, + "learning_rate": 1.6003914910744933e-10, + "loss": 3.4515, + "step": 8333 + }, + { + "epoch": 1.0, + "learning_rate": 1.4443540719499828e-10, + "loss": 3.4505, + "step": 8334 + }, + { + "epoch": 1.0, + "learning_rate": 1.2963184217007486e-10, + "loss": 3.4066, + "step": 8335 + }, + { + "epoch": 1.0, + "learning_rate": 1.1562845640300524e-10, + "loss": 3.4289, + "step": 8336 + }, + { + "epoch": 1.0, + "learning_rate": 1.024252521342195e-10, + "loss": 3.3431, + "step": 8337 + }, + { + "epoch": 1.0, + "learning_rate": 9.002223147702715e-11, + "loss": 3.4867, + "step": 8338 + }, + { + "epoch": 1.0, + "learning_rate": 7.841939641650697e-11, + "loss": 3.3651, + "step": 8339 + }, + { + "epoch": 1.0, + "learning_rate": 6.761674880950697e-11, + "loss": 3.4946, + "step": 8340 + }, + { + "epoch": 1.0, + "learning_rate": 5.761429038464439e-11, + "loss": 3.291, + "step": 8341 + }, + { + "epoch": 1.0, + "learning_rate": 4.841202274230572e-11, + "loss": 3.4418, + "step": 8342 + }, + { + "epoch": 1.0, + "learning_rate": 4.000994735631203e-11, + "loss": 3.3565, + "step": 8343 + }, + { + "epoch": 1.0, + "learning_rate": 3.240806557058829e-11, + "loss": 3.3524, + "step": 8344 + }, + { + "epoch": 1.0, + "learning_rate": 2.5606378602494043e-11, + "loss": 3.3708, + "step": 8345 + }, + { + "epoch": 1.0, + "learning_rate": 1.9604887539492746e-11, + "loss": 3.4234, + "step": 8346 + }, + { + "epoch": 1.0, + "learning_rate": 1.4403593342482424e-11, + "loss": 3.392, + "step": 8347 + }, + { + "epoch": 1.0, + "learning_rate": 1.0002496844130349e-11, + "loss": 3.3468, + "step": 8348 + }, + { + "epoch": 1.0, + "learning_rate": 6.4015987488730284e-12, + "loss": 3.3301, + "step": 8349 + }, + { + "epoch": 1.0, + "learning_rate": 3.6008996323610988e-12, + "loss": 3.3747, + "step": 8350 + }, + { + "epoch": 1.0, + "learning_rate": 1.6003999431246643e-12, + "loss": 3.397, + "step": 8351 + }, + { + "epoch": 1.0, + "learning_rate": 4.001000020181778e-13, + "loss": 3.4441, + "step": 8352 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 3.4411, + "step": 8353 + }, + { + "epoch": 1.0, + "step": 8353, + "total_flos": 1.0769385714635244e+18, + "train_loss": 3.426367837997678, + "train_runtime": 9211.7603, + "train_samples_per_second": 58.029, + "train_steps_per_second": 0.907 + } + ], + "max_steps": 8353, + "num_train_epochs": 1, + "total_flos": 1.0769385714635244e+18, + "trial_name": null, + "trial_params": null +}