{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 3338128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9925557078698005e-05, "loss": 5.3279, "step": 5000 }, { "epoch": 0.0, "eval_accuracy": 0.31327971235572855, "eval_loss": 3.994140625, "eval_runtime": 39.5933, "eval_samples_per_second": 90.116, "eval_steps_per_second": 11.265, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.985067978220128e-05, "loss": 3.5754, "step": 10000 }, { "epoch": 0.0, "eval_accuracy": 0.3823957607318666, "eval_loss": 3.310546875, "eval_runtime": 39.5685, "eval_samples_per_second": 90.173, "eval_steps_per_second": 11.272, "step": 10000 }, { "epoch": 0.0, "learning_rate": 4.9775862399524525e-05, "loss": 3.6102, "step": 15000 }, { "epoch": 0.0, "eval_accuracy": 0.39768097216925513, "eval_loss": 3.166015625, "eval_runtime": 39.5825, "eval_samples_per_second": 90.141, "eval_steps_per_second": 11.268, "step": 15000 }, { "epoch": 0.01, "learning_rate": 4.970101505993779e-05, "loss": 3.0639, "step": 20000 }, { "epoch": 0.01, "eval_accuracy": 0.41336891627105715, "eval_loss": 3.021484375, "eval_runtime": 39.5961, "eval_samples_per_second": 90.11, "eval_steps_per_second": 11.264, "step": 20000 }, { "epoch": 0.01, "learning_rate": 4.962618269880604e-05, "loss": 2.9477, "step": 25000 }, { "epoch": 0.01, "eval_accuracy": 0.425242680676284, "eval_loss": 2.919921875, "eval_runtime": 39.6071, "eval_samples_per_second": 90.085, "eval_steps_per_second": 11.261, "step": 25000 }, { "epoch": 0.01, "learning_rate": 4.9551335359219304e-05, "loss": 2.8589, "step": 30000 }, { "epoch": 0.01, "eval_accuracy": 0.4315466797294513, "eval_loss": 2.8671875, "eval_runtime": 39.6079, "eval_samples_per_second": 90.083, "eval_steps_per_second": 11.26, "step": 30000 }, { "epoch": 0.01, "learning_rate": 4.947647304117757e-05, "loss": 2.8063, "step": 35000 }, { "epoch": 0.01, "eval_accuracy": 0.43875449855125825, "eval_loss": 2.802734375, "eval_runtime": 39.6873, "eval_samples_per_second": 89.903, "eval_steps_per_second": 11.238, "step": 35000 }, { "epoch": 0.01, "learning_rate": 4.940162570159084e-05, "loss": 2.7646, "step": 40000 }, { "epoch": 0.01, "eval_accuracy": 0.44185690990623727, "eval_loss": 2.771484375, "eval_runtime": 39.6738, "eval_samples_per_second": 89.933, "eval_steps_per_second": 11.242, "step": 40000 }, { "epoch": 0.01, "learning_rate": 4.932679334045909e-05, "loss": 2.7306, "step": 45000 }, { "epoch": 0.01, "eval_accuracy": 0.4467244957896629, "eval_loss": 2.736328125, "eval_runtime": 39.7305, "eval_samples_per_second": 89.805, "eval_steps_per_second": 11.226, "step": 45000 }, { "epoch": 0.01, "learning_rate": 4.9251960979327336e-05, "loss": 2.7106, "step": 50000 }, { "epoch": 0.01, "eval_accuracy": 0.4492548623804952, "eval_loss": 2.712890625, "eval_runtime": 39.6605, "eval_samples_per_second": 89.964, "eval_steps_per_second": 11.245, "step": 50000 }, { "epoch": 0.02, "learning_rate": 4.917712861819559e-05, "loss": 2.6829, "step": 55000 }, { "epoch": 0.02, "eval_accuracy": 0.45224302916332426, "eval_loss": 2.689453125, "eval_runtime": 39.6345, "eval_samples_per_second": 90.023, "eval_steps_per_second": 11.253, "step": 55000 }, { "epoch": 0.02, "learning_rate": 4.9102311235518835e-05, "loss": 2.6703, "step": 60000 }, { "epoch": 0.02, "eval_accuracy": 0.45370464737056665, "eval_loss": 2.67578125, "eval_runtime": 39.6085, "eval_samples_per_second": 90.082, "eval_steps_per_second": 11.26, "step": 60000 }, { "epoch": 0.02, "learning_rate": 4.90274489174771e-05, "loss": 2.6522, "step": 65000 }, { "epoch": 0.02, "eval_accuracy": 0.4559714569388372, "eval_loss": 2.66015625, "eval_runtime": 39.7158, "eval_samples_per_second": 89.838, "eval_steps_per_second": 11.23, "step": 65000 }, { "epoch": 0.02, "learning_rate": 4.895260157789037e-05, "loss": 2.6377, "step": 70000 }, { "epoch": 0.02, "eval_accuracy": 0.4573840349100728, "eval_loss": 2.6484375, "eval_runtime": 40.0228, "eval_samples_per_second": 89.149, "eval_steps_per_second": 11.144, "step": 70000 }, { "epoch": 0.02, "learning_rate": 4.8877769216758615e-05, "loss": 2.6241, "step": 75000 }, { "epoch": 0.02, "eval_accuracy": 0.4586505880444836, "eval_loss": 2.634765625, "eval_runtime": 39.747, "eval_samples_per_second": 89.768, "eval_steps_per_second": 11.221, "step": 75000 }, { "epoch": 0.02, "learning_rate": 4.880295183408186e-05, "loss": 2.6159, "step": 80000 }, { "epoch": 0.02, "eval_accuracy": 0.46040042037619067, "eval_loss": 2.625, "eval_runtime": 39.8297, "eval_samples_per_second": 89.581, "eval_steps_per_second": 11.198, "step": 80000 }, { "epoch": 0.03, "learning_rate": 4.872807453758514e-05, "loss": 2.5959, "step": 85000 }, { "epoch": 0.03, "eval_accuracy": 0.4612864322379005, "eval_loss": 2.61328125, "eval_runtime": 39.842, "eval_samples_per_second": 89.554, "eval_steps_per_second": 11.194, "step": 85000 }, { "epoch": 0.03, "learning_rate": 4.86532271979984e-05, "loss": 2.5877, "step": 90000 }, { "epoch": 0.03, "eval_accuracy": 0.4624083303744811, "eval_loss": 2.603515625, "eval_runtime": 39.7194, "eval_samples_per_second": 89.83, "eval_steps_per_second": 11.229, "step": 90000 }, { "epoch": 0.03, "learning_rate": 4.857840981532165e-05, "loss": 2.5832, "step": 95000 }, { "epoch": 0.03, "eval_accuracy": 0.46323187757803697, "eval_loss": 2.599609375, "eval_runtime": 40.0969, "eval_samples_per_second": 88.984, "eval_steps_per_second": 11.123, "step": 95000 }, { "epoch": 0.03, "learning_rate": 4.8503562475734907e-05, "loss": 2.5726, "step": 100000 }, { "epoch": 0.03, "eval_accuracy": 0.46476143979941176, "eval_loss": 2.5859375, "eval_runtime": 39.6873, "eval_samples_per_second": 89.903, "eval_steps_per_second": 11.238, "step": 100000 }, { "epoch": 0.03, "learning_rate": 4.8428775049968125e-05, "loss": 2.5723, "step": 105000 }, { "epoch": 0.03, "eval_accuracy": 0.46553211121777593, "eval_loss": 2.580078125, "eval_runtime": 39.7946, "eval_samples_per_second": 89.66, "eval_steps_per_second": 11.208, "step": 105000 }, { "epoch": 0.03, "learning_rate": 4.83539127319264e-05, "loss": 2.5584, "step": 110000 }, { "epoch": 0.03, "eval_accuracy": 0.46414501225183996, "eval_loss": 2.59375, "eval_runtime": 39.8172, "eval_samples_per_second": 89.61, "eval_steps_per_second": 11.201, "step": 110000 }, { "epoch": 0.03, "learning_rate": 4.827905041388467e-05, "loss": 2.5541, "step": 115000 }, { "epoch": 0.03, "eval_accuracy": 0.4673400247228542, "eval_loss": 2.56640625, "eval_runtime": 39.875, "eval_samples_per_second": 89.48, "eval_steps_per_second": 11.185, "step": 115000 }, { "epoch": 0.04, "learning_rate": 4.820423303120791e-05, "loss": 2.541, "step": 120000 }, { "epoch": 0.04, "eval_accuracy": 0.46835206177206756, "eval_loss": 2.55859375, "eval_runtime": 39.7895, "eval_samples_per_second": 89.672, "eval_steps_per_second": 11.209, "step": 120000 }, { "epoch": 0.04, "learning_rate": 4.812941564853116e-05, "loss": 2.5359, "step": 125000 }, { "epoch": 0.04, "eval_accuracy": 0.4673909827334534, "eval_loss": 2.564453125, "eval_runtime": 39.8856, "eval_samples_per_second": 89.456, "eval_steps_per_second": 11.182, "step": 125000 }, { "epoch": 0.04, "learning_rate": 4.805458328739941e-05, "loss": 2.5298, "step": 130000 }, { "epoch": 0.04, "eval_accuracy": 0.4699054591919484, "eval_loss": 2.544921875, "eval_runtime": 39.8462, "eval_samples_per_second": 89.544, "eval_steps_per_second": 11.193, "step": 130000 }, { "epoch": 0.04, "learning_rate": 4.797972096935768e-05, "loss": 2.5258, "step": 135000 }, { "epoch": 0.04, "eval_accuracy": 0.47030819185636197, "eval_loss": 2.541015625, "eval_runtime": 39.8831, "eval_samples_per_second": 89.461, "eval_steps_per_second": 11.183, "step": 135000 }, { "epoch": 0.04, "learning_rate": 4.790488860822593e-05, "loss": 2.5207, "step": 140000 }, { "epoch": 0.04, "eval_accuracy": 0.47090352388341683, "eval_loss": 2.537109375, "eval_runtime": 40.0239, "eval_samples_per_second": 89.147, "eval_steps_per_second": 11.143, "step": 140000 }, { "epoch": 0.04, "learning_rate": 4.783005624709418e-05, "loss": 2.5167, "step": 145000 }, { "epoch": 0.04, "eval_accuracy": 0.47193419074295684, "eval_loss": 2.53125, "eval_runtime": 39.9144, "eval_samples_per_second": 89.391, "eval_steps_per_second": 11.174, "step": 145000 }, { "epoch": 0.04, "learning_rate": 4.7755223885962435e-05, "loss": 2.5101, "step": 150000 }, { "epoch": 0.04, "eval_accuracy": 0.4701947691876088, "eval_loss": 2.544921875, "eval_runtime": 40.0269, "eval_samples_per_second": 89.14, "eval_steps_per_second": 11.143, "step": 150000 }, { "epoch": 0.05, "learning_rate": 4.768039152483069e-05, "loss": 2.5058, "step": 155000 }, { "epoch": 0.05, "eval_accuracy": 0.4730330755844281, "eval_loss": 2.521484375, "eval_runtime": 39.9333, "eval_samples_per_second": 89.349, "eval_steps_per_second": 11.169, "step": 155000 }, { "epoch": 0.05, "learning_rate": 4.760554418524395e-05, "loss": 2.5021, "step": 160000 }, { "epoch": 0.05, "eval_accuracy": 0.473403480048569, "eval_loss": 2.51953125, "eval_runtime": 40.0331, "eval_samples_per_second": 89.126, "eval_steps_per_second": 11.141, "step": 160000 }, { "epoch": 0.05, "learning_rate": 4.753084663020711e-05, "loss": 2.8135, "step": 165000 }, { "epoch": 0.05, "eval_accuracy": 0.4317466762226635, "eval_loss": 2.83203125, "eval_runtime": 40.0127, "eval_samples_per_second": 89.172, "eval_steps_per_second": 11.146, "step": 165000 }, { "epoch": 0.05, "learning_rate": 4.7455954355255405e-05, "loss": 2.7932, "step": 170000 }, { "epoch": 0.05, "eval_accuracy": 0.4729974597705684, "eval_loss": 2.521484375, "eval_runtime": 39.9612, "eval_samples_per_second": 89.287, "eval_steps_per_second": 11.161, "step": 170000 }, { "epoch": 0.05, "learning_rate": 4.7381077058758686e-05, "loss": 2.4914, "step": 175000 }, { "epoch": 0.05, "eval_accuracy": 0.4751818050313638, "eval_loss": 2.505859375, "eval_runtime": 40.3867, "eval_samples_per_second": 88.346, "eval_steps_per_second": 11.043, "step": 175000 }, { "epoch": 0.05, "learning_rate": 4.730621474071695e-05, "loss": 2.487, "step": 180000 }, { "epoch": 0.05, "eval_accuracy": 0.4753875548483533, "eval_loss": 2.50390625, "eval_runtime": 40.0145, "eval_samples_per_second": 89.168, "eval_steps_per_second": 11.146, "step": 180000 }, { "epoch": 0.06, "learning_rate": 4.723138237958521e-05, "loss": 2.4829, "step": 185000 }, { "epoch": 0.06, "eval_accuracy": 0.47510947753244875, "eval_loss": 2.50390625, "eval_runtime": 40.0338, "eval_samples_per_second": 89.125, "eval_steps_per_second": 11.141, "step": 185000 }, { "epoch": 0.06, "learning_rate": 4.7156505083088486e-05, "loss": 2.4778, "step": 190000 }, { "epoch": 0.06, "eval_accuracy": 0.47625877244892145, "eval_loss": 2.49609375, "eval_runtime": 40.0473, "eval_samples_per_second": 89.095, "eval_steps_per_second": 11.137, "step": 190000 }, { "epoch": 0.06, "learning_rate": 4.708170267886672e-05, "loss": 2.4779, "step": 195000 }, { "epoch": 0.06, "eval_accuracy": 0.4770064305721763, "eval_loss": 2.4921875, "eval_runtime": 40.1061, "eval_samples_per_second": 88.964, "eval_steps_per_second": 11.121, "step": 195000 }, { "epoch": 0.06, "learning_rate": 4.700687031773498e-05, "loss": 2.4685, "step": 200000 }, { "epoch": 0.06, "eval_accuracy": 0.4765894515822188, "eval_loss": 2.494140625, "eval_runtime": 40.1514, "eval_samples_per_second": 88.864, "eval_steps_per_second": 11.108, "step": 200000 }, { "epoch": 0.06, "learning_rate": 4.693202297814823e-05, "loss": 2.4661, "step": 205000 }, { "epoch": 0.06, "eval_accuracy": 0.47763491270290054, "eval_loss": 2.484375, "eval_runtime": 40.1854, "eval_samples_per_second": 88.788, "eval_steps_per_second": 11.099, "step": 205000 }, { "epoch": 0.06, "learning_rate": 4.6857190617016483e-05, "loss": 2.4579, "step": 210000 }, { "epoch": 0.06, "eval_accuracy": 0.47826366880142374, "eval_loss": 2.48046875, "eval_runtime": 40.1794, "eval_samples_per_second": 88.802, "eval_steps_per_second": 11.1, "step": 210000 }, { "epoch": 0.06, "learning_rate": 4.6782358255884736e-05, "loss": 2.4589, "step": 215000 }, { "epoch": 0.06, "eval_accuracy": 0.4787937964923355, "eval_loss": 2.478515625, "eval_runtime": 40.1613, "eval_samples_per_second": 88.842, "eval_steps_per_second": 11.105, "step": 215000 }, { "epoch": 0.07, "learning_rate": 4.6707540873207975e-05, "loss": 2.4571, "step": 220000 }, { "epoch": 0.07, "eval_accuracy": 0.47927871949642525, "eval_loss": 2.474609375, "eval_runtime": 40.1097, "eval_samples_per_second": 88.956, "eval_steps_per_second": 11.119, "step": 220000 }, { "epoch": 0.07, "learning_rate": 4.663269353362124e-05, "loss": 2.4504, "step": 225000 }, { "epoch": 0.07, "eval_accuracy": 0.47965542521994137, "eval_loss": 2.47265625, "eval_runtime": 40.1284, "eval_samples_per_second": 88.915, "eval_steps_per_second": 11.114, "step": 225000 }, { "epoch": 0.07, "learning_rate": 4.65578461940345e-05, "loss": 2.4538, "step": 230000 }, { "epoch": 0.07, "eval_accuracy": 0.47995733773435206, "eval_loss": 2.46875, "eval_runtime": 40.2242, "eval_samples_per_second": 88.703, "eval_steps_per_second": 11.088, "step": 230000 }, { "epoch": 0.07, "learning_rate": 4.6483013832902755e-05, "loss": 2.4481, "step": 235000 }, { "epoch": 0.07, "eval_accuracy": 0.4806011620618159, "eval_loss": 2.466796875, "eval_runtime": 40.1909, "eval_samples_per_second": 88.776, "eval_steps_per_second": 11.097, "step": 235000 }, { "epoch": 0.07, "learning_rate": 4.640815151486103e-05, "loss": 2.4454, "step": 240000 }, { "epoch": 0.07, "eval_accuracy": 0.4809529367156302, "eval_loss": 2.4609375, "eval_runtime": 40.319, "eval_samples_per_second": 88.494, "eval_steps_per_second": 11.062, "step": 240000 }, { "epoch": 0.07, "learning_rate": 4.6333319153729274e-05, "loss": 2.44, "step": 245000 }, { "epoch": 0.07, "eval_accuracy": 0.4811408786256898, "eval_loss": 2.458984375, "eval_runtime": 40.235, "eval_samples_per_second": 88.679, "eval_steps_per_second": 11.085, "step": 245000 }, { "epoch": 0.07, "learning_rate": 4.625850177105252e-05, "loss": 2.4392, "step": 250000 }, { "epoch": 0.07, "eval_accuracy": 0.4810606060606061, "eval_loss": 2.458984375, "eval_runtime": 40.2635, "eval_samples_per_second": 88.616, "eval_steps_per_second": 11.077, "step": 250000 }, { "epoch": 0.08, "learning_rate": 4.618366940992077e-05, "loss": 2.431, "step": 255000 }, { "epoch": 0.08, "eval_accuracy": 0.48131512214580346, "eval_loss": 2.45703125, "eval_runtime": 40.2108, "eval_samples_per_second": 88.732, "eval_steps_per_second": 11.092, "step": 255000 }, { "epoch": 0.08, "learning_rate": 4.610885202724401e-05, "loss": 2.4377, "step": 260000 }, { "epoch": 0.08, "eval_accuracy": 0.482264420569064, "eval_loss": 2.451171875, "eval_runtime": 40.1835, "eval_samples_per_second": 88.793, "eval_steps_per_second": 11.099, "step": 260000 }, { "epoch": 0.08, "learning_rate": 4.6033959752292307e-05, "loss": 2.4299, "step": 265000 }, { "epoch": 0.08, "eval_accuracy": 0.4825972914447528, "eval_loss": 2.447265625, "eval_runtime": 40.2876, "eval_samples_per_second": 88.563, "eval_steps_per_second": 11.07, "step": 265000 }, { "epoch": 0.08, "learning_rate": 4.5959142369615546e-05, "loss": 2.4283, "step": 270000 }, { "epoch": 0.08, "eval_accuracy": 0.482810712360112, "eval_loss": 2.447265625, "eval_runtime": 40.3402, "eval_samples_per_second": 88.448, "eval_steps_per_second": 11.056, "step": 270000 }, { "epoch": 0.08, "learning_rate": 4.5884295030028805e-05, "loss": 2.4256, "step": 275000 }, { "epoch": 0.08, "eval_accuracy": 0.48325371829096697, "eval_loss": 2.443359375, "eval_runtime": 40.3191, "eval_samples_per_second": 88.494, "eval_steps_per_second": 11.062, "step": 275000 }, { "epoch": 0.08, "learning_rate": 4.5809462668897065e-05, "loss": 2.4198, "step": 280000 }, { "epoch": 0.08, "eval_accuracy": 0.48383589986367365, "eval_loss": 2.44140625, "eval_runtime": 40.3148, "eval_samples_per_second": 88.503, "eval_steps_per_second": 11.063, "step": 280000 }, { "epoch": 0.09, "learning_rate": 4.573461532931032e-05, "loss": 2.4174, "step": 285000 }, { "epoch": 0.09, "eval_accuracy": 0.4840175405143581, "eval_loss": 2.44140625, "eval_runtime": 40.703, "eval_samples_per_second": 87.659, "eval_steps_per_second": 10.957, "step": 285000 }, { "epoch": 0.09, "learning_rate": 4.565978296817857e-05, "loss": 2.4151, "step": 290000 }, { "epoch": 0.09, "eval_accuracy": 0.4844402728280929, "eval_loss": 2.435546875, "eval_runtime": 41.2392, "eval_samples_per_second": 86.52, "eval_steps_per_second": 10.815, "step": 290000 }, { "epoch": 0.09, "learning_rate": 4.558493562859184e-05, "loss": 2.4191, "step": 295000 }, { "epoch": 0.09, "eval_accuracy": 0.4847421853425036, "eval_loss": 2.43359375, "eval_runtime": 40.6977, "eval_samples_per_second": 87.671, "eval_steps_per_second": 10.959, "step": 295000 }, { "epoch": 0.09, "learning_rate": 4.55100882890051e-05, "loss": 2.4071, "step": 300000 }, { "epoch": 0.09, "eval_accuracy": 0.4848361562975334, "eval_loss": 2.431640625, "eval_runtime": 40.2058, "eval_samples_per_second": 88.744, "eval_steps_per_second": 11.093, "step": 300000 }, { "epoch": 0.09, "learning_rate": 4.543524094941836e-05, "loss": 2.4126, "step": 305000 }, { "epoch": 0.09, "eval_accuracy": 0.48549011743355736, "eval_loss": 2.427734375, "eval_runtime": 40.4122, "eval_samples_per_second": 88.29, "eval_steps_per_second": 11.036, "step": 305000 }, { "epoch": 0.09, "learning_rate": 4.536045352365158e-05, "loss": 2.4053, "step": 310000 }, { "epoch": 0.09, "eval_accuracy": 0.48513834277974305, "eval_loss": 2.4296875, "eval_runtime": 40.2632, "eval_samples_per_second": 88.617, "eval_steps_per_second": 11.077, "step": 310000 }, { "epoch": 0.09, "learning_rate": 4.528559120560985e-05, "loss": 2.4071, "step": 315000 }, { "epoch": 0.09, "eval_accuracy": 0.4857840848817993, "eval_loss": 2.42578125, "eval_runtime": 40.3587, "eval_samples_per_second": 88.407, "eval_steps_per_second": 11.051, "step": 315000 }, { "epoch": 0.1, "learning_rate": 4.5210743866023116e-05, "loss": 2.4027, "step": 320000 }, { "epoch": 0.1, "eval_accuracy": 0.486615303183725, "eval_loss": 2.421875, "eval_runtime": 40.2594, "eval_samples_per_second": 88.625, "eval_steps_per_second": 11.078, "step": 320000 }, { "epoch": 0.1, "learning_rate": 4.513591150489136e-05, "loss": 2.4013, "step": 325000 }, { "epoch": 0.1, "eval_accuracy": 0.4867040687505753, "eval_loss": 2.41796875, "eval_runtime": 40.2109, "eval_samples_per_second": 88.732, "eval_steps_per_second": 11.092, "step": 325000 }, { "epoch": 0.1, "learning_rate": 4.5061049186849636e-05, "loss": 2.4032, "step": 330000 }, { "epoch": 0.1, "eval_accuracy": 0.4866416040924214, "eval_loss": 2.41796875, "eval_runtime": 40.2756, "eval_samples_per_second": 88.59, "eval_steps_per_second": 11.074, "step": 330000 }, { "epoch": 0.1, "learning_rate": 4.498621682571789e-05, "loss": 2.3919, "step": 335000 }, { "epoch": 0.1, "eval_accuracy": 0.4870514599196069, "eval_loss": 2.416015625, "eval_runtime": 40.3049, "eval_samples_per_second": 88.525, "eval_steps_per_second": 11.066, "step": 335000 }, { "epoch": 0.1, "learning_rate": 4.491136948613115e-05, "loss": 2.3936, "step": 340000 }, { "epoch": 0.1, "eval_accuracy": 0.4872848804842874, "eval_loss": 2.4140625, "eval_runtime": 40.3719, "eval_samples_per_second": 88.378, "eval_steps_per_second": 11.047, "step": 340000 }, { "epoch": 0.1, "learning_rate": 4.48365371249994e-05, "loss": 2.3905, "step": 345000 }, { "epoch": 0.1, "eval_accuracy": 0.48784870621446635, "eval_loss": 2.41015625, "eval_runtime": 40.4162, "eval_samples_per_second": 88.281, "eval_steps_per_second": 11.035, "step": 345000 }, { "epoch": 0.1, "learning_rate": 4.4761704763867654e-05, "loss": 2.3889, "step": 350000 }, { "epoch": 0.1, "eval_accuracy": 0.4881240438523818, "eval_loss": 2.41015625, "eval_runtime": 40.2942, "eval_samples_per_second": 88.549, "eval_steps_per_second": 11.069, "step": 350000 }, { "epoch": 0.11, "learning_rate": 4.468688738119089e-05, "loss": 2.3866, "step": 355000 }, { "epoch": 0.11, "eval_accuracy": 0.48837801200198133, "eval_loss": 2.408203125, "eval_runtime": 40.2476, "eval_samples_per_second": 88.651, "eval_steps_per_second": 11.081, "step": 355000 }, { "epoch": 0.11, "learning_rate": 4.4612025063149173e-05, "loss": 2.3823, "step": 360000 }, { "epoch": 0.11, "eval_accuracy": 0.48875663550008985, "eval_loss": 2.40625, "eval_runtime": 40.3148, "eval_samples_per_second": 88.504, "eval_steps_per_second": 11.063, "step": 360000 }, { "epoch": 0.11, "learning_rate": 4.4537177723562427e-05, "loss": 2.3828, "step": 365000 }, { "epoch": 0.11, "eval_accuracy": 0.48881882619044487, "eval_loss": 2.40234375, "eval_runtime": 40.2734, "eval_samples_per_second": 88.594, "eval_steps_per_second": 11.074, "step": 365000 }, { "epoch": 0.11, "learning_rate": 4.446233038397569e-05, "loss": 2.3795, "step": 370000 }, { "epoch": 0.11, "eval_accuracy": 0.48893718027957866, "eval_loss": 2.400390625, "eval_runtime": 40.2791, "eval_samples_per_second": 88.582, "eval_steps_per_second": 11.073, "step": 370000 }, { "epoch": 0.11, "learning_rate": 4.43875729151189e-05, "loss": 2.3812, "step": 375000 }, { "epoch": 0.11, "eval_accuracy": 0.48680735461076846, "eval_loss": 2.416015625, "eval_runtime": 40.6108, "eval_samples_per_second": 87.858, "eval_steps_per_second": 10.982, "step": 375000 }, { "epoch": 0.11, "learning_rate": 4.431269561862217e-05, "loss": 2.3789, "step": 380000 }, { "epoch": 0.11, "eval_accuracy": 0.4895744293798684, "eval_loss": 2.396484375, "eval_runtime": 40.2591, "eval_samples_per_second": 88.626, "eval_steps_per_second": 11.078, "step": 380000 }, { "epoch": 0.12, "learning_rate": 4.423786325749043e-05, "loss": 2.372, "step": 385000 }, { "epoch": 0.12, "eval_accuracy": 0.48950730726913283, "eval_loss": 2.396484375, "eval_runtime": 40.2108, "eval_samples_per_second": 88.732, "eval_steps_per_second": 11.092, "step": 385000 }, { "epoch": 0.12, "learning_rate": 4.41630009394487e-05, "loss": 2.3732, "step": 390000 }, { "epoch": 0.12, "eval_accuracy": 0.4898645612789255, "eval_loss": 2.396484375, "eval_runtime": 40.4903, "eval_samples_per_second": 88.12, "eval_steps_per_second": 11.015, "step": 390000 }, { "epoch": 0.12, "learning_rate": 4.408815359986196e-05, "loss": 2.3725, "step": 395000 }, { "epoch": 0.12, "eval_accuracy": 0.4903272928913027, "eval_loss": 2.392578125, "eval_runtime": 40.3547, "eval_samples_per_second": 88.416, "eval_steps_per_second": 11.052, "step": 395000 }, { "epoch": 0.12, "learning_rate": 4.401332123873022e-05, "loss": 2.3716, "step": 400000 }, { "epoch": 0.12, "eval_accuracy": 0.49036948393233654, "eval_loss": 2.390625, "eval_runtime": 40.3654, "eval_samples_per_second": 88.392, "eval_steps_per_second": 11.049, "step": 400000 }, { "epoch": 0.12, "learning_rate": 4.393848887759846e-05, "loss": 2.3709, "step": 405000 }, { "epoch": 0.12, "eval_accuracy": 0.49040482577839734, "eval_loss": 2.390625, "eval_runtime": 40.3034, "eval_samples_per_second": 88.529, "eval_steps_per_second": 11.066, "step": 405000 }, { "epoch": 0.12, "learning_rate": 4.3863656516466716e-05, "loss": 2.3619, "step": 410000 }, { "epoch": 0.12, "eval_accuracy": 0.4906423558600616, "eval_loss": 2.388671875, "eval_runtime": 40.362, "eval_samples_per_second": 88.4, "eval_steps_per_second": 11.05, "step": 410000 }, { "epoch": 0.12, "learning_rate": 4.378877921997e-05, "loss": 2.367, "step": 415000 }, { "epoch": 0.12, "eval_accuracy": 0.49115138803045644, "eval_loss": 2.38671875, "eval_runtime": 40.2804, "eval_samples_per_second": 88.579, "eval_steps_per_second": 11.072, "step": 415000 }, { "epoch": 0.13, "learning_rate": 4.3713961837293236e-05, "loss": 2.3639, "step": 420000 }, { "epoch": 0.13, "eval_accuracy": 0.49116152483901654, "eval_loss": 2.384765625, "eval_runtime": 40.3366, "eval_samples_per_second": 88.456, "eval_steps_per_second": 11.057, "step": 420000 }, { "epoch": 0.13, "learning_rate": 4.363914445461648e-05, "loss": 2.3621, "step": 425000 }, { "epoch": 0.13, "eval_accuracy": 0.4918730192128138, "eval_loss": 2.3828125, "eval_runtime": 40.2687, "eval_samples_per_second": 88.605, "eval_steps_per_second": 11.076, "step": 425000 }, { "epoch": 0.13, "learning_rate": 4.3564282136574755e-05, "loss": 2.3578, "step": 430000 }, { "epoch": 0.13, "eval_accuracy": 0.4919609628762674, "eval_loss": 2.380859375, "eval_runtime": 40.2478, "eval_samples_per_second": 88.651, "eval_steps_per_second": 11.081, "step": 430000 }, { "epoch": 0.13, "learning_rate": 4.3489449775443e-05, "loss": 2.3608, "step": 435000 }, { "epoch": 0.13, "eval_accuracy": 0.4921738358560288, "eval_loss": 2.37890625, "eval_runtime": 40.4074, "eval_samples_per_second": 88.301, "eval_steps_per_second": 11.038, "step": 435000 }, { "epoch": 0.13, "learning_rate": 4.341461741431126e-05, "loss": 2.3541, "step": 440000 }, { "epoch": 0.13, "eval_accuracy": 0.4923423260523651, "eval_loss": 2.376953125, "eval_runtime": 40.2757, "eval_samples_per_second": 88.589, "eval_steps_per_second": 11.074, "step": 440000 }, { "epoch": 0.13, "learning_rate": 4.333978505317951e-05, "loss": 2.3556, "step": 445000 }, { "epoch": 0.13, "eval_accuracy": 0.4925938284917744, "eval_loss": 2.376953125, "eval_runtime": 40.292, "eval_samples_per_second": 88.553, "eval_steps_per_second": 11.069, "step": 445000 }, { "epoch": 0.13, "learning_rate": 4.326493771359277e-05, "loss": 2.3562, "step": 450000 }, { "epoch": 0.13, "eval_accuracy": 0.49278067453063834, "eval_loss": 2.376953125, "eval_runtime": 40.1882, "eval_samples_per_second": 88.782, "eval_steps_per_second": 11.098, "step": 450000 }, { "epoch": 0.14, "learning_rate": 4.9925197595778234e-05, "loss": 2.3641, "step": 455000 }, { "epoch": 0.14, "eval_accuracy": 0.4910004317732511, "eval_loss": 2.38671875, "eval_runtime": 39.6555, "eval_samples_per_second": 89.975, "eval_steps_per_second": 11.247, "step": 455000 }, { "epoch": 0.14, "learning_rate": 4.985036523464649e-05, "loss": 2.3641, "step": 460000 }, { "epoch": 0.14, "eval_accuracy": 0.4911015258910529, "eval_loss": 2.38671875, "eval_runtime": 39.5455, "eval_samples_per_second": 90.225, "eval_steps_per_second": 11.278, "step": 460000 }, { "epoch": 0.14, "learning_rate": 4.977551789505975e-05, "loss": 2.3646, "step": 465000 }, { "epoch": 0.14, "eval_accuracy": 0.4910639923026007, "eval_loss": 2.38671875, "eval_runtime": 39.4635, "eval_samples_per_second": 90.413, "eval_steps_per_second": 11.302, "step": 465000 }, { "epoch": 0.14, "learning_rate": 4.970071549083798e-05, "loss": 2.3629, "step": 470000 }, { "epoch": 0.14, "eval_accuracy": 0.4911439908998856, "eval_loss": 2.384765625, "eval_runtime": 39.5626, "eval_samples_per_second": 90.186, "eval_steps_per_second": 11.273, "step": 470000 }, { "epoch": 0.14, "learning_rate": 4.9625868151251246e-05, "loss": 2.3659, "step": 475000 }, { "epoch": 0.14, "eval_accuracy": 0.4913645349780168, "eval_loss": 2.3828125, "eval_runtime": 39.4888, "eval_samples_per_second": 90.355, "eval_steps_per_second": 11.294, "step": 475000 }, { "epoch": 0.14, "learning_rate": 4.9551020811664506e-05, "loss": 2.3651, "step": 480000 }, { "epoch": 0.14, "eval_accuracy": 0.4916360370667473, "eval_loss": 2.3828125, "eval_runtime": 38.5403, "eval_samples_per_second": 92.578, "eval_steps_per_second": 11.572, "step": 480000 }, { "epoch": 0.15, "learning_rate": 4.9476173472077765e-05, "loss": 2.3608, "step": 485000 }, { "epoch": 0.15, "eval_accuracy": 0.4917579527372671, "eval_loss": 2.380859375, "eval_runtime": 39.5737, "eval_samples_per_second": 90.161, "eval_steps_per_second": 11.27, "step": 485000 }, { "epoch": 0.15, "learning_rate": 4.9401356089401005e-05, "loss": 2.3612, "step": 490000 }, { "epoch": 0.15, "eval_accuracy": 0.49203685195656843, "eval_loss": 2.380859375, "eval_runtime": 38.5594, "eval_samples_per_second": 92.533, "eval_steps_per_second": 11.567, "step": 490000 }, { "epoch": 0.15, "learning_rate": 4.932649377135928e-05, "loss": 2.3569, "step": 495000 }, { "epoch": 0.15, "eval_accuracy": 0.49215328827110977, "eval_loss": 2.37890625, "eval_runtime": 39.5649, "eval_samples_per_second": 90.181, "eval_steps_per_second": 11.273, "step": 495000 }, { "epoch": 0.15, "learning_rate": 4.9251676388682524e-05, "loss": 2.3557, "step": 500000 }, { "epoch": 0.15, "eval_accuracy": 0.4923439698591586, "eval_loss": 2.37890625, "eval_runtime": 39.6114, "eval_samples_per_second": 90.075, "eval_steps_per_second": 11.259, "step": 500000 }, { "epoch": 0.15, "learning_rate": 4.9176829049095784e-05, "loss": 2.3541, "step": 505000 }, { "epoch": 0.15, "eval_accuracy": 0.49218726027817594, "eval_loss": 2.376953125, "eval_runtime": 39.5989, "eval_samples_per_second": 90.104, "eval_steps_per_second": 11.263, "step": 505000 }, { "epoch": 0.15, "learning_rate": 4.910196673105406e-05, "loss": 2.351, "step": 510000 }, { "epoch": 0.15, "eval_accuracy": 0.49274588062017544, "eval_loss": 2.375, "eval_runtime": 38.6221, "eval_samples_per_second": 92.382, "eval_steps_per_second": 11.548, "step": 510000 }, { "epoch": 0.15, "learning_rate": 4.9027134369922304e-05, "loss": 2.3504, "step": 515000 }, { "epoch": 0.15, "eval_accuracy": 0.49260917068851395, "eval_loss": 2.375, "eval_runtime": 39.6516, "eval_samples_per_second": 89.984, "eval_steps_per_second": 11.248, "step": 515000 }, { "epoch": 0.16, "learning_rate": 4.895231698724555e-05, "loss": 2.3479, "step": 520000 }, { "epoch": 0.16, "eval_accuracy": 0.492896014973984, "eval_loss": 2.373046875, "eval_runtime": 39.6699, "eval_samples_per_second": 89.942, "eval_steps_per_second": 11.243, "step": 520000 }, { "epoch": 0.16, "learning_rate": 4.887745466920382e-05, "loss": 2.3451, "step": 525000 }, { "epoch": 0.16, "eval_accuracy": 0.4929382060150178, "eval_loss": 2.37109375, "eval_runtime": 39.6216, "eval_samples_per_second": 90.052, "eval_steps_per_second": 11.256, "step": 525000 }, { "epoch": 0.16, "learning_rate": 4.880262230807207e-05, "loss": 2.3505, "step": 530000 }, { "epoch": 0.16, "eval_accuracy": 0.49343929311924395, "eval_loss": 2.369140625, "eval_runtime": 39.6785, "eval_samples_per_second": 89.923, "eval_steps_per_second": 11.24, "step": 530000 }, { "epoch": 0.16, "learning_rate": 4.8727804925395315e-05, "loss": 2.3457, "step": 535000 }, { "epoch": 0.16, "eval_accuracy": 0.4933929925612263, "eval_loss": 2.369140625, "eval_runtime": 39.6502, "eval_samples_per_second": 89.987, "eval_steps_per_second": 11.248, "step": 535000 }, { "epoch": 0.16, "learning_rate": 4.865297256426357e-05, "loss": 2.3479, "step": 540000 }, { "epoch": 0.16, "eval_accuracy": 0.4937496986354212, "eval_loss": 2.369140625, "eval_runtime": 39.6648, "eval_samples_per_second": 89.954, "eval_steps_per_second": 11.244, "step": 540000 }, { "epoch": 0.16, "learning_rate": 4.8578110246221835e-05, "loss": 2.3421, "step": 545000 }, { "epoch": 0.16, "eval_accuracy": 0.4935861398594655, "eval_loss": 2.3671875, "eval_runtime": 39.7026, "eval_samples_per_second": 89.868, "eval_steps_per_second": 11.234, "step": 545000 }, { "epoch": 0.16, "learning_rate": 4.850327788509009e-05, "loss": 2.3433, "step": 550000 }, { "epoch": 0.16, "eval_accuracy": 0.4937406576980568, "eval_loss": 2.3671875, "eval_runtime": 39.7646, "eval_samples_per_second": 89.728, "eval_steps_per_second": 11.216, "step": 550000 }, { "epoch": 0.17, "learning_rate": 4.842846050241333e-05, "loss": 2.3425, "step": 555000 }, { "epoch": 0.17, "eval_accuracy": 0.4939384624488776, "eval_loss": 2.365234375, "eval_runtime": 39.7934, "eval_samples_per_second": 89.663, "eval_steps_per_second": 11.208, "step": 555000 }, { "epoch": 0.17, "learning_rate": 4.835361316282659e-05, "loss": 2.3403, "step": 560000 }, { "epoch": 0.17, "eval_accuracy": 0.49420284137483617, "eval_loss": 2.36328125, "eval_runtime": 39.7702, "eval_samples_per_second": 89.715, "eval_steps_per_second": 11.214, "step": 560000 }, { "epoch": 0.17, "learning_rate": 4.827876582323985e-05, "loss": 2.3417, "step": 565000 }, { "epoch": 0.17, "eval_accuracy": 0.49440420770704296, "eval_loss": 2.361328125, "eval_runtime": 40.0918, "eval_samples_per_second": 88.996, "eval_steps_per_second": 11.124, "step": 565000 }, { "epoch": 0.17, "learning_rate": 4.82039484405631e-05, "loss": 2.3382, "step": 570000 }, { "epoch": 0.17, "eval_accuracy": 0.4947474893590907, "eval_loss": 2.361328125, "eval_runtime": 39.7167, "eval_samples_per_second": 89.836, "eval_steps_per_second": 11.23, "step": 570000 }, { "epoch": 0.17, "learning_rate": 4.812913105788634e-05, "loss": 2.3354, "step": 575000 }, { "epoch": 0.17, "eval_accuracy": 0.4949266642995849, "eval_loss": 2.359375, "eval_runtime": 39.8562, "eval_samples_per_second": 89.522, "eval_steps_per_second": 11.19, "step": 575000 }, { "epoch": 0.17, "learning_rate": 4.805425376138962e-05, "loss": 2.3366, "step": 580000 }, { "epoch": 0.17, "eval_accuracy": 0.4946513266616695, "eval_loss": 2.359375, "eval_runtime": 38.7841, "eval_samples_per_second": 91.997, "eval_steps_per_second": 11.5, "step": 580000 }, { "epoch": 0.18, "learning_rate": 4.797942140025787e-05, "loss": 2.3373, "step": 585000 }, { "epoch": 0.18, "eval_accuracy": 0.49454119160650334, "eval_loss": 2.359375, "eval_runtime": 38.7732, "eval_samples_per_second": 92.022, "eval_steps_per_second": 11.503, "step": 585000 }, { "epoch": 0.18, "learning_rate": 4.790460401758111e-05, "loss": 2.3365, "step": 590000 }, { "epoch": 0.18, "eval_accuracy": 0.49488255548395865, "eval_loss": 2.359375, "eval_runtime": 39.8158, "eval_samples_per_second": 89.613, "eval_steps_per_second": 11.202, "step": 590000 }, { "epoch": 0.18, "learning_rate": 4.782975667799438e-05, "loss": 2.3318, "step": 595000 }, { "epoch": 0.18, "eval_accuracy": 0.49525295994809954, "eval_loss": 2.35546875, "eval_runtime": 39.8567, "eval_samples_per_second": 89.521, "eval_steps_per_second": 11.19, "step": 595000 }, { "epoch": 0.18, "learning_rate": 4.7754894359952644e-05, "loss": 2.3278, "step": 600000 }, { "epoch": 0.18, "eval_accuracy": 0.4957521292777332, "eval_loss": 2.353515625, "eval_runtime": 39.8687, "eval_samples_per_second": 89.494, "eval_steps_per_second": 11.187, "step": 600000 }, { "epoch": 0.18, "learning_rate": 4.768004702036591e-05, "loss": 2.3277, "step": 605000 }, { "epoch": 0.18, "eval_accuracy": 0.4959060991807267, "eval_loss": 2.3515625, "eval_runtime": 40.0704, "eval_samples_per_second": 89.043, "eval_steps_per_second": 11.13, "step": 605000 }, { "epoch": 0.18, "learning_rate": 4.7605214659234157e-05, "loss": 2.326, "step": 610000 }, { "epoch": 0.18, "eval_accuracy": 0.49614417719798887, "eval_loss": 2.3515625, "eval_runtime": 39.9671, "eval_samples_per_second": 89.273, "eval_steps_per_second": 11.159, "step": 610000 }, { "epoch": 0.18, "learning_rate": 4.753036731964742e-05, "loss": 2.3273, "step": 615000 }, { "epoch": 0.18, "eval_accuracy": 0.49605705543793205, "eval_loss": 2.3515625, "eval_runtime": 39.9201, "eval_samples_per_second": 89.378, "eval_steps_per_second": 11.172, "step": 615000 }, { "epoch": 0.19, "learning_rate": 4.745551998006068e-05, "loss": 2.3284, "step": 620000 }, { "epoch": 0.19, "eval_accuracy": 0.49654992350819055, "eval_loss": 2.349609375, "eval_runtime": 39.956, "eval_samples_per_second": 89.298, "eval_steps_per_second": 11.162, "step": 620000 }, { "epoch": 0.19, "learning_rate": 4.7380687618928936e-05, "loss": 2.3276, "step": 625000 }, { "epoch": 0.19, "eval_accuracy": 0.49658115583726753, "eval_loss": 2.34765625, "eval_runtime": 39.9741, "eval_samples_per_second": 89.258, "eval_steps_per_second": 11.157, "step": 625000 }, { "epoch": 0.19, "learning_rate": 4.7305840279342196e-05, "loss": 2.3228, "step": 630000 }, { "epoch": 0.19, "eval_accuracy": 0.4966945785060207, "eval_loss": 2.345703125, "eval_runtime": 39.9089, "eval_samples_per_second": 89.404, "eval_steps_per_second": 11.175, "step": 630000 }, { "epoch": 0.19, "learning_rate": 4.723103787512043e-05, "loss": 2.3219, "step": 635000 }, { "epoch": 0.19, "eval_accuracy": 0.49684580873102496, "eval_loss": 2.345703125, "eval_runtime": 39.9108, "eval_samples_per_second": 89.399, "eval_steps_per_second": 11.175, "step": 635000 }, { "epoch": 0.19, "learning_rate": 4.715619053553369e-05, "loss": 2.326, "step": 640000 }, { "epoch": 0.19, "eval_accuracy": 0.49703155889869327, "eval_loss": 2.34375, "eval_runtime": 40.0352, "eval_samples_per_second": 89.122, "eval_steps_per_second": 11.14, "step": 640000 }, { "epoch": 0.19, "learning_rate": 4.708135817440194e-05, "loss": 2.3191, "step": 645000 }, { "epoch": 0.19, "eval_accuracy": 0.4972345690376936, "eval_loss": 2.341796875, "eval_runtime": 40.0269, "eval_samples_per_second": 89.14, "eval_steps_per_second": 11.143, "step": 645000 }, { "epoch": 0.19, "learning_rate": 4.70065258132702e-05, "loss": 2.3167, "step": 650000 }, { "epoch": 0.19, "eval_accuracy": 0.4972822394347058, "eval_loss": 2.34375, "eval_runtime": 40.0234, "eval_samples_per_second": 89.148, "eval_steps_per_second": 11.143, "step": 650000 }, { "epoch": 0.2, "learning_rate": 4.693166349522847e-05, "loss": 2.3172, "step": 655000 }, { "epoch": 0.2, "eval_accuracy": 0.4974205931731608, "eval_loss": 2.341796875, "eval_runtime": 40.0416, "eval_samples_per_second": 89.107, "eval_steps_per_second": 11.138, "step": 655000 }, { "epoch": 0.2, "learning_rate": 4.685683113409672e-05, "loss": 2.3194, "step": 660000 }, { "epoch": 0.2, "eval_accuracy": 0.4977205879129791, "eval_loss": 2.337890625, "eval_runtime": 40.5115, "eval_samples_per_second": 88.074, "eval_steps_per_second": 11.009, "step": 660000 }, { "epoch": 0.2, "learning_rate": 4.678198379450998e-05, "loss": 2.3204, "step": 665000 }, { "epoch": 0.2, "eval_accuracy": 0.49760332969504095, "eval_loss": 2.33984375, "eval_runtime": 40.059, "eval_samples_per_second": 89.069, "eval_steps_per_second": 11.134, "step": 665000 }, { "epoch": 0.2, "learning_rate": 4.670716641183322e-05, "loss": 2.309, "step": 670000 }, { "epoch": 0.2, "eval_accuracy": 0.49802085662059625, "eval_loss": 2.3359375, "eval_runtime": 40.172, "eval_samples_per_second": 88.818, "eval_steps_per_second": 11.102, "step": 670000 }, { "epoch": 0.2, "learning_rate": 4.663233405070147e-05, "loss": 2.3147, "step": 675000 }, { "epoch": 0.2, "eval_accuracy": 0.49805729433785273, "eval_loss": 2.337890625, "eval_runtime": 40.0906, "eval_samples_per_second": 88.999, "eval_steps_per_second": 11.125, "step": 675000 }, { "epoch": 0.2, "learning_rate": 4.655745675420475e-05, "loss": 2.3122, "step": 680000 }, { "epoch": 0.2, "eval_accuracy": 0.4980255140731779, "eval_loss": 2.3359375, "eval_runtime": 40.0778, "eval_samples_per_second": 89.027, "eval_steps_per_second": 11.128, "step": 680000 }, { "epoch": 0.21, "learning_rate": 4.6482624393073005e-05, "loss": 2.3096, "step": 685000 }, { "epoch": 0.21, "eval_accuracy": 0.4984096169272648, "eval_loss": 2.333984375, "eval_runtime": 40.3028, "eval_samples_per_second": 88.53, "eval_steps_per_second": 11.066, "step": 685000 }, { "epoch": 0.21, "learning_rate": 4.640780701039625e-05, "loss": 2.3093, "step": 690000 }, { "epoch": 0.21, "eval_accuracy": 0.49861701055104785, "eval_loss": 2.333984375, "eval_runtime": 40.1409, "eval_samples_per_second": 88.887, "eval_steps_per_second": 11.111, "step": 690000 }, { "epoch": 0.21, "learning_rate": 4.633295967080951e-05, "loss": 2.3048, "step": 695000 }, { "epoch": 0.21, "eval_accuracy": 0.498526601177404, "eval_loss": 2.33203125, "eval_runtime": 40.2295, "eval_samples_per_second": 88.691, "eval_steps_per_second": 11.086, "step": 695000 }, { "epoch": 0.21, "learning_rate": 4.6258127309677764e-05, "loss": 2.3111, "step": 700000 }, { "epoch": 0.21, "eval_accuracy": 0.4988186508510536, "eval_loss": 2.330078125, "eval_runtime": 40.2382, "eval_samples_per_second": 88.672, "eval_steps_per_second": 11.084, "step": 700000 }, { "epoch": 0.21, "learning_rate": 4.6183279970091023e-05, "loss": 2.3074, "step": 705000 }, { "epoch": 0.21, "eval_accuracy": 0.4989449500063561, "eval_loss": 2.330078125, "eval_runtime": 40.2221, "eval_samples_per_second": 88.707, "eval_steps_per_second": 11.088, "step": 705000 }, { "epoch": 0.21, "learning_rate": 4.610843263050428e-05, "loss": 2.3082, "step": 710000 }, { "epoch": 0.21, "eval_accuracy": 0.49918768547619985, "eval_loss": 2.330078125, "eval_runtime": 40.2424, "eval_samples_per_second": 88.663, "eval_steps_per_second": 11.083, "step": 710000 }, { "epoch": 0.21, "learning_rate": 4.603357031246256e-05, "loss": 2.3093, "step": 715000 }, { "epoch": 0.21, "eval_accuracy": 0.4993685042234876, "eval_loss": 2.328125, "eval_runtime": 39.2194, "eval_samples_per_second": 90.975, "eval_steps_per_second": 11.372, "step": 715000 }, { "epoch": 0.22, "learning_rate": 4.595873795133081e-05, "loss": 2.3011, "step": 720000 }, { "epoch": 0.22, "eval_accuracy": 0.4995095976399318, "eval_loss": 2.328125, "eval_runtime": 40.3274, "eval_samples_per_second": 88.476, "eval_steps_per_second": 11.059, "step": 720000 }, { "epoch": 0.22, "learning_rate": 4.588390559019906e-05, "loss": 2.2998, "step": 725000 }, { "epoch": 0.22, "eval_accuracy": 0.4994558999513433, "eval_loss": 2.326171875, "eval_runtime": 40.2634, "eval_samples_per_second": 88.616, "eval_steps_per_second": 11.077, "step": 725000 }, { "epoch": 0.22, "learning_rate": 4.580907322906731e-05, "loss": 2.3012, "step": 730000 }, { "epoch": 0.22, "eval_accuracy": 0.49959206194740696, "eval_loss": 2.326171875, "eval_runtime": 40.2894, "eval_samples_per_second": 88.559, "eval_steps_per_second": 11.07, "step": 730000 }, { "epoch": 0.22, "learning_rate": 4.573421091102558e-05, "loss": 2.3002, "step": 735000 }, { "epoch": 0.22, "eval_accuracy": 0.4997052106483612, "eval_loss": 2.32421875, "eval_runtime": 40.3059, "eval_samples_per_second": 88.523, "eval_steps_per_second": 11.065, "step": 735000 }, { "epoch": 0.22, "learning_rate": 4.5659378549893835e-05, "loss": 2.2994, "step": 740000 }, { "epoch": 0.22, "eval_accuracy": 0.5000197256815223, "eval_loss": 2.32421875, "eval_runtime": 40.7124, "eval_samples_per_second": 87.639, "eval_steps_per_second": 10.955, "step": 740000 }, { "epoch": 0.22, "learning_rate": 4.558454618876209e-05, "loss": 2.299, "step": 745000 }, { "epoch": 0.22, "eval_accuracy": 0.5000953407940244, "eval_loss": 2.322265625, "eval_runtime": 40.3194, "eval_samples_per_second": 88.493, "eval_steps_per_second": 11.062, "step": 745000 }, { "epoch": 0.22, "learning_rate": 4.550971382763034e-05, "loss": 2.2969, "step": 750000 }, { "epoch": 0.22, "eval_accuracy": 0.5002605433767736, "eval_loss": 2.322265625, "eval_runtime": 40.3665, "eval_samples_per_second": 88.39, "eval_steps_per_second": 11.049, "step": 750000 }, { "epoch": 0.23, "learning_rate": 4.543489644495358e-05, "loss": 2.2934, "step": 755000 }, { "epoch": 0.23, "eval_accuracy": 0.5003739660455269, "eval_loss": 2.3203125, "eval_runtime": 40.3284, "eval_samples_per_second": 88.474, "eval_steps_per_second": 11.059, "step": 755000 }, { "epoch": 0.23, "learning_rate": 4.536004910536684e-05, "loss": 2.2988, "step": 760000 }, { "epoch": 0.23, "eval_accuracy": 0.5004895804566715, "eval_loss": 2.318359375, "eval_runtime": 40.3305, "eval_samples_per_second": 88.469, "eval_steps_per_second": 11.059, "step": 760000 }, { "epoch": 0.23, "learning_rate": 4.5285186787325113e-05, "loss": 2.2911, "step": 765000 }, { "epoch": 0.23, "eval_accuracy": 0.5007449184452656, "eval_loss": 2.318359375, "eval_runtime": 39.3805, "eval_samples_per_second": 90.603, "eval_steps_per_second": 11.325, "step": 765000 }, { "epoch": 0.23, "learning_rate": 4.5210354426193366e-05, "loss": 2.2929, "step": 770000 }, { "epoch": 0.23, "eval_accuracy": 0.5008427249494803, "eval_loss": 2.318359375, "eval_runtime": 40.4207, "eval_samples_per_second": 88.272, "eval_steps_per_second": 11.034, "step": 770000 }, { "epoch": 0.23, "learning_rate": 4.5135567000426584e-05, "loss": 2.2926, "step": 775000 }, { "epoch": 0.23, "eval_accuracy": 0.5008994362838569, "eval_loss": 2.31640625, "eval_runtime": 40.4103, "eval_samples_per_second": 88.294, "eval_steps_per_second": 11.037, "step": 775000 }, { "epoch": 0.23, "learning_rate": 4.506070468238486e-05, "loss": 2.292, "step": 780000 }, { "epoch": 0.23, "eval_accuracy": 0.5011701164691906, "eval_loss": 2.31640625, "eval_runtime": 40.3936, "eval_samples_per_second": 88.331, "eval_steps_per_second": 11.041, "step": 780000 }, { "epoch": 0.24, "learning_rate": 4.498587232125311e-05, "loss": 2.2932, "step": 785000 }, { "epoch": 0.24, "eval_accuracy": 0.5014183312950129, "eval_loss": 2.314453125, "eval_runtime": 40.4142, "eval_samples_per_second": 88.286, "eval_steps_per_second": 11.036, "step": 785000 }, { "epoch": 0.24, "learning_rate": 4.4911039960121364e-05, "loss": 2.2903, "step": 790000 }, { "epoch": 0.24, "eval_accuracy": 0.5013958659355015, "eval_loss": 2.314453125, "eval_runtime": 40.3913, "eval_samples_per_second": 88.336, "eval_steps_per_second": 11.042, "step": 790000 }, { "epoch": 0.24, "learning_rate": 4.483620759898962e-05, "loss": 2.2886, "step": 795000 }, { "epoch": 0.24, "eval_accuracy": 0.5015205212840104, "eval_loss": 2.3125, "eval_runtime": 40.3641, "eval_samples_per_second": 88.395, "eval_steps_per_second": 11.049, "step": 795000 }, { "epoch": 0.24, "learning_rate": 4.476137523785787e-05, "loss": 2.2924, "step": 800000 }, { "epoch": 0.24, "eval_accuracy": 0.5014750426293895, "eval_loss": 2.3125, "eval_runtime": 40.359, "eval_samples_per_second": 88.407, "eval_steps_per_second": 11.051, "step": 800000 }, { "epoch": 0.24, "learning_rate": 4.468652789827113e-05, "loss": 2.2891, "step": 805000 }, { "epoch": 0.24, "eval_accuracy": 0.5018673645174441, "eval_loss": 2.310546875, "eval_runtime": 40.4253, "eval_samples_per_second": 88.261, "eval_steps_per_second": 11.033, "step": 805000 }, { "epoch": 0.24, "learning_rate": 4.461168055868439e-05, "loss": 2.2862, "step": 810000 }, { "epoch": 0.24, "eval_accuracy": 0.5019873624133714, "eval_loss": 2.30859375, "eval_runtime": 40.3854, "eval_samples_per_second": 88.349, "eval_steps_per_second": 11.044, "step": 810000 }, { "epoch": 0.24, "learning_rate": 4.453686317600763e-05, "loss": 2.2858, "step": 815000 }, { "epoch": 0.24, "eval_accuracy": 0.5021665373538656, "eval_loss": 2.30859375, "eval_runtime": 40.4075, "eval_samples_per_second": 88.301, "eval_steps_per_second": 11.038, "step": 815000 }, { "epoch": 0.25, "learning_rate": 4.44620008579659e-05, "loss": 2.2841, "step": 820000 }, { "epoch": 0.25, "eval_accuracy": 0.502265439729276, "eval_loss": 2.306640625, "eval_runtime": 40.4403, "eval_samples_per_second": 88.229, "eval_steps_per_second": 11.029, "step": 820000 }, { "epoch": 0.25, "learning_rate": 4.438718347528915e-05, "loss": 2.2843, "step": 825000 }, { "epoch": 0.25, "eval_accuracy": 0.5022361251747914, "eval_loss": 2.30859375, "eval_runtime": 40.4536, "eval_samples_per_second": 88.2, "eval_steps_per_second": 11.025, "step": 825000 }, { "epoch": 0.25, "learning_rate": 4.431233613570241e-05, "loss": 2.2832, "step": 830000 }, { "epoch": 0.25, "eval_accuracy": 0.5024813263548256, "eval_loss": 2.306640625, "eval_runtime": 40.4096, "eval_samples_per_second": 88.296, "eval_steps_per_second": 11.037, "step": 830000 }, { "epoch": 0.25, "learning_rate": 4.423756368839062e-05, "loss": 2.2846, "step": 835000 }, { "epoch": 0.25, "eval_accuracy": 0.502600776315155, "eval_loss": 2.306640625, "eval_runtime": 39.3247, "eval_samples_per_second": 90.732, "eval_steps_per_second": 11.341, "step": 835000 }, { "epoch": 0.25, "learning_rate": 4.416267141343891e-05, "loss": 2.2784, "step": 840000 }, { "epoch": 0.25, "eval_accuracy": 0.5026766653954561, "eval_loss": 2.3046875, "eval_runtime": 40.3768, "eval_samples_per_second": 88.367, "eval_steps_per_second": 11.046, "step": 840000 }, { "epoch": 0.25, "learning_rate": 4.408782407385217e-05, "loss": 2.277, "step": 845000 }, { "epoch": 0.25, "eval_accuracy": 0.5028424159138032, "eval_loss": 2.302734375, "eval_runtime": 40.4265, "eval_samples_per_second": 88.259, "eval_steps_per_second": 11.032, "step": 845000 }, { "epoch": 0.25, "learning_rate": 4.4013021669630405e-05, "loss": 2.276, "step": 850000 }, { "epoch": 0.25, "eval_accuracy": 0.5025583113063223, "eval_loss": 2.306640625, "eval_runtime": 40.7923, "eval_samples_per_second": 87.468, "eval_steps_per_second": 10.933, "step": 850000 }, { "epoch": 0.26, "learning_rate": 4.393818930849866e-05, "loss": 2.2802, "step": 855000 }, { "epoch": 0.26, "eval_accuracy": 0.5031032332583757, "eval_loss": 2.302734375, "eval_runtime": 40.3592, "eval_samples_per_second": 88.406, "eval_steps_per_second": 11.051, "step": 855000 }, { "epoch": 0.26, "learning_rate": 4.3863356947366904e-05, "loss": 2.2781, "step": 860000 }, { "epoch": 0.26, "eval_accuracy": 0.5032018616659872, "eval_loss": 2.30078125, "eval_runtime": 40.4036, "eval_samples_per_second": 88.309, "eval_steps_per_second": 11.039, "step": 860000 }, { "epoch": 0.26, "learning_rate": 4.378850960778017e-05, "loss": 2.2749, "step": 865000 }, { "epoch": 0.26, "eval_accuracy": 0.5038193850847547, "eval_loss": 2.298828125, "eval_runtime": 40.3881, "eval_samples_per_second": 88.343, "eval_steps_per_second": 11.043, "step": 865000 }, { "epoch": 0.26, "learning_rate": 4.371366226819343e-05, "loss": 2.2729, "step": 870000 }, { "epoch": 0.26, "eval_accuracy": 0.5037152773211648, "eval_loss": 2.296875, "eval_runtime": 40.4456, "eval_samples_per_second": 88.217, "eval_steps_per_second": 11.027, "step": 870000 }, { "epoch": 0.26, "learning_rate": 4.363882990706168e-05, "loss": 2.2708, "step": 875000 }, { "epoch": 0.26, "eval_accuracy": 0.5038993836820396, "eval_loss": 2.296875, "eval_runtime": 40.4095, "eval_samples_per_second": 88.296, "eval_steps_per_second": 11.037, "step": 875000 }, { "epoch": 0.26, "learning_rate": 4.3563997545929936e-05, "loss": 2.2754, "step": 880000 }, { "epoch": 0.26, "eval_accuracy": 0.5038591104155982, "eval_loss": 2.296875, "eval_runtime": 40.397, "eval_samples_per_second": 88.323, "eval_steps_per_second": 11.04, "step": 880000 }, { "epoch": 0.27, "learning_rate": 4.3489150206343196e-05, "loss": 2.2761, "step": 885000 }, { "epoch": 0.27, "eval_accuracy": 0.5040640383291909, "eval_loss": 2.294921875, "eval_runtime": 40.3166, "eval_samples_per_second": 88.499, "eval_steps_per_second": 11.062, "step": 885000 }, { "epoch": 0.27, "learning_rate": 4.341434780212143e-05, "loss": 2.2742, "step": 890000 }, { "epoch": 0.27, "eval_accuracy": 0.5041032157244366, "eval_loss": 2.294921875, "eval_runtime": 40.3381, "eval_samples_per_second": 88.452, "eval_steps_per_second": 11.057, "step": 890000 }, { "epoch": 0.27, "learning_rate": 4.3339470505624715e-05, "loss": 2.2734, "step": 895000 }, { "epoch": 0.27, "eval_accuracy": 0.5040837640107132, "eval_loss": 2.294921875, "eval_runtime": 40.3833, "eval_samples_per_second": 88.353, "eval_steps_per_second": 11.044, "step": 895000 }, { "epoch": 0.27, "learning_rate": 4.326463814449296e-05, "loss": 2.2682, "step": 900000 }, { "epoch": 0.27, "eval_accuracy": 0.5043944434946894, "eval_loss": 2.29296875, "eval_runtime": 40.4818, "eval_samples_per_second": 88.138, "eval_steps_per_second": 11.017, "step": 900000 }, { "epoch": 0.27, "learning_rate": 4.318982076181621e-05, "loss": 2.2667, "step": 905000 }, { "epoch": 0.27, "eval_accuracy": 0.5045489613332808, "eval_loss": 2.29296875, "eval_runtime": 40.3614, "eval_samples_per_second": 88.401, "eval_steps_per_second": 11.05, "step": 905000 }, { "epoch": 0.27, "learning_rate": 4.311498840068446e-05, "loss": 2.2676, "step": 910000 }, { "epoch": 0.27, "eval_accuracy": 0.5045801936623577, "eval_loss": 2.29296875, "eval_runtime": 40.4147, "eval_samples_per_second": 88.285, "eval_steps_per_second": 11.036, "step": 910000 }, { "epoch": 0.27, "learning_rate": 4.304012608264273e-05, "loss": 2.2707, "step": 915000 }, { "epoch": 0.27, "eval_accuracy": 0.5046547129036641, "eval_loss": 2.291015625, "eval_runtime": 40.4009, "eval_samples_per_second": 88.315, "eval_steps_per_second": 11.039, "step": 915000 }, { "epoch": 0.28, "learning_rate": 4.296529372151098e-05, "loss": 2.265, "step": 920000 }, { "epoch": 0.28, "eval_accuracy": 0.5047700533470098, "eval_loss": 2.291015625, "eval_runtime": 40.3704, "eval_samples_per_second": 88.382, "eval_steps_per_second": 11.048, "step": 920000 }, { "epoch": 0.28, "learning_rate": 4.289046136037923e-05, "loss": 2.2676, "step": 925000 }, { "epoch": 0.28, "eval_accuracy": 0.5046149875728206, "eval_loss": 2.291015625, "eval_runtime": 40.3019, "eval_samples_per_second": 88.532, "eval_steps_per_second": 11.066, "step": 925000 }, { "epoch": 0.28, "learning_rate": 4.281564397770247e-05, "loss": 2.2662, "step": 930000 }, { "epoch": 0.28, "eval_accuracy": 0.5051503206519119, "eval_loss": 2.2890625, "eval_runtime": 40.3203, "eval_samples_per_second": 88.491, "eval_steps_per_second": 11.061, "step": 930000 }, { "epoch": 0.28, "learning_rate": 4.274079663811574e-05, "loss": 2.2706, "step": 935000 }, { "epoch": 0.28, "eval_accuracy": 0.5050968969311223, "eval_loss": 2.2890625, "eval_runtime": 40.3497, "eval_samples_per_second": 88.427, "eval_steps_per_second": 11.053, "step": 935000 }, { "epoch": 0.28, "learning_rate": 4.2665949298529e-05, "loss": 2.2657, "step": 940000 }, { "epoch": 0.28, "eval_accuracy": 0.5048788185631814, "eval_loss": 2.2890625, "eval_runtime": 40.7581, "eval_samples_per_second": 87.541, "eval_steps_per_second": 10.943, "step": 940000 }, { "epoch": 0.28, "learning_rate": 4.259110195894226e-05, "loss": 2.2672, "step": 945000 }, { "epoch": 0.28, "eval_accuracy": 0.5050453909849252, "eval_loss": 2.287109375, "eval_runtime": 40.3332, "eval_samples_per_second": 88.463, "eval_steps_per_second": 11.058, "step": 945000 }, { "epoch": 0.28, "learning_rate": 4.9925167638868255e-05, "loss": 2.2716, "step": 950000 }, { "epoch": 0.28, "eval_accuracy": 0.5037065103515993, "eval_loss": 2.296875, "eval_runtime": 38.5412, "eval_samples_per_second": 92.576, "eval_steps_per_second": 11.572, "step": 950000 }, { "epoch": 0.29, "learning_rate": 4.9850290342371536e-05, "loss": 2.2702, "step": 955000 }, { "epoch": 0.29, "eval_accuracy": 0.5036908941870608, "eval_loss": 2.298828125, "eval_runtime": 39.4253, "eval_samples_per_second": 90.5, "eval_steps_per_second": 11.313, "step": 955000 }, { "epoch": 0.29, "learning_rate": 4.977548793814977e-05, "loss": 2.2708, "step": 960000 }, { "epoch": 0.29, "eval_accuracy": 0.5035029522770011, "eval_loss": 2.298828125, "eval_runtime": 39.5074, "eval_samples_per_second": 90.312, "eval_steps_per_second": 11.289, "step": 960000 }, { "epoch": 0.29, "learning_rate": 4.970064059856303e-05, "loss": 2.2738, "step": 965000 }, { "epoch": 0.29, "eval_accuracy": 0.5035369242840674, "eval_loss": 2.298828125, "eval_runtime": 39.5917, "eval_samples_per_second": 90.12, "eval_steps_per_second": 11.265, "step": 965000 }, { "epoch": 0.29, "learning_rate": 4.962582321588627e-05, "loss": 2.2737, "step": 970000 }, { "epoch": 0.29, "eval_accuracy": 0.5035577458367854, "eval_loss": 2.298828125, "eval_runtime": 39.4627, "eval_samples_per_second": 90.415, "eval_steps_per_second": 11.302, "step": 970000 }, { "epoch": 0.29, "learning_rate": 4.9550990854754526e-05, "loss": 2.2763, "step": 975000 }, { "epoch": 0.29, "eval_accuracy": 0.49873289892999134, "eval_loss": 2.330078125, "eval_runtime": 39.5211, "eval_samples_per_second": 90.281, "eval_steps_per_second": 11.285, "step": 975000 }, { "epoch": 0.29, "learning_rate": 4.9476143515167786e-05, "loss": 2.2738, "step": 980000 }, { "epoch": 0.29, "eval_accuracy": 0.5034662405919458, "eval_loss": 2.296875, "eval_runtime": 39.4696, "eval_samples_per_second": 90.399, "eval_steps_per_second": 11.3, "step": 980000 }, { "epoch": 0.3, "learning_rate": 4.9401296175581046e-05, "loss": 2.2737, "step": 985000 }, { "epoch": 0.3, "eval_accuracy": 0.5036182927203469, "eval_loss": 2.296875, "eval_runtime": 39.6371, "eval_samples_per_second": 90.017, "eval_steps_per_second": 11.252, "step": 985000 }, { "epoch": 0.3, "learning_rate": 4.932644883599431e-05, "loss": 2.2748, "step": 990000 }, { "epoch": 0.3, "eval_accuracy": 0.5036056902015965, "eval_loss": 2.296875, "eval_runtime": 39.6139, "eval_samples_per_second": 90.069, "eval_steps_per_second": 11.259, "step": 990000 }, { "epoch": 0.3, "learning_rate": 4.9251631453317545e-05, "loss": 2.2724, "step": 995000 }, { "epoch": 0.3, "eval_accuracy": 0.5038232206339396, "eval_loss": 2.296875, "eval_runtime": 39.4746, "eval_samples_per_second": 90.387, "eval_steps_per_second": 11.298, "step": 995000 }, { "epoch": 0.3, "learning_rate": 4.917678411373081e-05, "loss": 2.2744, "step": 1000000 }, { "epoch": 0.3, "eval_accuracy": 0.5032999421380009, "eval_loss": 2.298828125, "eval_runtime": 39.6576, "eval_samples_per_second": 89.97, "eval_steps_per_second": 11.246, "step": 1000000 }, { "epoch": 0.3, "learning_rate": 4.9101951752599065e-05, "loss": 2.2694, "step": 1005000 }, { "epoch": 0.3, "eval_accuracy": 0.5033465166638174, "eval_loss": 2.298828125, "eval_runtime": 39.6623, "eval_samples_per_second": 89.959, "eval_steps_per_second": 11.245, "step": 1005000 }, { "epoch": 0.3, "learning_rate": 4.9027104413012324e-05, "loss": 2.2684, "step": 1010000 }, { "epoch": 0.3, "eval_accuracy": 0.5039421226586712, "eval_loss": 2.294921875, "eval_runtime": 39.6275, "eval_samples_per_second": 90.039, "eval_steps_per_second": 11.255, "step": 1010000 }, { "epoch": 0.3, "learning_rate": 4.895231698724555e-05, "loss": 2.2731, "step": 1015000 }, { "epoch": 0.3, "eval_accuracy": 0.5039547251774216, "eval_loss": 2.294921875, "eval_runtime": 39.6334, "eval_samples_per_second": 90.025, "eval_steps_per_second": 11.253, "step": 1015000 }, { "epoch": 0.31, "learning_rate": 4.887746964765881e-05, "loss": 2.2714, "step": 1020000 }, { "epoch": 0.31, "eval_accuracy": 0.5042445831086797, "eval_loss": 2.294921875, "eval_runtime": 39.6384, "eval_samples_per_second": 90.014, "eval_steps_per_second": 11.252, "step": 1020000 }, { "epoch": 0.31, "learning_rate": 4.880263728652706e-05, "loss": 2.2687, "step": 1025000 }, { "epoch": 0.31, "eval_accuracy": 0.5045218385211876, "eval_loss": 2.29296875, "eval_runtime": 39.6654, "eval_samples_per_second": 89.952, "eval_steps_per_second": 11.244, "step": 1025000 }, { "epoch": 0.31, "learning_rate": 4.872778994694032e-05, "loss": 2.2673, "step": 1030000 }, { "epoch": 0.31, "eval_accuracy": 0.5046215627999947, "eval_loss": 2.29296875, "eval_runtime": 39.7655, "eval_samples_per_second": 89.726, "eval_steps_per_second": 11.216, "step": 1030000 }, { "epoch": 0.31, "learning_rate": 4.8652957585808575e-05, "loss": 2.2677, "step": 1035000 }, { "epoch": 0.31, "eval_accuracy": 0.5044056761744452, "eval_loss": 2.29296875, "eval_runtime": 39.7885, "eval_samples_per_second": 89.674, "eval_steps_per_second": 11.209, "step": 1035000 }, { "epoch": 0.31, "learning_rate": 4.8578110246221835e-05, "loss": 2.265, "step": 1040000 }, { "epoch": 0.31, "eval_accuracy": 0.5046538910002674, "eval_loss": 2.291015625, "eval_runtime": 39.7963, "eval_samples_per_second": 89.657, "eval_steps_per_second": 11.207, "step": 1040000 }, { "epoch": 0.31, "learning_rate": 4.850327788509009e-05, "loss": 2.2659, "step": 1045000 }, { "epoch": 0.31, "eval_accuracy": 0.504468688768197, "eval_loss": 2.291015625, "eval_runtime": 40.1905, "eval_samples_per_second": 88.777, "eval_steps_per_second": 11.097, "step": 1045000 }, { "epoch": 0.31, "learning_rate": 4.842849045932331e-05, "loss": 2.2633, "step": 1050000 }, { "epoch": 0.31, "eval_accuracy": 0.5042100631660157, "eval_loss": 2.294921875, "eval_runtime": 39.7629, "eval_samples_per_second": 89.732, "eval_steps_per_second": 11.216, "step": 1050000 }, { "epoch": 0.32, "learning_rate": 4.835361316282659e-05, "loss": 2.2689, "step": 1055000 }, { "epoch": 0.32, "eval_accuracy": 0.5049952548777227, "eval_loss": 2.2890625, "eval_runtime": 39.8901, "eval_samples_per_second": 89.446, "eval_steps_per_second": 11.181, "step": 1055000 }, { "epoch": 0.32, "learning_rate": 4.827876582323985e-05, "loss": 2.2617, "step": 1060000 }, { "epoch": 0.32, "eval_accuracy": 0.5049081331176659, "eval_loss": 2.2890625, "eval_runtime": 39.8913, "eval_samples_per_second": 89.443, "eval_steps_per_second": 11.18, "step": 1060000 }, { "epoch": 0.32, "learning_rate": 4.820390350519813e-05, "loss": 2.2613, "step": 1065000 }, { "epoch": 0.32, "eval_accuracy": 0.5052440176391427, "eval_loss": 2.287109375, "eval_runtime": 39.9185, "eval_samples_per_second": 89.382, "eval_steps_per_second": 11.173, "step": 1065000 }, { "epoch": 0.32, "learning_rate": 4.8129086122521366e-05, "loss": 2.2649, "step": 1070000 }, { "epoch": 0.32, "eval_accuracy": 0.5047081366244537, "eval_loss": 2.2890625, "eval_runtime": 39.8452, "eval_samples_per_second": 89.547, "eval_steps_per_second": 11.193, "step": 1070000 }, { "epoch": 0.32, "learning_rate": 4.805422380447964e-05, "loss": 2.2587, "step": 1075000 }, { "epoch": 0.32, "eval_accuracy": 0.505284564873383, "eval_loss": 2.287109375, "eval_runtime": 39.8746, "eval_samples_per_second": 89.481, "eval_steps_per_second": 11.185, "step": 1075000 }, { "epoch": 0.32, "learning_rate": 4.797939144334789e-05, "loss": 2.2641, "step": 1080000 }, { "epoch": 0.32, "eval_accuracy": 0.5054223706762402, "eval_loss": 2.28515625, "eval_runtime": 39.914, "eval_samples_per_second": 89.392, "eval_steps_per_second": 11.174, "step": 1080000 }, { "epoch": 0.33, "learning_rate": 4.7904559082216145e-05, "loss": 2.2634, "step": 1085000 }, { "epoch": 0.33, "eval_accuracy": 0.5056552433053229, "eval_loss": 2.28515625, "eval_runtime": 39.8319, "eval_samples_per_second": 89.576, "eval_steps_per_second": 11.197, "step": 1085000 }, { "epoch": 0.33, "learning_rate": 4.7829741699539384e-05, "loss": 2.2597, "step": 1090000 }, { "epoch": 0.33, "eval_accuracy": 0.5057451047433689, "eval_loss": 2.283203125, "eval_runtime": 39.976, "eval_samples_per_second": 89.254, "eval_steps_per_second": 11.157, "step": 1090000 }, { "epoch": 0.33, "learning_rate": 4.775487938149766e-05, "loss": 2.2572, "step": 1095000 }, { "epoch": 0.33, "eval_accuracy": 0.5059667446926958, "eval_loss": 2.283203125, "eval_runtime": 40.0116, "eval_samples_per_second": 89.174, "eval_steps_per_second": 11.147, "step": 1095000 }, { "epoch": 0.33, "learning_rate": 4.768003204191092e-05, "loss": 2.2566, "step": 1100000 }, { "epoch": 0.33, "eval_accuracy": 0.5055771624826304, "eval_loss": 2.283203125, "eval_runtime": 39.963, "eval_samples_per_second": 89.283, "eval_steps_per_second": 11.16, "step": 1100000 }, { "epoch": 0.33, "learning_rate": 4.760524461614414e-05, "loss": 2.2576, "step": 1105000 }, { "epoch": 0.33, "eval_accuracy": 0.5055905869047775, "eval_loss": 2.283203125, "eval_runtime": 39.8683, "eval_samples_per_second": 89.495, "eval_steps_per_second": 11.187, "step": 1105000 }, { "epoch": 0.33, "learning_rate": 4.753038229810241e-05, "loss": 2.2612, "step": 1110000 }, { "epoch": 0.33, "eval_accuracy": 0.505675790890242, "eval_loss": 2.283203125, "eval_runtime": 39.9091, "eval_samples_per_second": 89.403, "eval_steps_per_second": 11.175, "step": 1110000 }, { "epoch": 0.33, "learning_rate": 4.7455534958515676e-05, "loss": 2.2585, "step": 1115000 }, { "epoch": 0.33, "eval_accuracy": 0.505924553651662, "eval_loss": 2.28125, "eval_runtime": 39.8428, "eval_samples_per_second": 89.552, "eval_steps_per_second": 11.194, "step": 1115000 }, { "epoch": 0.34, "learning_rate": 4.738073255429391e-05, "loss": 2.2528, "step": 1120000 }, { "epoch": 0.34, "eval_accuracy": 0.5059566078841358, "eval_loss": 2.28125, "eval_runtime": 40.0018, "eval_samples_per_second": 89.196, "eval_steps_per_second": 11.149, "step": 1120000 }, { "epoch": 0.34, "learning_rate": 4.730588521470717e-05, "loss": 2.2599, "step": 1125000 }, { "epoch": 0.34, "eval_accuracy": 0.5059996208285663, "eval_loss": 2.28125, "eval_runtime": 39.9769, "eval_samples_per_second": 89.251, "eval_steps_per_second": 11.156, "step": 1125000 }, { "epoch": 0.34, "learning_rate": 4.723105285357542e-05, "loss": 2.2556, "step": 1130000 }, { "epoch": 0.34, "eval_accuracy": 0.5065730354317075, "eval_loss": 2.27734375, "eval_runtime": 39.975, "eval_samples_per_second": 89.256, "eval_steps_per_second": 11.157, "step": 1130000 }, { "epoch": 0.34, "learning_rate": 4.715619053553369e-05, "loss": 2.2519, "step": 1135000 }, { "epoch": 0.34, "eval_accuracy": 0.5063790662300716, "eval_loss": 2.279296875, "eval_runtime": 40.0596, "eval_samples_per_second": 89.067, "eval_steps_per_second": 11.133, "step": 1135000 }, { "epoch": 0.34, "learning_rate": 4.708135817440194e-05, "loss": 2.2567, "step": 1140000 }, { "epoch": 0.34, "eval_accuracy": 0.5067524843400006, "eval_loss": 2.27734375, "eval_runtime": 40.0409, "eval_samples_per_second": 89.109, "eval_steps_per_second": 11.139, "step": 1140000 }, { "epoch": 0.34, "learning_rate": 4.700651083481521e-05, "loss": 2.2516, "step": 1145000 }, { "epoch": 0.34, "eval_accuracy": 0.506862071459569, "eval_loss": 2.275390625, "eval_runtime": 39.8652, "eval_samples_per_second": 89.502, "eval_steps_per_second": 11.188, "step": 1145000 }, { "epoch": 0.34, "learning_rate": 4.693166349522847e-05, "loss": 2.2533, "step": 1150000 }, { "epoch": 0.34, "eval_accuracy": 0.5067752236673111, "eval_loss": 2.275390625, "eval_runtime": 39.9361, "eval_samples_per_second": 89.343, "eval_steps_per_second": 11.168, "step": 1150000 }, { "epoch": 0.35, "learning_rate": 4.6856846112551706e-05, "loss": 2.2532, "step": 1155000 }, { "epoch": 0.35, "eval_accuracy": 0.5069639874807674, "eval_loss": 2.275390625, "eval_runtime": 40.0505, "eval_samples_per_second": 89.088, "eval_steps_per_second": 11.136, "step": 1155000 }, { "epoch": 0.35, "learning_rate": 4.6781998772964966e-05, "loss": 2.2572, "step": 1160000 }, { "epoch": 0.35, "eval_accuracy": 0.5063629021299353, "eval_loss": 2.279296875, "eval_runtime": 40.0931, "eval_samples_per_second": 88.993, "eval_steps_per_second": 11.124, "step": 1160000 }, { "epoch": 0.35, "learning_rate": 4.670712147646825e-05, "loss": 2.2514, "step": 1165000 }, { "epoch": 0.35, "eval_accuracy": 0.5071686414265613, "eval_loss": 2.2734375, "eval_runtime": 40.4455, "eval_samples_per_second": 88.217, "eval_steps_per_second": 11.027, "step": 1165000 }, { "epoch": 0.35, "learning_rate": 4.66322891153365e-05, "loss": 2.2471, "step": 1170000 }, { "epoch": 0.35, "eval_accuracy": 0.5073409671720824, "eval_loss": 2.2734375, "eval_runtime": 40.0562, "eval_samples_per_second": 89.075, "eval_steps_per_second": 11.134, "step": 1170000 }, { "epoch": 0.35, "learning_rate": 4.655745675420475e-05, "loss": 2.2524, "step": 1175000 }, { "epoch": 0.35, "eval_accuracy": 0.50760562006584, "eval_loss": 2.271484375, "eval_runtime": 40.09, "eval_samples_per_second": 89.0, "eval_steps_per_second": 11.125, "step": 1175000 }, { "epoch": 0.35, "learning_rate": 4.648260941461802e-05, "loss": 2.247, "step": 1180000 }, { "epoch": 0.35, "eval_accuracy": 0.5072979542276519, "eval_loss": 2.271484375, "eval_runtime": 40.3075, "eval_samples_per_second": 88.52, "eval_steps_per_second": 11.065, "step": 1180000 }, { "epoch": 0.35, "learning_rate": 4.640780701039625e-05, "loss": 2.2491, "step": 1185000 }, { "epoch": 0.35, "eval_accuracy": 0.5076653450460047, "eval_loss": 2.271484375, "eval_runtime": 40.1522, "eval_samples_per_second": 88.862, "eval_steps_per_second": 11.108, "step": 1185000 }, { "epoch": 0.36, "learning_rate": 4.63329746492645e-05, "loss": 2.2481, "step": 1190000 }, { "epoch": 0.36, "eval_accuracy": 0.5078031508488619, "eval_loss": 2.26953125, "eval_runtime": 40.1488, "eval_samples_per_second": 88.869, "eval_steps_per_second": 11.109, "step": 1190000 }, { "epoch": 0.36, "learning_rate": 4.992515266041327e-05, "loss": 2.2465, "step": 1195000 }, { "epoch": 0.36, "eval_accuracy": 0.5069333030872883, "eval_loss": 2.2734375, "eval_runtime": 39.5233, "eval_samples_per_second": 90.276, "eval_steps_per_second": 11.284, "step": 1195000 }, { "epoch": 0.36, "learning_rate": 4.985030532082652e-05, "loss": 2.2494, "step": 1200000 }, { "epoch": 0.36, "eval_accuracy": 0.5067053618785863, "eval_loss": 2.279296875, "eval_runtime": 38.5856, "eval_samples_per_second": 92.47, "eval_steps_per_second": 11.559, "step": 1200000 }, { "epoch": 0.36, "learning_rate": 4.977545798123979e-05, "loss": 2.2541, "step": 1205000 }, { "epoch": 0.36, "eval_accuracy": 0.5068631673307646, "eval_loss": 2.275390625, "eval_runtime": 39.5375, "eval_samples_per_second": 90.244, "eval_steps_per_second": 11.28, "step": 1205000 }, { "epoch": 0.36, "learning_rate": 4.9700625620108035e-05, "loss": 2.25, "step": 1210000 }, { "epoch": 0.36, "eval_accuracy": 0.5067459091128265, "eval_loss": 2.275390625, "eval_runtime": 39.4824, "eval_samples_per_second": 90.369, "eval_steps_per_second": 11.296, "step": 1210000 }, { "epoch": 0.36, "learning_rate": 4.962582321588627e-05, "loss": 2.25, "step": 1215000 }, { "epoch": 0.36, "eval_accuracy": 0.5064313940796654, "eval_loss": 2.279296875, "eval_runtime": 39.3855, "eval_samples_per_second": 90.592, "eval_steps_per_second": 11.324, "step": 1215000 }, { "epoch": 0.37, "learning_rate": 4.955096089784455e-05, "loss": 2.2508, "step": 1220000 }, { "epoch": 0.37, "eval_accuracy": 0.507028095945715, "eval_loss": 2.2734375, "eval_runtime": 39.5116, "eval_samples_per_second": 90.303, "eval_steps_per_second": 11.288, "step": 1220000 }, { "epoch": 0.37, "learning_rate": 4.947612853671279e-05, "loss": 2.2496, "step": 1225000 }, { "epoch": 0.37, "eval_accuracy": 0.507010562006584, "eval_loss": 2.2734375, "eval_runtime": 39.5165, "eval_samples_per_second": 90.291, "eval_steps_per_second": 11.286, "step": 1225000 }, { "epoch": 0.37, "learning_rate": 4.940131115403604e-05, "loss": 2.2499, "step": 1230000 }, { "epoch": 0.37, "eval_accuracy": 0.5073475423992566, "eval_loss": 2.2734375, "eval_runtime": 39.4273, "eval_samples_per_second": 90.496, "eval_steps_per_second": 11.312, "step": 1230000 }, { "epoch": 0.37, "learning_rate": 4.932647879290429e-05, "loss": 2.2467, "step": 1235000 }, { "epoch": 0.37, "eval_accuracy": 0.5075538401518439, "eval_loss": 2.271484375, "eval_runtime": 39.5247, "eval_samples_per_second": 90.273, "eval_steps_per_second": 11.284, "step": 1235000 }, { "epoch": 0.37, "learning_rate": 4.925164643177254e-05, "loss": 2.2497, "step": 1240000 }, { "epoch": 0.37, "eval_accuracy": 0.5073163100701796, "eval_loss": 2.271484375, "eval_runtime": 38.6276, "eval_samples_per_second": 92.369, "eval_steps_per_second": 11.546, "step": 1240000 }, { "epoch": 0.37, "learning_rate": 4.917678411373081e-05, "loss": 2.2463, "step": 1245000 }, { "epoch": 0.37, "eval_accuracy": 0.5073409671720824, "eval_loss": 2.271484375, "eval_runtime": 39.5689, "eval_samples_per_second": 90.172, "eval_steps_per_second": 11.271, "step": 1245000 }, { "epoch": 0.37, "learning_rate": 4.910196673105406e-05, "loss": 2.2479, "step": 1250000 }, { "epoch": 0.37, "eval_accuracy": 0.5077573982264421, "eval_loss": 2.26953125, "eval_runtime": 39.6608, "eval_samples_per_second": 89.963, "eval_steps_per_second": 11.245, "step": 1250000 }, { "epoch": 0.38, "learning_rate": 4.902711939146731e-05, "loss": 2.2445, "step": 1255000 }, { "epoch": 0.38, "eval_accuracy": 0.5078749304121791, "eval_loss": 2.26953125, "eval_runtime": 39.6577, "eval_samples_per_second": 89.97, "eval_steps_per_second": 11.246, "step": 1255000 }, { "epoch": 0.38, "learning_rate": 4.89522420949706e-05, "loss": 2.247, "step": 1260000 }, { "epoch": 0.38, "eval_accuracy": 0.5078483555356837, "eval_loss": 2.26953125, "eval_runtime": 39.6203, "eval_samples_per_second": 90.055, "eval_steps_per_second": 11.257, "step": 1260000 }, { "epoch": 0.38, "learning_rate": 4.887743969074883e-05, "loss": 2.2443, "step": 1265000 }, { "epoch": 0.38, "eval_accuracy": 0.5078826015105489, "eval_loss": 2.267578125, "eval_runtime": 39.6508, "eval_samples_per_second": 89.986, "eval_steps_per_second": 11.248, "step": 1265000 }, { "epoch": 0.38, "learning_rate": 4.880262230807207e-05, "loss": 2.243, "step": 1270000 }, { "epoch": 0.38, "eval_accuracy": 0.5080672558070215, "eval_loss": 2.267578125, "eval_runtime": 39.6639, "eval_samples_per_second": 89.956, "eval_steps_per_second": 11.244, "step": 1270000 }, { "epoch": 0.38, "learning_rate": 4.8727804925395315e-05, "loss": 2.2454, "step": 1275000 }, { "epoch": 0.38, "eval_accuracy": 0.5076889062767118, "eval_loss": 2.271484375, "eval_runtime": 39.6373, "eval_samples_per_second": 90.016, "eval_steps_per_second": 11.252, "step": 1275000 }, { "epoch": 0.38, "learning_rate": 4.86529126504436e-05, "loss": 2.2451, "step": 1280000 }, { "epoch": 0.38, "eval_accuracy": 0.5080541053526733, "eval_loss": 2.26953125, "eval_runtime": 39.7457, "eval_samples_per_second": 89.771, "eval_steps_per_second": 11.221, "step": 1280000 }, { "epoch": 0.38, "learning_rate": 4.8578080289311855e-05, "loss": 2.2455, "step": 1285000 }, { "epoch": 0.38, "eval_accuracy": 0.5083853324215685, "eval_loss": 2.265625, "eval_runtime": 39.6403, "eval_samples_per_second": 90.009, "eval_steps_per_second": 11.251, "step": 1285000 }, { "epoch": 0.39, "learning_rate": 4.850324792818011e-05, "loss": 2.241, "step": 1290000 }, { "epoch": 0.39, "eval_accuracy": 0.508259307234065, "eval_loss": 2.267578125, "eval_runtime": 40.1652, "eval_samples_per_second": 88.833, "eval_steps_per_second": 11.104, "step": 1290000 }, { "epoch": 0.39, "learning_rate": 4.8428415567048354e-05, "loss": 2.243, "step": 1295000 }, { "epoch": 0.39, "eval_accuracy": 0.5085801235266012, "eval_loss": 2.263671875, "eval_runtime": 39.68, "eval_samples_per_second": 89.919, "eval_steps_per_second": 11.24, "step": 1295000 }, { "epoch": 0.39, "learning_rate": 4.8353553249006635e-05, "loss": 2.2408, "step": 1300000 }, { "epoch": 0.39, "eval_accuracy": 0.5084371123355645, "eval_loss": 2.263671875, "eval_runtime": 39.9223, "eval_samples_per_second": 89.374, "eval_steps_per_second": 11.172, "step": 1300000 }, { "epoch": 0.39, "learning_rate": 4.827876582323985e-05, "loss": 2.2508, "step": 1305000 }, { "epoch": 0.39, "eval_accuracy": 0.5063459161264021, "eval_loss": 2.279296875, "eval_runtime": 39.7588, "eval_samples_per_second": 89.741, "eval_steps_per_second": 11.218, "step": 1305000 }, { "epoch": 0.39, "learning_rate": 4.820396341901809e-05, "loss": 2.252, "step": 1310000 }, { "epoch": 0.39, "eval_accuracy": 0.504651699257876, "eval_loss": 2.291015625, "eval_runtime": 39.8037, "eval_samples_per_second": 89.64, "eval_steps_per_second": 11.205, "step": 1310000 }, { "epoch": 0.39, "learning_rate": 4.8129146036341324e-05, "loss": 2.7482, "step": 1315000 }, { "epoch": 0.39, "eval_accuracy": 0.4505951128528157, "eval_loss": 2.646484375, "eval_runtime": 39.8831, "eval_samples_per_second": 89.462, "eval_steps_per_second": 11.183, "step": 1315000 }, { "epoch": 0.4, "learning_rate": 4.805426873984461e-05, "loss": 2.4189, "step": 1320000 }, { "epoch": 0.4, "eval_accuracy": 0.5070354930762858, "eval_loss": 2.275390625, "eval_runtime": 39.9021, "eval_samples_per_second": 89.419, "eval_steps_per_second": 11.177, "step": 1320000 }, { "epoch": 0.4, "learning_rate": 4.797939144334789e-05, "loss": 2.2446, "step": 1325000 }, { "epoch": 0.4, "eval_accuracy": 0.5081412271127301, "eval_loss": 2.267578125, "eval_runtime": 39.8679, "eval_samples_per_second": 89.495, "eval_steps_per_second": 11.187, "step": 1325000 }, { "epoch": 0.4, "learning_rate": 4.790457406067113e-05, "loss": 2.2416, "step": 1330000 }, { "epoch": 0.4, "eval_accuracy": 0.5086858750969846, "eval_loss": 2.263671875, "eval_runtime": 39.8423, "eval_samples_per_second": 89.553, "eval_steps_per_second": 11.194, "step": 1330000 }, { "epoch": 0.4, "learning_rate": 4.78297267210844e-05, "loss": 2.2421, "step": 1335000 }, { "epoch": 0.4, "eval_accuracy": 0.5087765584384274, "eval_loss": 2.26171875, "eval_runtime": 39.9585, "eval_samples_per_second": 89.293, "eval_steps_per_second": 11.162, "step": 1335000 }, { "epoch": 0.4, "learning_rate": 4.775487938149766e-05, "loss": 2.2367, "step": 1340000 }, { "epoch": 0.4, "eval_accuracy": 0.509168606358683, "eval_loss": 2.26171875, "eval_runtime": 39.8472, "eval_samples_per_second": 89.542, "eval_steps_per_second": 11.193, "step": 1340000 }, { "epoch": 0.4, "learning_rate": 4.768001706345593e-05, "loss": 2.2355, "step": 1345000 }, { "epoch": 0.4, "eval_accuracy": 0.5090864160190068, "eval_loss": 2.259765625, "eval_runtime": 39.8651, "eval_samples_per_second": 89.502, "eval_steps_per_second": 11.188, "step": 1345000 }, { "epoch": 0.4, "learning_rate": 4.760518470232418e-05, "loss": 2.2379, "step": 1350000 }, { "epoch": 0.4, "eval_accuracy": 0.5093981913741786, "eval_loss": 2.259765625, "eval_runtime": 39.8769, "eval_samples_per_second": 89.475, "eval_steps_per_second": 11.184, "step": 1350000 }, { "epoch": 0.41, "learning_rate": 4.7530337362737444e-05, "loss": 2.2365, "step": 1355000 }, { "epoch": 0.41, "eval_accuracy": 0.509393259953798, "eval_loss": 2.259765625, "eval_runtime": 39.8496, "eval_samples_per_second": 89.537, "eval_steps_per_second": 11.192, "step": 1355000 }, { "epoch": 0.41, "learning_rate": 4.745550500160569e-05, "loss": 2.2379, "step": 1360000 }, { "epoch": 0.41, "eval_accuracy": 0.5091491546449597, "eval_loss": 2.2578125, "eval_runtime": 39.8235, "eval_samples_per_second": 89.595, "eval_steps_per_second": 11.199, "step": 1360000 }, { "epoch": 0.41, "learning_rate": 4.738067264047394e-05, "loss": 2.235, "step": 1365000 }, { "epoch": 0.41, "eval_accuracy": 0.5094527109661638, "eval_loss": 2.2578125, "eval_runtime": 38.8651, "eval_samples_per_second": 91.805, "eval_steps_per_second": 11.476, "step": 1365000 }, { "epoch": 0.41, "learning_rate": 4.730582530088721e-05, "loss": 2.236, "step": 1370000 }, { "epoch": 0.41, "eval_accuracy": 0.5093398362330085, "eval_loss": 2.2578125, "eval_runtime": 40.9049, "eval_samples_per_second": 87.227, "eval_steps_per_second": 10.903, "step": 1370000 }, { "epoch": 0.41, "learning_rate": 4.7230992939755456e-05, "loss": 2.2344, "step": 1375000 }, { "epoch": 0.41, "eval_accuracy": 0.5095472298567916, "eval_loss": 2.2578125, "eval_runtime": 39.9487, "eval_samples_per_second": 89.314, "eval_steps_per_second": 11.164, "step": 1375000 }, { "epoch": 0.41, "learning_rate": 4.715614560016872e-05, "loss": 2.2348, "step": 1380000 }, { "epoch": 0.41, "eval_accuracy": 0.5095688733129063, "eval_loss": 2.255859375, "eval_runtime": 39.951, "eval_samples_per_second": 89.309, "eval_steps_per_second": 11.164, "step": 1380000 }, { "epoch": 0.41, "learning_rate": 4.7081313239036975e-05, "loss": 2.2306, "step": 1385000 }, { "epoch": 0.41, "eval_accuracy": 0.5097368155736447, "eval_loss": 2.255859375, "eval_runtime": 40.0156, "eval_samples_per_second": 89.165, "eval_steps_per_second": 11.146, "step": 1385000 }, { "epoch": 0.42, "learning_rate": 4.7006495856360214e-05, "loss": 2.2293, "step": 1390000 }, { "epoch": 0.42, "eval_accuracy": 0.5097521577703843, "eval_loss": 2.255859375, "eval_runtime": 40.0042, "eval_samples_per_second": 89.191, "eval_steps_per_second": 11.149, "step": 1390000 }, { "epoch": 0.42, "learning_rate": 4.693166349522847e-05, "loss": 2.2311, "step": 1395000 }, { "epoch": 0.42, "eval_accuracy": 0.510102562585204, "eval_loss": 2.25390625, "eval_runtime": 39.9554, "eval_samples_per_second": 89.3, "eval_steps_per_second": 11.162, "step": 1395000 }, { "epoch": 0.42, "learning_rate": 4.685683113409672e-05, "loss": 2.231, "step": 1400000 }, { "epoch": 0.42, "eval_accuracy": 0.5101116035225683, "eval_loss": 2.25390625, "eval_runtime": 39.9882, "eval_samples_per_second": 89.226, "eval_steps_per_second": 11.153, "step": 1400000 }, { "epoch": 0.42, "learning_rate": 4.678201375141996e-05, "loss": 2.2272, "step": 1405000 }, { "epoch": 0.42, "eval_accuracy": 0.5102126976403701, "eval_loss": 2.251953125, "eval_runtime": 40.3186, "eval_samples_per_second": 88.495, "eval_steps_per_second": 11.062, "step": 1405000 }, { "epoch": 0.42, "learning_rate": 4.670718139028821e-05, "loss": 2.2264, "step": 1410000 }, { "epoch": 0.42, "eval_accuracy": 0.5102436560016482, "eval_loss": 2.25390625, "eval_runtime": 40.0007, "eval_samples_per_second": 89.198, "eval_steps_per_second": 11.15, "step": 1410000 }, { "epoch": 0.42, "learning_rate": 4.6632349029156465e-05, "loss": 2.2295, "step": 1415000 }, { "epoch": 0.42, "eval_accuracy": 0.5104469401084474, "eval_loss": 2.251953125, "eval_runtime": 40.1353, "eval_samples_per_second": 88.899, "eval_steps_per_second": 11.112, "step": 1415000 }, { "epoch": 0.43, "learning_rate": 4.655753164647971e-05, "loss": 2.2281, "step": 1420000 }, { "epoch": 0.43, "eval_accuracy": 0.5103937903554567, "eval_loss": 2.251953125, "eval_runtime": 40.0088, "eval_samples_per_second": 89.18, "eval_steps_per_second": 11.148, "step": 1420000 }, { "epoch": 0.43, "learning_rate": 4.6482699285347956e-05, "loss": 2.2234, "step": 1425000 }, { "epoch": 0.43, "eval_accuracy": 0.5106672102187797, "eval_loss": 2.25, "eval_runtime": 40.1918, "eval_samples_per_second": 88.774, "eval_steps_per_second": 11.097, "step": 1425000 }, { "epoch": 0.43, "learning_rate": 4.64078819026712e-05, "loss": 2.2293, "step": 1430000 }, { "epoch": 0.43, "eval_accuracy": 0.5107220037785639, "eval_loss": 2.25, "eval_runtime": 40.1338, "eval_samples_per_second": 88.903, "eval_steps_per_second": 11.113, "step": 1430000 }, { "epoch": 0.43, "learning_rate": 4.633306451999444e-05, "loss": 2.2256, "step": 1435000 }, { "epoch": 0.43, "eval_accuracy": 0.5108652889373995, "eval_loss": 2.25, "eval_runtime": 40.1228, "eval_samples_per_second": 88.927, "eval_steps_per_second": 11.116, "step": 1435000 }, { "epoch": 0.43, "learning_rate": 4.6258232158862694e-05, "loss": 2.2247, "step": 1440000 }, { "epoch": 0.43, "eval_accuracy": 0.5107954271486747, "eval_loss": 2.25, "eval_runtime": 40.0563, "eval_samples_per_second": 89.075, "eval_steps_per_second": 11.134, "step": 1440000 }, { "epoch": 0.43, "learning_rate": 4.6183384819275954e-05, "loss": 2.222, "step": 1445000 }, { "epoch": 0.43, "eval_accuracy": 0.510766386561989, "eval_loss": 2.25, "eval_runtime": 40.2217, "eval_samples_per_second": 88.708, "eval_steps_per_second": 11.089, "step": 1445000 }, { "epoch": 0.43, "learning_rate": 4.61085674365992e-05, "loss": 2.2228, "step": 1450000 }, { "epoch": 0.43, "eval_accuracy": 0.5106184439505719, "eval_loss": 2.248046875, "eval_runtime": 40.5305, "eval_samples_per_second": 88.032, "eval_steps_per_second": 11.004, "step": 1450000 }, { "epoch": 0.44, "learning_rate": 4.603372009701246e-05, "loss": 2.2241, "step": 1455000 }, { "epoch": 0.44, "eval_accuracy": 0.5110554225898505, "eval_loss": 2.248046875, "eval_runtime": 40.2413, "eval_samples_per_second": 88.665, "eval_steps_per_second": 11.083, "step": 1455000 }, { "epoch": 0.44, "learning_rate": 4.595891769279069e-05, "loss": 2.2219, "step": 1460000 }, { "epoch": 0.44, "eval_accuracy": 0.511077613981563, "eval_loss": 2.24609375, "eval_runtime": 40.2762, "eval_samples_per_second": 88.588, "eval_steps_per_second": 11.074, "step": 1460000 }, { "epoch": 0.44, "learning_rate": 4.5884085331658944e-05, "loss": 2.2219, "step": 1465000 }, { "epoch": 0.44, "eval_accuracy": 0.5112833637985525, "eval_loss": 2.24609375, "eval_runtime": 40.2334, "eval_samples_per_second": 88.682, "eval_steps_per_second": 11.085, "step": 1465000 }, { "epoch": 0.44, "learning_rate": 4.58092529705272e-05, "loss": 2.2215, "step": 1470000 }, { "epoch": 0.44, "eval_accuracy": 0.5112595286000464, "eval_loss": 2.24609375, "eval_runtime": 40.2987, "eval_samples_per_second": 88.539, "eval_steps_per_second": 11.067, "step": 1470000 }, { "epoch": 0.44, "learning_rate": 4.573445056630543e-05, "loss": 2.2193, "step": 1475000 }, { "epoch": 0.44, "eval_accuracy": 0.5116091115114694, "eval_loss": 2.244140625, "eval_runtime": 40.1594, "eval_samples_per_second": 88.846, "eval_steps_per_second": 11.106, "step": 1475000 }, { "epoch": 0.44, "learning_rate": 4.56595882482637e-05, "loss": 2.2183, "step": 1480000 }, { "epoch": 0.44, "eval_accuracy": 0.5114707577730144, "eval_loss": 2.244140625, "eval_runtime": 40.9626, "eval_samples_per_second": 87.104, "eval_steps_per_second": 10.888, "step": 1480000 }, { "epoch": 0.44, "learning_rate": 4.5584755887131956e-05, "loss": 2.2177, "step": 1485000 }, { "epoch": 0.44, "eval_accuracy": 0.5116211660946219, "eval_loss": 2.244140625, "eval_runtime": 40.3714, "eval_samples_per_second": 88.379, "eval_steps_per_second": 11.047, "step": 1485000 }, { "epoch": 0.45, "learning_rate": 4.55099235260002e-05, "loss": 2.2211, "step": 1490000 }, { "epoch": 0.45, "eval_accuracy": 0.5115852763129632, "eval_loss": 2.2421875, "eval_runtime": 40.2564, "eval_samples_per_second": 88.632, "eval_steps_per_second": 11.079, "step": 1490000 }, { "epoch": 0.45, "learning_rate": 4.5435091164868455e-05, "loss": 2.2183, "step": 1495000 }, { "epoch": 0.45, "eval_accuracy": 0.5118184229098449, "eval_loss": 2.2421875, "eval_runtime": 40.2315, "eval_samples_per_second": 88.687, "eval_steps_per_second": 11.086, "step": 1495000 }, { "epoch": 0.45, "learning_rate": 4.5360243825281715e-05, "loss": 2.2182, "step": 1500000 }, { "epoch": 0.45, "eval_accuracy": 0.5120186933708559, "eval_loss": 2.240234375, "eval_runtime": 40.3125, "eval_samples_per_second": 88.509, "eval_steps_per_second": 11.064, "step": 1500000 }, { "epoch": 0.45, "learning_rate": 4.528539648569498e-05, "loss": 2.2148, "step": 1505000 }, { "epoch": 0.45, "eval_accuracy": 0.5121849918248009, "eval_loss": 2.240234375, "eval_runtime": 40.3172, "eval_samples_per_second": 88.498, "eval_steps_per_second": 11.062, "step": 1505000 }, { "epoch": 0.45, "learning_rate": 4.521059408147321e-05, "loss": 2.2217, "step": 1510000 }, { "epoch": 0.45, "eval_accuracy": 0.5122685520034718, "eval_loss": 2.240234375, "eval_runtime": 40.2766, "eval_samples_per_second": 88.587, "eval_steps_per_second": 11.073, "step": 1510000 }, { "epoch": 0.45, "learning_rate": 4.5135761720341466e-05, "loss": 2.2117, "step": 1515000 }, { "epoch": 0.45, "eval_accuracy": 0.5123701940568713, "eval_loss": 2.23828125, "eval_runtime": 40.4763, "eval_samples_per_second": 88.15, "eval_steps_per_second": 11.019, "step": 1515000 }, { "epoch": 0.46, "learning_rate": 4.5060944337664705e-05, "loss": 2.2152, "step": 1520000 }, { "epoch": 0.46, "eval_accuracy": 0.5123003322681465, "eval_loss": 2.23828125, "eval_runtime": 40.309, "eval_samples_per_second": 88.516, "eval_steps_per_second": 11.065, "step": 1520000 }, { "epoch": 0.46, "learning_rate": 4.4986096998077965e-05, "loss": 2.2148, "step": 1525000 }, { "epoch": 0.46, "eval_accuracy": 0.5124967671799727, "eval_loss": 2.23828125, "eval_runtime": 40.2971, "eval_samples_per_second": 88.542, "eval_steps_per_second": 11.068, "step": 1525000 }, { "epoch": 0.46, "learning_rate": 4.491127961540121e-05, "loss": 2.2151, "step": 1530000 }, { "epoch": 0.46, "eval_accuracy": 0.5127488175549798, "eval_loss": 2.236328125, "eval_runtime": 40.3675, "eval_samples_per_second": 88.388, "eval_steps_per_second": 11.049, "step": 1530000 }, { "epoch": 0.46, "learning_rate": 4.483646223272445e-05, "loss": 2.2129, "step": 1535000 }, { "epoch": 0.46, "eval_accuracy": 0.5127022430291633, "eval_loss": 2.236328125, "eval_runtime": 40.4605, "eval_samples_per_second": 88.185, "eval_steps_per_second": 11.023, "step": 1535000 }, { "epoch": 0.46, "learning_rate": 4.47616298715927e-05, "loss": 2.2145, "step": 1540000 }, { "epoch": 0.46, "eval_accuracy": 0.5127690911721, "eval_loss": 2.236328125, "eval_runtime": 40.376, "eval_samples_per_second": 88.369, "eval_steps_per_second": 11.046, "step": 1540000 }, { "epoch": 0.46, "learning_rate": 4.468681248891595e-05, "loss": 2.2099, "step": 1545000 }, { "epoch": 0.46, "eval_accuracy": 0.5128871712934349, "eval_loss": 2.236328125, "eval_runtime": 40.3585, "eval_samples_per_second": 88.408, "eval_steps_per_second": 11.051, "step": 1545000 }, { "epoch": 0.46, "learning_rate": 4.46119651493292e-05, "loss": 2.2125, "step": 1550000 }, { "epoch": 0.46, "eval_accuracy": 0.5131964809384164, "eval_loss": 2.234375, "eval_runtime": 40.3163, "eval_samples_per_second": 88.5, "eval_steps_per_second": 11.063, "step": 1550000 }, { "epoch": 0.47, "learning_rate": 4.453713278819746e-05, "loss": 2.2101, "step": 1555000 }, { "epoch": 0.47, "eval_accuracy": 0.5130504561015916, "eval_loss": 2.234375, "eval_runtime": 40.2993, "eval_samples_per_second": 88.537, "eval_steps_per_second": 11.067, "step": 1555000 }, { "epoch": 0.47, "learning_rate": 4.4462300427065714e-05, "loss": 2.211, "step": 1560000 }, { "epoch": 0.47, "eval_accuracy": 0.5132159326521398, "eval_loss": 2.234375, "eval_runtime": 40.3465, "eval_samples_per_second": 88.434, "eval_steps_per_second": 11.054, "step": 1560000 }, { "epoch": 0.47, "learning_rate": 4.438743810902398e-05, "loss": 2.2086, "step": 1565000 }, { "epoch": 0.47, "eval_accuracy": 0.5131567556075729, "eval_loss": 2.234375, "eval_runtime": 40.256, "eval_samples_per_second": 88.633, "eval_steps_per_second": 11.079, "step": 1565000 }, { "epoch": 0.47, "learning_rate": 4.4312605747892234e-05, "loss": 2.2137, "step": 1570000 }, { "epoch": 0.47, "eval_accuracy": 0.5131545638651815, "eval_loss": 2.232421875, "eval_runtime": 40.2935, "eval_samples_per_second": 88.55, "eval_steps_per_second": 11.069, "step": 1570000 }, { "epoch": 0.47, "learning_rate": 4.423778836521548e-05, "loss": 2.2122, "step": 1575000 }, { "epoch": 0.47, "eval_accuracy": 0.5134236002437218, "eval_loss": 2.232421875, "eval_runtime": 40.3698, "eval_samples_per_second": 88.383, "eval_steps_per_second": 11.048, "step": 1575000 }, { "epoch": 0.47, "learning_rate": 4.416297098253872e-05, "loss": 2.2053, "step": 1580000 }, { "epoch": 0.47, "eval_accuracy": 0.5133767517501063, "eval_loss": 2.232421875, "eval_runtime": 40.3058, "eval_samples_per_second": 88.523, "eval_steps_per_second": 11.065, "step": 1580000 }, { "epoch": 0.47, "learning_rate": 4.408813862140697e-05, "loss": 2.208, "step": 1585000 }, { "epoch": 0.47, "eval_accuracy": 0.513388258397661, "eval_loss": 2.23046875, "eval_runtime": 40.3539, "eval_samples_per_second": 88.418, "eval_steps_per_second": 11.052, "step": 1585000 }, { "epoch": 0.48, "learning_rate": 4.401329128182023e-05, "loss": 2.2081, "step": 1590000 }, { "epoch": 0.48, "eval_accuracy": 0.5135512692380189, "eval_loss": 2.23046875, "eval_runtime": 42.8555, "eval_samples_per_second": 83.257, "eval_steps_per_second": 10.407, "step": 1590000 }, { "epoch": 0.48, "learning_rate": 4.3938458920688484e-05, "loss": 2.2077, "step": 1595000 }, { "epoch": 0.48, "eval_accuracy": 0.5137540054092202, "eval_loss": 2.23046875, "eval_runtime": 40.2657, "eval_samples_per_second": 88.611, "eval_steps_per_second": 11.076, "step": 1595000 }, { "epoch": 0.48, "learning_rate": 4.3863611581101744e-05, "loss": 2.2061, "step": 1600000 }, { "epoch": 0.48, "eval_accuracy": 0.5135893507620688, "eval_loss": 2.23046875, "eval_runtime": 41.6767, "eval_samples_per_second": 85.611, "eval_steps_per_second": 10.701, "step": 1600000 }, { "epoch": 0.48, "learning_rate": 4.378880917687998e-05, "loss": 2.2055, "step": 1605000 }, { "epoch": 0.48, "eval_accuracy": 0.5138961946968601, "eval_loss": 2.228515625, "eval_runtime": 40.3609, "eval_samples_per_second": 88.402, "eval_steps_per_second": 11.05, "step": 1605000 }, { "epoch": 0.48, "learning_rate": 4.371397681574823e-05, "loss": 2.2065, "step": 1610000 }, { "epoch": 0.48, "eval_accuracy": 0.5138923591476752, "eval_loss": 2.228515625, "eval_runtime": 40.453, "eval_samples_per_second": 88.201, "eval_steps_per_second": 11.025, "step": 1610000 }, { "epoch": 0.48, "learning_rate": 4.363914445461648e-05, "loss": 2.2054, "step": 1615000 }, { "epoch": 0.48, "eval_accuracy": 0.5138997562782461, "eval_loss": 2.228515625, "eval_runtime": 41.6251, "eval_samples_per_second": 85.718, "eval_steps_per_second": 10.715, "step": 1615000 }, { "epoch": 0.49, "learning_rate": 4.356432707193973e-05, "loss": 2.2035, "step": 1620000 }, { "epoch": 0.49, "eval_accuracy": 0.5140274252725432, "eval_loss": 2.228515625, "eval_runtime": 40.4365, "eval_samples_per_second": 88.237, "eval_steps_per_second": 11.03, "step": 1620000 }, { "epoch": 0.49, "learning_rate": 4.348947973235298e-05, "loss": 2.2021, "step": 1625000 }, { "epoch": 0.49, "eval_accuracy": 0.5139844123281126, "eval_loss": 2.228515625, "eval_runtime": 40.3492, "eval_samples_per_second": 88.428, "eval_steps_per_second": 11.054, "step": 1625000 }, { "epoch": 0.49, "learning_rate": 4.3414662349676226e-05, "loss": 2.2036, "step": 1630000 }, { "epoch": 0.49, "eval_accuracy": 0.5138233192623471, "eval_loss": 2.228515625, "eval_runtime": 40.3662, "eval_samples_per_second": 88.391, "eval_steps_per_second": 11.049, "step": 1630000 }, { "epoch": 0.49, "learning_rate": 4.333981501008949e-05, "loss": 2.204, "step": 1635000 }, { "epoch": 0.49, "eval_accuracy": 0.5139803028111288, "eval_loss": 2.2265625, "eval_runtime": 40.2896, "eval_samples_per_second": 88.559, "eval_steps_per_second": 11.07, "step": 1635000 }, { "epoch": 0.49, "learning_rate": 4.3264967670502746e-05, "loss": 2.2042, "step": 1640000 }, { "epoch": 0.49, "eval_accuracy": 0.5140860543815122, "eval_loss": 2.2265625, "eval_runtime": 42.4068, "eval_samples_per_second": 84.137, "eval_steps_per_second": 10.517, "step": 1640000 }, { "epoch": 0.49, "learning_rate": 4.3190135309371006e-05, "loss": 2.2024, "step": 1645000 }, { "epoch": 0.49, "eval_accuracy": 0.514173450109368, "eval_loss": 2.2265625, "eval_runtime": 40.3018, "eval_samples_per_second": 88.532, "eval_steps_per_second": 11.067, "step": 1645000 }, { "epoch": 0.49, "learning_rate": 4.311530294823926e-05, "loss": 2.2023, "step": 1650000 }, { "epoch": 0.49, "eval_accuracy": 0.5144172814504074, "eval_loss": 2.2265625, "eval_runtime": 40.2694, "eval_samples_per_second": 88.603, "eval_steps_per_second": 11.075, "step": 1650000 }, { "epoch": 0.5, "learning_rate": 4.30404855655625e-05, "loss": 2.1976, "step": 1655000 }, { "epoch": 0.5, "eval_accuracy": 0.5145805662585642, "eval_loss": 2.224609375, "eval_runtime": 40.3729, "eval_samples_per_second": 88.376, "eval_steps_per_second": 11.047, "step": 1655000 }, { "epoch": 0.5, "learning_rate": 4.296565320443075e-05, "loss": 2.2028, "step": 1660000 }, { "epoch": 0.5, "eval_accuracy": 0.5147172761902257, "eval_loss": 2.224609375, "eval_runtime": 40.282, "eval_samples_per_second": 88.576, "eval_steps_per_second": 11.072, "step": 1660000 }, { "epoch": 0.5, "learning_rate": 4.289080586484401e-05, "loss": 2.1971, "step": 1665000 }, { "epoch": 0.5, "eval_accuracy": 0.5146457705947074, "eval_loss": 2.224609375, "eval_runtime": 40.4909, "eval_samples_per_second": 88.119, "eval_steps_per_second": 11.015, "step": 1665000 }, { "epoch": 0.5, "learning_rate": 4.281595852525727e-05, "loss": 2.1978, "step": 1670000 }, { "epoch": 0.5, "eval_accuracy": 0.5146065931994617, "eval_loss": 2.224609375, "eval_runtime": 40.3534, "eval_samples_per_second": 88.419, "eval_steps_per_second": 11.052, "step": 1670000 }, { "epoch": 0.5, "learning_rate": 4.27411561210355e-05, "loss": 2.1955, "step": 1675000 }, { "epoch": 0.5, "eval_accuracy": 0.5148249455352015, "eval_loss": 2.22265625, "eval_runtime": 39.3164, "eval_samples_per_second": 90.751, "eval_steps_per_second": 11.344, "step": 1675000 }, { "epoch": 0.5, "learning_rate": 4.266630878144877e-05, "loss": 2.1967, "step": 1680000 }, { "epoch": 0.5, "eval_accuracy": 0.5146874137001434, "eval_loss": 2.22265625, "eval_runtime": 40.3287, "eval_samples_per_second": 88.473, "eval_steps_per_second": 11.059, "step": 1680000 }, { "epoch": 0.5, "learning_rate": 4.259149139877201e-05, "loss": 2.1975, "step": 1685000 }, { "epoch": 0.5, "eval_accuracy": 0.5151745284466245, "eval_loss": 2.22265625, "eval_runtime": 40.2734, "eval_samples_per_second": 88.594, "eval_steps_per_second": 11.074, "step": 1685000 }, { "epoch": 0.51, "learning_rate": 4.251668899455024e-05, "loss": 2.1972, "step": 1690000 }, { "epoch": 0.51, "eval_accuracy": 0.5148622051558548, "eval_loss": 2.220703125, "eval_runtime": 40.2657, "eval_samples_per_second": 88.611, "eval_steps_per_second": 11.076, "step": 1690000 }, { "epoch": 0.51, "learning_rate": 4.2441841654963506e-05, "loss": 2.1967, "step": 1695000 }, { "epoch": 0.51, "eval_accuracy": 0.5150745302000184, "eval_loss": 2.220703125, "eval_runtime": 40.2491, "eval_samples_per_second": 88.648, "eval_steps_per_second": 11.081, "step": 1695000 }, { "epoch": 0.51, "learning_rate": 4.236699431537676e-05, "loss": 2.194, "step": 1700000 }, { "epoch": 0.51, "eval_accuracy": 0.5150706946508335, "eval_loss": 2.220703125, "eval_runtime": 40.3323, "eval_samples_per_second": 88.465, "eval_steps_per_second": 11.058, "step": 1700000 }, { "epoch": 0.51, "learning_rate": 4.9925167638868255e-05, "loss": 2.2009, "step": 1705000 }, { "epoch": 0.51, "eval_accuracy": 0.5139096191190072, "eval_loss": 2.228515625, "eval_runtime": 39.4304, "eval_samples_per_second": 90.489, "eval_steps_per_second": 11.311, "step": 1705000 }, { "epoch": 0.51, "learning_rate": 4.9850350256191494e-05, "loss": 2.2085, "step": 1710000 }, { "epoch": 0.51, "eval_accuracy": 0.5136279802217166, "eval_loss": 2.23046875, "eval_runtime": 39.2325, "eval_samples_per_second": 90.945, "eval_steps_per_second": 11.368, "step": 1710000 }, { "epoch": 0.51, "learning_rate": 4.977551789505975e-05, "loss": 2.2077, "step": 1715000 }, { "epoch": 0.51, "eval_accuracy": 0.5136983899460393, "eval_loss": 2.23046875, "eval_runtime": 39.2977, "eval_samples_per_second": 90.794, "eval_steps_per_second": 11.349, "step": 1715000 }, { "epoch": 0.52, "learning_rate": 4.9700670555473014e-05, "loss": 2.205, "step": 1720000 }, { "epoch": 0.52, "eval_accuracy": 0.51339866917402, "eval_loss": 2.23046875, "eval_runtime": 39.3375, "eval_samples_per_second": 90.702, "eval_steps_per_second": 11.338, "step": 1720000 }, { "epoch": 0.52, "learning_rate": 4.962583819434126e-05, "loss": 2.2063, "step": 1725000 }, { "epoch": 0.52, "eval_accuracy": 0.5134490792490214, "eval_loss": 2.23046875, "eval_runtime": 39.3134, "eval_samples_per_second": 90.758, "eval_steps_per_second": 11.345, "step": 1725000 }, { "epoch": 0.52, "learning_rate": 4.9551020811664506e-05, "loss": 2.2076, "step": 1730000 }, { "epoch": 0.52, "eval_accuracy": 0.5134718185763318, "eval_loss": 2.23046875, "eval_runtime": 39.2618, "eval_samples_per_second": 90.877, "eval_steps_per_second": 11.36, "step": 1730000 }, { "epoch": 0.52, "learning_rate": 4.9476173472077765e-05, "loss": 2.2036, "step": 1735000 }, { "epoch": 0.52, "eval_accuracy": 0.5133134651885556, "eval_loss": 2.23046875, "eval_runtime": 39.3642, "eval_samples_per_second": 90.641, "eval_steps_per_second": 11.33, "step": 1735000 }, { "epoch": 0.52, "learning_rate": 4.9401356089401005e-05, "loss": 2.2064, "step": 1740000 }, { "epoch": 0.52, "eval_accuracy": 0.5138024977096292, "eval_loss": 2.23046875, "eval_runtime": 39.4343, "eval_samples_per_second": 90.48, "eval_steps_per_second": 11.31, "step": 1740000 }, { "epoch": 0.52, "learning_rate": 4.932650874981427e-05, "loss": 2.2053, "step": 1745000 }, { "epoch": 0.52, "eval_accuracy": 0.5136904448798706, "eval_loss": 2.23046875, "eval_runtime": 39.4148, "eval_samples_per_second": 90.524, "eval_steps_per_second": 11.316, "step": 1745000 }, { "epoch": 0.52, "learning_rate": 4.9251676388682524e-05, "loss": 2.2048, "step": 1750000 }, { "epoch": 0.52, "eval_accuracy": 0.5138953727934633, "eval_loss": 2.23046875, "eval_runtime": 39.3715, "eval_samples_per_second": 90.624, "eval_steps_per_second": 11.328, "step": 1750000 }, { "epoch": 0.53, "learning_rate": 4.917684402755077e-05, "loss": 2.2075, "step": 1755000 }, { "epoch": 0.53, "eval_accuracy": 0.5137868815450907, "eval_loss": 2.23046875, "eval_runtime": 39.4167, "eval_samples_per_second": 90.52, "eval_steps_per_second": 11.315, "step": 1755000 }, { "epoch": 0.53, "learning_rate": 4.910201166641902e-05, "loss": 2.2041, "step": 1760000 }, { "epoch": 0.53, "eval_accuracy": 0.5136414046438638, "eval_loss": 2.228515625, "eval_runtime": 39.46, "eval_samples_per_second": 90.421, "eval_steps_per_second": 11.303, "step": 1760000 }, { "epoch": 0.53, "learning_rate": 4.902716432683229e-05, "loss": 2.2057, "step": 1765000 }, { "epoch": 0.53, "eval_accuracy": 0.5138789347255281, "eval_loss": 2.228515625, "eval_runtime": 39.4616, "eval_samples_per_second": 90.417, "eval_steps_per_second": 11.302, "step": 1765000 }, { "epoch": 0.53, "learning_rate": 4.8952331965700536e-05, "loss": 2.2054, "step": 1770000 }, { "epoch": 0.53, "eval_accuracy": 0.5138929070832731, "eval_loss": 2.228515625, "eval_runtime": 39.5693, "eval_samples_per_second": 90.171, "eval_steps_per_second": 11.271, "step": 1770000 }, { "epoch": 0.53, "learning_rate": 4.887752956147877e-05, "loss": 2.2085, "step": 1775000 }, { "epoch": 0.53, "eval_accuracy": 0.5139266051225403, "eval_loss": 2.228515625, "eval_runtime": 39.4709, "eval_samples_per_second": 90.396, "eval_steps_per_second": 11.299, "step": 1775000 }, { "epoch": 0.53, "learning_rate": 4.880269720034703e-05, "loss": 2.2051, "step": 1780000 }, { "epoch": 0.53, "eval_accuracy": 0.5141471492006715, "eval_loss": 2.2265625, "eval_runtime": 39.4552, "eval_samples_per_second": 90.432, "eval_steps_per_second": 11.304, "step": 1780000 }, { "epoch": 0.53, "learning_rate": 4.872786483921527e-05, "loss": 2.2023, "step": 1785000 }, { "epoch": 0.53, "eval_accuracy": 0.5139211257665619, "eval_loss": 2.2265625, "eval_runtime": 39.5212, "eval_samples_per_second": 90.281, "eval_steps_per_second": 11.285, "step": 1785000 }, { "epoch": 0.54, "learning_rate": 4.8653032478083526e-05, "loss": 2.205, "step": 1790000 }, { "epoch": 0.54, "eval_accuracy": 0.5140561918914298, "eval_loss": 2.2265625, "eval_runtime": 39.4725, "eval_samples_per_second": 90.392, "eval_steps_per_second": 11.299, "step": 1790000 }, { "epoch": 0.54, "learning_rate": 4.857818513849679e-05, "loss": 2.2009, "step": 1795000 }, { "epoch": 0.54, "eval_accuracy": 0.5141463272972748, "eval_loss": 2.2265625, "eval_runtime": 39.6114, "eval_samples_per_second": 90.075, "eval_steps_per_second": 11.259, "step": 1795000 }, { "epoch": 0.54, "learning_rate": 4.850335277736504e-05, "loss": 2.1998, "step": 1800000 }, { "epoch": 0.54, "eval_accuracy": 0.5143134476546165, "eval_loss": 2.2265625, "eval_runtime": 39.8498, "eval_samples_per_second": 89.536, "eval_steps_per_second": 11.192, "step": 1800000 }, { "epoch": 0.54, "learning_rate": 4.8428505437778305e-05, "loss": 2.2009, "step": 1805000 }, { "epoch": 0.54, "eval_accuracy": 0.5143608440838298, "eval_loss": 2.224609375, "eval_runtime": 38.3896, "eval_samples_per_second": 92.942, "eval_steps_per_second": 11.618, "step": 1805000 }, { "epoch": 0.54, "learning_rate": 4.8353688055101544e-05, "loss": 2.2027, "step": 1810000 }, { "epoch": 0.54, "eval_accuracy": 0.5143013930714639, "eval_loss": 2.2265625, "eval_runtime": 39.6231, "eval_samples_per_second": 90.048, "eval_steps_per_second": 11.256, "step": 1810000 }, { "epoch": 0.54, "learning_rate": 4.8278840715514804e-05, "loss": 2.2007, "step": 1815000 }, { "epoch": 0.54, "eval_accuracy": 0.5145857716467437, "eval_loss": 2.224609375, "eval_runtime": 39.6832, "eval_samples_per_second": 89.912, "eval_steps_per_second": 11.239, "step": 1815000 }, { "epoch": 0.55, "learning_rate": 4.820402333283805e-05, "loss": 2.1978, "step": 1820000 }, { "epoch": 0.55, "eval_accuracy": 0.5144972800476924, "eval_loss": 2.224609375, "eval_runtime": 39.721, "eval_samples_per_second": 89.827, "eval_steps_per_second": 11.228, "step": 1820000 }, { "epoch": 0.55, "learning_rate": 4.812920595016129e-05, "loss": 2.1999, "step": 1825000 }, { "epoch": 0.55, "eval_accuracy": 0.5145970043264995, "eval_loss": 2.22265625, "eval_runtime": 39.6521, "eval_samples_per_second": 89.983, "eval_steps_per_second": 11.248, "step": 1825000 }, { "epoch": 0.55, "learning_rate": 4.8054388567484535e-05, "loss": 2.1978, "step": 1830000 }, { "epoch": 0.55, "eval_accuracy": 0.5148150826944404, "eval_loss": 2.22265625, "eval_runtime": 39.5596, "eval_samples_per_second": 90.193, "eval_steps_per_second": 11.274, "step": 1830000 }, { "epoch": 0.55, "learning_rate": 4.79795262494428e-05, "loss": 2.1989, "step": 1835000 }, { "epoch": 0.55, "eval_accuracy": 0.5147271390309869, "eval_loss": 2.22265625, "eval_runtime": 39.6103, "eval_samples_per_second": 90.077, "eval_steps_per_second": 11.26, "step": 1835000 }, { "epoch": 0.55, "learning_rate": 4.790467890985607e-05, "loss": 2.1989, "step": 1840000 }, { "epoch": 0.55, "eval_accuracy": 0.5147980966909074, "eval_loss": 2.22265625, "eval_runtime": 39.6511, "eval_samples_per_second": 89.985, "eval_steps_per_second": 11.248, "step": 1840000 }, { "epoch": 0.55, "learning_rate": 4.7829876505634294e-05, "loss": 2.1982, "step": 1845000 }, { "epoch": 0.55, "eval_accuracy": 0.5149764497280047, "eval_loss": 2.220703125, "eval_runtime": 39.6684, "eval_samples_per_second": 89.946, "eval_steps_per_second": 11.243, "step": 1845000 }, { "epoch": 0.55, "learning_rate": 4.775501418759257e-05, "loss": 2.1974, "step": 1850000 }, { "epoch": 0.55, "eval_accuracy": 0.515062201649067, "eval_loss": 2.220703125, "eval_runtime": 39.5826, "eval_samples_per_second": 90.141, "eval_steps_per_second": 11.268, "step": 1850000 }, { "epoch": 0.56, "learning_rate": 4.768016684800583e-05, "loss": 2.1972, "step": 1855000 }, { "epoch": 0.56, "eval_accuracy": 0.5151389126327648, "eval_loss": 2.220703125, "eval_runtime": 39.774, "eval_samples_per_second": 89.707, "eval_steps_per_second": 11.213, "step": 1855000 }, { "epoch": 0.56, "learning_rate": 4.760533448687408e-05, "loss": 2.1966, "step": 1860000 }, { "epoch": 0.56, "eval_accuracy": 0.515106858400291, "eval_loss": 2.220703125, "eval_runtime": 39.7218, "eval_samples_per_second": 89.825, "eval_steps_per_second": 11.228, "step": 1860000 }, { "epoch": 0.56, "learning_rate": 4.753050212574233e-05, "loss": 2.198, "step": 1865000 }, { "epoch": 0.56, "eval_accuracy": 0.5150282296420008, "eval_loss": 2.220703125, "eval_runtime": 39.6783, "eval_samples_per_second": 89.923, "eval_steps_per_second": 11.24, "step": 1865000 }, { "epoch": 0.56, "learning_rate": 4.7455669764610586e-05, "loss": 2.1978, "step": 1870000 }, { "epoch": 0.56, "eval_accuracy": 0.5151887747721684, "eval_loss": 2.220703125, "eval_runtime": 39.753, "eval_samples_per_second": 89.754, "eval_steps_per_second": 11.219, "step": 1870000 }, { "epoch": 0.56, "learning_rate": 4.738083740347884e-05, "loss": 2.1938, "step": 1875000 }, { "epoch": 0.56, "eval_accuracy": 0.5152000074519242, "eval_loss": 2.220703125, "eval_runtime": 39.7549, "eval_samples_per_second": 89.75, "eval_steps_per_second": 11.219, "step": 1875000 }, { "epoch": 0.56, "learning_rate": 4.730600504234709e-05, "loss": 2.1908, "step": 1880000 }, { "epoch": 0.56, "eval_accuracy": 0.5152191851978486, "eval_loss": 2.21875, "eval_runtime": 39.6445, "eval_samples_per_second": 90.0, "eval_steps_per_second": 11.25, "step": 1880000 }, { "epoch": 0.56, "learning_rate": 4.723118765967034e-05, "loss": 2.1899, "step": 1885000 }, { "epoch": 0.56, "eval_accuracy": 0.5151602821210807, "eval_loss": 2.21875, "eval_runtime": 39.7932, "eval_samples_per_second": 89.664, "eval_steps_per_second": 11.208, "step": 1885000 }, { "epoch": 0.57, "learning_rate": 4.715634032008359e-05, "loss": 2.1938, "step": 1890000 }, { "epoch": 0.57, "eval_accuracy": 0.5151682271872493, "eval_loss": 2.21875, "eval_runtime": 39.7818, "eval_samples_per_second": 89.689, "eval_steps_per_second": 11.211, "step": 1890000 }, { "epoch": 0.57, "learning_rate": 4.708150795895185e-05, "loss": 2.1909, "step": 1895000 }, { "epoch": 0.57, "eval_accuracy": 0.5153520595803252, "eval_loss": 2.21875, "eval_runtime": 39.8181, "eval_samples_per_second": 89.607, "eval_steps_per_second": 11.201, "step": 1895000 }, { "epoch": 0.57, "learning_rate": 4.700669057627509e-05, "loss": 2.1921, "step": 1900000 }, { "epoch": 0.57, "eval_accuracy": 0.5155320564242162, "eval_loss": 2.21875, "eval_runtime": 40.2538, "eval_samples_per_second": 88.638, "eval_steps_per_second": 11.08, "step": 1900000 }, { "epoch": 0.57, "learning_rate": 4.693187319359833e-05, "loss": 2.1926, "step": 1905000 }, { "epoch": 0.57, "eval_accuracy": 0.5155928772755766, "eval_loss": 2.216796875, "eval_runtime": 39.9117, "eval_samples_per_second": 89.397, "eval_steps_per_second": 11.175, "step": 1905000 }, { "epoch": 0.57, "learning_rate": 4.685704083246658e-05, "loss": 2.194, "step": 1910000 }, { "epoch": 0.57, "eval_accuracy": 0.5154164420130716, "eval_loss": 2.216796875, "eval_runtime": 39.9039, "eval_samples_per_second": 89.415, "eval_steps_per_second": 11.177, "step": 1910000 }, { "epoch": 0.57, "learning_rate": 4.992518261732325e-05, "loss": 2.1942, "step": 1915000 }, { "epoch": 0.57, "eval_accuracy": 0.5151652135414612, "eval_loss": 2.21875, "eval_runtime": 39.5459, "eval_samples_per_second": 90.224, "eval_steps_per_second": 11.278, "step": 1915000 }, { "epoch": 0.58, "learning_rate": 4.985036523464649e-05, "loss": 2.1947, "step": 1920000 }, { "epoch": 0.58, "eval_accuracy": 0.5150559003896918, "eval_loss": 2.21875, "eval_runtime": 39.6198, "eval_samples_per_second": 90.056, "eval_steps_per_second": 11.257, "step": 1920000 }, { "epoch": 0.58, "learning_rate": 4.977553287351474e-05, "loss": 2.1941, "step": 1925000 }, { "epoch": 0.58, "eval_accuracy": 0.5150868587509698, "eval_loss": 2.220703125, "eval_runtime": 39.5078, "eval_samples_per_second": 90.311, "eval_steps_per_second": 11.289, "step": 1925000 }, { "epoch": 0.58, "learning_rate": 4.970071549083798e-05, "loss": 2.1984, "step": 1930000 }, { "epoch": 0.58, "eval_accuracy": 0.5151849392229835, "eval_loss": 2.220703125, "eval_runtime": 39.6089, "eval_samples_per_second": 90.081, "eval_steps_per_second": 11.26, "step": 1930000 }, { "epoch": 0.58, "learning_rate": 4.9625868151251246e-05, "loss": 2.1929, "step": 1935000 }, { "epoch": 0.58, "eval_accuracy": 0.5150846670085785, "eval_loss": 2.220703125, "eval_runtime": 39.574, "eval_samples_per_second": 90.16, "eval_steps_per_second": 11.27, "step": 1935000 }, { "epoch": 0.58, "learning_rate": 4.955106574702947e-05, "loss": 2.1921, "step": 1940000 }, { "epoch": 0.58, "eval_accuracy": 0.515442742921768, "eval_loss": 2.21875, "eval_runtime": 39.6485, "eval_samples_per_second": 89.991, "eval_steps_per_second": 11.249, "step": 1940000 }, { "epoch": 0.58, "learning_rate": 4.947621840744274e-05, "loss": 2.1932, "step": 1945000 }, { "epoch": 0.58, "eval_accuracy": 0.5153391830937759, "eval_loss": 2.21875, "eval_runtime": 39.6644, "eval_samples_per_second": 89.955, "eval_steps_per_second": 11.244, "step": 1945000 }, { "epoch": 0.58, "learning_rate": 4.940138604631099e-05, "loss": 2.1959, "step": 1950000 }, { "epoch": 0.58, "eval_accuracy": 0.5154117845604899, "eval_loss": 2.21875, "eval_runtime": 39.6108, "eval_samples_per_second": 90.077, "eval_steps_per_second": 11.26, "step": 1950000 }, { "epoch": 0.59, "learning_rate": 4.932656866363423e-05, "loss": 2.1927, "step": 1955000 }, { "epoch": 0.59, "eval_accuracy": 0.5153761687466302, "eval_loss": 2.21875, "eval_runtime": 39.6613, "eval_samples_per_second": 89.962, "eval_steps_per_second": 11.245, "step": 1955000 }, { "epoch": 0.59, "learning_rate": 4.9251751280957475e-05, "loss": 2.1949, "step": 1960000 }, { "epoch": 0.59, "eval_accuracy": 0.5154917831577748, "eval_loss": 2.21875, "eval_runtime": 39.8742, "eval_samples_per_second": 89.481, "eval_steps_per_second": 11.185, "step": 1960000 }, { "epoch": 0.59, "learning_rate": 4.9176933898280714e-05, "loss": 2.1918, "step": 1965000 }, { "epoch": 0.59, "eval_accuracy": 0.5153786344568205, "eval_loss": 2.216796875, "eval_runtime": 39.9956, "eval_samples_per_second": 89.21, "eval_steps_per_second": 11.151, "step": 1965000 }, { "epoch": 0.59, "learning_rate": 4.910210153714897e-05, "loss": 2.1957, "step": 1970000 }, { "epoch": 0.59, "eval_accuracy": 0.5154745231864427, "eval_loss": 2.216796875, "eval_runtime": 39.6981, "eval_samples_per_second": 89.878, "eval_steps_per_second": 11.235, "step": 1970000 }, { "epoch": 0.59, "learning_rate": 4.902726917601722e-05, "loss": 2.1884, "step": 1975000 }, { "epoch": 0.59, "eval_accuracy": 0.51571890246308, "eval_loss": 2.216796875, "eval_runtime": 39.9761, "eval_samples_per_second": 89.253, "eval_steps_per_second": 11.157, "step": 1975000 }, { "epoch": 0.59, "learning_rate": 4.895245179334046e-05, "loss": 2.1942, "step": 1980000 }, { "epoch": 0.59, "eval_accuracy": 0.5156073975689194, "eval_loss": 2.21484375, "eval_runtime": 39.9647, "eval_samples_per_second": 89.279, "eval_steps_per_second": 11.16, "step": 1980000 }, { "epoch": 0.59, "learning_rate": 4.8877604453753726e-05, "loss": 2.1938, "step": 1985000 }, { "epoch": 0.59, "eval_accuracy": 0.5155802747568262, "eval_loss": 2.216796875, "eval_runtime": 39.7886, "eval_samples_per_second": 89.674, "eval_steps_per_second": 11.209, "step": 1985000 }, { "epoch": 0.6, "learning_rate": 4.8802787071076965e-05, "loss": 2.1935, "step": 1990000 }, { "epoch": 0.6, "eval_accuracy": 0.5160071165875447, "eval_loss": 2.21484375, "eval_runtime": 40.1621, "eval_samples_per_second": 88.84, "eval_steps_per_second": 11.105, "step": 1990000 }, { "epoch": 0.6, "learning_rate": 4.872795470994522e-05, "loss": 2.1902, "step": 1995000 }, { "epoch": 0.6, "eval_accuracy": 0.5157413678225916, "eval_loss": 2.21484375, "eval_runtime": 39.8406, "eval_samples_per_second": 89.557, "eval_steps_per_second": 11.195, "step": 1995000 }, { "epoch": 0.6, "learning_rate": 4.865310737035848e-05, "loss": 2.188, "step": 2000000 }, { "epoch": 0.6, "eval_accuracy": 0.5158473933607739, "eval_loss": 2.21484375, "eval_runtime": 39.7424, "eval_samples_per_second": 89.778, "eval_steps_per_second": 11.222, "step": 2000000 }, { "epoch": 0.6, "learning_rate": 4.8578289987681716e-05, "loss": 2.1862, "step": 2005000 }, { "epoch": 0.6, "eval_accuracy": 0.5159235564088739, "eval_loss": 2.212890625, "eval_runtime": 39.7499, "eval_samples_per_second": 89.761, "eval_steps_per_second": 11.22, "step": 2005000 }, { "epoch": 0.6, "learning_rate": 4.850347260500496e-05, "loss": 2.1886, "step": 2010000 }, { "epoch": 0.6, "eval_accuracy": 0.5160695812456987, "eval_loss": 2.212890625, "eval_runtime": 40.1441, "eval_samples_per_second": 88.88, "eval_steps_per_second": 11.11, "step": 2010000 }, { "epoch": 0.6, "learning_rate": 4.8428610286963236e-05, "loss": 2.1811, "step": 2015000 }, { "epoch": 0.6, "eval_accuracy": 0.516141360809016, "eval_loss": 2.212890625, "eval_runtime": 40.0164, "eval_samples_per_second": 89.163, "eval_steps_per_second": 11.145, "step": 2015000 }, { "epoch": 0.61, "learning_rate": 4.835377792583148e-05, "loss": 2.19, "step": 2020000 }, { "epoch": 0.61, "eval_accuracy": 0.5160435543048012, "eval_loss": 2.212890625, "eval_runtime": 39.7821, "eval_samples_per_second": 89.689, "eval_steps_per_second": 11.211, "step": 2020000 }, { "epoch": 0.61, "learning_rate": 4.827893058624475e-05, "loss": 2.1895, "step": 2025000 }, { "epoch": 0.61, "eval_accuracy": 0.5164950532374227, "eval_loss": 2.212890625, "eval_runtime": 39.7945, "eval_samples_per_second": 89.661, "eval_steps_per_second": 11.208, "step": 2025000 }, { "epoch": 0.61, "learning_rate": 4.820411320356799e-05, "loss": 2.1904, "step": 2030000 }, { "epoch": 0.61, "eval_accuracy": 0.516082457732248, "eval_loss": 2.212890625, "eval_runtime": 39.8538, "eval_samples_per_second": 89.527, "eval_steps_per_second": 11.191, "step": 2030000 }, { "epoch": 0.61, "learning_rate": 4.812928084243624e-05, "loss": 2.1854, "step": 2035000 }, { "epoch": 0.61, "eval_accuracy": 0.516504368142586, "eval_loss": 2.212890625, "eval_runtime": 39.7999, "eval_samples_per_second": 89.649, "eval_steps_per_second": 11.206, "step": 2035000 }, { "epoch": 0.61, "learning_rate": 4.805447843821447e-05, "loss": 2.1883, "step": 2040000 }, { "epoch": 0.61, "eval_accuracy": 0.51652875127669, "eval_loss": 2.2109375, "eval_runtime": 39.8414, "eval_samples_per_second": 89.555, "eval_steps_per_second": 11.194, "step": 2040000 }, { "epoch": 0.61, "learning_rate": 4.7979646077082725e-05, "loss": 2.1859, "step": 2045000 }, { "epoch": 0.61, "eval_accuracy": 0.5165314909546791, "eval_loss": 2.2109375, "eval_runtime": 39.9091, "eval_samples_per_second": 89.403, "eval_steps_per_second": 11.175, "step": 2045000 }, { "epoch": 0.61, "learning_rate": 4.790481371595098e-05, "loss": 2.1849, "step": 2050000 }, { "epoch": 0.61, "eval_accuracy": 0.5167750483279198, "eval_loss": 2.208984375, "eval_runtime": 39.8945, "eval_samples_per_second": 89.436, "eval_steps_per_second": 11.179, "step": 2050000 }, { "epoch": 0.62, "learning_rate": 4.782996637636424e-05, "loss": 2.1844, "step": 2055000 }, { "epoch": 0.62, "eval_accuracy": 0.5167309395122934, "eval_loss": 2.2109375, "eval_runtime": 39.9417, "eval_samples_per_second": 89.33, "eval_steps_per_second": 11.166, "step": 2055000 }, { "epoch": 0.62, "learning_rate": 4.7755148993687484e-05, "loss": 2.1866, "step": 2060000 }, { "epoch": 0.62, "eval_accuracy": 0.5166953236984337, "eval_loss": 2.208984375, "eval_runtime": 39.9499, "eval_samples_per_second": 89.312, "eval_steps_per_second": 11.164, "step": 2060000 }, { "epoch": 0.62, "learning_rate": 4.768031663255573e-05, "loss": 2.1865, "step": 2065000 }, { "epoch": 0.62, "eval_accuracy": 0.5167821714906917, "eval_loss": 2.208984375, "eval_runtime": 39.8531, "eval_samples_per_second": 89.529, "eval_steps_per_second": 11.191, "step": 2065000 }, { "epoch": 0.62, "learning_rate": 4.7605499249878976e-05, "loss": 2.1846, "step": 2070000 }, { "epoch": 0.62, "eval_accuracy": 0.5171333982089081, "eval_loss": 2.20703125, "eval_runtime": 39.9613, "eval_samples_per_second": 89.286, "eval_steps_per_second": 11.161, "step": 2070000 }, { "epoch": 0.62, "learning_rate": 4.753066688874723e-05, "loss": 2.1821, "step": 2075000 }, { "epoch": 0.62, "eval_accuracy": 0.5169824419517027, "eval_loss": 2.20703125, "eval_runtime": 39.9564, "eval_samples_per_second": 89.297, "eval_steps_per_second": 11.162, "step": 2075000 }, { "epoch": 0.62, "learning_rate": 4.7455849506070474e-05, "loss": 2.184, "step": 2080000 }, { "epoch": 0.62, "eval_accuracy": 0.5170257288639323, "eval_loss": 2.20703125, "eval_runtime": 41.0867, "eval_samples_per_second": 86.841, "eval_steps_per_second": 10.855, "step": 2080000 }, { "epoch": 0.62, "learning_rate": 4.738101714493872e-05, "loss": 2.1847, "step": 2085000 }, { "epoch": 0.62, "eval_accuracy": 0.5173227099579624, "eval_loss": 2.205078125, "eval_runtime": 39.9314, "eval_samples_per_second": 89.353, "eval_steps_per_second": 11.169, "step": 2085000 }, { "epoch": 0.63, "learning_rate": 4.730618478380697e-05, "loss": 2.1836, "step": 2090000 }, { "epoch": 0.63, "eval_accuracy": 0.5173509286412512, "eval_loss": 2.205078125, "eval_runtime": 40.0844, "eval_samples_per_second": 89.012, "eval_steps_per_second": 11.127, "step": 2090000 }, { "epoch": 0.63, "learning_rate": 4.723136740113022e-05, "loss": 2.1791, "step": 2095000 }, { "epoch": 0.63, "eval_accuracy": 0.5174243520113619, "eval_loss": 2.205078125, "eval_runtime": 39.9803, "eval_samples_per_second": 89.244, "eval_steps_per_second": 11.156, "step": 2095000 }, { "epoch": 0.63, "learning_rate": 4.715652006154348e-05, "loss": 2.1812, "step": 2100000 }, { "epoch": 0.63, "eval_accuracy": 0.5173136690205979, "eval_loss": 2.205078125, "eval_runtime": 40.0609, "eval_samples_per_second": 89.064, "eval_steps_per_second": 11.133, "step": 2100000 }, { "epoch": 0.63, "learning_rate": 4.708168770041173e-05, "loss": 2.1835, "step": 2105000 }, { "epoch": 0.63, "eval_accuracy": 0.5175572263938386, "eval_loss": 2.205078125, "eval_runtime": 40.0398, "eval_samples_per_second": 89.111, "eval_steps_per_second": 11.139, "step": 2105000 }, { "epoch": 0.63, "learning_rate": 4.700687031773498e-05, "loss": 2.1806, "step": 2110000 }, { "epoch": 0.63, "eval_accuracy": 0.517552294973458, "eval_loss": 2.205078125, "eval_runtime": 40.3766, "eval_samples_per_second": 88.368, "eval_steps_per_second": 11.046, "step": 2110000 }, { "epoch": 0.63, "learning_rate": 4.6932037956603224e-05, "loss": 2.1832, "step": 2115000 }, { "epoch": 0.63, "eval_accuracy": 0.5174777757321516, "eval_loss": 2.205078125, "eval_runtime": 41.019, "eval_samples_per_second": 86.984, "eval_steps_per_second": 10.873, "step": 2115000 }, { "epoch": 0.64, "learning_rate": 4.6857205595471476e-05, "loss": 2.1766, "step": 2120000 }, { "epoch": 0.64, "eval_accuracy": 0.5177577708226486, "eval_loss": 2.203125, "eval_runtime": 40.432, "eval_samples_per_second": 88.247, "eval_steps_per_second": 11.031, "step": 2120000 }, { "epoch": 0.64, "learning_rate": 4.678237323433973e-05, "loss": 2.1775, "step": 2125000 }, { "epoch": 0.64, "eval_accuracy": 0.5178353037097432, "eval_loss": 2.203125, "eval_runtime": 41.1107, "eval_samples_per_second": 86.79, "eval_steps_per_second": 10.849, "step": 2125000 }, { "epoch": 0.64, "learning_rate": 4.6707540873207975e-05, "loss": 2.1801, "step": 2130000 }, { "epoch": 0.64, "eval_accuracy": 0.5176514713166673, "eval_loss": 2.203125, "eval_runtime": 41.0472, "eval_samples_per_second": 86.924, "eval_steps_per_second": 10.866, "step": 2130000 }, { "epoch": 0.64, "learning_rate": 4.6632708512076235e-05, "loss": 2.1789, "step": 2135000 }, { "epoch": 0.64, "eval_accuracy": 0.5177799622143612, "eval_loss": 2.203125, "eval_runtime": 41.4469, "eval_samples_per_second": 86.086, "eval_steps_per_second": 10.761, "step": 2135000 }, { "epoch": 0.64, "learning_rate": 4.655787615094449e-05, "loss": 2.1794, "step": 2140000 }, { "epoch": 0.64, "eval_accuracy": 0.5178131123180306, "eval_loss": 2.203125, "eval_runtime": 40.4764, "eval_samples_per_second": 88.15, "eval_steps_per_second": 11.019, "step": 2140000 }, { "epoch": 0.64, "learning_rate": 4.6483043789812734e-05, "loss": 2.1799, "step": 2145000 }, { "epoch": 0.64, "eval_accuracy": 0.5178917410763209, "eval_loss": 2.201171875, "eval_runtime": 40.376, "eval_samples_per_second": 88.369, "eval_steps_per_second": 11.046, "step": 2145000 }, { "epoch": 0.64, "learning_rate": 4.6408196450226e-05, "loss": 2.1746, "step": 2150000 }, { "epoch": 0.64, "eval_accuracy": 0.5180391357521402, "eval_loss": 2.201171875, "eval_runtime": 40.5387, "eval_samples_per_second": 88.015, "eval_steps_per_second": 11.002, "step": 2150000 }, { "epoch": 0.65, "learning_rate": 4.633336408909425e-05, "loss": 2.1766, "step": 2155000 }, { "epoch": 0.65, "eval_accuracy": 0.5178873575915381, "eval_loss": 2.201171875, "eval_runtime": 40.514, "eval_samples_per_second": 88.068, "eval_steps_per_second": 11.009, "step": 2155000 }, { "epoch": 0.65, "learning_rate": 4.62585317279625e-05, "loss": 2.1754, "step": 2160000 }, { "epoch": 0.65, "eval_accuracy": 0.5177369492699306, "eval_loss": 2.201171875, "eval_runtime": 42.6971, "eval_samples_per_second": 83.565, "eval_steps_per_second": 10.446, "step": 2160000 }, { "epoch": 0.65, "learning_rate": 4.618369936683075e-05, "loss": 2.1764, "step": 2165000 }, { "epoch": 0.65, "eval_accuracy": 0.5177405108513166, "eval_loss": 2.201171875, "eval_runtime": 40.5808, "eval_samples_per_second": 87.923, "eval_steps_per_second": 10.99, "step": 2165000 }, { "epoch": 0.65, "learning_rate": 4.6108881984154e-05, "loss": 2.1745, "step": 2170000 }, { "epoch": 0.65, "eval_accuracy": 0.51831337751886, "eval_loss": 2.19921875, "eval_runtime": 42.1403, "eval_samples_per_second": 84.67, "eval_steps_per_second": 10.584, "step": 2170000 }, { "epoch": 0.65, "learning_rate": 4.6034049623022244e-05, "loss": 2.1735, "step": 2175000 }, { "epoch": 0.65, "eval_accuracy": 0.5180128348434438, "eval_loss": 2.19921875, "eval_runtime": 40.2519, "eval_samples_per_second": 88.642, "eval_steps_per_second": 11.08, "step": 2175000 }, { "epoch": 0.65, "learning_rate": 4.59592172618905e-05, "loss": 2.1778, "step": 2180000 }, { "epoch": 0.65, "eval_accuracy": 0.5180588614336625, "eval_loss": 2.19921875, "eval_runtime": 42.4462, "eval_samples_per_second": 84.059, "eval_steps_per_second": 10.507, "step": 2180000 }, { "epoch": 0.65, "learning_rate": 4.5884384900758757e-05, "loss": 2.1717, "step": 2185000 }, { "epoch": 0.65, "eval_accuracy": 0.5183484453971218, "eval_loss": 2.19921875, "eval_runtime": 41.2843, "eval_samples_per_second": 86.425, "eval_steps_per_second": 10.803, "step": 2185000 }, { "epoch": 0.66, "learning_rate": 4.580953756117201e-05, "loss": 2.1752, "step": 2190000 }, { "epoch": 0.66, "eval_accuracy": 0.518520497174844, "eval_loss": 2.197265625, "eval_runtime": 40.4537, "eval_samples_per_second": 88.2, "eval_steps_per_second": 11.025, "step": 2190000 }, { "epoch": 0.66, "learning_rate": 4.5734720178495255e-05, "loss": 2.1747, "step": 2195000 }, { "epoch": 0.66, "eval_accuracy": 0.5184799499406038, "eval_loss": 2.197265625, "eval_runtime": 43.6458, "eval_samples_per_second": 81.749, "eval_steps_per_second": 10.219, "step": 2195000 }, { "epoch": 0.66, "learning_rate": 4.56599027958185e-05, "loss": 2.1754, "step": 2200000 }, { "epoch": 0.66, "eval_accuracy": 0.5186040573535149, "eval_loss": 2.197265625, "eval_runtime": 41.0029, "eval_samples_per_second": 87.018, "eval_steps_per_second": 10.877, "step": 2200000 }, { "epoch": 0.66, "learning_rate": 4.558508541314174e-05, "loss": 2.1728, "step": 2205000 }, { "epoch": 0.66, "eval_accuracy": 0.5187728215176501, "eval_loss": 2.197265625, "eval_runtime": 40.4446, "eval_samples_per_second": 88.22, "eval_steps_per_second": 11.027, "step": 2205000 }, { "epoch": 0.66, "learning_rate": 4.551025305200999e-05, "loss": 2.1684, "step": 2210000 }, { "epoch": 0.66, "eval_accuracy": 0.5185736469278347, "eval_loss": 2.197265625, "eval_runtime": 40.6605, "eval_samples_per_second": 87.751, "eval_steps_per_second": 10.969, "step": 2210000 }, { "epoch": 0.66, "learning_rate": 4.543540571242325e-05, "loss": 2.1722, "step": 2215000 }, { "epoch": 0.66, "eval_accuracy": 0.5188199439790645, "eval_loss": 2.1953125, "eval_runtime": 40.5747, "eval_samples_per_second": 87.937, "eval_steps_per_second": 10.992, "step": 2215000 }, { "epoch": 0.67, "learning_rate": 4.536058832974649e-05, "loss": 2.1692, "step": 2220000 }, { "epoch": 0.67, "eval_accuracy": 0.519004872243336, "eval_loss": 2.1953125, "eval_runtime": 41.1424, "eval_samples_per_second": 86.723, "eval_steps_per_second": 10.84, "step": 2220000 }, { "epoch": 0.67, "learning_rate": 4.528577094706974e-05, "loss": 2.176, "step": 2225000 }, { "epoch": 0.67, "eval_accuracy": 0.519122404429073, "eval_loss": 2.1953125, "eval_runtime": 42.7268, "eval_samples_per_second": 83.507, "eval_steps_per_second": 10.438, "step": 2225000 }, { "epoch": 0.67, "learning_rate": 4.5210923607483e-05, "loss": 2.1697, "step": 2230000 }, { "epoch": 0.67, "eval_accuracy": 0.5190202144400756, "eval_loss": 2.1953125, "eval_runtime": 40.4249, "eval_samples_per_second": 88.262, "eval_steps_per_second": 11.033, "step": 2230000 }, { "epoch": 0.67, "learning_rate": 4.5136106224806244e-05, "loss": 2.1731, "step": 2235000 }, { "epoch": 0.67, "eval_accuracy": 0.5190837749694251, "eval_loss": 2.1953125, "eval_runtime": 42.0959, "eval_samples_per_second": 84.759, "eval_steps_per_second": 10.595, "step": 2235000 }, { "epoch": 0.67, "learning_rate": 4.506124390676451e-05, "loss": 2.173, "step": 2240000 }, { "epoch": 0.67, "eval_accuracy": 0.5191125415883119, "eval_loss": 2.193359375, "eval_runtime": 40.9494, "eval_samples_per_second": 87.132, "eval_steps_per_second": 10.892, "step": 2240000 }, { "epoch": 0.67, "learning_rate": 4.498644150254274e-05, "loss": 2.1714, "step": 2245000 }, { "epoch": 0.67, "eval_accuracy": 0.5192900727220126, "eval_loss": 2.193359375, "eval_runtime": 40.4515, "eval_samples_per_second": 88.204, "eval_steps_per_second": 11.026, "step": 2245000 }, { "epoch": 0.67, "learning_rate": 4.4911594162956e-05, "loss": 2.1719, "step": 2250000 }, { "epoch": 0.67, "eval_accuracy": 0.5192147315773094, "eval_loss": 2.193359375, "eval_runtime": 40.7422, "eval_samples_per_second": 87.575, "eval_steps_per_second": 10.947, "step": 2250000 }, { "epoch": 0.68, "learning_rate": 4.483674682336927e-05, "loss": 2.1667, "step": 2255000 }, { "epoch": 0.68, "eval_accuracy": 0.5189793932380363, "eval_loss": 2.193359375, "eval_runtime": 40.4467, "eval_samples_per_second": 88.215, "eval_steps_per_second": 11.027, "step": 2255000 }, { "epoch": 0.68, "learning_rate": 4.476191446223752e-05, "loss": 2.1653, "step": 2260000 }, { "epoch": 0.68, "eval_accuracy": 0.5191834992482324, "eval_loss": 2.193359375, "eval_runtime": 40.3493, "eval_samples_per_second": 88.428, "eval_steps_per_second": 11.053, "step": 2260000 }, { "epoch": 0.68, "learning_rate": 4.468709707956076e-05, "loss": 2.1656, "step": 2265000 }, { "epoch": 0.68, "eval_accuracy": 0.5193434964428021, "eval_loss": 2.19140625, "eval_runtime": 44.129, "eval_samples_per_second": 80.854, "eval_steps_per_second": 10.107, "step": 2265000 }, { "epoch": 0.68, "learning_rate": 4.4612264718429014e-05, "loss": 2.1695, "step": 2270000 }, { "epoch": 0.68, "eval_accuracy": 0.5194328099452503, "eval_loss": 2.19140625, "eval_runtime": 42.07, "eval_samples_per_second": 84.811, "eval_steps_per_second": 10.601, "step": 2270000 }, { "epoch": 0.68, "learning_rate": 4.453744733575226e-05, "loss": 2.17, "step": 2275000 }, { "epoch": 0.68, "eval_accuracy": 0.5195643144887323, "eval_loss": 2.19140625, "eval_runtime": 40.4092, "eval_samples_per_second": 88.297, "eval_steps_per_second": 11.037, "step": 2275000 }, { "epoch": 0.68, "learning_rate": 4.44626299530755e-05, "loss": 2.1628, "step": 2280000 }, { "epoch": 0.68, "eval_accuracy": 0.5197062298085732, "eval_loss": 2.19140625, "eval_runtime": 40.3663, "eval_samples_per_second": 88.391, "eval_steps_per_second": 11.049, "step": 2280000 }, { "epoch": 0.68, "learning_rate": 4.438779759194375e-05, "loss": 2.1648, "step": 2285000 }, { "epoch": 0.68, "eval_accuracy": 0.5196210258231089, "eval_loss": 2.189453125, "eval_runtime": 41.4265, "eval_samples_per_second": 86.128, "eval_steps_per_second": 10.766, "step": 2285000 }, { "epoch": 0.69, "learning_rate": 4.431298020926699e-05, "loss": 2.1647, "step": 2290000 }, { "epoch": 0.69, "eval_accuracy": 0.5199106097865681, "eval_loss": 2.189453125, "eval_runtime": 43.7368, "eval_samples_per_second": 81.579, "eval_steps_per_second": 10.197, "step": 2290000 }, { "epoch": 0.69, "learning_rate": 4.423813286968026e-05, "loss": 2.1648, "step": 2295000 }, { "epoch": 0.69, "eval_accuracy": 0.5198015706025977, "eval_loss": 2.189453125, "eval_runtime": 40.3741, "eval_samples_per_second": 88.374, "eval_steps_per_second": 11.047, "step": 2295000 }, { "epoch": 0.69, "learning_rate": 4.4163315487003496e-05, "loss": 2.168, "step": 2300000 }, { "epoch": 0.69, "eval_accuracy": 0.5196733536727027, "eval_loss": 2.189453125, "eval_runtime": 40.4104, "eval_samples_per_second": 88.294, "eval_steps_per_second": 11.037, "step": 2300000 }, { "epoch": 0.69, "learning_rate": 4.4088498104326735e-05, "loss": 2.1607, "step": 2305000 }, { "epoch": 0.69, "eval_accuracy": 0.5197840366634667, "eval_loss": 2.189453125, "eval_runtime": 40.7993, "eval_samples_per_second": 87.452, "eval_steps_per_second": 10.932, "step": 2305000 }, { "epoch": 0.69, "learning_rate": 4.401365076474e-05, "loss": 2.1674, "step": 2310000 }, { "epoch": 0.69, "eval_accuracy": 0.5199684169921404, "eval_loss": 2.1875, "eval_runtime": 40.3824, "eval_samples_per_second": 88.355, "eval_steps_per_second": 11.044, "step": 2310000 }, { "epoch": 0.69, "learning_rate": 4.393881840360825e-05, "loss": 2.1656, "step": 2315000 }, { "epoch": 0.69, "eval_accuracy": 0.5199689649277383, "eval_loss": 2.1875, "eval_runtime": 40.7535, "eval_samples_per_second": 87.551, "eval_steps_per_second": 10.944, "step": 2315000 }, { "epoch": 0.7, "learning_rate": 4.38639860424765e-05, "loss": 2.1637, "step": 2320000 }, { "epoch": 0.7, "eval_accuracy": 0.5201547150954066, "eval_loss": 2.1875, "eval_runtime": 40.6083, "eval_samples_per_second": 87.864, "eval_steps_per_second": 10.983, "step": 2320000 }, { "epoch": 0.7, "learning_rate": 4.3789168659799746e-05, "loss": 2.1649, "step": 2325000 }, { "epoch": 0.7, "eval_accuracy": 0.5201152637323619, "eval_loss": 2.1875, "eval_runtime": 41.4342, "eval_samples_per_second": 86.112, "eval_steps_per_second": 10.764, "step": 2325000 }, { "epoch": 0.7, "learning_rate": 4.371436625557798e-05, "loss": 2.1625, "step": 2330000 }, { "epoch": 0.7, "eval_accuracy": 0.5200544428810016, "eval_loss": 2.1875, "eval_runtime": 44.1316, "eval_samples_per_second": 80.849, "eval_steps_per_second": 10.106, "step": 2330000 }, { "epoch": 0.7, "learning_rate": 4.363950393753625e-05, "loss": 2.1627, "step": 2335000 }, { "epoch": 0.7, "eval_accuracy": 0.5202593707945943, "eval_loss": 2.1875, "eval_runtime": 42.6548, "eval_samples_per_second": 83.648, "eval_steps_per_second": 10.456, "step": 2335000 }, { "epoch": 0.7, "learning_rate": 4.356468655485949e-05, "loss": 2.1598, "step": 2340000 }, { "epoch": 0.7, "eval_accuracy": 0.5203048494492152, "eval_loss": 2.185546875, "eval_runtime": 43.9859, "eval_samples_per_second": 81.117, "eval_steps_per_second": 10.14, "step": 2340000 }, { "epoch": 0.7, "learning_rate": 4.348982423681776e-05, "loss": 2.1638, "step": 2345000 }, { "epoch": 0.7, "eval_accuracy": 0.5201473179648357, "eval_loss": 2.1875, "eval_runtime": 44.5632, "eval_samples_per_second": 80.066, "eval_steps_per_second": 10.008, "step": 2345000 }, { "epoch": 0.7, "learning_rate": 4.341499187568602e-05, "loss": 2.1588, "step": 2350000 }, { "epoch": 0.7, "eval_accuracy": 0.5204785450337309, "eval_loss": 2.185546875, "eval_runtime": 41.6749, "eval_samples_per_second": 85.615, "eval_steps_per_second": 10.702, "step": 2350000 }, { "epoch": 0.71, "learning_rate": 4.334015951455427e-05, "loss": 2.1633, "step": 2355000 }, { "epoch": 0.71, "eval_accuracy": 0.5204648466437849, "eval_loss": 2.185546875, "eval_runtime": 40.4241, "eval_samples_per_second": 88.264, "eval_steps_per_second": 11.033, "step": 2355000 }, { "epoch": 0.71, "learning_rate": 4.326534213187751e-05, "loss": 2.1621, "step": 2360000 }, { "epoch": 0.71, "eval_accuracy": 0.5205470369834612, "eval_loss": 2.185546875, "eval_runtime": 40.3566, "eval_samples_per_second": 88.412, "eval_steps_per_second": 11.051, "step": 2360000 }, { "epoch": 0.71, "learning_rate": 4.319049479229077e-05, "loss": 2.165, "step": 2365000 }, { "epoch": 0.71, "eval_accuracy": 0.5207188147933844, "eval_loss": 2.18359375, "eval_runtime": 40.8467, "eval_samples_per_second": 87.351, "eval_steps_per_second": 10.919, "step": 2365000 }, { "epoch": 0.71, "learning_rate": 4.311566243115902e-05, "loss": 2.159, "step": 2370000 }, { "epoch": 0.71, "eval_accuracy": 0.52062347399936, "eval_loss": 2.18359375, "eval_runtime": 43.216, "eval_samples_per_second": 82.562, "eval_steps_per_second": 10.32, "step": 2370000 }, { "epoch": 0.71, "learning_rate": 4.3040830070027275e-05, "loss": 2.1573, "step": 2375000 }, { "epoch": 0.71, "eval_accuracy": 0.5207314173121348, "eval_loss": 2.18359375, "eval_runtime": 44.6428, "eval_samples_per_second": 79.923, "eval_steps_per_second": 9.99, "step": 2375000 }, { "epoch": 0.71, "learning_rate": 4.2966012687350514e-05, "loss": 2.1556, "step": 2380000 }, { "epoch": 0.71, "eval_accuracy": 0.520848401562274, "eval_loss": 2.18359375, "eval_runtime": 40.6164, "eval_samples_per_second": 87.846, "eval_steps_per_second": 10.981, "step": 2380000 }, { "epoch": 0.71, "learning_rate": 4.289119530467376e-05, "loss": 2.1562, "step": 2385000 }, { "epoch": 0.71, "eval_accuracy": 0.5209563448750487, "eval_loss": 2.18359375, "eval_runtime": 40.356, "eval_samples_per_second": 88.413, "eval_steps_per_second": 11.052, "step": 2385000 }, { "epoch": 0.72, "learning_rate": 4.281636294354201e-05, "loss": 2.1572, "step": 2390000 }, { "epoch": 0.72, "eval_accuracy": 0.5209188112865967, "eval_loss": 2.18359375, "eval_runtime": 44.3639, "eval_samples_per_second": 80.426, "eval_steps_per_second": 10.053, "step": 2390000 }, { "epoch": 0.72, "learning_rate": 4.274154556086525e-05, "loss": 2.1577, "step": 2395000 }, { "epoch": 0.72, "eval_accuracy": 0.5208686751793942, "eval_loss": 2.181640625, "eval_runtime": 42.2402, "eval_samples_per_second": 84.469, "eval_steps_per_second": 10.559, "step": 2395000 }, { "epoch": 0.72, "learning_rate": 4.2666713199733505e-05, "loss": 2.1529, "step": 2400000 }, { "epoch": 0.72, "eval_accuracy": 0.5209933305279031, "eval_loss": 2.181640625, "eval_runtime": 42.0012, "eval_samples_per_second": 84.95, "eval_steps_per_second": 10.619, "step": 2400000 }, { "epoch": 0.72, "learning_rate": 4.259188083860176e-05, "loss": 2.1636, "step": 2405000 }, { "epoch": 0.72, "eval_accuracy": 0.5210516856690732, "eval_loss": 2.181640625, "eval_runtime": 40.8766, "eval_samples_per_second": 87.287, "eval_steps_per_second": 10.911, "step": 2405000 }, { "epoch": 0.72, "learning_rate": 4.2517063455924996e-05, "loss": 2.1521, "step": 2410000 }, { "epoch": 0.72, "eval_accuracy": 0.5212941471711181, "eval_loss": 2.181640625, "eval_runtime": 44.8987, "eval_samples_per_second": 79.468, "eval_steps_per_second": 9.933, "step": 2410000 }, { "epoch": 0.72, "learning_rate": 4.244221611633826e-05, "loss": 2.1574, "step": 2415000 }, { "epoch": 0.72, "eval_accuracy": 0.5213697622836202, "eval_loss": 2.181640625, "eval_runtime": 40.8755, "eval_samples_per_second": 87.289, "eval_steps_per_second": 10.911, "step": 2415000 }, { "epoch": 0.72, "learning_rate": 4.236736877675152e-05, "loss": 2.1546, "step": 2420000 }, { "epoch": 0.72, "eval_accuracy": 0.5213067496898685, "eval_loss": 2.1796875, "eval_runtime": 43.2862, "eval_samples_per_second": 82.428, "eval_steps_per_second": 10.304, "step": 2420000 }, { "epoch": 0.73, "learning_rate": 4.2292536415619776e-05, "loss": 2.1572, "step": 2425000 }, { "epoch": 0.73, "eval_accuracy": 0.521194148924512, "eval_loss": 2.1796875, "eval_runtime": 42.5113, "eval_samples_per_second": 83.931, "eval_steps_per_second": 10.491, "step": 2425000 }, { "epoch": 0.73, "learning_rate": 4.2217719032943015e-05, "loss": 2.1544, "step": 2430000 }, { "epoch": 0.73, "eval_accuracy": 0.5212160663484257, "eval_loss": 2.1796875, "eval_runtime": 42.4347, "eval_samples_per_second": 84.082, "eval_steps_per_second": 10.51, "step": 2430000 }, { "epoch": 0.73, "learning_rate": 4.214290165026626e-05, "loss": 2.15, "step": 2435000 }, { "epoch": 0.73, "eval_accuracy": 0.5213122290458468, "eval_loss": 2.1796875, "eval_runtime": 41.657, "eval_samples_per_second": 85.652, "eval_steps_per_second": 10.706, "step": 2435000 }, { "epoch": 0.73, "learning_rate": 4.206805431067952e-05, "loss": 2.1537, "step": 2440000 }, { "epoch": 0.73, "eval_accuracy": 0.5217242766154238, "eval_loss": 2.177734375, "eval_runtime": 45.9893, "eval_samples_per_second": 77.583, "eval_steps_per_second": 9.698, "step": 2440000 }, { "epoch": 0.73, "learning_rate": 4.199322194954777e-05, "loss": 2.1552, "step": 2445000 }, { "epoch": 0.73, "eval_accuracy": 0.5215689368734356, "eval_loss": 2.177734375, "eval_runtime": 43.8458, "eval_samples_per_second": 81.376, "eval_steps_per_second": 10.172, "step": 2445000 }, { "epoch": 0.73, "learning_rate": 4.1918389588416026e-05, "loss": 2.1522, "step": 2450000 }, { "epoch": 0.73, "eval_accuracy": 0.5215245540900105, "eval_loss": 2.177734375, "eval_runtime": 45.304, "eval_samples_per_second": 78.757, "eval_steps_per_second": 9.845, "step": 2450000 }, { "epoch": 0.74, "learning_rate": 4.1843542248829286e-05, "loss": 2.1487, "step": 2455000 }, { "epoch": 0.74, "eval_accuracy": 0.5214875684371562, "eval_loss": 2.177734375, "eval_runtime": 41.9232, "eval_samples_per_second": 85.108, "eval_steps_per_second": 10.639, "step": 2455000 }, { "epoch": 0.74, "learning_rate": 4.176870988769754e-05, "loss": 2.1582, "step": 2460000 }, { "epoch": 0.74, "eval_accuracy": 0.52146373323865, "eval_loss": 2.177734375, "eval_runtime": 44.2104, "eval_samples_per_second": 80.705, "eval_steps_per_second": 10.088, "step": 2460000 }, { "epoch": 0.74, "learning_rate": 4.169390748347577e-05, "loss": 2.1582, "step": 2465000 }, { "epoch": 0.74, "eval_accuracy": 0.5218020834703172, "eval_loss": 2.177734375, "eval_runtime": 43.1549, "eval_samples_per_second": 82.679, "eval_steps_per_second": 10.335, "step": 2465000 }, { "epoch": 0.74, "learning_rate": 4.161907512234402e-05, "loss": 2.1529, "step": 2470000 }, { "epoch": 0.74, "eval_accuracy": 0.5217976999855345, "eval_loss": 2.177734375, "eval_runtime": 45.1081, "eval_samples_per_second": 79.099, "eval_steps_per_second": 9.887, "step": 2470000 }, { "epoch": 0.74, "learning_rate": 4.1544242761212276e-05, "loss": 2.1549, "step": 2475000 }, { "epoch": 0.74, "eval_accuracy": 0.5219418070477668, "eval_loss": 2.17578125, "eval_runtime": 40.8292, "eval_samples_per_second": 87.388, "eval_steps_per_second": 10.924, "step": 2475000 }, { "epoch": 0.74, "learning_rate": 4.1469425378535516e-05, "loss": 2.1525, "step": 2480000 }, { "epoch": 0.74, "eval_accuracy": 0.521930574368011, "eval_loss": 2.17578125, "eval_runtime": 43.1177, "eval_samples_per_second": 82.75, "eval_steps_per_second": 10.344, "step": 2480000 }, { "epoch": 0.74, "learning_rate": 4.139459301740377e-05, "loss": 2.1478, "step": 2485000 }, { "epoch": 0.74, "eval_accuracy": 0.5221272832476362, "eval_loss": 2.17578125, "eval_runtime": 44.4519, "eval_samples_per_second": 80.266, "eval_steps_per_second": 10.033, "step": 2485000 }, { "epoch": 0.75, "learning_rate": 4.131977563472701e-05, "loss": 2.1524, "step": 2490000 }, { "epoch": 0.75, "eval_accuracy": 0.5219949568007575, "eval_loss": 2.17578125, "eval_runtime": 43.8567, "eval_samples_per_second": 81.356, "eval_steps_per_second": 10.169, "step": 2490000 }, { "epoch": 0.75, "learning_rate": 4.124494327359526e-05, "loss": 2.1477, "step": 2495000 }, { "epoch": 0.75, "eval_accuracy": 0.5220256411942366, "eval_loss": 2.173828125, "eval_runtime": 40.3311, "eval_samples_per_second": 88.468, "eval_steps_per_second": 11.058, "step": 2495000 }, { "epoch": 0.75, "learning_rate": 4.117011091246352e-05, "loss": 2.1524, "step": 2500000 }, { "epoch": 0.75, "eval_accuracy": 0.5221511184461423, "eval_loss": 2.173828125, "eval_runtime": 42.2823, "eval_samples_per_second": 84.385, "eval_steps_per_second": 10.548, "step": 2500000 }, { "epoch": 0.75, "learning_rate": 4.1095278551331766e-05, "loss": 2.147, "step": 2505000 }, { "epoch": 0.75, "eval_accuracy": 0.5221815288718226, "eval_loss": 2.173828125, "eval_runtime": 40.7144, "eval_samples_per_second": 87.635, "eval_steps_per_second": 10.954, "step": 2505000 }, { "epoch": 0.75, "learning_rate": 4.102046116865501e-05, "loss": 2.1481, "step": 2510000 }, { "epoch": 0.75, "eval_accuracy": 0.5222954994761736, "eval_loss": 2.173828125, "eval_runtime": 42.7002, "eval_samples_per_second": 83.559, "eval_steps_per_second": 10.445, "step": 2510000 }, { "epoch": 0.75, "learning_rate": 4.094559885061328e-05, "loss": 2.1494, "step": 2515000 }, { "epoch": 0.75, "eval_accuracy": 0.5222659109538901, "eval_loss": 2.173828125, "eval_runtime": 42.0725, "eval_samples_per_second": 84.806, "eval_steps_per_second": 10.601, "step": 2515000 }, { "epoch": 0.75, "learning_rate": 4.087078146793652e-05, "loss": 2.1484, "step": 2520000 }, { "epoch": 0.75, "eval_accuracy": 0.5222675547606836, "eval_loss": 2.173828125, "eval_runtime": 44.1957, "eval_samples_per_second": 80.732, "eval_steps_per_second": 10.091, "step": 2520000 }, { "epoch": 0.76, "learning_rate": 4.079594910680478e-05, "loss": 2.1474, "step": 2525000 }, { "epoch": 0.76, "eval_accuracy": 0.5223297454510387, "eval_loss": 2.173828125, "eval_runtime": 43.3427, "eval_samples_per_second": 82.321, "eval_steps_per_second": 10.29, "step": 2525000 }, { "epoch": 0.76, "learning_rate": 4.072110176721804e-05, "loss": 2.1487, "step": 2530000 }, { "epoch": 0.76, "eval_accuracy": 0.5222724861810643, "eval_loss": 2.173828125, "eval_runtime": 44.0468, "eval_samples_per_second": 81.005, "eval_steps_per_second": 10.126, "step": 2530000 }, { "epoch": 0.76, "learning_rate": 4.064626940608628e-05, "loss": 2.1465, "step": 2535000 }, { "epoch": 0.76, "eval_accuracy": 0.5224768661590591, "eval_loss": 2.171875, "eval_runtime": 40.4028, "eval_samples_per_second": 88.311, "eval_steps_per_second": 11.039, "step": 2535000 }, { "epoch": 0.76, "learning_rate": 4.057145202340953e-05, "loss": 2.1456, "step": 2540000 }, { "epoch": 0.76, "eval_accuracy": 0.5226201513178947, "eval_loss": 2.171875, "eval_runtime": 42.5959, "eval_samples_per_second": 83.764, "eval_steps_per_second": 10.47, "step": 2540000 }, { "epoch": 0.76, "learning_rate": 4.049661966227778e-05, "loss": 2.1482, "step": 2545000 }, { "epoch": 0.76, "eval_accuracy": 0.5223516628749524, "eval_loss": 2.171875, "eval_runtime": 43.342, "eval_samples_per_second": 82.322, "eval_steps_per_second": 10.29, "step": 2545000 }, { "epoch": 0.76, "learning_rate": 4.042180227960102e-05, "loss": 2.1451, "step": 2550000 }, { "epoch": 0.76, "eval_accuracy": 0.5226286443196613, "eval_loss": 2.171875, "eval_runtime": 42.1401, "eval_samples_per_second": 84.67, "eval_steps_per_second": 10.584, "step": 2550000 }, { "epoch": 0.77, "learning_rate": 4.0346969918469274e-05, "loss": 2.143, "step": 2555000 }, { "epoch": 0.77, "eval_accuracy": 0.5225609742733278, "eval_loss": 2.171875, "eval_runtime": 44.8351, "eval_samples_per_second": 79.581, "eval_steps_per_second": 9.948, "step": 2555000 }, { "epoch": 0.77, "learning_rate": 4.027210760042755e-05, "loss": 2.1463, "step": 2560000 }, { "epoch": 0.77, "eval_accuracy": 0.5225061807135436, "eval_loss": 2.171875, "eval_runtime": 42.3643, "eval_samples_per_second": 84.222, "eval_steps_per_second": 10.528, "step": 2560000 }, { "epoch": 0.77, "learning_rate": 4.0197290217750786e-05, "loss": 2.1466, "step": 2565000 }, { "epoch": 0.77, "eval_accuracy": 0.5227820662870569, "eval_loss": 2.169921875, "eval_runtime": 44.9199, "eval_samples_per_second": 79.43, "eval_steps_per_second": 9.929, "step": 2565000 }, { "epoch": 0.77, "learning_rate": 4.012247283507403e-05, "loss": 2.1423, "step": 2570000 }, { "epoch": 0.77, "eval_accuracy": 0.5229272692204849, "eval_loss": 2.169921875, "eval_runtime": 43.6123, "eval_samples_per_second": 81.812, "eval_steps_per_second": 10.226, "step": 2570000 }, { "epoch": 0.77, "learning_rate": 4.004765545239728e-05, "loss": 2.1423, "step": 2575000 }, { "epoch": 0.77, "eval_accuracy": 0.5230557601181788, "eval_loss": 2.169921875, "eval_runtime": 44.819, "eval_samples_per_second": 79.609, "eval_steps_per_second": 9.951, "step": 2575000 }, { "epoch": 0.77, "learning_rate": 3.997283806972052e-05, "loss": 2.1444, "step": 2580000 }, { "epoch": 0.77, "eval_accuracy": 0.5230245277891018, "eval_loss": 2.169921875, "eval_runtime": 42.2873, "eval_samples_per_second": 84.375, "eval_steps_per_second": 10.547, "step": 2580000 }, { "epoch": 0.77, "learning_rate": 3.9898020687043756e-05, "loss": 2.1402, "step": 2585000 }, { "epoch": 0.77, "eval_accuracy": 0.5230417877604338, "eval_loss": 2.16796875, "eval_runtime": 44.6974, "eval_samples_per_second": 79.826, "eval_steps_per_second": 9.978, "step": 2585000 }, { "epoch": 0.78, "learning_rate": 3.9823203304367e-05, "loss": 2.1376, "step": 2590000 }, { "epoch": 0.78, "eval_accuracy": 0.5230924718032341, "eval_loss": 2.16796875, "eval_runtime": 43.4573, "eval_samples_per_second": 82.104, "eval_steps_per_second": 10.263, "step": 2590000 }, { "epoch": 0.78, "learning_rate": 3.974838592169024e-05, "loss": 2.1395, "step": 2595000 }, { "epoch": 0.78, "eval_accuracy": 0.5231894564040521, "eval_loss": 2.16796875, "eval_runtime": 43.8137, "eval_samples_per_second": 81.436, "eval_steps_per_second": 10.179, "step": 2595000 }, { "epoch": 0.78, "learning_rate": 3.9673553560558494e-05, "loss": 2.1399, "step": 2600000 }, { "epoch": 0.78, "eval_accuracy": 0.5232801397454949, "eval_loss": 2.16796875, "eval_runtime": 41.9685, "eval_samples_per_second": 85.016, "eval_steps_per_second": 10.627, "step": 2600000 }, { "epoch": 0.78, "learning_rate": 3.9598721199426747e-05, "loss": 2.1379, "step": 2605000 }, { "epoch": 0.78, "eval_accuracy": 0.5230908279964406, "eval_loss": 2.16796875, "eval_runtime": 42.9652, "eval_samples_per_second": 83.044, "eval_steps_per_second": 10.38, "step": 2605000 }, { "epoch": 0.78, "learning_rate": 3.952390381674999e-05, "loss": 2.1411, "step": 2610000 }, { "epoch": 0.78, "eval_accuracy": 0.5233708230869376, "eval_loss": 2.166015625, "eval_runtime": 41.4989, "eval_samples_per_second": 85.978, "eval_steps_per_second": 10.747, "step": 2610000 }, { "epoch": 0.78, "learning_rate": 3.944908643407323e-05, "loss": 2.1421, "step": 2615000 }, { "epoch": 0.78, "eval_accuracy": 0.5232478115452223, "eval_loss": 2.166015625, "eval_runtime": 41.1264, "eval_samples_per_second": 86.757, "eval_steps_per_second": 10.845, "step": 2615000 }, { "epoch": 0.78, "learning_rate": 3.9374254072941484e-05, "loss": 2.1412, "step": 2620000 }, { "epoch": 0.78, "eval_accuracy": 0.5236650645029786, "eval_loss": 2.166015625, "eval_runtime": 43.7159, "eval_samples_per_second": 81.618, "eval_steps_per_second": 10.202, "step": 2620000 }, { "epoch": 0.79, "learning_rate": 3.929942171180974e-05, "loss": 2.1381, "step": 2625000 }, { "epoch": 0.79, "eval_accuracy": 0.5235886274870797, "eval_loss": 2.166015625, "eval_runtime": 43.6678, "eval_samples_per_second": 81.708, "eval_steps_per_second": 10.213, "step": 2625000 }, { "epoch": 0.79, "learning_rate": 3.922458935067799e-05, "loss": 2.142, "step": 2630000 }, { "epoch": 0.79, "eval_accuracy": 0.523625339172135, "eval_loss": 2.166015625, "eval_runtime": 42.3354, "eval_samples_per_second": 84.279, "eval_steps_per_second": 10.535, "step": 2630000 }, { "epoch": 0.79, "learning_rate": 3.9149756989546236e-05, "loss": 2.1394, "step": 2635000 }, { "epoch": 0.79, "eval_accuracy": 0.5236212296551512, "eval_loss": 2.1640625, "eval_runtime": 42.9131, "eval_samples_per_second": 83.145, "eval_steps_per_second": 10.393, "step": 2635000 }, { "epoch": 0.79, "learning_rate": 3.907493960686948e-05, "loss": 2.1384, "step": 2640000 }, { "epoch": 0.79, "eval_accuracy": 0.5233793160887042, "eval_loss": 2.1640625, "eval_runtime": 40.2525, "eval_samples_per_second": 88.64, "eval_steps_per_second": 11.08, "step": 2640000 }, { "epoch": 0.79, "learning_rate": 3.900012222419273e-05, "loss": 2.138, "step": 2645000 }, { "epoch": 0.79, "eval_accuracy": 0.5235653402241714, "eval_loss": 2.1640625, "eval_runtime": 43.9803, "eval_samples_per_second": 81.127, "eval_steps_per_second": 10.141, "step": 2645000 }, { "epoch": 0.79, "learning_rate": 3.892530484151597e-05, "loss": 2.1346, "step": 2650000 }, { "epoch": 0.79, "eval_accuracy": 0.523867526706381, "eval_loss": 2.1640625, "eval_runtime": 44.5226, "eval_samples_per_second": 80.139, "eval_steps_per_second": 10.017, "step": 2650000 }, { "epoch": 0.8, "learning_rate": 3.885047248038421e-05, "loss": 2.1376, "step": 2655000 }, { "epoch": 0.8, "eval_accuracy": 0.5239204024915728, "eval_loss": 2.1640625, "eval_runtime": 42.646, "eval_samples_per_second": 83.666, "eval_steps_per_second": 10.458, "step": 2655000 }, { "epoch": 0.8, "learning_rate": 3.877564011925247e-05, "loss": 2.1409, "step": 2660000 }, { "epoch": 0.8, "eval_accuracy": 0.5239705385987753, "eval_loss": 2.1640625, "eval_runtime": 40.3234, "eval_samples_per_second": 88.485, "eval_steps_per_second": 11.061, "step": 2660000 }, { "epoch": 0.8, "learning_rate": 3.8700807758120725e-05, "loss": 2.1343, "step": 2665000 }, { "epoch": 0.8, "eval_accuracy": 0.5239877985701072, "eval_loss": 2.1640625, "eval_runtime": 42.9773, "eval_samples_per_second": 83.021, "eval_steps_per_second": 10.378, "step": 2665000 }, { "epoch": 0.8, "learning_rate": 3.862597539698897e-05, "loss": 2.1363, "step": 2670000 }, { "epoch": 0.8, "eval_accuracy": 0.5240447838722828, "eval_loss": 2.162109375, "eval_runtime": 44.9261, "eval_samples_per_second": 79.419, "eval_steps_per_second": 9.927, "step": 2670000 }, { "epoch": 0.8, "learning_rate": 3.855112805740224e-05, "loss": 2.1343, "step": 2675000 }, { "epoch": 0.8, "eval_accuracy": 0.5241869731599227, "eval_loss": 2.162109375, "eval_runtime": 43.4218, "eval_samples_per_second": 82.171, "eval_steps_per_second": 10.271, "step": 2675000 }, { "epoch": 0.8, "learning_rate": 3.847631067472548e-05, "loss": 2.1381, "step": 2680000 }, { "epoch": 0.8, "eval_accuracy": 0.5243439567087043, "eval_loss": 2.162109375, "eval_runtime": 43.0405, "eval_samples_per_second": 82.899, "eval_steps_per_second": 10.362, "step": 2680000 }, { "epoch": 0.8, "learning_rate": 3.840147831359373e-05, "loss": 2.1355, "step": 2685000 }, { "epoch": 0.8, "eval_accuracy": 0.5241456040222856, "eval_loss": 2.162109375, "eval_runtime": 43.7247, "eval_samples_per_second": 81.601, "eval_steps_per_second": 10.2, "step": 2685000 }, { "epoch": 0.81, "learning_rate": 3.8326615995552e-05, "loss": 2.1394, "step": 2690000 }, { "epoch": 0.81, "eval_accuracy": 0.5242305340399511, "eval_loss": 2.16015625, "eval_runtime": 40.8207, "eval_samples_per_second": 87.407, "eval_steps_per_second": 10.926, "step": 2690000 }, { "epoch": 0.81, "learning_rate": 3.825179861287524e-05, "loss": 2.1359, "step": 2695000 }, { "epoch": 0.81, "eval_accuracy": 0.5244875158353388, "eval_loss": 2.16015625, "eval_runtime": 44.3125, "eval_samples_per_second": 80.519, "eval_steps_per_second": 10.065, "step": 2695000 }, { "epoch": 0.81, "learning_rate": 3.817698123019848e-05, "loss": 2.1365, "step": 2700000 }, { "epoch": 0.81, "eval_accuracy": 0.524362312551232, "eval_loss": 2.16015625, "eval_runtime": 42.9757, "eval_samples_per_second": 83.024, "eval_steps_per_second": 10.378, "step": 2700000 }, { "epoch": 0.81, "learning_rate": 3.810216384752173e-05, "loss": 2.131, "step": 2705000 }, { "epoch": 0.81, "eval_accuracy": 0.5244225854669946, "eval_loss": 2.16015625, "eval_runtime": 44.5469, "eval_samples_per_second": 80.095, "eval_steps_per_second": 10.012, "step": 2705000 }, { "epoch": 0.81, "learning_rate": 3.802731650793499e-05, "loss": 2.1337, "step": 2710000 }, { "epoch": 0.81, "eval_accuracy": 0.5244025858176733, "eval_loss": 2.16015625, "eval_runtime": 43.6203, "eval_samples_per_second": 81.797, "eval_steps_per_second": 10.225, "step": 2710000 }, { "epoch": 0.81, "learning_rate": 3.7952499125258226e-05, "loss": 2.1307, "step": 2715000 }, { "epoch": 0.81, "eval_accuracy": 0.5245954591481136, "eval_loss": 2.158203125, "eval_runtime": 43.7473, "eval_samples_per_second": 81.559, "eval_steps_per_second": 10.195, "step": 2715000 }, { "epoch": 0.81, "learning_rate": 3.787765178567149e-05, "loss": 2.1333, "step": 2720000 }, { "epoch": 0.81, "eval_accuracy": 0.524749429051107, "eval_loss": 2.158203125, "eval_runtime": 43.6989, "eval_samples_per_second": 81.65, "eval_steps_per_second": 10.206, "step": 2720000 }, { "epoch": 0.82, "learning_rate": 3.780283440299474e-05, "loss": 2.1354, "step": 2725000 }, { "epoch": 0.82, "eval_accuracy": 0.5246316228975711, "eval_loss": 2.158203125, "eval_runtime": 43.001, "eval_samples_per_second": 82.975, "eval_steps_per_second": 10.372, "step": 2725000 }, { "epoch": 0.82, "learning_rate": 3.7728002041862985e-05, "loss": 2.1372, "step": 2730000 }, { "epoch": 0.82, "eval_accuracy": 0.5248077841922771, "eval_loss": 2.158203125, "eval_runtime": 41.555, "eval_samples_per_second": 85.862, "eval_steps_per_second": 10.733, "step": 2730000 }, { "epoch": 0.82, "learning_rate": 3.765316968073124e-05, "loss": 2.1323, "step": 2735000 }, { "epoch": 0.82, "eval_accuracy": 0.5248480574587185, "eval_loss": 2.158203125, "eval_runtime": 41.1337, "eval_samples_per_second": 86.742, "eval_steps_per_second": 10.843, "step": 2735000 }, { "epoch": 0.82, "learning_rate": 3.757835229805448e-05, "loss": 2.1315, "step": 2740000 }, { "epoch": 0.82, "eval_accuracy": 0.5249064125998887, "eval_loss": 2.15625, "eval_runtime": 43.6977, "eval_samples_per_second": 81.652, "eval_steps_per_second": 10.206, "step": 2740000 }, { "epoch": 0.82, "learning_rate": 3.750351993692273e-05, "loss": 2.1341, "step": 2745000 }, { "epoch": 0.82, "eval_accuracy": 0.5249143576660573, "eval_loss": 2.15625, "eval_runtime": 44.5031, "eval_samples_per_second": 80.174, "eval_steps_per_second": 10.022, "step": 2745000 }, { "epoch": 0.82, "learning_rate": 3.7428702554245975e-05, "loss": 2.132, "step": 2750000 }, { "epoch": 0.82, "eval_accuracy": 0.5249768223242113, "eval_loss": 2.15625, "eval_runtime": 40.671, "eval_samples_per_second": 87.728, "eval_steps_per_second": 10.966, "step": 2750000 }, { "epoch": 0.83, "learning_rate": 3.7353885171569214e-05, "loss": 2.1322, "step": 2755000 }, { "epoch": 0.83, "eval_accuracy": 0.5251954486277501, "eval_loss": 2.15625, "eval_runtime": 43.2998, "eval_samples_per_second": 82.402, "eval_steps_per_second": 10.3, "step": 2755000 }, { "epoch": 0.83, "learning_rate": 3.727906778889246e-05, "loss": 2.1298, "step": 2760000 }, { "epoch": 0.83, "eval_accuracy": 0.5252044895651145, "eval_loss": 2.15625, "eval_runtime": 44.6651, "eval_samples_per_second": 79.883, "eval_steps_per_second": 9.985, "step": 2760000 }, { "epoch": 0.83, "learning_rate": 3.720422044930572e-05, "loss": 2.1285, "step": 2765000 }, { "epoch": 0.83, "eval_accuracy": 0.5252362698297893, "eval_loss": 2.154296875, "eval_runtime": 43.7277, "eval_samples_per_second": 81.596, "eval_steps_per_second": 10.199, "step": 2765000 }, { "epoch": 0.83, "learning_rate": 3.7129403066628966e-05, "loss": 2.1299, "step": 2770000 }, { "epoch": 0.83, "eval_accuracy": 0.5251601067816893, "eval_loss": 2.15625, "eval_runtime": 43.8556, "eval_samples_per_second": 81.358, "eval_steps_per_second": 10.17, "step": 2770000 }, { "epoch": 0.83, "learning_rate": 3.7054585683952205e-05, "loss": 2.1304, "step": 2775000 }, { "epoch": 0.83, "eval_accuracy": 0.5252631186740835, "eval_loss": 2.154296875, "eval_runtime": 44.0024, "eval_samples_per_second": 81.086, "eval_steps_per_second": 10.136, "step": 2775000 }, { "epoch": 0.83, "learning_rate": 3.6979768301275444e-05, "loss": 2.1288, "step": 2780000 }, { "epoch": 0.83, "eval_accuracy": 0.5254154447702835, "eval_loss": 2.154296875, "eval_runtime": 43.1187, "eval_samples_per_second": 82.748, "eval_steps_per_second": 10.344, "step": 2780000 }, { "epoch": 0.83, "learning_rate": 3.6904935940143697e-05, "loss": 2.1295, "step": 2785000 }, { "epoch": 0.83, "eval_accuracy": 0.5253255833322374, "eval_loss": 2.154296875, "eval_runtime": 44.6338, "eval_samples_per_second": 79.939, "eval_steps_per_second": 9.992, "step": 2785000 }, { "epoch": 0.84, "learning_rate": 3.683010357901195e-05, "loss": 2.129, "step": 2790000 }, { "epoch": 0.84, "eval_accuracy": 0.5255368125052053, "eval_loss": 2.154296875, "eval_runtime": 44.4544, "eval_samples_per_second": 80.262, "eval_steps_per_second": 10.033, "step": 2790000 }, { "epoch": 0.84, "learning_rate": 3.6755286196335195e-05, "loss": 2.1285, "step": 2795000 }, { "epoch": 0.84, "eval_accuracy": 0.5253598293071026, "eval_loss": 2.154296875, "eval_runtime": 44.2524, "eval_samples_per_second": 80.628, "eval_steps_per_second": 10.079, "step": 2795000 }, { "epoch": 0.84, "learning_rate": 3.668045383520344e-05, "loss": 2.1292, "step": 2800000 }, { "epoch": 0.84, "eval_accuracy": 0.5252880497437853, "eval_loss": 2.154296875, "eval_runtime": 40.6928, "eval_samples_per_second": 87.681, "eval_steps_per_second": 10.96, "step": 2800000 }, { "epoch": 0.84, "learning_rate": 3.660563645252669e-05, "loss": 2.1278, "step": 2805000 }, { "epoch": 0.84, "eval_accuracy": 0.5256472215281704, "eval_loss": 2.15234375, "eval_runtime": 43.3931, "eval_samples_per_second": 82.225, "eval_steps_per_second": 10.278, "step": 2805000 }, { "epoch": 0.84, "learning_rate": 3.653080409139494e-05, "loss": 2.1239, "step": 2810000 }, { "epoch": 0.84, "eval_accuracy": 0.5255110595321069, "eval_loss": 2.15234375, "eval_runtime": 42.2966, "eval_samples_per_second": 84.357, "eval_steps_per_second": 10.545, "step": 2810000 }, { "epoch": 0.84, "learning_rate": 3.645598670871818e-05, "loss": 2.1241, "step": 2815000 }, { "epoch": 0.84, "eval_accuracy": 0.5259063950659495, "eval_loss": 2.15234375, "eval_runtime": 42.6516, "eval_samples_per_second": 83.655, "eval_steps_per_second": 10.457, "step": 2815000 }, { "epoch": 0.84, "learning_rate": 3.638115434758643e-05, "loss": 2.1232, "step": 2820000 }, { "epoch": 0.84, "eval_accuracy": 0.5256781798894485, "eval_loss": 2.15234375, "eval_runtime": 46.2947, "eval_samples_per_second": 77.071, "eval_steps_per_second": 9.634, "step": 2820000 }, { "epoch": 0.85, "learning_rate": 3.6306321986454685e-05, "loss": 2.1241, "step": 2825000 }, { "epoch": 0.85, "eval_accuracy": 0.525697357635373, "eval_loss": 2.150390625, "eval_runtime": 43.287, "eval_samples_per_second": 82.427, "eval_steps_per_second": 10.303, "step": 2825000 }, { "epoch": 0.85, "learning_rate": 3.623148962532294e-05, "loss": 2.1236, "step": 2830000 }, { "epoch": 0.85, "eval_accuracy": 0.5259439286544016, "eval_loss": 2.150390625, "eval_runtime": 43.0484, "eval_samples_per_second": 82.884, "eval_steps_per_second": 10.36, "step": 2830000 }, { "epoch": 0.85, "learning_rate": 3.6156672242646177e-05, "loss": 2.1272, "step": 2835000 }, { "epoch": 0.85, "eval_accuracy": 0.5259442026222007, "eval_loss": 2.150390625, "eval_runtime": 42.9436, "eval_samples_per_second": 83.086, "eval_steps_per_second": 10.386, "step": 2835000 }, { "epoch": 0.85, "learning_rate": 3.608183988151443e-05, "loss": 2.1271, "step": 2840000 }, { "epoch": 0.85, "eval_accuracy": 0.5260592690977474, "eval_loss": 2.150390625, "eval_runtime": 40.596, "eval_samples_per_second": 87.89, "eval_steps_per_second": 10.986, "step": 2840000 }, { "epoch": 0.85, "learning_rate": 3.6007022498837675e-05, "loss": 2.1249, "step": 2845000 }, { "epoch": 0.85, "eval_accuracy": 0.52616776034612, "eval_loss": 2.1484375, "eval_runtime": 43.4159, "eval_samples_per_second": 82.182, "eval_steps_per_second": 10.273, "step": 2845000 }, { "epoch": 0.85, "learning_rate": 3.5932175159250935e-05, "loss": 2.1245, "step": 2850000 }, { "epoch": 0.85, "eval_accuracy": 0.5260307764466595, "eval_loss": 2.1484375, "eval_runtime": 43.8356, "eval_samples_per_second": 81.395, "eval_steps_per_second": 10.174, "step": 2850000 }, { "epoch": 0.86, "learning_rate": 3.585735777657418e-05, "loss": 2.1222, "step": 2855000 }, { "epoch": 0.86, "eval_accuracy": 0.5261137886897326, "eval_loss": 2.1484375, "eval_runtime": 43.0232, "eval_samples_per_second": 82.932, "eval_steps_per_second": 10.367, "step": 2855000 }, { "epoch": 0.86, "learning_rate": 3.5782525415442434e-05, "loss": 2.125, "step": 2860000 }, { "epoch": 0.86, "eval_accuracy": 0.5263050182133793, "eval_loss": 2.1484375, "eval_runtime": 43.0096, "eval_samples_per_second": 82.958, "eval_steps_per_second": 10.37, "step": 2860000 }, { "epoch": 0.86, "learning_rate": 3.570770803276567e-05, "loss": 2.1261, "step": 2865000 }, { "epoch": 0.86, "eval_accuracy": 0.5260803646182642, "eval_loss": 2.1484375, "eval_runtime": 40.5259, "eval_samples_per_second": 88.042, "eval_steps_per_second": 11.005, "step": 2865000 }, { "epoch": 0.86, "learning_rate": 3.563289065008891e-05, "loss": 2.1247, "step": 2870000 }, { "epoch": 0.86, "eval_accuracy": 0.5262392659416383, "eval_loss": 2.1484375, "eval_runtime": 46.7614, "eval_samples_per_second": 76.302, "eval_steps_per_second": 9.538, "step": 2870000 }, { "epoch": 0.86, "learning_rate": 3.5558028332047185e-05, "loss": 2.1225, "step": 2875000 }, { "epoch": 0.86, "eval_accuracy": 0.5263167988287328, "eval_loss": 2.1484375, "eval_runtime": 43.7319, "eval_samples_per_second": 81.588, "eval_steps_per_second": 10.199, "step": 2875000 }, { "epoch": 0.86, "learning_rate": 3.5483210949370424e-05, "loss": 2.122, "step": 2880000 }, { "epoch": 0.86, "eval_accuracy": 0.5261085833015531, "eval_loss": 2.1484375, "eval_runtime": 46.8934, "eval_samples_per_second": 76.087, "eval_steps_per_second": 9.511, "step": 2880000 }, { "epoch": 0.86, "learning_rate": 3.540839356669367e-05, "loss": 2.1237, "step": 2885000 }, { "epoch": 0.86, "eval_accuracy": 0.5261107750439444, "eval_loss": 2.146484375, "eval_runtime": 42.8485, "eval_samples_per_second": 83.27, "eval_steps_per_second": 10.409, "step": 2885000 }, { "epoch": 0.87, "learning_rate": 3.533356120556192e-05, "loss": 2.1219, "step": 2890000 }, { "epoch": 0.87, "eval_accuracy": 0.5261825546072617, "eval_loss": 2.146484375, "eval_runtime": 44.3826, "eval_samples_per_second": 80.392, "eval_steps_per_second": 10.049, "step": 2890000 }, { "epoch": 0.87, "learning_rate": 3.525874382288516e-05, "loss": 2.1248, "step": 2895000 }, { "epoch": 0.87, "eval_accuracy": 0.526191595544626, "eval_loss": 2.146484375, "eval_runtime": 40.9038, "eval_samples_per_second": 87.229, "eval_steps_per_second": 10.904, "step": 2895000 }, { "epoch": 0.87, "learning_rate": 3.51839264402084e-05, "loss": 2.1191, "step": 2900000 }, { "epoch": 0.87, "eval_accuracy": 0.526354332417185, "eval_loss": 2.146484375, "eval_runtime": 44.4026, "eval_samples_per_second": 80.356, "eval_steps_per_second": 10.044, "step": 2900000 }, { "epoch": 0.87, "learning_rate": 3.510909407907666e-05, "loss": 2.1181, "step": 2905000 }, { "epoch": 0.87, "eval_accuracy": 0.5264343310144699, "eval_loss": 2.146484375, "eval_runtime": 43.5492, "eval_samples_per_second": 81.93, "eval_steps_per_second": 10.241, "step": 2905000 }, { "epoch": 0.87, "learning_rate": 3.50342766963999e-05, "loss": 2.1176, "step": 2910000 }, { "epoch": 0.87, "eval_accuracy": 0.5263020045675911, "eval_loss": 2.146484375, "eval_runtime": 46.4423, "eval_samples_per_second": 76.827, "eval_steps_per_second": 9.603, "step": 2910000 }, { "epoch": 0.87, "learning_rate": 3.495944433526815e-05, "loss": 2.1191, "step": 2915000 }, { "epoch": 0.87, "eval_accuracy": 0.5266524093824109, "eval_loss": 2.146484375, "eval_runtime": 42.6675, "eval_samples_per_second": 83.623, "eval_steps_per_second": 10.453, "step": 2915000 }, { "epoch": 0.87, "learning_rate": 3.488462695259139e-05, "loss": 2.1206, "step": 2920000 }, { "epoch": 0.87, "eval_accuracy": 0.5267954205734475, "eval_loss": 2.14453125, "eval_runtime": 42.9737, "eval_samples_per_second": 83.028, "eval_steps_per_second": 10.378, "step": 2920000 }, { "epoch": 0.88, "learning_rate": 3.4809794591459645e-05, "loss": 2.1148, "step": 2925000 }, { "epoch": 0.88, "eval_accuracy": 0.5267219972033367, "eval_loss": 2.14453125, "eval_runtime": 44.1462, "eval_samples_per_second": 80.822, "eval_steps_per_second": 10.103, "step": 2925000 }, { "epoch": 0.88, "learning_rate": 3.473497720878289e-05, "loss": 2.1188, "step": 2930000 }, { "epoch": 0.88, "eval_accuracy": 0.5270244576533453, "eval_loss": 2.14453125, "eval_runtime": 44.0298, "eval_samples_per_second": 81.036, "eval_steps_per_second": 10.129, "step": 2930000 }, { "epoch": 0.88, "learning_rate": 3.466015982610613e-05, "loss": 2.1118, "step": 2935000 }, { "epoch": 0.88, "eval_accuracy": 0.5270036361006273, "eval_loss": 2.14453125, "eval_runtime": 41.5147, "eval_samples_per_second": 85.945, "eval_steps_per_second": 10.743, "step": 2935000 }, { "epoch": 0.88, "learning_rate": 4.992518261732325e-05, "loss": 2.1283, "step": 2940000 }, { "epoch": 0.88, "eval_accuracy": 0.5243740931665856, "eval_loss": 2.158203125, "eval_runtime": 39.6741, "eval_samples_per_second": 89.933, "eval_steps_per_second": 11.242, "step": 2940000 }, { "epoch": 0.88, "learning_rate": 4.9850350256191494e-05, "loss": 2.1336, "step": 2945000 }, { "epoch": 0.88, "eval_accuracy": 0.5240346470637227, "eval_loss": 2.162109375, "eval_runtime": 39.6588, "eval_samples_per_second": 89.967, "eval_steps_per_second": 11.246, "step": 2945000 }, { "epoch": 0.88, "learning_rate": 4.977551789505975e-05, "loss": 2.1311, "step": 2950000 }, { "epoch": 0.88, "eval_accuracy": 0.5237494465850462, "eval_loss": 2.162109375, "eval_runtime": 39.7632, "eval_samples_per_second": 89.731, "eval_steps_per_second": 11.216, "step": 2950000 }, { "epoch": 0.89, "learning_rate": 4.970070051238299e-05, "loss": 2.1377, "step": 2955000 }, { "epoch": 0.89, "eval_accuracy": 0.523618763944961, "eval_loss": 2.1640625, "eval_runtime": 39.7355, "eval_samples_per_second": 89.794, "eval_steps_per_second": 11.224, "step": 2955000 }, { "epoch": 0.89, "learning_rate": 4.962588312970624e-05, "loss": 2.136, "step": 2960000 }, { "epoch": 0.89, "eval_accuracy": 0.523584244002297, "eval_loss": 2.1640625, "eval_runtime": 39.6172, "eval_samples_per_second": 90.062, "eval_steps_per_second": 11.258, "step": 2960000 }, { "epoch": 0.89, "learning_rate": 4.955103579011949e-05, "loss": 2.1394, "step": 2965000 }, { "epoch": 0.89, "eval_accuracy": 0.5233458920172359, "eval_loss": 2.1640625, "eval_runtime": 39.7138, "eval_samples_per_second": 89.843, "eval_steps_per_second": 11.23, "step": 2965000 }, { "epoch": 0.89, "learning_rate": 4.947621840744274e-05, "loss": 2.1405, "step": 2970000 }, { "epoch": 0.89, "eval_accuracy": 0.5233305498204963, "eval_loss": 2.166015625, "eval_runtime": 39.7403, "eval_samples_per_second": 89.783, "eval_steps_per_second": 11.223, "step": 2970000 }, { "epoch": 0.89, "learning_rate": 4.9401401024765983e-05, "loss": 2.1391, "step": 2975000 }, { "epoch": 0.89, "eval_accuracy": 0.5235795865497153, "eval_loss": 2.166015625, "eval_runtime": 39.791, "eval_samples_per_second": 89.668, "eval_steps_per_second": 11.209, "step": 2975000 }, { "epoch": 0.89, "learning_rate": 4.9326553685179237e-05, "loss": 2.1353, "step": 2980000 }, { "epoch": 0.89, "eval_accuracy": 0.5233823297344923, "eval_loss": 2.166015625, "eval_runtime": 39.6943, "eval_samples_per_second": 89.887, "eval_steps_per_second": 11.236, "step": 2980000 }, { "epoch": 0.89, "learning_rate": 4.925173630250248e-05, "loss": 2.1392, "step": 2985000 }, { "epoch": 0.89, "eval_accuracy": 0.5233889049616665, "eval_loss": 2.166015625, "eval_runtime": 39.7244, "eval_samples_per_second": 89.819, "eval_steps_per_second": 11.227, "step": 2985000 }, { "epoch": 0.9, "learning_rate": 4.917691891982573e-05, "loss": 2.1384, "step": 2990000 }, { "epoch": 0.9, "eval_accuracy": 0.5235058892118056, "eval_loss": 2.166015625, "eval_runtime": 39.7383, "eval_samples_per_second": 89.787, "eval_steps_per_second": 11.223, "step": 2990000 }, { "epoch": 0.9, "learning_rate": 4.910210153714897e-05, "loss": 2.1373, "step": 2995000 }, { "epoch": 0.9, "eval_accuracy": 0.523321234915333, "eval_loss": 2.166015625, "eval_runtime": 39.8394, "eval_samples_per_second": 89.56, "eval_steps_per_second": 11.195, "step": 2995000 }, { "epoch": 0.9, "learning_rate": 4.9027284154472206e-05, "loss": 2.1346, "step": 3000000 }, { "epoch": 0.9, "eval_accuracy": 0.523394110349846, "eval_loss": 2.166015625, "eval_runtime": 39.7248, "eval_samples_per_second": 89.818, "eval_steps_per_second": 11.227, "step": 3000000 }, { "epoch": 0.9, "learning_rate": 4.895246677179545e-05, "loss": 2.1368, "step": 3005000 }, { "epoch": 0.9, "eval_accuracy": 0.523494108596452, "eval_loss": 2.166015625, "eval_runtime": 39.8116, "eval_samples_per_second": 89.622, "eval_steps_per_second": 11.203, "step": 3005000 }, { "epoch": 0.9, "learning_rate": 4.8877634410663705e-05, "loss": 2.1383, "step": 3010000 }, { "epoch": 0.9, "eval_accuracy": 0.5232738384861197, "eval_loss": 2.166015625, "eval_runtime": 39.8277, "eval_samples_per_second": 89.586, "eval_steps_per_second": 11.198, "step": 3010000 }, { "epoch": 0.9, "learning_rate": 4.880280204953195e-05, "loss": 2.1447, "step": 3015000 }, { "epoch": 0.9, "eval_accuracy": 0.5233116460423708, "eval_loss": 2.166015625, "eval_runtime": 39.9037, "eval_samples_per_second": 89.415, "eval_steps_per_second": 11.177, "step": 3015000 }, { "epoch": 0.9, "learning_rate": 4.87279846668552e-05, "loss": 2.1392, "step": 3020000 }, { "epoch": 0.9, "eval_accuracy": 0.5234119182567758, "eval_loss": 2.166015625, "eval_runtime": 39.8551, "eval_samples_per_second": 89.524, "eval_steps_per_second": 11.191, "step": 3020000 }, { "epoch": 0.91, "learning_rate": 4.865315230572345e-05, "loss": 2.1359, "step": 3025000 }, { "epoch": 0.91, "eval_accuracy": 0.5233072625575881, "eval_loss": 2.166015625, "eval_runtime": 39.8861, "eval_samples_per_second": 89.455, "eval_steps_per_second": 11.182, "step": 3025000 }, { "epoch": 0.91, "learning_rate": 4.8578334923046695e-05, "loss": 2.1408, "step": 3030000 }, { "epoch": 0.91, "eval_accuracy": 0.5233184952373438, "eval_loss": 2.166015625, "eval_runtime": 39.8536, "eval_samples_per_second": 89.528, "eval_steps_per_second": 11.191, "step": 3030000 }, { "epoch": 0.91, "learning_rate": 4.850350256191494e-05, "loss": 2.1437, "step": 3035000 }, { "epoch": 0.91, "eval_accuracy": 0.5232642496131574, "eval_loss": 2.166015625, "eval_runtime": 39.8602, "eval_samples_per_second": 89.513, "eval_steps_per_second": 11.189, "step": 3035000 }, { "epoch": 0.91, "learning_rate": 4.8428715136148166e-05, "loss": 2.1354, "step": 3040000 }, { "epoch": 0.91, "eval_accuracy": 0.5233253444323168, "eval_loss": 2.166015625, "eval_runtime": 39.8447, "eval_samples_per_second": 89.548, "eval_steps_per_second": 11.193, "step": 3040000 }, { "epoch": 0.91, "learning_rate": 4.835383783965145e-05, "loss": 2.1371, "step": 3045000 }, { "epoch": 0.91, "eval_accuracy": 0.5234626022995761, "eval_loss": 2.166015625, "eval_runtime": 40.0007, "eval_samples_per_second": 89.199, "eval_steps_per_second": 11.15, "step": 3045000 }, { "epoch": 0.91, "learning_rate": 4.82790054785197e-05, "loss": 2.1399, "step": 3050000 }, { "epoch": 0.91, "eval_accuracy": 0.5233957541566394, "eval_loss": 2.166015625, "eval_runtime": 39.8876, "eval_samples_per_second": 89.451, "eval_steps_per_second": 11.181, "step": 3050000 }, { "epoch": 0.92, "learning_rate": 4.820420307429793e-05, "loss": 2.1387, "step": 3055000 }, { "epoch": 0.92, "eval_accuracy": 0.5234242468077273, "eval_loss": 2.166015625, "eval_runtime": 39.9685, "eval_samples_per_second": 89.27, "eval_steps_per_second": 11.159, "step": 3055000 }, { "epoch": 0.92, "learning_rate": 4.8129340756256206e-05, "loss": 2.1406, "step": 3060000 }, { "epoch": 0.92, "eval_accuracy": 0.5232119217635636, "eval_loss": 2.166015625, "eval_runtime": 39.9746, "eval_samples_per_second": 89.257, "eval_steps_per_second": 11.157, "step": 3060000 }, { "epoch": 0.92, "learning_rate": 4.805450839512445e-05, "loss": 2.1387, "step": 3065000 }, { "epoch": 0.92, "eval_accuracy": 0.5234650680097664, "eval_loss": 2.166015625, "eval_runtime": 40.0463, "eval_samples_per_second": 89.097, "eval_steps_per_second": 11.137, "step": 3065000 }, { "epoch": 0.92, "learning_rate": 4.797966105553772e-05, "loss": 2.1413, "step": 3070000 }, { "epoch": 0.92, "eval_accuracy": 0.5234842457556909, "eval_loss": 2.166015625, "eval_runtime": 39.9277, "eval_samples_per_second": 89.362, "eval_steps_per_second": 11.17, "step": 3070000 }, { "epoch": 0.92, "learning_rate": 4.790484367286096e-05, "loss": 2.1371, "step": 3075000 }, { "epoch": 0.92, "eval_accuracy": 0.523473561011533, "eval_loss": 2.1640625, "eval_runtime": 39.9812, "eval_samples_per_second": 89.242, "eval_steps_per_second": 11.155, "step": 3075000 }, { "epoch": 0.92, "learning_rate": 4.782999633327422e-05, "loss": 2.138, "step": 3080000 }, { "epoch": 0.92, "eval_accuracy": 0.5234982181134358, "eval_loss": 2.1640625, "eval_runtime": 39.9703, "eval_samples_per_second": 89.266, "eval_steps_per_second": 11.158, "step": 3080000 }, { "epoch": 0.92, "learning_rate": 4.7755148993687484e-05, "loss": 2.1385, "step": 3085000 }, { "epoch": 0.92, "eval_accuracy": 0.5236360239162929, "eval_loss": 2.1640625, "eval_runtime": 40.1115, "eval_samples_per_second": 88.952, "eval_steps_per_second": 11.119, "step": 3085000 }, { "epoch": 0.93, "learning_rate": 4.768031663255573e-05, "loss": 2.135, "step": 3090000 }, { "epoch": 0.93, "eval_accuracy": 0.5233746586361225, "eval_loss": 2.166015625, "eval_runtime": 40.1304, "eval_samples_per_second": 88.91, "eval_steps_per_second": 11.114, "step": 3090000 }, { "epoch": 0.93, "learning_rate": 4.760548427142398e-05, "loss": 2.1401, "step": 3095000 }, { "epoch": 0.93, "eval_accuracy": 0.5235689018055574, "eval_loss": 2.1640625, "eval_runtime": 40.1365, "eval_samples_per_second": 88.897, "eval_steps_per_second": 11.112, "step": 3095000 }, { "epoch": 0.93, "learning_rate": 4.753066688874723e-05, "loss": 2.1374, "step": 3100000 }, { "epoch": 0.93, "eval_accuracy": 0.5235552034156113, "eval_loss": 2.1640625, "eval_runtime": 40.133, "eval_samples_per_second": 88.904, "eval_steps_per_second": 11.113, "step": 3100000 }, { "epoch": 0.93, "learning_rate": 4.745586448452546e-05, "loss": 2.1358, "step": 3105000 }, { "epoch": 0.93, "eval_accuracy": 0.5237198580627628, "eval_loss": 2.1640625, "eval_runtime": 40.0267, "eval_samples_per_second": 89.141, "eval_steps_per_second": 11.143, "step": 3105000 }, { "epoch": 0.93, "learning_rate": 4.7381032123393713e-05, "loss": 2.1344, "step": 3110000 }, { "epoch": 0.93, "eval_accuracy": 0.5239288954933393, "eval_loss": 2.162109375, "eval_runtime": 40.1156, "eval_samples_per_second": 88.943, "eval_steps_per_second": 11.118, "step": 3110000 }, { "epoch": 0.93, "learning_rate": 4.730621474071695e-05, "loss": 2.1368, "step": 3115000 }, { "epoch": 0.93, "eval_accuracy": 0.5238793073217346, "eval_loss": 2.162109375, "eval_runtime": 40.1212, "eval_samples_per_second": 88.93, "eval_steps_per_second": 11.116, "step": 3115000 }, { "epoch": 0.93, "learning_rate": 4.723136740113022e-05, "loss": 2.1345, "step": 3120000 }, { "epoch": 0.93, "eval_accuracy": 0.5236836943133052, "eval_loss": 2.162109375, "eval_runtime": 40.203, "eval_samples_per_second": 88.75, "eval_steps_per_second": 11.094, "step": 3120000 }, { "epoch": 0.94, "learning_rate": 4.7156535039998465e-05, "loss": 2.1358, "step": 3125000 }, { "epoch": 0.94, "eval_accuracy": 0.5238979371320612, "eval_loss": 2.162109375, "eval_runtime": 40.2344, "eval_samples_per_second": 88.68, "eval_steps_per_second": 11.085, "step": 3125000 }, { "epoch": 0.94, "learning_rate": 4.708171765732171e-05, "loss": 2.1395, "step": 3130000 }, { "epoch": 0.94, "eval_accuracy": 0.5239330050103231, "eval_loss": 2.162109375, "eval_runtime": 40.4542, "eval_samples_per_second": 88.199, "eval_steps_per_second": 11.025, "step": 3130000 }, { "epoch": 0.94, "learning_rate": 4.700690027464496e-05, "loss": 2.1359, "step": 3135000 }, { "epoch": 0.94, "eval_accuracy": 0.5242872453743277, "eval_loss": 2.162109375, "eval_runtime": 40.3355, "eval_samples_per_second": 88.458, "eval_steps_per_second": 11.057, "step": 3135000 }, { "epoch": 0.94, "learning_rate": 4.693205293505821e-05, "loss": 2.1373, "step": 3140000 }, { "epoch": 0.94, "eval_accuracy": 0.5241631379614166, "eval_loss": 2.16015625, "eval_runtime": 40.2137, "eval_samples_per_second": 88.726, "eval_steps_per_second": 11.091, "step": 3140000 }, { "epoch": 0.94, "learning_rate": 4.6857235552381456e-05, "loss": 2.1357, "step": 3145000 }, { "epoch": 0.94, "eval_accuracy": 0.5243412170307151, "eval_loss": 2.16015625, "eval_runtime": 40.3184, "eval_samples_per_second": 88.496, "eval_steps_per_second": 11.062, "step": 3145000 }, { "epoch": 0.94, "learning_rate": 4.67824181697047e-05, "loss": 2.1354, "step": 3150000 }, { "epoch": 0.94, "eval_accuracy": 0.5243636823902266, "eval_loss": 2.16015625, "eval_runtime": 40.1892, "eval_samples_per_second": 88.78, "eval_steps_per_second": 11.098, "step": 3150000 }, { "epoch": 0.95, "learning_rate": 4.670757083011796e-05, "loss": 2.1323, "step": 3155000 }, { "epoch": 0.95, "eval_accuracy": 0.5243839560073468, "eval_loss": 2.16015625, "eval_runtime": 40.3635, "eval_samples_per_second": 88.397, "eval_steps_per_second": 11.05, "step": 3155000 }, { "epoch": 0.95, "learning_rate": 4.66327534474412e-05, "loss": 2.133, "step": 3160000 }, { "epoch": 0.95, "eval_accuracy": 0.5242327257823425, "eval_loss": 2.16015625, "eval_runtime": 41.1497, "eval_samples_per_second": 86.708, "eval_steps_per_second": 10.838, "step": 3160000 }, { "epoch": 0.95, "learning_rate": 4.655790610785447e-05, "loss": 2.1315, "step": 3165000 }, { "epoch": 0.95, "eval_accuracy": 0.524407243270255, "eval_loss": 2.16015625, "eval_runtime": 41.4927, "eval_samples_per_second": 85.991, "eval_steps_per_second": 10.749, "step": 3165000 }, { "epoch": 0.95, "learning_rate": 4.6483088725177706e-05, "loss": 2.1363, "step": 3170000 }, { "epoch": 0.95, "eval_accuracy": 0.5242845056963384, "eval_loss": 2.16015625, "eval_runtime": 43.5552, "eval_samples_per_second": 81.919, "eval_steps_per_second": 10.24, "step": 3170000 }, { "epoch": 0.95, "learning_rate": 4.640825636404596e-05, "loss": 2.1349, "step": 3175000 }, { "epoch": 0.95, "eval_accuracy": 0.5245045018388719, "eval_loss": 2.16015625, "eval_runtime": 41.5498, "eval_samples_per_second": 85.873, "eval_steps_per_second": 10.734, "step": 3175000 }, { "epoch": 0.95, "learning_rate": 4.633342400291421e-05, "loss": 2.1336, "step": 3180000 }, { "epoch": 0.95, "eval_accuracy": 0.524365600164819, "eval_loss": 2.16015625, "eval_runtime": 45.093, "eval_samples_per_second": 79.125, "eval_steps_per_second": 9.891, "step": 3180000 }, { "epoch": 0.95, "learning_rate": 4.625860662023746e-05, "loss": 2.1364, "step": 3185000 }, { "epoch": 0.95, "eval_accuracy": 0.5243612166800363, "eval_loss": 2.158203125, "eval_runtime": 42.4027, "eval_samples_per_second": 84.146, "eval_steps_per_second": 10.518, "step": 3185000 }, { "epoch": 0.96, "learning_rate": 4.618375928065071e-05, "loss": 2.133, "step": 3190000 }, { "epoch": 0.96, "eval_accuracy": 0.5243256008661766, "eval_loss": 2.158203125, "eval_runtime": 40.6817, "eval_samples_per_second": 87.705, "eval_steps_per_second": 10.963, "step": 3190000 }, { "epoch": 0.96, "learning_rate": 4.6108941897973956e-05, "loss": 2.1349, "step": 3195000 }, { "epoch": 0.96, "eval_accuracy": 0.5245045018388719, "eval_loss": 2.158203125, "eval_runtime": 42.2213, "eval_samples_per_second": 84.507, "eval_steps_per_second": 10.563, "step": 3195000 }, { "epoch": 0.96, "learning_rate": 4.60341245152972e-05, "loss": 2.134, "step": 3200000 }, { "epoch": 0.96, "eval_accuracy": 0.5245702541106129, "eval_loss": 2.158203125, "eval_runtime": 41.8771, "eval_samples_per_second": 85.202, "eval_steps_per_second": 10.65, "step": 3200000 }, { "epoch": 0.96, "learning_rate": 4.5959277175710455e-05, "loss": 2.1308, "step": 3205000 }, { "epoch": 0.96, "eval_accuracy": 0.5249423023815473, "eval_loss": 2.15625, "eval_runtime": 42.7291, "eval_samples_per_second": 83.503, "eval_steps_per_second": 10.438, "step": 3205000 }, { "epoch": 0.96, "learning_rate": 4.5884444814578715e-05, "loss": 2.1302, "step": 3210000 }, { "epoch": 0.96, "eval_accuracy": 0.5246628552266481, "eval_loss": 2.15625, "eval_runtime": 41.7473, "eval_samples_per_second": 85.467, "eval_steps_per_second": 10.683, "step": 3210000 }, { "epoch": 0.96, "learning_rate": 4.580964241035695e-05, "loss": 2.1302, "step": 3215000 }, { "epoch": 0.96, "eval_accuracy": 0.5246708002928168, "eval_loss": 2.15625, "eval_runtime": 43.5638, "eval_samples_per_second": 81.903, "eval_steps_per_second": 10.238, "step": 3215000 }, { "epoch": 0.96, "learning_rate": 4.5734825027680186e-05, "loss": 2.1331, "step": 3220000 }, { "epoch": 0.96, "eval_accuracy": 0.5247650452156455, "eval_loss": 2.15625, "eval_runtime": 42.8533, "eval_samples_per_second": 83.261, "eval_steps_per_second": 10.408, "step": 3220000 }, { "epoch": 0.97, "learning_rate": 4.565999266654844e-05, "loss": 2.1273, "step": 3225000 }, { "epoch": 0.97, "eval_accuracy": 0.5247250459170031, "eval_loss": 2.15625, "eval_runtime": 42.4534, "eval_samples_per_second": 84.045, "eval_steps_per_second": 10.506, "step": 3225000 }, { "epoch": 0.97, "learning_rate": 4.5585175283871685e-05, "loss": 2.1286, "step": 3230000 }, { "epoch": 0.97, "eval_accuracy": 0.5249765483564124, "eval_loss": 2.15625, "eval_runtime": 43.0626, "eval_samples_per_second": 82.856, "eval_steps_per_second": 10.357, "step": 3230000 }, { "epoch": 0.97, "learning_rate": 4.551034292273994e-05, "loss": 2.1282, "step": 3235000 }, { "epoch": 0.97, "eval_accuracy": 0.525047232048534, "eval_loss": 2.154296875, "eval_runtime": 42.4424, "eval_samples_per_second": 84.067, "eval_steps_per_second": 10.508, "step": 3235000 }, { "epoch": 0.97, "learning_rate": 4.543552554006317e-05, "loss": 2.1309, "step": 3240000 }, { "epoch": 0.97, "eval_accuracy": 0.5250951764133451, "eval_loss": 2.154296875, "eval_runtime": 44.2507, "eval_samples_per_second": 80.631, "eval_steps_per_second": 10.079, "step": 3240000 }, { "epoch": 0.97, "learning_rate": 4.5360708157386415e-05, "loss": 2.1295, "step": 3245000 }, { "epoch": 0.97, "eval_accuracy": 0.5253592813715047, "eval_loss": 2.154296875, "eval_runtime": 44.2616, "eval_samples_per_second": 80.612, "eval_steps_per_second": 10.076, "step": 3245000 }, { "epoch": 0.97, "learning_rate": 4.528589077470966e-05, "loss": 2.1275, "step": 3250000 }, { "epoch": 0.97, "eval_accuracy": 0.5253707880190593, "eval_loss": 2.154296875, "eval_runtime": 43.0103, "eval_samples_per_second": 82.957, "eval_steps_per_second": 10.37, "step": 3250000 }, { "epoch": 0.98, "learning_rate": 4.5211058413577914e-05, "loss": 2.133, "step": 3255000 }, { "epoch": 0.98, "eval_accuracy": 0.5253924314751741, "eval_loss": 2.154296875, "eval_runtime": 41.4169, "eval_samples_per_second": 86.148, "eval_steps_per_second": 10.769, "step": 3255000 }, { "epoch": 0.98, "learning_rate": 4.513622605244616e-05, "loss": 2.1301, "step": 3260000 }, { "epoch": 0.98, "eval_accuracy": 0.5251132582880739, "eval_loss": 2.154296875, "eval_runtime": 42.1862, "eval_samples_per_second": 84.577, "eval_steps_per_second": 10.572, "step": 3260000 }, { "epoch": 0.98, "learning_rate": 4.5061408669769406e-05, "loss": 2.1314, "step": 3265000 }, { "epoch": 0.98, "eval_accuracy": 0.5253096931999001, "eval_loss": 2.15234375, "eval_runtime": 43.9727, "eval_samples_per_second": 81.141, "eval_steps_per_second": 10.143, "step": 3265000 }, { "epoch": 0.98, "learning_rate": 4.498656133018267e-05, "loss": 2.1258, "step": 3270000 }, { "epoch": 0.98, "eval_accuracy": 0.5254751697504482, "eval_loss": 2.15234375, "eval_runtime": 42.0779, "eval_samples_per_second": 84.795, "eval_steps_per_second": 10.599, "step": 3270000 }, { "epoch": 0.98, "learning_rate": 4.491172896905092e-05, "loss": 2.1286, "step": 3275000 }, { "epoch": 0.98, "eval_accuracy": 0.525354897886722, "eval_loss": 2.15234375, "eval_runtime": 41.8893, "eval_samples_per_second": 85.177, "eval_steps_per_second": 10.647, "step": 3275000 }, { "epoch": 0.98, "learning_rate": 4.4836911586374165e-05, "loss": 2.1267, "step": 3280000 }, { "epoch": 0.98, "eval_accuracy": 0.5253622950172928, "eval_loss": 2.15234375, "eval_runtime": 42.0241, "eval_samples_per_second": 84.904, "eval_steps_per_second": 10.613, "step": 3280000 }, { "epoch": 0.98, "learning_rate": 4.476207922524242e-05, "loss": 2.13, "step": 3285000 }, { "epoch": 0.98, "eval_accuracy": 0.5254332526772133, "eval_loss": 2.15234375, "eval_runtime": 43.4043, "eval_samples_per_second": 82.204, "eval_steps_per_second": 10.275, "step": 3285000 }, { "epoch": 0.99, "learning_rate": 4.4687246864110663e-05, "loss": 2.1284, "step": 3290000 }, { "epoch": 0.99, "eval_accuracy": 0.5254976351099597, "eval_loss": 2.15234375, "eval_runtime": 43.4747, "eval_samples_per_second": 82.071, "eval_steps_per_second": 10.259, "step": 3290000 }, { "epoch": 0.99, "learning_rate": 4.461242948143391e-05, "loss": 2.1295, "step": 3295000 }, { "epoch": 0.99, "eval_accuracy": 0.5254190063516695, "eval_loss": 2.15234375, "eval_runtime": 44.0831, "eval_samples_per_second": 80.938, "eval_steps_per_second": 10.117, "step": 3295000 }, { "epoch": 0.99, "learning_rate": 4.453759712030216e-05, "loss": 2.1241, "step": 3300000 }, { "epoch": 0.99, "eval_accuracy": 0.5255573600901244, "eval_loss": 2.15234375, "eval_runtime": 43.4229, "eval_samples_per_second": 82.169, "eval_steps_per_second": 10.271, "step": 3300000 }, { "epoch": 0.99, "learning_rate": 4.446276475917041e-05, "loss": 2.1297, "step": 3305000 }, { "epoch": 0.99, "eval_accuracy": 0.5257825616208374, "eval_loss": 2.15234375, "eval_runtime": 41.6675, "eval_samples_per_second": 85.63, "eval_steps_per_second": 10.704, "step": 3305000 }, { "epoch": 0.99, "learning_rate": 4.4387947376493654e-05, "loss": 2.126, "step": 3310000 }, { "epoch": 0.99, "eval_accuracy": 0.5256425640755888, "eval_loss": 2.150390625, "eval_runtime": 43.5829, "eval_samples_per_second": 81.867, "eval_steps_per_second": 10.233, "step": 3310000 }, { "epoch": 0.99, "learning_rate": 4.43131299938169e-05, "loss": 2.1263, "step": 3315000 }, { "epoch": 0.99, "eval_accuracy": 0.5255672229308856, "eval_loss": 2.150390625, "eval_runtime": 40.6138, "eval_samples_per_second": 87.852, "eval_steps_per_second": 10.981, "step": 3315000 }, { "epoch": 0.99, "learning_rate": 4.423828265423016e-05, "loss": 2.1273, "step": 3320000 }, { "epoch": 0.99, "eval_accuracy": 0.525589962258196, "eval_loss": 2.150390625, "eval_runtime": 41.1643, "eval_samples_per_second": 86.677, "eval_steps_per_second": 10.835, "step": 3320000 }, { "epoch": 1.0, "learning_rate": 4.4163450293098406e-05, "loss": 2.1214, "step": 3325000 }, { "epoch": 1.0, "eval_accuracy": 0.5255433877323795, "eval_loss": 2.150390625, "eval_runtime": 40.8274, "eval_samples_per_second": 87.392, "eval_steps_per_second": 10.924, "step": 3325000 }, { "epoch": 1.0, "learning_rate": 4.408863291042165e-05, "loss": 2.1275, "step": 3330000 }, { "epoch": 1.0, "eval_accuracy": 0.5255872225802068, "eval_loss": 2.150390625, "eval_runtime": 42.63, "eval_samples_per_second": 83.697, "eval_steps_per_second": 10.462, "step": 3330000 }, { "epoch": 1.0, "learning_rate": 4.40138155277449e-05, "loss": 2.1227, "step": 3335000 }, { "epoch": 1.0, "eval_accuracy": 0.5258348894704312, "eval_loss": 2.150390625, "eval_runtime": 42.4569, "eval_samples_per_second": 84.038, "eval_steps_per_second": 10.505, "step": 3335000 }, { "epoch": 1.0, "step": 3338128, "total_flos": 1.0872066371139498e+21, "train_loss": 0.2576859601399347, "train_runtime": 150388.7494, "train_samples_per_second": 177.573, "train_steps_per_second": 22.197 } ], "max_steps": 3338128, "num_train_epochs": 1, "total_flos": 1.0872066371139498e+21, "trial_name": null, "trial_params": null }