diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10030 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 3338128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9925557078698005e-05, + "loss": 5.3279, + "step": 5000 + }, + { + "epoch": 0.0, + "eval_accuracy": 0.31327971235572855, + "eval_loss": 3.994140625, + "eval_runtime": 39.5933, + "eval_samples_per_second": 90.116, + "eval_steps_per_second": 11.265, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.985067978220128e-05, + "loss": 3.5754, + "step": 10000 + }, + { + "epoch": 0.0, + "eval_accuracy": 0.3823957607318666, + "eval_loss": 3.310546875, + "eval_runtime": 39.5685, + "eval_samples_per_second": 90.173, + "eval_steps_per_second": 11.272, + "step": 10000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9775862399524525e-05, + "loss": 3.6102, + "step": 15000 + }, + { + "epoch": 0.0, + "eval_accuracy": 0.39768097216925513, + "eval_loss": 3.166015625, + "eval_runtime": 39.5825, + "eval_samples_per_second": 90.141, + "eval_steps_per_second": 11.268, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 4.970101505993779e-05, + "loss": 3.0639, + "step": 20000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.41336891627105715, + "eval_loss": 3.021484375, + "eval_runtime": 39.5961, + "eval_samples_per_second": 90.11, + "eval_steps_per_second": 11.264, + "step": 20000 + }, + { + "epoch": 0.01, + "learning_rate": 4.962618269880604e-05, + "loss": 2.9477, + "step": 25000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.425242680676284, + "eval_loss": 2.919921875, + "eval_runtime": 39.6071, + "eval_samples_per_second": 90.085, + "eval_steps_per_second": 11.261, + "step": 25000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9551335359219304e-05, + "loss": 2.8589, + "step": 30000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.4315466797294513, + "eval_loss": 2.8671875, + "eval_runtime": 39.6079, + "eval_samples_per_second": 90.083, + "eval_steps_per_second": 11.26, + "step": 30000 + }, + { + "epoch": 0.01, + "learning_rate": 4.947647304117757e-05, + "loss": 2.8063, + "step": 35000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.43875449855125825, + "eval_loss": 2.802734375, + "eval_runtime": 39.6873, + "eval_samples_per_second": 89.903, + "eval_steps_per_second": 11.238, + "step": 35000 + }, + { + "epoch": 0.01, + "learning_rate": 4.940162570159084e-05, + "loss": 2.7646, + "step": 40000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.44185690990623727, + "eval_loss": 2.771484375, + "eval_runtime": 39.6738, + "eval_samples_per_second": 89.933, + "eval_steps_per_second": 11.242, + "step": 40000 + }, + { + "epoch": 0.01, + "learning_rate": 4.932679334045909e-05, + "loss": 2.7306, + "step": 45000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.4467244957896629, + "eval_loss": 2.736328125, + "eval_runtime": 39.7305, + "eval_samples_per_second": 89.805, + "eval_steps_per_second": 11.226, + "step": 45000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9251960979327336e-05, + "loss": 2.7106, + "step": 50000 + }, + { + "epoch": 0.01, + "eval_accuracy": 0.4492548623804952, + "eval_loss": 2.712890625, + "eval_runtime": 39.6605, + "eval_samples_per_second": 89.964, + "eval_steps_per_second": 11.245, + "step": 50000 + }, + { + "epoch": 0.02, + "learning_rate": 4.917712861819559e-05, + "loss": 2.6829, + "step": 55000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.45224302916332426, + "eval_loss": 2.689453125, + "eval_runtime": 39.6345, + "eval_samples_per_second": 90.023, + "eval_steps_per_second": 11.253, + "step": 55000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9102311235518835e-05, + "loss": 2.6703, + "step": 60000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.45370464737056665, + "eval_loss": 2.67578125, + "eval_runtime": 39.6085, + "eval_samples_per_second": 90.082, + "eval_steps_per_second": 11.26, + "step": 60000 + }, + { + "epoch": 0.02, + "learning_rate": 4.90274489174771e-05, + "loss": 2.6522, + "step": 65000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.4559714569388372, + "eval_loss": 2.66015625, + "eval_runtime": 39.7158, + "eval_samples_per_second": 89.838, + "eval_steps_per_second": 11.23, + "step": 65000 + }, + { + "epoch": 0.02, + "learning_rate": 4.895260157789037e-05, + "loss": 2.6377, + "step": 70000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.4573840349100728, + "eval_loss": 2.6484375, + "eval_runtime": 40.0228, + "eval_samples_per_second": 89.149, + "eval_steps_per_second": 11.144, + "step": 70000 + }, + { + "epoch": 0.02, + "learning_rate": 4.8877769216758615e-05, + "loss": 2.6241, + "step": 75000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.4586505880444836, + "eval_loss": 2.634765625, + "eval_runtime": 39.747, + "eval_samples_per_second": 89.768, + "eval_steps_per_second": 11.221, + "step": 75000 + }, + { + "epoch": 0.02, + "learning_rate": 4.880295183408186e-05, + "loss": 2.6159, + "step": 80000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.46040042037619067, + "eval_loss": 2.625, + "eval_runtime": 39.8297, + "eval_samples_per_second": 89.581, + "eval_steps_per_second": 11.198, + "step": 80000 + }, + { + "epoch": 0.03, + "learning_rate": 4.872807453758514e-05, + "loss": 2.5959, + "step": 85000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.4612864322379005, + "eval_loss": 2.61328125, + "eval_runtime": 39.842, + "eval_samples_per_second": 89.554, + "eval_steps_per_second": 11.194, + "step": 85000 + }, + { + "epoch": 0.03, + "learning_rate": 4.86532271979984e-05, + "loss": 2.5877, + "step": 90000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.4624083303744811, + "eval_loss": 2.603515625, + "eval_runtime": 39.7194, + "eval_samples_per_second": 89.83, + "eval_steps_per_second": 11.229, + "step": 90000 + }, + { + "epoch": 0.03, + "learning_rate": 4.857840981532165e-05, + "loss": 2.5832, + "step": 95000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.46323187757803697, + "eval_loss": 2.599609375, + "eval_runtime": 40.0969, + "eval_samples_per_second": 88.984, + "eval_steps_per_second": 11.123, + "step": 95000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8503562475734907e-05, + "loss": 2.5726, + "step": 100000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.46476143979941176, + "eval_loss": 2.5859375, + "eval_runtime": 39.6873, + "eval_samples_per_second": 89.903, + "eval_steps_per_second": 11.238, + "step": 100000 + }, + { + "epoch": 0.03, + "learning_rate": 4.8428775049968125e-05, + "loss": 2.5723, + "step": 105000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.46553211121777593, + "eval_loss": 2.580078125, + "eval_runtime": 39.7946, + "eval_samples_per_second": 89.66, + "eval_steps_per_second": 11.208, + "step": 105000 + }, + { + "epoch": 0.03, + "learning_rate": 4.83539127319264e-05, + "loss": 2.5584, + "step": 110000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.46414501225183996, + "eval_loss": 2.59375, + "eval_runtime": 39.8172, + "eval_samples_per_second": 89.61, + "eval_steps_per_second": 11.201, + "step": 110000 + }, + { + "epoch": 0.03, + "learning_rate": 4.827905041388467e-05, + "loss": 2.5541, + "step": 115000 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.4673400247228542, + "eval_loss": 2.56640625, + "eval_runtime": 39.875, + "eval_samples_per_second": 89.48, + "eval_steps_per_second": 11.185, + "step": 115000 + }, + { + "epoch": 0.04, + "learning_rate": 4.820423303120791e-05, + "loss": 2.541, + "step": 120000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.46835206177206756, + "eval_loss": 2.55859375, + "eval_runtime": 39.7895, + "eval_samples_per_second": 89.672, + "eval_steps_per_second": 11.209, + "step": 120000 + }, + { + "epoch": 0.04, + "learning_rate": 4.812941564853116e-05, + "loss": 2.5359, + "step": 125000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.4673909827334534, + "eval_loss": 2.564453125, + "eval_runtime": 39.8856, + "eval_samples_per_second": 89.456, + "eval_steps_per_second": 11.182, + "step": 125000 + }, + { + "epoch": 0.04, + "learning_rate": 4.805458328739941e-05, + "loss": 2.5298, + "step": 130000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.4699054591919484, + "eval_loss": 2.544921875, + "eval_runtime": 39.8462, + "eval_samples_per_second": 89.544, + "eval_steps_per_second": 11.193, + "step": 130000 + }, + { + "epoch": 0.04, + "learning_rate": 4.797972096935768e-05, + "loss": 2.5258, + "step": 135000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.47030819185636197, + "eval_loss": 2.541015625, + "eval_runtime": 39.8831, + "eval_samples_per_second": 89.461, + "eval_steps_per_second": 11.183, + "step": 135000 + }, + { + "epoch": 0.04, + "learning_rate": 4.790488860822593e-05, + "loss": 2.5207, + "step": 140000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.47090352388341683, + "eval_loss": 2.537109375, + "eval_runtime": 40.0239, + "eval_samples_per_second": 89.147, + "eval_steps_per_second": 11.143, + "step": 140000 + }, + { + "epoch": 0.04, + "learning_rate": 4.783005624709418e-05, + "loss": 2.5167, + "step": 145000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.47193419074295684, + "eval_loss": 2.53125, + "eval_runtime": 39.9144, + "eval_samples_per_second": 89.391, + "eval_steps_per_second": 11.174, + "step": 145000 + }, + { + "epoch": 0.04, + "learning_rate": 4.7755223885962435e-05, + "loss": 2.5101, + "step": 150000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.4701947691876088, + "eval_loss": 2.544921875, + "eval_runtime": 40.0269, + "eval_samples_per_second": 89.14, + "eval_steps_per_second": 11.143, + "step": 150000 + }, + { + "epoch": 0.05, + "learning_rate": 4.768039152483069e-05, + "loss": 2.5058, + "step": 155000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4730330755844281, + "eval_loss": 2.521484375, + "eval_runtime": 39.9333, + "eval_samples_per_second": 89.349, + "eval_steps_per_second": 11.169, + "step": 155000 + }, + { + "epoch": 0.05, + "learning_rate": 4.760554418524395e-05, + "loss": 2.5021, + "step": 160000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.473403480048569, + "eval_loss": 2.51953125, + "eval_runtime": 40.0331, + "eval_samples_per_second": 89.126, + "eval_steps_per_second": 11.141, + "step": 160000 + }, + { + "epoch": 0.05, + "learning_rate": 4.753084663020711e-05, + "loss": 2.8135, + "step": 165000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4317466762226635, + "eval_loss": 2.83203125, + "eval_runtime": 40.0127, + "eval_samples_per_second": 89.172, + "eval_steps_per_second": 11.146, + "step": 165000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7455954355255405e-05, + "loss": 2.7932, + "step": 170000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4729974597705684, + "eval_loss": 2.521484375, + "eval_runtime": 39.9612, + "eval_samples_per_second": 89.287, + "eval_steps_per_second": 11.161, + "step": 170000 + }, + { + "epoch": 0.05, + "learning_rate": 4.7381077058758686e-05, + "loss": 2.4914, + "step": 175000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4751818050313638, + "eval_loss": 2.505859375, + "eval_runtime": 40.3867, + "eval_samples_per_second": 88.346, + "eval_steps_per_second": 11.043, + "step": 175000 + }, + { + "epoch": 0.05, + "learning_rate": 4.730621474071695e-05, + "loss": 2.487, + "step": 180000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4753875548483533, + "eval_loss": 2.50390625, + "eval_runtime": 40.0145, + "eval_samples_per_second": 89.168, + "eval_steps_per_second": 11.146, + "step": 180000 + }, + { + "epoch": 0.06, + "learning_rate": 4.723138237958521e-05, + "loss": 2.4829, + "step": 185000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.47510947753244875, + "eval_loss": 2.50390625, + "eval_runtime": 40.0338, + "eval_samples_per_second": 89.125, + "eval_steps_per_second": 11.141, + "step": 185000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7156505083088486e-05, + "loss": 2.4778, + "step": 190000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.47625877244892145, + "eval_loss": 2.49609375, + "eval_runtime": 40.0473, + "eval_samples_per_second": 89.095, + "eval_steps_per_second": 11.137, + "step": 190000 + }, + { + "epoch": 0.06, + "learning_rate": 4.708170267886672e-05, + "loss": 2.4779, + "step": 195000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.4770064305721763, + "eval_loss": 2.4921875, + "eval_runtime": 40.1061, + "eval_samples_per_second": 88.964, + "eval_steps_per_second": 11.121, + "step": 195000 + }, + { + "epoch": 0.06, + "learning_rate": 4.700687031773498e-05, + "loss": 2.4685, + "step": 200000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.4765894515822188, + "eval_loss": 2.494140625, + "eval_runtime": 40.1514, + "eval_samples_per_second": 88.864, + "eval_steps_per_second": 11.108, + "step": 200000 + }, + { + "epoch": 0.06, + "learning_rate": 4.693202297814823e-05, + "loss": 2.4661, + "step": 205000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.47763491270290054, + "eval_loss": 2.484375, + "eval_runtime": 40.1854, + "eval_samples_per_second": 88.788, + "eval_steps_per_second": 11.099, + "step": 205000 + }, + { + "epoch": 0.06, + "learning_rate": 4.6857190617016483e-05, + "loss": 2.4579, + "step": 210000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.47826366880142374, + "eval_loss": 2.48046875, + "eval_runtime": 40.1794, + "eval_samples_per_second": 88.802, + "eval_steps_per_second": 11.1, + "step": 210000 + }, + { + "epoch": 0.06, + "learning_rate": 4.6782358255884736e-05, + "loss": 2.4589, + "step": 215000 + }, + { + "epoch": 0.06, + "eval_accuracy": 0.4787937964923355, + "eval_loss": 2.478515625, + "eval_runtime": 40.1613, + "eval_samples_per_second": 88.842, + "eval_steps_per_second": 11.105, + "step": 215000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6707540873207975e-05, + "loss": 2.4571, + "step": 220000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.47927871949642525, + "eval_loss": 2.474609375, + "eval_runtime": 40.1097, + "eval_samples_per_second": 88.956, + "eval_steps_per_second": 11.119, + "step": 220000 + }, + { + "epoch": 0.07, + "learning_rate": 4.663269353362124e-05, + "loss": 2.4504, + "step": 225000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.47965542521994137, + "eval_loss": 2.47265625, + "eval_runtime": 40.1284, + "eval_samples_per_second": 88.915, + "eval_steps_per_second": 11.114, + "step": 225000 + }, + { + "epoch": 0.07, + "learning_rate": 4.65578461940345e-05, + "loss": 2.4538, + "step": 230000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.47995733773435206, + "eval_loss": 2.46875, + "eval_runtime": 40.2242, + "eval_samples_per_second": 88.703, + "eval_steps_per_second": 11.088, + "step": 230000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6483013832902755e-05, + "loss": 2.4481, + "step": 235000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4806011620618159, + "eval_loss": 2.466796875, + "eval_runtime": 40.1909, + "eval_samples_per_second": 88.776, + "eval_steps_per_second": 11.097, + "step": 235000 + }, + { + "epoch": 0.07, + "learning_rate": 4.640815151486103e-05, + "loss": 2.4454, + "step": 240000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4809529367156302, + "eval_loss": 2.4609375, + "eval_runtime": 40.319, + "eval_samples_per_second": 88.494, + "eval_steps_per_second": 11.062, + "step": 240000 + }, + { + "epoch": 0.07, + "learning_rate": 4.6333319153729274e-05, + "loss": 2.44, + "step": 245000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4811408786256898, + "eval_loss": 2.458984375, + "eval_runtime": 40.235, + "eval_samples_per_second": 88.679, + "eval_steps_per_second": 11.085, + "step": 245000 + }, + { + "epoch": 0.07, + "learning_rate": 4.625850177105252e-05, + "loss": 2.4392, + "step": 250000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4810606060606061, + "eval_loss": 2.458984375, + "eval_runtime": 40.2635, + "eval_samples_per_second": 88.616, + "eval_steps_per_second": 11.077, + "step": 250000 + }, + { + "epoch": 0.08, + "learning_rate": 4.618366940992077e-05, + "loss": 2.431, + "step": 255000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.48131512214580346, + "eval_loss": 2.45703125, + "eval_runtime": 40.2108, + "eval_samples_per_second": 88.732, + "eval_steps_per_second": 11.092, + "step": 255000 + }, + { + "epoch": 0.08, + "learning_rate": 4.610885202724401e-05, + "loss": 2.4377, + "step": 260000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.482264420569064, + "eval_loss": 2.451171875, + "eval_runtime": 40.1835, + "eval_samples_per_second": 88.793, + "eval_steps_per_second": 11.099, + "step": 260000 + }, + { + "epoch": 0.08, + "learning_rate": 4.6033959752292307e-05, + "loss": 2.4299, + "step": 265000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.4825972914447528, + "eval_loss": 2.447265625, + "eval_runtime": 40.2876, + "eval_samples_per_second": 88.563, + "eval_steps_per_second": 11.07, + "step": 265000 + }, + { + "epoch": 0.08, + "learning_rate": 4.5959142369615546e-05, + "loss": 2.4283, + "step": 270000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.482810712360112, + "eval_loss": 2.447265625, + "eval_runtime": 40.3402, + "eval_samples_per_second": 88.448, + "eval_steps_per_second": 11.056, + "step": 270000 + }, + { + "epoch": 0.08, + "learning_rate": 4.5884295030028805e-05, + "loss": 2.4256, + "step": 275000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.48325371829096697, + "eval_loss": 2.443359375, + "eval_runtime": 40.3191, + "eval_samples_per_second": 88.494, + "eval_steps_per_second": 11.062, + "step": 275000 + }, + { + "epoch": 0.08, + "learning_rate": 4.5809462668897065e-05, + "loss": 2.4198, + "step": 280000 + }, + { + "epoch": 0.08, + "eval_accuracy": 0.48383589986367365, + "eval_loss": 2.44140625, + "eval_runtime": 40.3148, + "eval_samples_per_second": 88.503, + "eval_steps_per_second": 11.063, + "step": 280000 + }, + { + "epoch": 0.09, + "learning_rate": 4.573461532931032e-05, + "loss": 2.4174, + "step": 285000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4840175405143581, + "eval_loss": 2.44140625, + "eval_runtime": 40.703, + "eval_samples_per_second": 87.659, + "eval_steps_per_second": 10.957, + "step": 285000 + }, + { + "epoch": 0.09, + "learning_rate": 4.565978296817857e-05, + "loss": 2.4151, + "step": 290000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4844402728280929, + "eval_loss": 2.435546875, + "eval_runtime": 41.2392, + "eval_samples_per_second": 86.52, + "eval_steps_per_second": 10.815, + "step": 290000 + }, + { + "epoch": 0.09, + "learning_rate": 4.558493562859184e-05, + "loss": 2.4191, + "step": 295000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4847421853425036, + "eval_loss": 2.43359375, + "eval_runtime": 40.6977, + "eval_samples_per_second": 87.671, + "eval_steps_per_second": 10.959, + "step": 295000 + }, + { + "epoch": 0.09, + "learning_rate": 4.55100882890051e-05, + "loss": 2.4071, + "step": 300000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4848361562975334, + "eval_loss": 2.431640625, + "eval_runtime": 40.2058, + "eval_samples_per_second": 88.744, + "eval_steps_per_second": 11.093, + "step": 300000 + }, + { + "epoch": 0.09, + "learning_rate": 4.543524094941836e-05, + "loss": 2.4126, + "step": 305000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.48549011743355736, + "eval_loss": 2.427734375, + "eval_runtime": 40.4122, + "eval_samples_per_second": 88.29, + "eval_steps_per_second": 11.036, + "step": 305000 + }, + { + "epoch": 0.09, + "learning_rate": 4.536045352365158e-05, + "loss": 2.4053, + "step": 310000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.48513834277974305, + "eval_loss": 2.4296875, + "eval_runtime": 40.2632, + "eval_samples_per_second": 88.617, + "eval_steps_per_second": 11.077, + "step": 310000 + }, + { + "epoch": 0.09, + "learning_rate": 4.528559120560985e-05, + "loss": 2.4071, + "step": 315000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4857840848817993, + "eval_loss": 2.42578125, + "eval_runtime": 40.3587, + "eval_samples_per_second": 88.407, + "eval_steps_per_second": 11.051, + "step": 315000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5210743866023116e-05, + "loss": 2.4027, + "step": 320000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.486615303183725, + "eval_loss": 2.421875, + "eval_runtime": 40.2594, + "eval_samples_per_second": 88.625, + "eval_steps_per_second": 11.078, + "step": 320000 + }, + { + "epoch": 0.1, + "learning_rate": 4.513591150489136e-05, + "loss": 2.4013, + "step": 325000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.4867040687505753, + "eval_loss": 2.41796875, + "eval_runtime": 40.2109, + "eval_samples_per_second": 88.732, + "eval_steps_per_second": 11.092, + "step": 325000 + }, + { + "epoch": 0.1, + "learning_rate": 4.5061049186849636e-05, + "loss": 2.4032, + "step": 330000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.4866416040924214, + "eval_loss": 2.41796875, + "eval_runtime": 40.2756, + "eval_samples_per_second": 88.59, + "eval_steps_per_second": 11.074, + "step": 330000 + }, + { + "epoch": 0.1, + "learning_rate": 4.498621682571789e-05, + "loss": 2.3919, + "step": 335000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.4870514599196069, + "eval_loss": 2.416015625, + "eval_runtime": 40.3049, + "eval_samples_per_second": 88.525, + "eval_steps_per_second": 11.066, + "step": 335000 + }, + { + "epoch": 0.1, + "learning_rate": 4.491136948613115e-05, + "loss": 2.3936, + "step": 340000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.4872848804842874, + "eval_loss": 2.4140625, + "eval_runtime": 40.3719, + "eval_samples_per_second": 88.378, + "eval_steps_per_second": 11.047, + "step": 340000 + }, + { + "epoch": 0.1, + "learning_rate": 4.48365371249994e-05, + "loss": 2.3905, + "step": 345000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.48784870621446635, + "eval_loss": 2.41015625, + "eval_runtime": 40.4162, + "eval_samples_per_second": 88.281, + "eval_steps_per_second": 11.035, + "step": 345000 + }, + { + "epoch": 0.1, + "learning_rate": 4.4761704763867654e-05, + "loss": 2.3889, + "step": 350000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.4881240438523818, + "eval_loss": 2.41015625, + "eval_runtime": 40.2942, + "eval_samples_per_second": 88.549, + "eval_steps_per_second": 11.069, + "step": 350000 + }, + { + "epoch": 0.11, + "learning_rate": 4.468688738119089e-05, + "loss": 2.3866, + "step": 355000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.48837801200198133, + "eval_loss": 2.408203125, + "eval_runtime": 40.2476, + "eval_samples_per_second": 88.651, + "eval_steps_per_second": 11.081, + "step": 355000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4612025063149173e-05, + "loss": 2.3823, + "step": 360000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.48875663550008985, + "eval_loss": 2.40625, + "eval_runtime": 40.3148, + "eval_samples_per_second": 88.504, + "eval_steps_per_second": 11.063, + "step": 360000 + }, + { + "epoch": 0.11, + "learning_rate": 4.4537177723562427e-05, + "loss": 2.3828, + "step": 365000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.48881882619044487, + "eval_loss": 2.40234375, + "eval_runtime": 40.2734, + "eval_samples_per_second": 88.594, + "eval_steps_per_second": 11.074, + "step": 365000 + }, + { + "epoch": 0.11, + "learning_rate": 4.446233038397569e-05, + "loss": 2.3795, + "step": 370000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.48893718027957866, + "eval_loss": 2.400390625, + "eval_runtime": 40.2791, + "eval_samples_per_second": 88.582, + "eval_steps_per_second": 11.073, + "step": 370000 + }, + { + "epoch": 0.11, + "learning_rate": 4.43875729151189e-05, + "loss": 2.3812, + "step": 375000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.48680735461076846, + "eval_loss": 2.416015625, + "eval_runtime": 40.6108, + "eval_samples_per_second": 87.858, + "eval_steps_per_second": 10.982, + "step": 375000 + }, + { + "epoch": 0.11, + "learning_rate": 4.431269561862217e-05, + "loss": 2.3789, + "step": 380000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.4895744293798684, + "eval_loss": 2.396484375, + "eval_runtime": 40.2591, + "eval_samples_per_second": 88.626, + "eval_steps_per_second": 11.078, + "step": 380000 + }, + { + "epoch": 0.12, + "learning_rate": 4.423786325749043e-05, + "loss": 2.372, + "step": 385000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.48950730726913283, + "eval_loss": 2.396484375, + "eval_runtime": 40.2108, + "eval_samples_per_second": 88.732, + "eval_steps_per_second": 11.092, + "step": 385000 + }, + { + "epoch": 0.12, + "learning_rate": 4.41630009394487e-05, + "loss": 2.3732, + "step": 390000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.4898645612789255, + "eval_loss": 2.396484375, + "eval_runtime": 40.4903, + "eval_samples_per_second": 88.12, + "eval_steps_per_second": 11.015, + "step": 390000 + }, + { + "epoch": 0.12, + "learning_rate": 4.408815359986196e-05, + "loss": 2.3725, + "step": 395000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.4903272928913027, + "eval_loss": 2.392578125, + "eval_runtime": 40.3547, + "eval_samples_per_second": 88.416, + "eval_steps_per_second": 11.052, + "step": 395000 + }, + { + "epoch": 0.12, + "learning_rate": 4.401332123873022e-05, + "loss": 2.3716, + "step": 400000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.49036948393233654, + "eval_loss": 2.390625, + "eval_runtime": 40.3654, + "eval_samples_per_second": 88.392, + "eval_steps_per_second": 11.049, + "step": 400000 + }, + { + "epoch": 0.12, + "learning_rate": 4.393848887759846e-05, + "loss": 2.3709, + "step": 405000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.49040482577839734, + "eval_loss": 2.390625, + "eval_runtime": 40.3034, + "eval_samples_per_second": 88.529, + "eval_steps_per_second": 11.066, + "step": 405000 + }, + { + "epoch": 0.12, + "learning_rate": 4.3863656516466716e-05, + "loss": 2.3619, + "step": 410000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.4906423558600616, + "eval_loss": 2.388671875, + "eval_runtime": 40.362, + "eval_samples_per_second": 88.4, + "eval_steps_per_second": 11.05, + "step": 410000 + }, + { + "epoch": 0.12, + "learning_rate": 4.378877921997e-05, + "loss": 2.367, + "step": 415000 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.49115138803045644, + "eval_loss": 2.38671875, + "eval_runtime": 40.2804, + "eval_samples_per_second": 88.579, + "eval_steps_per_second": 11.072, + "step": 415000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3713961837293236e-05, + "loss": 2.3639, + "step": 420000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.49116152483901654, + "eval_loss": 2.384765625, + "eval_runtime": 40.3366, + "eval_samples_per_second": 88.456, + "eval_steps_per_second": 11.057, + "step": 420000 + }, + { + "epoch": 0.13, + "learning_rate": 4.363914445461648e-05, + "loss": 2.3621, + "step": 425000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4918730192128138, + "eval_loss": 2.3828125, + "eval_runtime": 40.2687, + "eval_samples_per_second": 88.605, + "eval_steps_per_second": 11.076, + "step": 425000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3564282136574755e-05, + "loss": 2.3578, + "step": 430000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4919609628762674, + "eval_loss": 2.380859375, + "eval_runtime": 40.2478, + "eval_samples_per_second": 88.651, + "eval_steps_per_second": 11.081, + "step": 430000 + }, + { + "epoch": 0.13, + "learning_rate": 4.3489449775443e-05, + "loss": 2.3608, + "step": 435000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4921738358560288, + "eval_loss": 2.37890625, + "eval_runtime": 40.4074, + "eval_samples_per_second": 88.301, + "eval_steps_per_second": 11.038, + "step": 435000 + }, + { + "epoch": 0.13, + "learning_rate": 4.341461741431126e-05, + "loss": 2.3541, + "step": 440000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4923423260523651, + "eval_loss": 2.376953125, + "eval_runtime": 40.2757, + "eval_samples_per_second": 88.589, + "eval_steps_per_second": 11.074, + "step": 440000 + }, + { + "epoch": 0.13, + "learning_rate": 4.333978505317951e-05, + "loss": 2.3556, + "step": 445000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.4925938284917744, + "eval_loss": 2.376953125, + "eval_runtime": 40.292, + "eval_samples_per_second": 88.553, + "eval_steps_per_second": 11.069, + "step": 445000 + }, + { + "epoch": 0.13, + "learning_rate": 4.326493771359277e-05, + "loss": 2.3562, + "step": 450000 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.49278067453063834, + "eval_loss": 2.376953125, + "eval_runtime": 40.1882, + "eval_samples_per_second": 88.782, + "eval_steps_per_second": 11.098, + "step": 450000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9925197595778234e-05, + "loss": 2.3641, + "step": 455000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4910004317732511, + "eval_loss": 2.38671875, + "eval_runtime": 39.6555, + "eval_samples_per_second": 89.975, + "eval_steps_per_second": 11.247, + "step": 455000 + }, + { + "epoch": 0.14, + "learning_rate": 4.985036523464649e-05, + "loss": 2.3641, + "step": 460000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4911015258910529, + "eval_loss": 2.38671875, + "eval_runtime": 39.5455, + "eval_samples_per_second": 90.225, + "eval_steps_per_second": 11.278, + "step": 460000 + }, + { + "epoch": 0.14, + "learning_rate": 4.977551789505975e-05, + "loss": 2.3646, + "step": 465000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4910639923026007, + "eval_loss": 2.38671875, + "eval_runtime": 39.4635, + "eval_samples_per_second": 90.413, + "eval_steps_per_second": 11.302, + "step": 465000 + }, + { + "epoch": 0.14, + "learning_rate": 4.970071549083798e-05, + "loss": 2.3629, + "step": 470000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4911439908998856, + "eval_loss": 2.384765625, + "eval_runtime": 39.5626, + "eval_samples_per_second": 90.186, + "eval_steps_per_second": 11.273, + "step": 470000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9625868151251246e-05, + "loss": 2.3659, + "step": 475000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4913645349780168, + "eval_loss": 2.3828125, + "eval_runtime": 39.4888, + "eval_samples_per_second": 90.355, + "eval_steps_per_second": 11.294, + "step": 475000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9551020811664506e-05, + "loss": 2.3651, + "step": 480000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4916360370667473, + "eval_loss": 2.3828125, + "eval_runtime": 38.5403, + "eval_samples_per_second": 92.578, + "eval_steps_per_second": 11.572, + "step": 480000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9476173472077765e-05, + "loss": 2.3608, + "step": 485000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.4917579527372671, + "eval_loss": 2.380859375, + "eval_runtime": 39.5737, + "eval_samples_per_second": 90.161, + "eval_steps_per_second": 11.27, + "step": 485000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9401356089401005e-05, + "loss": 2.3612, + "step": 490000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.49203685195656843, + "eval_loss": 2.380859375, + "eval_runtime": 38.5594, + "eval_samples_per_second": 92.533, + "eval_steps_per_second": 11.567, + "step": 490000 + }, + { + "epoch": 0.15, + "learning_rate": 4.932649377135928e-05, + "loss": 2.3569, + "step": 495000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.49215328827110977, + "eval_loss": 2.37890625, + "eval_runtime": 39.5649, + "eval_samples_per_second": 90.181, + "eval_steps_per_second": 11.273, + "step": 495000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9251676388682524e-05, + "loss": 2.3557, + "step": 500000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.4923439698591586, + "eval_loss": 2.37890625, + "eval_runtime": 39.6114, + "eval_samples_per_second": 90.075, + "eval_steps_per_second": 11.259, + "step": 500000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9176829049095784e-05, + "loss": 2.3541, + "step": 505000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.49218726027817594, + "eval_loss": 2.376953125, + "eval_runtime": 39.5989, + "eval_samples_per_second": 90.104, + "eval_steps_per_second": 11.263, + "step": 505000 + }, + { + "epoch": 0.15, + "learning_rate": 4.910196673105406e-05, + "loss": 2.351, + "step": 510000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.49274588062017544, + "eval_loss": 2.375, + "eval_runtime": 38.6221, + "eval_samples_per_second": 92.382, + "eval_steps_per_second": 11.548, + "step": 510000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9027134369922304e-05, + "loss": 2.3504, + "step": 515000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.49260917068851395, + "eval_loss": 2.375, + "eval_runtime": 39.6516, + "eval_samples_per_second": 89.984, + "eval_steps_per_second": 11.248, + "step": 515000 + }, + { + "epoch": 0.16, + "learning_rate": 4.895231698724555e-05, + "loss": 2.3479, + "step": 520000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.492896014973984, + "eval_loss": 2.373046875, + "eval_runtime": 39.6699, + "eval_samples_per_second": 89.942, + "eval_steps_per_second": 11.243, + "step": 520000 + }, + { + "epoch": 0.16, + "learning_rate": 4.887745466920382e-05, + "loss": 2.3451, + "step": 525000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4929382060150178, + "eval_loss": 2.37109375, + "eval_runtime": 39.6216, + "eval_samples_per_second": 90.052, + "eval_steps_per_second": 11.256, + "step": 525000 + }, + { + "epoch": 0.16, + "learning_rate": 4.880262230807207e-05, + "loss": 2.3505, + "step": 530000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.49343929311924395, + "eval_loss": 2.369140625, + "eval_runtime": 39.6785, + "eval_samples_per_second": 89.923, + "eval_steps_per_second": 11.24, + "step": 530000 + }, + { + "epoch": 0.16, + "learning_rate": 4.8727804925395315e-05, + "loss": 2.3457, + "step": 535000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4933929925612263, + "eval_loss": 2.369140625, + "eval_runtime": 39.6502, + "eval_samples_per_second": 89.987, + "eval_steps_per_second": 11.248, + "step": 535000 + }, + { + "epoch": 0.16, + "learning_rate": 4.865297256426357e-05, + "loss": 2.3479, + "step": 540000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4937496986354212, + "eval_loss": 2.369140625, + "eval_runtime": 39.6648, + "eval_samples_per_second": 89.954, + "eval_steps_per_second": 11.244, + "step": 540000 + }, + { + "epoch": 0.16, + "learning_rate": 4.8578110246221835e-05, + "loss": 2.3421, + "step": 545000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4935861398594655, + "eval_loss": 2.3671875, + "eval_runtime": 39.7026, + "eval_samples_per_second": 89.868, + "eval_steps_per_second": 11.234, + "step": 545000 + }, + { + "epoch": 0.16, + "learning_rate": 4.850327788509009e-05, + "loss": 2.3433, + "step": 550000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.4937406576980568, + "eval_loss": 2.3671875, + "eval_runtime": 39.7646, + "eval_samples_per_second": 89.728, + "eval_steps_per_second": 11.216, + "step": 550000 + }, + { + "epoch": 0.17, + "learning_rate": 4.842846050241333e-05, + "loss": 2.3425, + "step": 555000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.4939384624488776, + "eval_loss": 2.365234375, + "eval_runtime": 39.7934, + "eval_samples_per_second": 89.663, + "eval_steps_per_second": 11.208, + "step": 555000 + }, + { + "epoch": 0.17, + "learning_rate": 4.835361316282659e-05, + "loss": 2.3403, + "step": 560000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.49420284137483617, + "eval_loss": 2.36328125, + "eval_runtime": 39.7702, + "eval_samples_per_second": 89.715, + "eval_steps_per_second": 11.214, + "step": 560000 + }, + { + "epoch": 0.17, + "learning_rate": 4.827876582323985e-05, + "loss": 2.3417, + "step": 565000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.49440420770704296, + "eval_loss": 2.361328125, + "eval_runtime": 40.0918, + "eval_samples_per_second": 88.996, + "eval_steps_per_second": 11.124, + "step": 565000 + }, + { + "epoch": 0.17, + "learning_rate": 4.82039484405631e-05, + "loss": 2.3382, + "step": 570000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.4947474893590907, + "eval_loss": 2.361328125, + "eval_runtime": 39.7167, + "eval_samples_per_second": 89.836, + "eval_steps_per_second": 11.23, + "step": 570000 + }, + { + "epoch": 0.17, + "learning_rate": 4.812913105788634e-05, + "loss": 2.3354, + "step": 575000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.4949266642995849, + "eval_loss": 2.359375, + "eval_runtime": 39.8562, + "eval_samples_per_second": 89.522, + "eval_steps_per_second": 11.19, + "step": 575000 + }, + { + "epoch": 0.17, + "learning_rate": 4.805425376138962e-05, + "loss": 2.3366, + "step": 580000 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.4946513266616695, + "eval_loss": 2.359375, + "eval_runtime": 38.7841, + "eval_samples_per_second": 91.997, + "eval_steps_per_second": 11.5, + "step": 580000 + }, + { + "epoch": 0.18, + "learning_rate": 4.797942140025787e-05, + "loss": 2.3373, + "step": 585000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.49454119160650334, + "eval_loss": 2.359375, + "eval_runtime": 38.7732, + "eval_samples_per_second": 92.022, + "eval_steps_per_second": 11.503, + "step": 585000 + }, + { + "epoch": 0.18, + "learning_rate": 4.790460401758111e-05, + "loss": 2.3365, + "step": 590000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.49488255548395865, + "eval_loss": 2.359375, + "eval_runtime": 39.8158, + "eval_samples_per_second": 89.613, + "eval_steps_per_second": 11.202, + "step": 590000 + }, + { + "epoch": 0.18, + "learning_rate": 4.782975667799438e-05, + "loss": 2.3318, + "step": 595000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.49525295994809954, + "eval_loss": 2.35546875, + "eval_runtime": 39.8567, + "eval_samples_per_second": 89.521, + "eval_steps_per_second": 11.19, + "step": 595000 + }, + { + "epoch": 0.18, + "learning_rate": 4.7754894359952644e-05, + "loss": 2.3278, + "step": 600000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.4957521292777332, + "eval_loss": 2.353515625, + "eval_runtime": 39.8687, + "eval_samples_per_second": 89.494, + "eval_steps_per_second": 11.187, + "step": 600000 + }, + { + "epoch": 0.18, + "learning_rate": 4.768004702036591e-05, + "loss": 2.3277, + "step": 605000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.4959060991807267, + "eval_loss": 2.3515625, + "eval_runtime": 40.0704, + "eval_samples_per_second": 89.043, + "eval_steps_per_second": 11.13, + "step": 605000 + }, + { + "epoch": 0.18, + "learning_rate": 4.7605214659234157e-05, + "loss": 2.326, + "step": 610000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.49614417719798887, + "eval_loss": 2.3515625, + "eval_runtime": 39.9671, + "eval_samples_per_second": 89.273, + "eval_steps_per_second": 11.159, + "step": 610000 + }, + { + "epoch": 0.18, + "learning_rate": 4.753036731964742e-05, + "loss": 2.3273, + "step": 615000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.49605705543793205, + "eval_loss": 2.3515625, + "eval_runtime": 39.9201, + "eval_samples_per_second": 89.378, + "eval_steps_per_second": 11.172, + "step": 615000 + }, + { + "epoch": 0.19, + "learning_rate": 4.745551998006068e-05, + "loss": 2.3284, + "step": 620000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.49654992350819055, + "eval_loss": 2.349609375, + "eval_runtime": 39.956, + "eval_samples_per_second": 89.298, + "eval_steps_per_second": 11.162, + "step": 620000 + }, + { + "epoch": 0.19, + "learning_rate": 4.7380687618928936e-05, + "loss": 2.3276, + "step": 625000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.49658115583726753, + "eval_loss": 2.34765625, + "eval_runtime": 39.9741, + "eval_samples_per_second": 89.258, + "eval_steps_per_second": 11.157, + "step": 625000 + }, + { + "epoch": 0.19, + "learning_rate": 4.7305840279342196e-05, + "loss": 2.3228, + "step": 630000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.4966945785060207, + "eval_loss": 2.345703125, + "eval_runtime": 39.9089, + "eval_samples_per_second": 89.404, + "eval_steps_per_second": 11.175, + "step": 630000 + }, + { + "epoch": 0.19, + "learning_rate": 4.723103787512043e-05, + "loss": 2.3219, + "step": 635000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.49684580873102496, + "eval_loss": 2.345703125, + "eval_runtime": 39.9108, + "eval_samples_per_second": 89.399, + "eval_steps_per_second": 11.175, + "step": 635000 + }, + { + "epoch": 0.19, + "learning_rate": 4.715619053553369e-05, + "loss": 2.326, + "step": 640000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.49703155889869327, + "eval_loss": 2.34375, + "eval_runtime": 40.0352, + "eval_samples_per_second": 89.122, + "eval_steps_per_second": 11.14, + "step": 640000 + }, + { + "epoch": 0.19, + "learning_rate": 4.708135817440194e-05, + "loss": 2.3191, + "step": 645000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.4972345690376936, + "eval_loss": 2.341796875, + "eval_runtime": 40.0269, + "eval_samples_per_second": 89.14, + "eval_steps_per_second": 11.143, + "step": 645000 + }, + { + "epoch": 0.19, + "learning_rate": 4.70065258132702e-05, + "loss": 2.3167, + "step": 650000 + }, + { + "epoch": 0.19, + "eval_accuracy": 0.4972822394347058, + "eval_loss": 2.34375, + "eval_runtime": 40.0234, + "eval_samples_per_second": 89.148, + "eval_steps_per_second": 11.143, + "step": 650000 + }, + { + "epoch": 0.2, + "learning_rate": 4.693166349522847e-05, + "loss": 2.3172, + "step": 655000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.4974205931731608, + "eval_loss": 2.341796875, + "eval_runtime": 40.0416, + "eval_samples_per_second": 89.107, + "eval_steps_per_second": 11.138, + "step": 655000 + }, + { + "epoch": 0.2, + "learning_rate": 4.685683113409672e-05, + "loss": 2.3194, + "step": 660000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.4977205879129791, + "eval_loss": 2.337890625, + "eval_runtime": 40.5115, + "eval_samples_per_second": 88.074, + "eval_steps_per_second": 11.009, + "step": 660000 + }, + { + "epoch": 0.2, + "learning_rate": 4.678198379450998e-05, + "loss": 2.3204, + "step": 665000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.49760332969504095, + "eval_loss": 2.33984375, + "eval_runtime": 40.059, + "eval_samples_per_second": 89.069, + "eval_steps_per_second": 11.134, + "step": 665000 + }, + { + "epoch": 0.2, + "learning_rate": 4.670716641183322e-05, + "loss": 2.309, + "step": 670000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.49802085662059625, + "eval_loss": 2.3359375, + "eval_runtime": 40.172, + "eval_samples_per_second": 88.818, + "eval_steps_per_second": 11.102, + "step": 670000 + }, + { + "epoch": 0.2, + "learning_rate": 4.663233405070147e-05, + "loss": 2.3147, + "step": 675000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.49805729433785273, + "eval_loss": 2.337890625, + "eval_runtime": 40.0906, + "eval_samples_per_second": 88.999, + "eval_steps_per_second": 11.125, + "step": 675000 + }, + { + "epoch": 0.2, + "learning_rate": 4.655745675420475e-05, + "loss": 2.3122, + "step": 680000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.4980255140731779, + "eval_loss": 2.3359375, + "eval_runtime": 40.0778, + "eval_samples_per_second": 89.027, + "eval_steps_per_second": 11.128, + "step": 680000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6482624393073005e-05, + "loss": 2.3096, + "step": 685000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.4984096169272648, + "eval_loss": 2.333984375, + "eval_runtime": 40.3028, + "eval_samples_per_second": 88.53, + "eval_steps_per_second": 11.066, + "step": 685000 + }, + { + "epoch": 0.21, + "learning_rate": 4.640780701039625e-05, + "loss": 2.3093, + "step": 690000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.49861701055104785, + "eval_loss": 2.333984375, + "eval_runtime": 40.1409, + "eval_samples_per_second": 88.887, + "eval_steps_per_second": 11.111, + "step": 690000 + }, + { + "epoch": 0.21, + "learning_rate": 4.633295967080951e-05, + "loss": 2.3048, + "step": 695000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.498526601177404, + "eval_loss": 2.33203125, + "eval_runtime": 40.2295, + "eval_samples_per_second": 88.691, + "eval_steps_per_second": 11.086, + "step": 695000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6258127309677764e-05, + "loss": 2.3111, + "step": 700000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.4988186508510536, + "eval_loss": 2.330078125, + "eval_runtime": 40.2382, + "eval_samples_per_second": 88.672, + "eval_steps_per_second": 11.084, + "step": 700000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6183279970091023e-05, + "loss": 2.3074, + "step": 705000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.4989449500063561, + "eval_loss": 2.330078125, + "eval_runtime": 40.2221, + "eval_samples_per_second": 88.707, + "eval_steps_per_second": 11.088, + "step": 705000 + }, + { + "epoch": 0.21, + "learning_rate": 4.610843263050428e-05, + "loss": 2.3082, + "step": 710000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.49918768547619985, + "eval_loss": 2.330078125, + "eval_runtime": 40.2424, + "eval_samples_per_second": 88.663, + "eval_steps_per_second": 11.083, + "step": 710000 + }, + { + "epoch": 0.21, + "learning_rate": 4.603357031246256e-05, + "loss": 2.3093, + "step": 715000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.4993685042234876, + "eval_loss": 2.328125, + "eval_runtime": 39.2194, + "eval_samples_per_second": 90.975, + "eval_steps_per_second": 11.372, + "step": 715000 + }, + { + "epoch": 0.22, + "learning_rate": 4.595873795133081e-05, + "loss": 2.3011, + "step": 720000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.4995095976399318, + "eval_loss": 2.328125, + "eval_runtime": 40.3274, + "eval_samples_per_second": 88.476, + "eval_steps_per_second": 11.059, + "step": 720000 + }, + { + "epoch": 0.22, + "learning_rate": 4.588390559019906e-05, + "loss": 2.2998, + "step": 725000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.4994558999513433, + "eval_loss": 2.326171875, + "eval_runtime": 40.2634, + "eval_samples_per_second": 88.616, + "eval_steps_per_second": 11.077, + "step": 725000 + }, + { + "epoch": 0.22, + "learning_rate": 4.580907322906731e-05, + "loss": 2.3012, + "step": 730000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.49959206194740696, + "eval_loss": 2.326171875, + "eval_runtime": 40.2894, + "eval_samples_per_second": 88.559, + "eval_steps_per_second": 11.07, + "step": 730000 + }, + { + "epoch": 0.22, + "learning_rate": 4.573421091102558e-05, + "loss": 2.3002, + "step": 735000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.4997052106483612, + "eval_loss": 2.32421875, + "eval_runtime": 40.3059, + "eval_samples_per_second": 88.523, + "eval_steps_per_second": 11.065, + "step": 735000 + }, + { + "epoch": 0.22, + "learning_rate": 4.5659378549893835e-05, + "loss": 2.2994, + "step": 740000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.5000197256815223, + "eval_loss": 2.32421875, + "eval_runtime": 40.7124, + "eval_samples_per_second": 87.639, + "eval_steps_per_second": 10.955, + "step": 740000 + }, + { + "epoch": 0.22, + "learning_rate": 4.558454618876209e-05, + "loss": 2.299, + "step": 745000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.5000953407940244, + "eval_loss": 2.322265625, + "eval_runtime": 40.3194, + "eval_samples_per_second": 88.493, + "eval_steps_per_second": 11.062, + "step": 745000 + }, + { + "epoch": 0.22, + "learning_rate": 4.550971382763034e-05, + "loss": 2.2969, + "step": 750000 + }, + { + "epoch": 0.22, + "eval_accuracy": 0.5002605433767736, + "eval_loss": 2.322265625, + "eval_runtime": 40.3665, + "eval_samples_per_second": 88.39, + "eval_steps_per_second": 11.049, + "step": 750000 + }, + { + "epoch": 0.23, + "learning_rate": 4.543489644495358e-05, + "loss": 2.2934, + "step": 755000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5003739660455269, + "eval_loss": 2.3203125, + "eval_runtime": 40.3284, + "eval_samples_per_second": 88.474, + "eval_steps_per_second": 11.059, + "step": 755000 + }, + { + "epoch": 0.23, + "learning_rate": 4.536004910536684e-05, + "loss": 2.2988, + "step": 760000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5004895804566715, + "eval_loss": 2.318359375, + "eval_runtime": 40.3305, + "eval_samples_per_second": 88.469, + "eval_steps_per_second": 11.059, + "step": 760000 + }, + { + "epoch": 0.23, + "learning_rate": 4.5285186787325113e-05, + "loss": 2.2911, + "step": 765000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5007449184452656, + "eval_loss": 2.318359375, + "eval_runtime": 39.3805, + "eval_samples_per_second": 90.603, + "eval_steps_per_second": 11.325, + "step": 765000 + }, + { + "epoch": 0.23, + "learning_rate": 4.5210354426193366e-05, + "loss": 2.2929, + "step": 770000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5008427249494803, + "eval_loss": 2.318359375, + "eval_runtime": 40.4207, + "eval_samples_per_second": 88.272, + "eval_steps_per_second": 11.034, + "step": 770000 + }, + { + "epoch": 0.23, + "learning_rate": 4.5135567000426584e-05, + "loss": 2.2926, + "step": 775000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5008994362838569, + "eval_loss": 2.31640625, + "eval_runtime": 40.4103, + "eval_samples_per_second": 88.294, + "eval_steps_per_second": 11.037, + "step": 775000 + }, + { + "epoch": 0.23, + "learning_rate": 4.506070468238486e-05, + "loss": 2.292, + "step": 780000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5011701164691906, + "eval_loss": 2.31640625, + "eval_runtime": 40.3936, + "eval_samples_per_second": 88.331, + "eval_steps_per_second": 11.041, + "step": 780000 + }, + { + "epoch": 0.24, + "learning_rate": 4.498587232125311e-05, + "loss": 2.2932, + "step": 785000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5014183312950129, + "eval_loss": 2.314453125, + "eval_runtime": 40.4142, + "eval_samples_per_second": 88.286, + "eval_steps_per_second": 11.036, + "step": 785000 + }, + { + "epoch": 0.24, + "learning_rate": 4.4911039960121364e-05, + "loss": 2.2903, + "step": 790000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5013958659355015, + "eval_loss": 2.314453125, + "eval_runtime": 40.3913, + "eval_samples_per_second": 88.336, + "eval_steps_per_second": 11.042, + "step": 790000 + }, + { + "epoch": 0.24, + "learning_rate": 4.483620759898962e-05, + "loss": 2.2886, + "step": 795000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5015205212840104, + "eval_loss": 2.3125, + "eval_runtime": 40.3641, + "eval_samples_per_second": 88.395, + "eval_steps_per_second": 11.049, + "step": 795000 + }, + { + "epoch": 0.24, + "learning_rate": 4.476137523785787e-05, + "loss": 2.2924, + "step": 800000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5014750426293895, + "eval_loss": 2.3125, + "eval_runtime": 40.359, + "eval_samples_per_second": 88.407, + "eval_steps_per_second": 11.051, + "step": 800000 + }, + { + "epoch": 0.24, + "learning_rate": 4.468652789827113e-05, + "loss": 2.2891, + "step": 805000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5018673645174441, + "eval_loss": 2.310546875, + "eval_runtime": 40.4253, + "eval_samples_per_second": 88.261, + "eval_steps_per_second": 11.033, + "step": 805000 + }, + { + "epoch": 0.24, + "learning_rate": 4.461168055868439e-05, + "loss": 2.2862, + "step": 810000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5019873624133714, + "eval_loss": 2.30859375, + "eval_runtime": 40.3854, + "eval_samples_per_second": 88.349, + "eval_steps_per_second": 11.044, + "step": 810000 + }, + { + "epoch": 0.24, + "learning_rate": 4.453686317600763e-05, + "loss": 2.2858, + "step": 815000 + }, + { + "epoch": 0.24, + "eval_accuracy": 0.5021665373538656, + "eval_loss": 2.30859375, + "eval_runtime": 40.4075, + "eval_samples_per_second": 88.301, + "eval_steps_per_second": 11.038, + "step": 815000 + }, + { + "epoch": 0.25, + "learning_rate": 4.44620008579659e-05, + "loss": 2.2841, + "step": 820000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.502265439729276, + "eval_loss": 2.306640625, + "eval_runtime": 40.4403, + "eval_samples_per_second": 88.229, + "eval_steps_per_second": 11.029, + "step": 820000 + }, + { + "epoch": 0.25, + "learning_rate": 4.438718347528915e-05, + "loss": 2.2843, + "step": 825000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5022361251747914, + "eval_loss": 2.30859375, + "eval_runtime": 40.4536, + "eval_samples_per_second": 88.2, + "eval_steps_per_second": 11.025, + "step": 825000 + }, + { + "epoch": 0.25, + "learning_rate": 4.431233613570241e-05, + "loss": 2.2832, + "step": 830000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5024813263548256, + "eval_loss": 2.306640625, + "eval_runtime": 40.4096, + "eval_samples_per_second": 88.296, + "eval_steps_per_second": 11.037, + "step": 830000 + }, + { + "epoch": 0.25, + "learning_rate": 4.423756368839062e-05, + "loss": 2.2846, + "step": 835000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.502600776315155, + "eval_loss": 2.306640625, + "eval_runtime": 39.3247, + "eval_samples_per_second": 90.732, + "eval_steps_per_second": 11.341, + "step": 835000 + }, + { + "epoch": 0.25, + "learning_rate": 4.416267141343891e-05, + "loss": 2.2784, + "step": 840000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5026766653954561, + "eval_loss": 2.3046875, + "eval_runtime": 40.3768, + "eval_samples_per_second": 88.367, + "eval_steps_per_second": 11.046, + "step": 840000 + }, + { + "epoch": 0.25, + "learning_rate": 4.408782407385217e-05, + "loss": 2.277, + "step": 845000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5028424159138032, + "eval_loss": 2.302734375, + "eval_runtime": 40.4265, + "eval_samples_per_second": 88.259, + "eval_steps_per_second": 11.032, + "step": 845000 + }, + { + "epoch": 0.25, + "learning_rate": 4.4013021669630405e-05, + "loss": 2.276, + "step": 850000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5025583113063223, + "eval_loss": 2.306640625, + "eval_runtime": 40.7923, + "eval_samples_per_second": 87.468, + "eval_steps_per_second": 10.933, + "step": 850000 + }, + { + "epoch": 0.26, + "learning_rate": 4.393818930849866e-05, + "loss": 2.2802, + "step": 855000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5031032332583757, + "eval_loss": 2.302734375, + "eval_runtime": 40.3592, + "eval_samples_per_second": 88.406, + "eval_steps_per_second": 11.051, + "step": 855000 + }, + { + "epoch": 0.26, + "learning_rate": 4.3863356947366904e-05, + "loss": 2.2781, + "step": 860000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5032018616659872, + "eval_loss": 2.30078125, + "eval_runtime": 40.4036, + "eval_samples_per_second": 88.309, + "eval_steps_per_second": 11.039, + "step": 860000 + }, + { + "epoch": 0.26, + "learning_rate": 4.378850960778017e-05, + "loss": 2.2749, + "step": 865000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5038193850847547, + "eval_loss": 2.298828125, + "eval_runtime": 40.3881, + "eval_samples_per_second": 88.343, + "eval_steps_per_second": 11.043, + "step": 865000 + }, + { + "epoch": 0.26, + "learning_rate": 4.371366226819343e-05, + "loss": 2.2729, + "step": 870000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5037152773211648, + "eval_loss": 2.296875, + "eval_runtime": 40.4456, + "eval_samples_per_second": 88.217, + "eval_steps_per_second": 11.027, + "step": 870000 + }, + { + "epoch": 0.26, + "learning_rate": 4.363882990706168e-05, + "loss": 2.2708, + "step": 875000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5038993836820396, + "eval_loss": 2.296875, + "eval_runtime": 40.4095, + "eval_samples_per_second": 88.296, + "eval_steps_per_second": 11.037, + "step": 875000 + }, + { + "epoch": 0.26, + "learning_rate": 4.3563997545929936e-05, + "loss": 2.2754, + "step": 880000 + }, + { + "epoch": 0.26, + "eval_accuracy": 0.5038591104155982, + "eval_loss": 2.296875, + "eval_runtime": 40.397, + "eval_samples_per_second": 88.323, + "eval_steps_per_second": 11.04, + "step": 880000 + }, + { + "epoch": 0.27, + "learning_rate": 4.3489150206343196e-05, + "loss": 2.2761, + "step": 885000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5040640383291909, + "eval_loss": 2.294921875, + "eval_runtime": 40.3166, + "eval_samples_per_second": 88.499, + "eval_steps_per_second": 11.062, + "step": 885000 + }, + { + "epoch": 0.27, + "learning_rate": 4.341434780212143e-05, + "loss": 2.2742, + "step": 890000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5041032157244366, + "eval_loss": 2.294921875, + "eval_runtime": 40.3381, + "eval_samples_per_second": 88.452, + "eval_steps_per_second": 11.057, + "step": 890000 + }, + { + "epoch": 0.27, + "learning_rate": 4.3339470505624715e-05, + "loss": 2.2734, + "step": 895000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5040837640107132, + "eval_loss": 2.294921875, + "eval_runtime": 40.3833, + "eval_samples_per_second": 88.353, + "eval_steps_per_second": 11.044, + "step": 895000 + }, + { + "epoch": 0.27, + "learning_rate": 4.326463814449296e-05, + "loss": 2.2682, + "step": 900000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5043944434946894, + "eval_loss": 2.29296875, + "eval_runtime": 40.4818, + "eval_samples_per_second": 88.138, + "eval_steps_per_second": 11.017, + "step": 900000 + }, + { + "epoch": 0.27, + "learning_rate": 4.318982076181621e-05, + "loss": 2.2667, + "step": 905000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5045489613332808, + "eval_loss": 2.29296875, + "eval_runtime": 40.3614, + "eval_samples_per_second": 88.401, + "eval_steps_per_second": 11.05, + "step": 905000 + }, + { + "epoch": 0.27, + "learning_rate": 4.311498840068446e-05, + "loss": 2.2676, + "step": 910000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5045801936623577, + "eval_loss": 2.29296875, + "eval_runtime": 40.4147, + "eval_samples_per_second": 88.285, + "eval_steps_per_second": 11.036, + "step": 910000 + }, + { + "epoch": 0.27, + "learning_rate": 4.304012608264273e-05, + "loss": 2.2707, + "step": 915000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5046547129036641, + "eval_loss": 2.291015625, + "eval_runtime": 40.4009, + "eval_samples_per_second": 88.315, + "eval_steps_per_second": 11.039, + "step": 915000 + }, + { + "epoch": 0.28, + "learning_rate": 4.296529372151098e-05, + "loss": 2.265, + "step": 920000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5047700533470098, + "eval_loss": 2.291015625, + "eval_runtime": 40.3704, + "eval_samples_per_second": 88.382, + "eval_steps_per_second": 11.048, + "step": 920000 + }, + { + "epoch": 0.28, + "learning_rate": 4.289046136037923e-05, + "loss": 2.2676, + "step": 925000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5046149875728206, + "eval_loss": 2.291015625, + "eval_runtime": 40.3019, + "eval_samples_per_second": 88.532, + "eval_steps_per_second": 11.066, + "step": 925000 + }, + { + "epoch": 0.28, + "learning_rate": 4.281564397770247e-05, + "loss": 2.2662, + "step": 930000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5051503206519119, + "eval_loss": 2.2890625, + "eval_runtime": 40.3203, + "eval_samples_per_second": 88.491, + "eval_steps_per_second": 11.061, + "step": 930000 + }, + { + "epoch": 0.28, + "learning_rate": 4.274079663811574e-05, + "loss": 2.2706, + "step": 935000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5050968969311223, + "eval_loss": 2.2890625, + "eval_runtime": 40.3497, + "eval_samples_per_second": 88.427, + "eval_steps_per_second": 11.053, + "step": 935000 + }, + { + "epoch": 0.28, + "learning_rate": 4.2665949298529e-05, + "loss": 2.2657, + "step": 940000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5048788185631814, + "eval_loss": 2.2890625, + "eval_runtime": 40.7581, + "eval_samples_per_second": 87.541, + "eval_steps_per_second": 10.943, + "step": 940000 + }, + { + "epoch": 0.28, + "learning_rate": 4.259110195894226e-05, + "loss": 2.2672, + "step": 945000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5050453909849252, + "eval_loss": 2.287109375, + "eval_runtime": 40.3332, + "eval_samples_per_second": 88.463, + "eval_steps_per_second": 11.058, + "step": 945000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9925167638868255e-05, + "loss": 2.2716, + "step": 950000 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.5037065103515993, + "eval_loss": 2.296875, + "eval_runtime": 38.5412, + "eval_samples_per_second": 92.576, + "eval_steps_per_second": 11.572, + "step": 950000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9850290342371536e-05, + "loss": 2.2702, + "step": 955000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.5036908941870608, + "eval_loss": 2.298828125, + "eval_runtime": 39.4253, + "eval_samples_per_second": 90.5, + "eval_steps_per_second": 11.313, + "step": 955000 + }, + { + "epoch": 0.29, + "learning_rate": 4.977548793814977e-05, + "loss": 2.2708, + "step": 960000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.5035029522770011, + "eval_loss": 2.298828125, + "eval_runtime": 39.5074, + "eval_samples_per_second": 90.312, + "eval_steps_per_second": 11.289, + "step": 960000 + }, + { + "epoch": 0.29, + "learning_rate": 4.970064059856303e-05, + "loss": 2.2738, + "step": 965000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.5035369242840674, + "eval_loss": 2.298828125, + "eval_runtime": 39.5917, + "eval_samples_per_second": 90.12, + "eval_steps_per_second": 11.265, + "step": 965000 + }, + { + "epoch": 0.29, + "learning_rate": 4.962582321588627e-05, + "loss": 2.2737, + "step": 970000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.5035577458367854, + "eval_loss": 2.298828125, + "eval_runtime": 39.4627, + "eval_samples_per_second": 90.415, + "eval_steps_per_second": 11.302, + "step": 970000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9550990854754526e-05, + "loss": 2.2763, + "step": 975000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.49873289892999134, + "eval_loss": 2.330078125, + "eval_runtime": 39.5211, + "eval_samples_per_second": 90.281, + "eval_steps_per_second": 11.285, + "step": 975000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9476143515167786e-05, + "loss": 2.2738, + "step": 980000 + }, + { + "epoch": 0.29, + "eval_accuracy": 0.5034662405919458, + "eval_loss": 2.296875, + "eval_runtime": 39.4696, + "eval_samples_per_second": 90.399, + "eval_steps_per_second": 11.3, + "step": 980000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9401296175581046e-05, + "loss": 2.2737, + "step": 985000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5036182927203469, + "eval_loss": 2.296875, + "eval_runtime": 39.6371, + "eval_samples_per_second": 90.017, + "eval_steps_per_second": 11.252, + "step": 985000 + }, + { + "epoch": 0.3, + "learning_rate": 4.932644883599431e-05, + "loss": 2.2748, + "step": 990000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5036056902015965, + "eval_loss": 2.296875, + "eval_runtime": 39.6139, + "eval_samples_per_second": 90.069, + "eval_steps_per_second": 11.259, + "step": 990000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9251631453317545e-05, + "loss": 2.2724, + "step": 995000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5038232206339396, + "eval_loss": 2.296875, + "eval_runtime": 39.4746, + "eval_samples_per_second": 90.387, + "eval_steps_per_second": 11.298, + "step": 995000 + }, + { + "epoch": 0.3, + "learning_rate": 4.917678411373081e-05, + "loss": 2.2744, + "step": 1000000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5032999421380009, + "eval_loss": 2.298828125, + "eval_runtime": 39.6576, + "eval_samples_per_second": 89.97, + "eval_steps_per_second": 11.246, + "step": 1000000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9101951752599065e-05, + "loss": 2.2694, + "step": 1005000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5033465166638174, + "eval_loss": 2.298828125, + "eval_runtime": 39.6623, + "eval_samples_per_second": 89.959, + "eval_steps_per_second": 11.245, + "step": 1005000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9027104413012324e-05, + "loss": 2.2684, + "step": 1010000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5039421226586712, + "eval_loss": 2.294921875, + "eval_runtime": 39.6275, + "eval_samples_per_second": 90.039, + "eval_steps_per_second": 11.255, + "step": 1010000 + }, + { + "epoch": 0.3, + "learning_rate": 4.895231698724555e-05, + "loss": 2.2731, + "step": 1015000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5039547251774216, + "eval_loss": 2.294921875, + "eval_runtime": 39.6334, + "eval_samples_per_second": 90.025, + "eval_steps_per_second": 11.253, + "step": 1015000 + }, + { + "epoch": 0.31, + "learning_rate": 4.887746964765881e-05, + "loss": 2.2714, + "step": 1020000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5042445831086797, + "eval_loss": 2.294921875, + "eval_runtime": 39.6384, + "eval_samples_per_second": 90.014, + "eval_steps_per_second": 11.252, + "step": 1020000 + }, + { + "epoch": 0.31, + "learning_rate": 4.880263728652706e-05, + "loss": 2.2687, + "step": 1025000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5045218385211876, + "eval_loss": 2.29296875, + "eval_runtime": 39.6654, + "eval_samples_per_second": 89.952, + "eval_steps_per_second": 11.244, + "step": 1025000 + }, + { + "epoch": 0.31, + "learning_rate": 4.872778994694032e-05, + "loss": 2.2673, + "step": 1030000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5046215627999947, + "eval_loss": 2.29296875, + "eval_runtime": 39.7655, + "eval_samples_per_second": 89.726, + "eval_steps_per_second": 11.216, + "step": 1030000 + }, + { + "epoch": 0.31, + "learning_rate": 4.8652957585808575e-05, + "loss": 2.2677, + "step": 1035000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5044056761744452, + "eval_loss": 2.29296875, + "eval_runtime": 39.7885, + "eval_samples_per_second": 89.674, + "eval_steps_per_second": 11.209, + "step": 1035000 + }, + { + "epoch": 0.31, + "learning_rate": 4.8578110246221835e-05, + "loss": 2.265, + "step": 1040000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5046538910002674, + "eval_loss": 2.291015625, + "eval_runtime": 39.7963, + "eval_samples_per_second": 89.657, + "eval_steps_per_second": 11.207, + "step": 1040000 + }, + { + "epoch": 0.31, + "learning_rate": 4.850327788509009e-05, + "loss": 2.2659, + "step": 1045000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.504468688768197, + "eval_loss": 2.291015625, + "eval_runtime": 40.1905, + "eval_samples_per_second": 88.777, + "eval_steps_per_second": 11.097, + "step": 1045000 + }, + { + "epoch": 0.31, + "learning_rate": 4.842849045932331e-05, + "loss": 2.2633, + "step": 1050000 + }, + { + "epoch": 0.31, + "eval_accuracy": 0.5042100631660157, + "eval_loss": 2.294921875, + "eval_runtime": 39.7629, + "eval_samples_per_second": 89.732, + "eval_steps_per_second": 11.216, + "step": 1050000 + }, + { + "epoch": 0.32, + "learning_rate": 4.835361316282659e-05, + "loss": 2.2689, + "step": 1055000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5049952548777227, + "eval_loss": 2.2890625, + "eval_runtime": 39.8901, + "eval_samples_per_second": 89.446, + "eval_steps_per_second": 11.181, + "step": 1055000 + }, + { + "epoch": 0.32, + "learning_rate": 4.827876582323985e-05, + "loss": 2.2617, + "step": 1060000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5049081331176659, + "eval_loss": 2.2890625, + "eval_runtime": 39.8913, + "eval_samples_per_second": 89.443, + "eval_steps_per_second": 11.18, + "step": 1060000 + }, + { + "epoch": 0.32, + "learning_rate": 4.820390350519813e-05, + "loss": 2.2613, + "step": 1065000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5052440176391427, + "eval_loss": 2.287109375, + "eval_runtime": 39.9185, + "eval_samples_per_second": 89.382, + "eval_steps_per_second": 11.173, + "step": 1065000 + }, + { + "epoch": 0.32, + "learning_rate": 4.8129086122521366e-05, + "loss": 2.2649, + "step": 1070000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5047081366244537, + "eval_loss": 2.2890625, + "eval_runtime": 39.8452, + "eval_samples_per_second": 89.547, + "eval_steps_per_second": 11.193, + "step": 1070000 + }, + { + "epoch": 0.32, + "learning_rate": 4.805422380447964e-05, + "loss": 2.2587, + "step": 1075000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.505284564873383, + "eval_loss": 2.287109375, + "eval_runtime": 39.8746, + "eval_samples_per_second": 89.481, + "eval_steps_per_second": 11.185, + "step": 1075000 + }, + { + "epoch": 0.32, + "learning_rate": 4.797939144334789e-05, + "loss": 2.2641, + "step": 1080000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5054223706762402, + "eval_loss": 2.28515625, + "eval_runtime": 39.914, + "eval_samples_per_second": 89.392, + "eval_steps_per_second": 11.174, + "step": 1080000 + }, + { + "epoch": 0.33, + "learning_rate": 4.7904559082216145e-05, + "loss": 2.2634, + "step": 1085000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.5056552433053229, + "eval_loss": 2.28515625, + "eval_runtime": 39.8319, + "eval_samples_per_second": 89.576, + "eval_steps_per_second": 11.197, + "step": 1085000 + }, + { + "epoch": 0.33, + "learning_rate": 4.7829741699539384e-05, + "loss": 2.2597, + "step": 1090000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.5057451047433689, + "eval_loss": 2.283203125, + "eval_runtime": 39.976, + "eval_samples_per_second": 89.254, + "eval_steps_per_second": 11.157, + "step": 1090000 + }, + { + "epoch": 0.33, + "learning_rate": 4.775487938149766e-05, + "loss": 2.2572, + "step": 1095000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.5059667446926958, + "eval_loss": 2.283203125, + "eval_runtime": 40.0116, + "eval_samples_per_second": 89.174, + "eval_steps_per_second": 11.147, + "step": 1095000 + }, + { + "epoch": 0.33, + "learning_rate": 4.768003204191092e-05, + "loss": 2.2566, + "step": 1100000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.5055771624826304, + "eval_loss": 2.283203125, + "eval_runtime": 39.963, + "eval_samples_per_second": 89.283, + "eval_steps_per_second": 11.16, + "step": 1100000 + }, + { + "epoch": 0.33, + "learning_rate": 4.760524461614414e-05, + "loss": 2.2576, + "step": 1105000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.5055905869047775, + "eval_loss": 2.283203125, + "eval_runtime": 39.8683, + "eval_samples_per_second": 89.495, + "eval_steps_per_second": 11.187, + "step": 1105000 + }, + { + "epoch": 0.33, + "learning_rate": 4.753038229810241e-05, + "loss": 2.2612, + "step": 1110000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.505675790890242, + "eval_loss": 2.283203125, + "eval_runtime": 39.9091, + "eval_samples_per_second": 89.403, + "eval_steps_per_second": 11.175, + "step": 1110000 + }, + { + "epoch": 0.33, + "learning_rate": 4.7455534958515676e-05, + "loss": 2.2585, + "step": 1115000 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.505924553651662, + "eval_loss": 2.28125, + "eval_runtime": 39.8428, + "eval_samples_per_second": 89.552, + "eval_steps_per_second": 11.194, + "step": 1115000 + }, + { + "epoch": 0.34, + "learning_rate": 4.738073255429391e-05, + "loss": 2.2528, + "step": 1120000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5059566078841358, + "eval_loss": 2.28125, + "eval_runtime": 40.0018, + "eval_samples_per_second": 89.196, + "eval_steps_per_second": 11.149, + "step": 1120000 + }, + { + "epoch": 0.34, + "learning_rate": 4.730588521470717e-05, + "loss": 2.2599, + "step": 1125000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5059996208285663, + "eval_loss": 2.28125, + "eval_runtime": 39.9769, + "eval_samples_per_second": 89.251, + "eval_steps_per_second": 11.156, + "step": 1125000 + }, + { + "epoch": 0.34, + "learning_rate": 4.723105285357542e-05, + "loss": 2.2556, + "step": 1130000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5065730354317075, + "eval_loss": 2.27734375, + "eval_runtime": 39.975, + "eval_samples_per_second": 89.256, + "eval_steps_per_second": 11.157, + "step": 1130000 + }, + { + "epoch": 0.34, + "learning_rate": 4.715619053553369e-05, + "loss": 2.2519, + "step": 1135000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5063790662300716, + "eval_loss": 2.279296875, + "eval_runtime": 40.0596, + "eval_samples_per_second": 89.067, + "eval_steps_per_second": 11.133, + "step": 1135000 + }, + { + "epoch": 0.34, + "learning_rate": 4.708135817440194e-05, + "loss": 2.2567, + "step": 1140000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5067524843400006, + "eval_loss": 2.27734375, + "eval_runtime": 40.0409, + "eval_samples_per_second": 89.109, + "eval_steps_per_second": 11.139, + "step": 1140000 + }, + { + "epoch": 0.34, + "learning_rate": 4.700651083481521e-05, + "loss": 2.2516, + "step": 1145000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.506862071459569, + "eval_loss": 2.275390625, + "eval_runtime": 39.8652, + "eval_samples_per_second": 89.502, + "eval_steps_per_second": 11.188, + "step": 1145000 + }, + { + "epoch": 0.34, + "learning_rate": 4.693166349522847e-05, + "loss": 2.2533, + "step": 1150000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5067752236673111, + "eval_loss": 2.275390625, + "eval_runtime": 39.9361, + "eval_samples_per_second": 89.343, + "eval_steps_per_second": 11.168, + "step": 1150000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6856846112551706e-05, + "loss": 2.2532, + "step": 1155000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5069639874807674, + "eval_loss": 2.275390625, + "eval_runtime": 40.0505, + "eval_samples_per_second": 89.088, + "eval_steps_per_second": 11.136, + "step": 1155000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6781998772964966e-05, + "loss": 2.2572, + "step": 1160000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5063629021299353, + "eval_loss": 2.279296875, + "eval_runtime": 40.0931, + "eval_samples_per_second": 88.993, + "eval_steps_per_second": 11.124, + "step": 1160000 + }, + { + "epoch": 0.35, + "learning_rate": 4.670712147646825e-05, + "loss": 2.2514, + "step": 1165000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5071686414265613, + "eval_loss": 2.2734375, + "eval_runtime": 40.4455, + "eval_samples_per_second": 88.217, + "eval_steps_per_second": 11.027, + "step": 1165000 + }, + { + "epoch": 0.35, + "learning_rate": 4.66322891153365e-05, + "loss": 2.2471, + "step": 1170000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5073409671720824, + "eval_loss": 2.2734375, + "eval_runtime": 40.0562, + "eval_samples_per_second": 89.075, + "eval_steps_per_second": 11.134, + "step": 1170000 + }, + { + "epoch": 0.35, + "learning_rate": 4.655745675420475e-05, + "loss": 2.2524, + "step": 1175000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.50760562006584, + "eval_loss": 2.271484375, + "eval_runtime": 40.09, + "eval_samples_per_second": 89.0, + "eval_steps_per_second": 11.125, + "step": 1175000 + }, + { + "epoch": 0.35, + "learning_rate": 4.648260941461802e-05, + "loss": 2.247, + "step": 1180000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5072979542276519, + "eval_loss": 2.271484375, + "eval_runtime": 40.3075, + "eval_samples_per_second": 88.52, + "eval_steps_per_second": 11.065, + "step": 1180000 + }, + { + "epoch": 0.35, + "learning_rate": 4.640780701039625e-05, + "loss": 2.2491, + "step": 1185000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.5076653450460047, + "eval_loss": 2.271484375, + "eval_runtime": 40.1522, + "eval_samples_per_second": 88.862, + "eval_steps_per_second": 11.108, + "step": 1185000 + }, + { + "epoch": 0.36, + "learning_rate": 4.63329746492645e-05, + "loss": 2.2481, + "step": 1190000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5078031508488619, + "eval_loss": 2.26953125, + "eval_runtime": 40.1488, + "eval_samples_per_second": 88.869, + "eval_steps_per_second": 11.109, + "step": 1190000 + }, + { + "epoch": 0.36, + "learning_rate": 4.992515266041327e-05, + "loss": 2.2465, + "step": 1195000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5069333030872883, + "eval_loss": 2.2734375, + "eval_runtime": 39.5233, + "eval_samples_per_second": 90.276, + "eval_steps_per_second": 11.284, + "step": 1195000 + }, + { + "epoch": 0.36, + "learning_rate": 4.985030532082652e-05, + "loss": 2.2494, + "step": 1200000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5067053618785863, + "eval_loss": 2.279296875, + "eval_runtime": 38.5856, + "eval_samples_per_second": 92.47, + "eval_steps_per_second": 11.559, + "step": 1200000 + }, + { + "epoch": 0.36, + "learning_rate": 4.977545798123979e-05, + "loss": 2.2541, + "step": 1205000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5068631673307646, + "eval_loss": 2.275390625, + "eval_runtime": 39.5375, + "eval_samples_per_second": 90.244, + "eval_steps_per_second": 11.28, + "step": 1205000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9700625620108035e-05, + "loss": 2.25, + "step": 1210000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5067459091128265, + "eval_loss": 2.275390625, + "eval_runtime": 39.4824, + "eval_samples_per_second": 90.369, + "eval_steps_per_second": 11.296, + "step": 1210000 + }, + { + "epoch": 0.36, + "learning_rate": 4.962582321588627e-05, + "loss": 2.25, + "step": 1215000 + }, + { + "epoch": 0.36, + "eval_accuracy": 0.5064313940796654, + "eval_loss": 2.279296875, + "eval_runtime": 39.3855, + "eval_samples_per_second": 90.592, + "eval_steps_per_second": 11.324, + "step": 1215000 + }, + { + "epoch": 0.37, + "learning_rate": 4.955096089784455e-05, + "loss": 2.2508, + "step": 1220000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.507028095945715, + "eval_loss": 2.2734375, + "eval_runtime": 39.5116, + "eval_samples_per_second": 90.303, + "eval_steps_per_second": 11.288, + "step": 1220000 + }, + { + "epoch": 0.37, + "learning_rate": 4.947612853671279e-05, + "loss": 2.2496, + "step": 1225000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.507010562006584, + "eval_loss": 2.2734375, + "eval_runtime": 39.5165, + "eval_samples_per_second": 90.291, + "eval_steps_per_second": 11.286, + "step": 1225000 + }, + { + "epoch": 0.37, + "learning_rate": 4.940131115403604e-05, + "loss": 2.2499, + "step": 1230000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5073475423992566, + "eval_loss": 2.2734375, + "eval_runtime": 39.4273, + "eval_samples_per_second": 90.496, + "eval_steps_per_second": 11.312, + "step": 1230000 + }, + { + "epoch": 0.37, + "learning_rate": 4.932647879290429e-05, + "loss": 2.2467, + "step": 1235000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5075538401518439, + "eval_loss": 2.271484375, + "eval_runtime": 39.5247, + "eval_samples_per_second": 90.273, + "eval_steps_per_second": 11.284, + "step": 1235000 + }, + { + "epoch": 0.37, + "learning_rate": 4.925164643177254e-05, + "loss": 2.2497, + "step": 1240000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5073163100701796, + "eval_loss": 2.271484375, + "eval_runtime": 38.6276, + "eval_samples_per_second": 92.369, + "eval_steps_per_second": 11.546, + "step": 1240000 + }, + { + "epoch": 0.37, + "learning_rate": 4.917678411373081e-05, + "loss": 2.2463, + "step": 1245000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5073409671720824, + "eval_loss": 2.271484375, + "eval_runtime": 39.5689, + "eval_samples_per_second": 90.172, + "eval_steps_per_second": 11.271, + "step": 1245000 + }, + { + "epoch": 0.37, + "learning_rate": 4.910196673105406e-05, + "loss": 2.2479, + "step": 1250000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5077573982264421, + "eval_loss": 2.26953125, + "eval_runtime": 39.6608, + "eval_samples_per_second": 89.963, + "eval_steps_per_second": 11.245, + "step": 1250000 + }, + { + "epoch": 0.38, + "learning_rate": 4.902711939146731e-05, + "loss": 2.2445, + "step": 1255000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5078749304121791, + "eval_loss": 2.26953125, + "eval_runtime": 39.6577, + "eval_samples_per_second": 89.97, + "eval_steps_per_second": 11.246, + "step": 1255000 + }, + { + "epoch": 0.38, + "learning_rate": 4.89522420949706e-05, + "loss": 2.247, + "step": 1260000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5078483555356837, + "eval_loss": 2.26953125, + "eval_runtime": 39.6203, + "eval_samples_per_second": 90.055, + "eval_steps_per_second": 11.257, + "step": 1260000 + }, + { + "epoch": 0.38, + "learning_rate": 4.887743969074883e-05, + "loss": 2.2443, + "step": 1265000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5078826015105489, + "eval_loss": 2.267578125, + "eval_runtime": 39.6508, + "eval_samples_per_second": 89.986, + "eval_steps_per_second": 11.248, + "step": 1265000 + }, + { + "epoch": 0.38, + "learning_rate": 4.880262230807207e-05, + "loss": 2.243, + "step": 1270000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5080672558070215, + "eval_loss": 2.267578125, + "eval_runtime": 39.6639, + "eval_samples_per_second": 89.956, + "eval_steps_per_second": 11.244, + "step": 1270000 + }, + { + "epoch": 0.38, + "learning_rate": 4.8727804925395315e-05, + "loss": 2.2454, + "step": 1275000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5076889062767118, + "eval_loss": 2.271484375, + "eval_runtime": 39.6373, + "eval_samples_per_second": 90.016, + "eval_steps_per_second": 11.252, + "step": 1275000 + }, + { + "epoch": 0.38, + "learning_rate": 4.86529126504436e-05, + "loss": 2.2451, + "step": 1280000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5080541053526733, + "eval_loss": 2.26953125, + "eval_runtime": 39.7457, + "eval_samples_per_second": 89.771, + "eval_steps_per_second": 11.221, + "step": 1280000 + }, + { + "epoch": 0.38, + "learning_rate": 4.8578080289311855e-05, + "loss": 2.2455, + "step": 1285000 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.5083853324215685, + "eval_loss": 2.265625, + "eval_runtime": 39.6403, + "eval_samples_per_second": 90.009, + "eval_steps_per_second": 11.251, + "step": 1285000 + }, + { + "epoch": 0.39, + "learning_rate": 4.850324792818011e-05, + "loss": 2.241, + "step": 1290000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.508259307234065, + "eval_loss": 2.267578125, + "eval_runtime": 40.1652, + "eval_samples_per_second": 88.833, + "eval_steps_per_second": 11.104, + "step": 1290000 + }, + { + "epoch": 0.39, + "learning_rate": 4.8428415567048354e-05, + "loss": 2.243, + "step": 1295000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.5085801235266012, + "eval_loss": 2.263671875, + "eval_runtime": 39.68, + "eval_samples_per_second": 89.919, + "eval_steps_per_second": 11.24, + "step": 1295000 + }, + { + "epoch": 0.39, + "learning_rate": 4.8353553249006635e-05, + "loss": 2.2408, + "step": 1300000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.5084371123355645, + "eval_loss": 2.263671875, + "eval_runtime": 39.9223, + "eval_samples_per_second": 89.374, + "eval_steps_per_second": 11.172, + "step": 1300000 + }, + { + "epoch": 0.39, + "learning_rate": 4.827876582323985e-05, + "loss": 2.2508, + "step": 1305000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.5063459161264021, + "eval_loss": 2.279296875, + "eval_runtime": 39.7588, + "eval_samples_per_second": 89.741, + "eval_steps_per_second": 11.218, + "step": 1305000 + }, + { + "epoch": 0.39, + "learning_rate": 4.820396341901809e-05, + "loss": 2.252, + "step": 1310000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.504651699257876, + "eval_loss": 2.291015625, + "eval_runtime": 39.8037, + "eval_samples_per_second": 89.64, + "eval_steps_per_second": 11.205, + "step": 1310000 + }, + { + "epoch": 0.39, + "learning_rate": 4.8129146036341324e-05, + "loss": 2.7482, + "step": 1315000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.4505951128528157, + "eval_loss": 2.646484375, + "eval_runtime": 39.8831, + "eval_samples_per_second": 89.462, + "eval_steps_per_second": 11.183, + "step": 1315000 + }, + { + "epoch": 0.4, + "learning_rate": 4.805426873984461e-05, + "loss": 2.4189, + "step": 1320000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5070354930762858, + "eval_loss": 2.275390625, + "eval_runtime": 39.9021, + "eval_samples_per_second": 89.419, + "eval_steps_per_second": 11.177, + "step": 1320000 + }, + { + "epoch": 0.4, + "learning_rate": 4.797939144334789e-05, + "loss": 2.2446, + "step": 1325000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5081412271127301, + "eval_loss": 2.267578125, + "eval_runtime": 39.8679, + "eval_samples_per_second": 89.495, + "eval_steps_per_second": 11.187, + "step": 1325000 + }, + { + "epoch": 0.4, + "learning_rate": 4.790457406067113e-05, + "loss": 2.2416, + "step": 1330000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5086858750969846, + "eval_loss": 2.263671875, + "eval_runtime": 39.8423, + "eval_samples_per_second": 89.553, + "eval_steps_per_second": 11.194, + "step": 1330000 + }, + { + "epoch": 0.4, + "learning_rate": 4.78297267210844e-05, + "loss": 2.2421, + "step": 1335000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5087765584384274, + "eval_loss": 2.26171875, + "eval_runtime": 39.9585, + "eval_samples_per_second": 89.293, + "eval_steps_per_second": 11.162, + "step": 1335000 + }, + { + "epoch": 0.4, + "learning_rate": 4.775487938149766e-05, + "loss": 2.2367, + "step": 1340000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.509168606358683, + "eval_loss": 2.26171875, + "eval_runtime": 39.8472, + "eval_samples_per_second": 89.542, + "eval_steps_per_second": 11.193, + "step": 1340000 + }, + { + "epoch": 0.4, + "learning_rate": 4.768001706345593e-05, + "loss": 2.2355, + "step": 1345000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5090864160190068, + "eval_loss": 2.259765625, + "eval_runtime": 39.8651, + "eval_samples_per_second": 89.502, + "eval_steps_per_second": 11.188, + "step": 1345000 + }, + { + "epoch": 0.4, + "learning_rate": 4.760518470232418e-05, + "loss": 2.2379, + "step": 1350000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.5093981913741786, + "eval_loss": 2.259765625, + "eval_runtime": 39.8769, + "eval_samples_per_second": 89.475, + "eval_steps_per_second": 11.184, + "step": 1350000 + }, + { + "epoch": 0.41, + "learning_rate": 4.7530337362737444e-05, + "loss": 2.2365, + "step": 1355000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.509393259953798, + "eval_loss": 2.259765625, + "eval_runtime": 39.8496, + "eval_samples_per_second": 89.537, + "eval_steps_per_second": 11.192, + "step": 1355000 + }, + { + "epoch": 0.41, + "learning_rate": 4.745550500160569e-05, + "loss": 2.2379, + "step": 1360000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5091491546449597, + "eval_loss": 2.2578125, + "eval_runtime": 39.8235, + "eval_samples_per_second": 89.595, + "eval_steps_per_second": 11.199, + "step": 1360000 + }, + { + "epoch": 0.41, + "learning_rate": 4.738067264047394e-05, + "loss": 2.235, + "step": 1365000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5094527109661638, + "eval_loss": 2.2578125, + "eval_runtime": 38.8651, + "eval_samples_per_second": 91.805, + "eval_steps_per_second": 11.476, + "step": 1365000 + }, + { + "epoch": 0.41, + "learning_rate": 4.730582530088721e-05, + "loss": 2.236, + "step": 1370000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5093398362330085, + "eval_loss": 2.2578125, + "eval_runtime": 40.9049, + "eval_samples_per_second": 87.227, + "eval_steps_per_second": 10.903, + "step": 1370000 + }, + { + "epoch": 0.41, + "learning_rate": 4.7230992939755456e-05, + "loss": 2.2344, + "step": 1375000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5095472298567916, + "eval_loss": 2.2578125, + "eval_runtime": 39.9487, + "eval_samples_per_second": 89.314, + "eval_steps_per_second": 11.164, + "step": 1375000 + }, + { + "epoch": 0.41, + "learning_rate": 4.715614560016872e-05, + "loss": 2.2348, + "step": 1380000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5095688733129063, + "eval_loss": 2.255859375, + "eval_runtime": 39.951, + "eval_samples_per_second": 89.309, + "eval_steps_per_second": 11.164, + "step": 1380000 + }, + { + "epoch": 0.41, + "learning_rate": 4.7081313239036975e-05, + "loss": 2.2306, + "step": 1385000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5097368155736447, + "eval_loss": 2.255859375, + "eval_runtime": 40.0156, + "eval_samples_per_second": 89.165, + "eval_steps_per_second": 11.146, + "step": 1385000 + }, + { + "epoch": 0.42, + "learning_rate": 4.7006495856360214e-05, + "loss": 2.2293, + "step": 1390000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.5097521577703843, + "eval_loss": 2.255859375, + "eval_runtime": 40.0042, + "eval_samples_per_second": 89.191, + "eval_steps_per_second": 11.149, + "step": 1390000 + }, + { + "epoch": 0.42, + "learning_rate": 4.693166349522847e-05, + "loss": 2.2311, + "step": 1395000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.510102562585204, + "eval_loss": 2.25390625, + "eval_runtime": 39.9554, + "eval_samples_per_second": 89.3, + "eval_steps_per_second": 11.162, + "step": 1395000 + }, + { + "epoch": 0.42, + "learning_rate": 4.685683113409672e-05, + "loss": 2.231, + "step": 1400000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.5101116035225683, + "eval_loss": 2.25390625, + "eval_runtime": 39.9882, + "eval_samples_per_second": 89.226, + "eval_steps_per_second": 11.153, + "step": 1400000 + }, + { + "epoch": 0.42, + "learning_rate": 4.678201375141996e-05, + "loss": 2.2272, + "step": 1405000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.5102126976403701, + "eval_loss": 2.251953125, + "eval_runtime": 40.3186, + "eval_samples_per_second": 88.495, + "eval_steps_per_second": 11.062, + "step": 1405000 + }, + { + "epoch": 0.42, + "learning_rate": 4.670718139028821e-05, + "loss": 2.2264, + "step": 1410000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.5102436560016482, + "eval_loss": 2.25390625, + "eval_runtime": 40.0007, + "eval_samples_per_second": 89.198, + "eval_steps_per_second": 11.15, + "step": 1410000 + }, + { + "epoch": 0.42, + "learning_rate": 4.6632349029156465e-05, + "loss": 2.2295, + "step": 1415000 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.5104469401084474, + "eval_loss": 2.251953125, + "eval_runtime": 40.1353, + "eval_samples_per_second": 88.899, + "eval_steps_per_second": 11.112, + "step": 1415000 + }, + { + "epoch": 0.43, + "learning_rate": 4.655753164647971e-05, + "loss": 2.2281, + "step": 1420000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5103937903554567, + "eval_loss": 2.251953125, + "eval_runtime": 40.0088, + "eval_samples_per_second": 89.18, + "eval_steps_per_second": 11.148, + "step": 1420000 + }, + { + "epoch": 0.43, + "learning_rate": 4.6482699285347956e-05, + "loss": 2.2234, + "step": 1425000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5106672102187797, + "eval_loss": 2.25, + "eval_runtime": 40.1918, + "eval_samples_per_second": 88.774, + "eval_steps_per_second": 11.097, + "step": 1425000 + }, + { + "epoch": 0.43, + "learning_rate": 4.64078819026712e-05, + "loss": 2.2293, + "step": 1430000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5107220037785639, + "eval_loss": 2.25, + "eval_runtime": 40.1338, + "eval_samples_per_second": 88.903, + "eval_steps_per_second": 11.113, + "step": 1430000 + }, + { + "epoch": 0.43, + "learning_rate": 4.633306451999444e-05, + "loss": 2.2256, + "step": 1435000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5108652889373995, + "eval_loss": 2.25, + "eval_runtime": 40.1228, + "eval_samples_per_second": 88.927, + "eval_steps_per_second": 11.116, + "step": 1435000 + }, + { + "epoch": 0.43, + "learning_rate": 4.6258232158862694e-05, + "loss": 2.2247, + "step": 1440000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5107954271486747, + "eval_loss": 2.25, + "eval_runtime": 40.0563, + "eval_samples_per_second": 89.075, + "eval_steps_per_second": 11.134, + "step": 1440000 + }, + { + "epoch": 0.43, + "learning_rate": 4.6183384819275954e-05, + "loss": 2.222, + "step": 1445000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.510766386561989, + "eval_loss": 2.25, + "eval_runtime": 40.2217, + "eval_samples_per_second": 88.708, + "eval_steps_per_second": 11.089, + "step": 1445000 + }, + { + "epoch": 0.43, + "learning_rate": 4.61085674365992e-05, + "loss": 2.2228, + "step": 1450000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5106184439505719, + "eval_loss": 2.248046875, + "eval_runtime": 40.5305, + "eval_samples_per_second": 88.032, + "eval_steps_per_second": 11.004, + "step": 1450000 + }, + { + "epoch": 0.44, + "learning_rate": 4.603372009701246e-05, + "loss": 2.2241, + "step": 1455000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5110554225898505, + "eval_loss": 2.248046875, + "eval_runtime": 40.2413, + "eval_samples_per_second": 88.665, + "eval_steps_per_second": 11.083, + "step": 1455000 + }, + { + "epoch": 0.44, + "learning_rate": 4.595891769279069e-05, + "loss": 2.2219, + "step": 1460000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.511077613981563, + "eval_loss": 2.24609375, + "eval_runtime": 40.2762, + "eval_samples_per_second": 88.588, + "eval_steps_per_second": 11.074, + "step": 1460000 + }, + { + "epoch": 0.44, + "learning_rate": 4.5884085331658944e-05, + "loss": 2.2219, + "step": 1465000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5112833637985525, + "eval_loss": 2.24609375, + "eval_runtime": 40.2334, + "eval_samples_per_second": 88.682, + "eval_steps_per_second": 11.085, + "step": 1465000 + }, + { + "epoch": 0.44, + "learning_rate": 4.58092529705272e-05, + "loss": 2.2215, + "step": 1470000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5112595286000464, + "eval_loss": 2.24609375, + "eval_runtime": 40.2987, + "eval_samples_per_second": 88.539, + "eval_steps_per_second": 11.067, + "step": 1470000 + }, + { + "epoch": 0.44, + "learning_rate": 4.573445056630543e-05, + "loss": 2.2193, + "step": 1475000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5116091115114694, + "eval_loss": 2.244140625, + "eval_runtime": 40.1594, + "eval_samples_per_second": 88.846, + "eval_steps_per_second": 11.106, + "step": 1475000 + }, + { + "epoch": 0.44, + "learning_rate": 4.56595882482637e-05, + "loss": 2.2183, + "step": 1480000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5114707577730144, + "eval_loss": 2.244140625, + "eval_runtime": 40.9626, + "eval_samples_per_second": 87.104, + "eval_steps_per_second": 10.888, + "step": 1480000 + }, + { + "epoch": 0.44, + "learning_rate": 4.5584755887131956e-05, + "loss": 2.2177, + "step": 1485000 + }, + { + "epoch": 0.44, + "eval_accuracy": 0.5116211660946219, + "eval_loss": 2.244140625, + "eval_runtime": 40.3714, + "eval_samples_per_second": 88.379, + "eval_steps_per_second": 11.047, + "step": 1485000 + }, + { + "epoch": 0.45, + "learning_rate": 4.55099235260002e-05, + "loss": 2.2211, + "step": 1490000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5115852763129632, + "eval_loss": 2.2421875, + "eval_runtime": 40.2564, + "eval_samples_per_second": 88.632, + "eval_steps_per_second": 11.079, + "step": 1490000 + }, + { + "epoch": 0.45, + "learning_rate": 4.5435091164868455e-05, + "loss": 2.2183, + "step": 1495000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5118184229098449, + "eval_loss": 2.2421875, + "eval_runtime": 40.2315, + "eval_samples_per_second": 88.687, + "eval_steps_per_second": 11.086, + "step": 1495000 + }, + { + "epoch": 0.45, + "learning_rate": 4.5360243825281715e-05, + "loss": 2.2182, + "step": 1500000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5120186933708559, + "eval_loss": 2.240234375, + "eval_runtime": 40.3125, + "eval_samples_per_second": 88.509, + "eval_steps_per_second": 11.064, + "step": 1500000 + }, + { + "epoch": 0.45, + "learning_rate": 4.528539648569498e-05, + "loss": 2.2148, + "step": 1505000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5121849918248009, + "eval_loss": 2.240234375, + "eval_runtime": 40.3172, + "eval_samples_per_second": 88.498, + "eval_steps_per_second": 11.062, + "step": 1505000 + }, + { + "epoch": 0.45, + "learning_rate": 4.521059408147321e-05, + "loss": 2.2217, + "step": 1510000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5122685520034718, + "eval_loss": 2.240234375, + "eval_runtime": 40.2766, + "eval_samples_per_second": 88.587, + "eval_steps_per_second": 11.073, + "step": 1510000 + }, + { + "epoch": 0.45, + "learning_rate": 4.5135761720341466e-05, + "loss": 2.2117, + "step": 1515000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.5123701940568713, + "eval_loss": 2.23828125, + "eval_runtime": 40.4763, + "eval_samples_per_second": 88.15, + "eval_steps_per_second": 11.019, + "step": 1515000 + }, + { + "epoch": 0.46, + "learning_rate": 4.5060944337664705e-05, + "loss": 2.2152, + "step": 1520000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5123003322681465, + "eval_loss": 2.23828125, + "eval_runtime": 40.309, + "eval_samples_per_second": 88.516, + "eval_steps_per_second": 11.065, + "step": 1520000 + }, + { + "epoch": 0.46, + "learning_rate": 4.4986096998077965e-05, + "loss": 2.2148, + "step": 1525000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5124967671799727, + "eval_loss": 2.23828125, + "eval_runtime": 40.2971, + "eval_samples_per_second": 88.542, + "eval_steps_per_second": 11.068, + "step": 1525000 + }, + { + "epoch": 0.46, + "learning_rate": 4.491127961540121e-05, + "loss": 2.2151, + "step": 1530000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5127488175549798, + "eval_loss": 2.236328125, + "eval_runtime": 40.3675, + "eval_samples_per_second": 88.388, + "eval_steps_per_second": 11.049, + "step": 1530000 + }, + { + "epoch": 0.46, + "learning_rate": 4.483646223272445e-05, + "loss": 2.2129, + "step": 1535000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5127022430291633, + "eval_loss": 2.236328125, + "eval_runtime": 40.4605, + "eval_samples_per_second": 88.185, + "eval_steps_per_second": 11.023, + "step": 1535000 + }, + { + "epoch": 0.46, + "learning_rate": 4.47616298715927e-05, + "loss": 2.2145, + "step": 1540000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5127690911721, + "eval_loss": 2.236328125, + "eval_runtime": 40.376, + "eval_samples_per_second": 88.369, + "eval_steps_per_second": 11.046, + "step": 1540000 + }, + { + "epoch": 0.46, + "learning_rate": 4.468681248891595e-05, + "loss": 2.2099, + "step": 1545000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5128871712934349, + "eval_loss": 2.236328125, + "eval_runtime": 40.3585, + "eval_samples_per_second": 88.408, + "eval_steps_per_second": 11.051, + "step": 1545000 + }, + { + "epoch": 0.46, + "learning_rate": 4.46119651493292e-05, + "loss": 2.2125, + "step": 1550000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5131964809384164, + "eval_loss": 2.234375, + "eval_runtime": 40.3163, + "eval_samples_per_second": 88.5, + "eval_steps_per_second": 11.063, + "step": 1550000 + }, + { + "epoch": 0.47, + "learning_rate": 4.453713278819746e-05, + "loss": 2.2101, + "step": 1555000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5130504561015916, + "eval_loss": 2.234375, + "eval_runtime": 40.2993, + "eval_samples_per_second": 88.537, + "eval_steps_per_second": 11.067, + "step": 1555000 + }, + { + "epoch": 0.47, + "learning_rate": 4.4462300427065714e-05, + "loss": 2.211, + "step": 1560000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5132159326521398, + "eval_loss": 2.234375, + "eval_runtime": 40.3465, + "eval_samples_per_second": 88.434, + "eval_steps_per_second": 11.054, + "step": 1560000 + }, + { + "epoch": 0.47, + "learning_rate": 4.438743810902398e-05, + "loss": 2.2086, + "step": 1565000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5131567556075729, + "eval_loss": 2.234375, + "eval_runtime": 40.256, + "eval_samples_per_second": 88.633, + "eval_steps_per_second": 11.079, + "step": 1565000 + }, + { + "epoch": 0.47, + "learning_rate": 4.4312605747892234e-05, + "loss": 2.2137, + "step": 1570000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5131545638651815, + "eval_loss": 2.232421875, + "eval_runtime": 40.2935, + "eval_samples_per_second": 88.55, + "eval_steps_per_second": 11.069, + "step": 1570000 + }, + { + "epoch": 0.47, + "learning_rate": 4.423778836521548e-05, + "loss": 2.2122, + "step": 1575000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5134236002437218, + "eval_loss": 2.232421875, + "eval_runtime": 40.3698, + "eval_samples_per_second": 88.383, + "eval_steps_per_second": 11.048, + "step": 1575000 + }, + { + "epoch": 0.47, + "learning_rate": 4.416297098253872e-05, + "loss": 2.2053, + "step": 1580000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.5133767517501063, + "eval_loss": 2.232421875, + "eval_runtime": 40.3058, + "eval_samples_per_second": 88.523, + "eval_steps_per_second": 11.065, + "step": 1580000 + }, + { + "epoch": 0.47, + "learning_rate": 4.408813862140697e-05, + "loss": 2.208, + "step": 1585000 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.513388258397661, + "eval_loss": 2.23046875, + "eval_runtime": 40.3539, + "eval_samples_per_second": 88.418, + "eval_steps_per_second": 11.052, + "step": 1585000 + }, + { + "epoch": 0.48, + "learning_rate": 4.401329128182023e-05, + "loss": 2.2081, + "step": 1590000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5135512692380189, + "eval_loss": 2.23046875, + "eval_runtime": 42.8555, + "eval_samples_per_second": 83.257, + "eval_steps_per_second": 10.407, + "step": 1590000 + }, + { + "epoch": 0.48, + "learning_rate": 4.3938458920688484e-05, + "loss": 2.2077, + "step": 1595000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5137540054092202, + "eval_loss": 2.23046875, + "eval_runtime": 40.2657, + "eval_samples_per_second": 88.611, + "eval_steps_per_second": 11.076, + "step": 1595000 + }, + { + "epoch": 0.48, + "learning_rate": 4.3863611581101744e-05, + "loss": 2.2061, + "step": 1600000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5135893507620688, + "eval_loss": 2.23046875, + "eval_runtime": 41.6767, + "eval_samples_per_second": 85.611, + "eval_steps_per_second": 10.701, + "step": 1600000 + }, + { + "epoch": 0.48, + "learning_rate": 4.378880917687998e-05, + "loss": 2.2055, + "step": 1605000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5138961946968601, + "eval_loss": 2.228515625, + "eval_runtime": 40.3609, + "eval_samples_per_second": 88.402, + "eval_steps_per_second": 11.05, + "step": 1605000 + }, + { + "epoch": 0.48, + "learning_rate": 4.371397681574823e-05, + "loss": 2.2065, + "step": 1610000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5138923591476752, + "eval_loss": 2.228515625, + "eval_runtime": 40.453, + "eval_samples_per_second": 88.201, + "eval_steps_per_second": 11.025, + "step": 1610000 + }, + { + "epoch": 0.48, + "learning_rate": 4.363914445461648e-05, + "loss": 2.2054, + "step": 1615000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5138997562782461, + "eval_loss": 2.228515625, + "eval_runtime": 41.6251, + "eval_samples_per_second": 85.718, + "eval_steps_per_second": 10.715, + "step": 1615000 + }, + { + "epoch": 0.49, + "learning_rate": 4.356432707193973e-05, + "loss": 2.2035, + "step": 1620000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5140274252725432, + "eval_loss": 2.228515625, + "eval_runtime": 40.4365, + "eval_samples_per_second": 88.237, + "eval_steps_per_second": 11.03, + "step": 1620000 + }, + { + "epoch": 0.49, + "learning_rate": 4.348947973235298e-05, + "loss": 2.2021, + "step": 1625000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5139844123281126, + "eval_loss": 2.228515625, + "eval_runtime": 40.3492, + "eval_samples_per_second": 88.428, + "eval_steps_per_second": 11.054, + "step": 1625000 + }, + { + "epoch": 0.49, + "learning_rate": 4.3414662349676226e-05, + "loss": 2.2036, + "step": 1630000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5138233192623471, + "eval_loss": 2.228515625, + "eval_runtime": 40.3662, + "eval_samples_per_second": 88.391, + "eval_steps_per_second": 11.049, + "step": 1630000 + }, + { + "epoch": 0.49, + "learning_rate": 4.333981501008949e-05, + "loss": 2.204, + "step": 1635000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5139803028111288, + "eval_loss": 2.2265625, + "eval_runtime": 40.2896, + "eval_samples_per_second": 88.559, + "eval_steps_per_second": 11.07, + "step": 1635000 + }, + { + "epoch": 0.49, + "learning_rate": 4.3264967670502746e-05, + "loss": 2.2042, + "step": 1640000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5140860543815122, + "eval_loss": 2.2265625, + "eval_runtime": 42.4068, + "eval_samples_per_second": 84.137, + "eval_steps_per_second": 10.517, + "step": 1640000 + }, + { + "epoch": 0.49, + "learning_rate": 4.3190135309371006e-05, + "loss": 2.2024, + "step": 1645000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.514173450109368, + "eval_loss": 2.2265625, + "eval_runtime": 40.3018, + "eval_samples_per_second": 88.532, + "eval_steps_per_second": 11.067, + "step": 1645000 + }, + { + "epoch": 0.49, + "learning_rate": 4.311530294823926e-05, + "loss": 2.2023, + "step": 1650000 + }, + { + "epoch": 0.49, + "eval_accuracy": 0.5144172814504074, + "eval_loss": 2.2265625, + "eval_runtime": 40.2694, + "eval_samples_per_second": 88.603, + "eval_steps_per_second": 11.075, + "step": 1650000 + }, + { + "epoch": 0.5, + "learning_rate": 4.30404855655625e-05, + "loss": 2.1976, + "step": 1655000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5145805662585642, + "eval_loss": 2.224609375, + "eval_runtime": 40.3729, + "eval_samples_per_second": 88.376, + "eval_steps_per_second": 11.047, + "step": 1655000 + }, + { + "epoch": 0.5, + "learning_rate": 4.296565320443075e-05, + "loss": 2.2028, + "step": 1660000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5147172761902257, + "eval_loss": 2.224609375, + "eval_runtime": 40.282, + "eval_samples_per_second": 88.576, + "eval_steps_per_second": 11.072, + "step": 1660000 + }, + { + "epoch": 0.5, + "learning_rate": 4.289080586484401e-05, + "loss": 2.1971, + "step": 1665000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5146457705947074, + "eval_loss": 2.224609375, + "eval_runtime": 40.4909, + "eval_samples_per_second": 88.119, + "eval_steps_per_second": 11.015, + "step": 1665000 + }, + { + "epoch": 0.5, + "learning_rate": 4.281595852525727e-05, + "loss": 2.1978, + "step": 1670000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5146065931994617, + "eval_loss": 2.224609375, + "eval_runtime": 40.3534, + "eval_samples_per_second": 88.419, + "eval_steps_per_second": 11.052, + "step": 1670000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27411561210355e-05, + "loss": 2.1955, + "step": 1675000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5148249455352015, + "eval_loss": 2.22265625, + "eval_runtime": 39.3164, + "eval_samples_per_second": 90.751, + "eval_steps_per_second": 11.344, + "step": 1675000 + }, + { + "epoch": 0.5, + "learning_rate": 4.266630878144877e-05, + "loss": 2.1967, + "step": 1680000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5146874137001434, + "eval_loss": 2.22265625, + "eval_runtime": 40.3287, + "eval_samples_per_second": 88.473, + "eval_steps_per_second": 11.059, + "step": 1680000 + }, + { + "epoch": 0.5, + "learning_rate": 4.259149139877201e-05, + "loss": 2.1975, + "step": 1685000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5151745284466245, + "eval_loss": 2.22265625, + "eval_runtime": 40.2734, + "eval_samples_per_second": 88.594, + "eval_steps_per_second": 11.074, + "step": 1685000 + }, + { + "epoch": 0.51, + "learning_rate": 4.251668899455024e-05, + "loss": 2.1972, + "step": 1690000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5148622051558548, + "eval_loss": 2.220703125, + "eval_runtime": 40.2657, + "eval_samples_per_second": 88.611, + "eval_steps_per_second": 11.076, + "step": 1690000 + }, + { + "epoch": 0.51, + "learning_rate": 4.2441841654963506e-05, + "loss": 2.1967, + "step": 1695000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5150745302000184, + "eval_loss": 2.220703125, + "eval_runtime": 40.2491, + "eval_samples_per_second": 88.648, + "eval_steps_per_second": 11.081, + "step": 1695000 + }, + { + "epoch": 0.51, + "learning_rate": 4.236699431537676e-05, + "loss": 2.194, + "step": 1700000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5150706946508335, + "eval_loss": 2.220703125, + "eval_runtime": 40.3323, + "eval_samples_per_second": 88.465, + "eval_steps_per_second": 11.058, + "step": 1700000 + }, + { + "epoch": 0.51, + "learning_rate": 4.9925167638868255e-05, + "loss": 2.2009, + "step": 1705000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5139096191190072, + "eval_loss": 2.228515625, + "eval_runtime": 39.4304, + "eval_samples_per_second": 90.489, + "eval_steps_per_second": 11.311, + "step": 1705000 + }, + { + "epoch": 0.51, + "learning_rate": 4.9850350256191494e-05, + "loss": 2.2085, + "step": 1710000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5136279802217166, + "eval_loss": 2.23046875, + "eval_runtime": 39.2325, + "eval_samples_per_second": 90.945, + "eval_steps_per_second": 11.368, + "step": 1710000 + }, + { + "epoch": 0.51, + "learning_rate": 4.977551789505975e-05, + "loss": 2.2077, + "step": 1715000 + }, + { + "epoch": 0.51, + "eval_accuracy": 0.5136983899460393, + "eval_loss": 2.23046875, + "eval_runtime": 39.2977, + "eval_samples_per_second": 90.794, + "eval_steps_per_second": 11.349, + "step": 1715000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9700670555473014e-05, + "loss": 2.205, + "step": 1720000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.51339866917402, + "eval_loss": 2.23046875, + "eval_runtime": 39.3375, + "eval_samples_per_second": 90.702, + "eval_steps_per_second": 11.338, + "step": 1720000 + }, + { + "epoch": 0.52, + "learning_rate": 4.962583819434126e-05, + "loss": 2.2063, + "step": 1725000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5134490792490214, + "eval_loss": 2.23046875, + "eval_runtime": 39.3134, + "eval_samples_per_second": 90.758, + "eval_steps_per_second": 11.345, + "step": 1725000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9551020811664506e-05, + "loss": 2.2076, + "step": 1730000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5134718185763318, + "eval_loss": 2.23046875, + "eval_runtime": 39.2618, + "eval_samples_per_second": 90.877, + "eval_steps_per_second": 11.36, + "step": 1730000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9476173472077765e-05, + "loss": 2.2036, + "step": 1735000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5133134651885556, + "eval_loss": 2.23046875, + "eval_runtime": 39.3642, + "eval_samples_per_second": 90.641, + "eval_steps_per_second": 11.33, + "step": 1735000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9401356089401005e-05, + "loss": 2.2064, + "step": 1740000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5138024977096292, + "eval_loss": 2.23046875, + "eval_runtime": 39.4343, + "eval_samples_per_second": 90.48, + "eval_steps_per_second": 11.31, + "step": 1740000 + }, + { + "epoch": 0.52, + "learning_rate": 4.932650874981427e-05, + "loss": 2.2053, + "step": 1745000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5136904448798706, + "eval_loss": 2.23046875, + "eval_runtime": 39.4148, + "eval_samples_per_second": 90.524, + "eval_steps_per_second": 11.316, + "step": 1745000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9251676388682524e-05, + "loss": 2.2048, + "step": 1750000 + }, + { + "epoch": 0.52, + "eval_accuracy": 0.5138953727934633, + "eval_loss": 2.23046875, + "eval_runtime": 39.3715, + "eval_samples_per_second": 90.624, + "eval_steps_per_second": 11.328, + "step": 1750000 + }, + { + "epoch": 0.53, + "learning_rate": 4.917684402755077e-05, + "loss": 2.2075, + "step": 1755000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5137868815450907, + "eval_loss": 2.23046875, + "eval_runtime": 39.4167, + "eval_samples_per_second": 90.52, + "eval_steps_per_second": 11.315, + "step": 1755000 + }, + { + "epoch": 0.53, + "learning_rate": 4.910201166641902e-05, + "loss": 2.2041, + "step": 1760000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5136414046438638, + "eval_loss": 2.228515625, + "eval_runtime": 39.46, + "eval_samples_per_second": 90.421, + "eval_steps_per_second": 11.303, + "step": 1760000 + }, + { + "epoch": 0.53, + "learning_rate": 4.902716432683229e-05, + "loss": 2.2057, + "step": 1765000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5138789347255281, + "eval_loss": 2.228515625, + "eval_runtime": 39.4616, + "eval_samples_per_second": 90.417, + "eval_steps_per_second": 11.302, + "step": 1765000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8952331965700536e-05, + "loss": 2.2054, + "step": 1770000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5138929070832731, + "eval_loss": 2.228515625, + "eval_runtime": 39.5693, + "eval_samples_per_second": 90.171, + "eval_steps_per_second": 11.271, + "step": 1770000 + }, + { + "epoch": 0.53, + "learning_rate": 4.887752956147877e-05, + "loss": 2.2085, + "step": 1775000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5139266051225403, + "eval_loss": 2.228515625, + "eval_runtime": 39.4709, + "eval_samples_per_second": 90.396, + "eval_steps_per_second": 11.299, + "step": 1775000 + }, + { + "epoch": 0.53, + "learning_rate": 4.880269720034703e-05, + "loss": 2.2051, + "step": 1780000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5141471492006715, + "eval_loss": 2.2265625, + "eval_runtime": 39.4552, + "eval_samples_per_second": 90.432, + "eval_steps_per_second": 11.304, + "step": 1780000 + }, + { + "epoch": 0.53, + "learning_rate": 4.872786483921527e-05, + "loss": 2.2023, + "step": 1785000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.5139211257665619, + "eval_loss": 2.2265625, + "eval_runtime": 39.5212, + "eval_samples_per_second": 90.281, + "eval_steps_per_second": 11.285, + "step": 1785000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8653032478083526e-05, + "loss": 2.205, + "step": 1790000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5140561918914298, + "eval_loss": 2.2265625, + "eval_runtime": 39.4725, + "eval_samples_per_second": 90.392, + "eval_steps_per_second": 11.299, + "step": 1790000 + }, + { + "epoch": 0.54, + "learning_rate": 4.857818513849679e-05, + "loss": 2.2009, + "step": 1795000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5141463272972748, + "eval_loss": 2.2265625, + "eval_runtime": 39.6114, + "eval_samples_per_second": 90.075, + "eval_steps_per_second": 11.259, + "step": 1795000 + }, + { + "epoch": 0.54, + "learning_rate": 4.850335277736504e-05, + "loss": 2.1998, + "step": 1800000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5143134476546165, + "eval_loss": 2.2265625, + "eval_runtime": 39.8498, + "eval_samples_per_second": 89.536, + "eval_steps_per_second": 11.192, + "step": 1800000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8428505437778305e-05, + "loss": 2.2009, + "step": 1805000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5143608440838298, + "eval_loss": 2.224609375, + "eval_runtime": 38.3896, + "eval_samples_per_second": 92.942, + "eval_steps_per_second": 11.618, + "step": 1805000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8353688055101544e-05, + "loss": 2.2027, + "step": 1810000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5143013930714639, + "eval_loss": 2.2265625, + "eval_runtime": 39.6231, + "eval_samples_per_second": 90.048, + "eval_steps_per_second": 11.256, + "step": 1810000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8278840715514804e-05, + "loss": 2.2007, + "step": 1815000 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.5145857716467437, + "eval_loss": 2.224609375, + "eval_runtime": 39.6832, + "eval_samples_per_second": 89.912, + "eval_steps_per_second": 11.239, + "step": 1815000 + }, + { + "epoch": 0.55, + "learning_rate": 4.820402333283805e-05, + "loss": 2.1978, + "step": 1820000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5144972800476924, + "eval_loss": 2.224609375, + "eval_runtime": 39.721, + "eval_samples_per_second": 89.827, + "eval_steps_per_second": 11.228, + "step": 1820000 + }, + { + "epoch": 0.55, + "learning_rate": 4.812920595016129e-05, + "loss": 2.1999, + "step": 1825000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5145970043264995, + "eval_loss": 2.22265625, + "eval_runtime": 39.6521, + "eval_samples_per_second": 89.983, + "eval_steps_per_second": 11.248, + "step": 1825000 + }, + { + "epoch": 0.55, + "learning_rate": 4.8054388567484535e-05, + "loss": 2.1978, + "step": 1830000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5148150826944404, + "eval_loss": 2.22265625, + "eval_runtime": 39.5596, + "eval_samples_per_second": 90.193, + "eval_steps_per_second": 11.274, + "step": 1830000 + }, + { + "epoch": 0.55, + "learning_rate": 4.79795262494428e-05, + "loss": 2.1989, + "step": 1835000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5147271390309869, + "eval_loss": 2.22265625, + "eval_runtime": 39.6103, + "eval_samples_per_second": 90.077, + "eval_steps_per_second": 11.26, + "step": 1835000 + }, + { + "epoch": 0.55, + "learning_rate": 4.790467890985607e-05, + "loss": 2.1989, + "step": 1840000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5147980966909074, + "eval_loss": 2.22265625, + "eval_runtime": 39.6511, + "eval_samples_per_second": 89.985, + "eval_steps_per_second": 11.248, + "step": 1840000 + }, + { + "epoch": 0.55, + "learning_rate": 4.7829876505634294e-05, + "loss": 2.1982, + "step": 1845000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5149764497280047, + "eval_loss": 2.220703125, + "eval_runtime": 39.6684, + "eval_samples_per_second": 89.946, + "eval_steps_per_second": 11.243, + "step": 1845000 + }, + { + "epoch": 0.55, + "learning_rate": 4.775501418759257e-05, + "loss": 2.1974, + "step": 1850000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.515062201649067, + "eval_loss": 2.220703125, + "eval_runtime": 39.5826, + "eval_samples_per_second": 90.141, + "eval_steps_per_second": 11.268, + "step": 1850000 + }, + { + "epoch": 0.56, + "learning_rate": 4.768016684800583e-05, + "loss": 2.1972, + "step": 1855000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5151389126327648, + "eval_loss": 2.220703125, + "eval_runtime": 39.774, + "eval_samples_per_second": 89.707, + "eval_steps_per_second": 11.213, + "step": 1855000 + }, + { + "epoch": 0.56, + "learning_rate": 4.760533448687408e-05, + "loss": 2.1966, + "step": 1860000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.515106858400291, + "eval_loss": 2.220703125, + "eval_runtime": 39.7218, + "eval_samples_per_second": 89.825, + "eval_steps_per_second": 11.228, + "step": 1860000 + }, + { + "epoch": 0.56, + "learning_rate": 4.753050212574233e-05, + "loss": 2.198, + "step": 1865000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5150282296420008, + "eval_loss": 2.220703125, + "eval_runtime": 39.6783, + "eval_samples_per_second": 89.923, + "eval_steps_per_second": 11.24, + "step": 1865000 + }, + { + "epoch": 0.56, + "learning_rate": 4.7455669764610586e-05, + "loss": 2.1978, + "step": 1870000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5151887747721684, + "eval_loss": 2.220703125, + "eval_runtime": 39.753, + "eval_samples_per_second": 89.754, + "eval_steps_per_second": 11.219, + "step": 1870000 + }, + { + "epoch": 0.56, + "learning_rate": 4.738083740347884e-05, + "loss": 2.1938, + "step": 1875000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5152000074519242, + "eval_loss": 2.220703125, + "eval_runtime": 39.7549, + "eval_samples_per_second": 89.75, + "eval_steps_per_second": 11.219, + "step": 1875000 + }, + { + "epoch": 0.56, + "learning_rate": 4.730600504234709e-05, + "loss": 2.1908, + "step": 1880000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5152191851978486, + "eval_loss": 2.21875, + "eval_runtime": 39.6445, + "eval_samples_per_second": 90.0, + "eval_steps_per_second": 11.25, + "step": 1880000 + }, + { + "epoch": 0.56, + "learning_rate": 4.723118765967034e-05, + "loss": 2.1899, + "step": 1885000 + }, + { + "epoch": 0.56, + "eval_accuracy": 0.5151602821210807, + "eval_loss": 2.21875, + "eval_runtime": 39.7932, + "eval_samples_per_second": 89.664, + "eval_steps_per_second": 11.208, + "step": 1885000 + }, + { + "epoch": 0.57, + "learning_rate": 4.715634032008359e-05, + "loss": 2.1938, + "step": 1890000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5151682271872493, + "eval_loss": 2.21875, + "eval_runtime": 39.7818, + "eval_samples_per_second": 89.689, + "eval_steps_per_second": 11.211, + "step": 1890000 + }, + { + "epoch": 0.57, + "learning_rate": 4.708150795895185e-05, + "loss": 2.1909, + "step": 1895000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5153520595803252, + "eval_loss": 2.21875, + "eval_runtime": 39.8181, + "eval_samples_per_second": 89.607, + "eval_steps_per_second": 11.201, + "step": 1895000 + }, + { + "epoch": 0.57, + "learning_rate": 4.700669057627509e-05, + "loss": 2.1921, + "step": 1900000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5155320564242162, + "eval_loss": 2.21875, + "eval_runtime": 40.2538, + "eval_samples_per_second": 88.638, + "eval_steps_per_second": 11.08, + "step": 1900000 + }, + { + "epoch": 0.57, + "learning_rate": 4.693187319359833e-05, + "loss": 2.1926, + "step": 1905000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5155928772755766, + "eval_loss": 2.216796875, + "eval_runtime": 39.9117, + "eval_samples_per_second": 89.397, + "eval_steps_per_second": 11.175, + "step": 1905000 + }, + { + "epoch": 0.57, + "learning_rate": 4.685704083246658e-05, + "loss": 2.194, + "step": 1910000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5154164420130716, + "eval_loss": 2.216796875, + "eval_runtime": 39.9039, + "eval_samples_per_second": 89.415, + "eval_steps_per_second": 11.177, + "step": 1910000 + }, + { + "epoch": 0.57, + "learning_rate": 4.992518261732325e-05, + "loss": 2.1942, + "step": 1915000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5151652135414612, + "eval_loss": 2.21875, + "eval_runtime": 39.5459, + "eval_samples_per_second": 90.224, + "eval_steps_per_second": 11.278, + "step": 1915000 + }, + { + "epoch": 0.58, + "learning_rate": 4.985036523464649e-05, + "loss": 2.1947, + "step": 1920000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5150559003896918, + "eval_loss": 2.21875, + "eval_runtime": 39.6198, + "eval_samples_per_second": 90.056, + "eval_steps_per_second": 11.257, + "step": 1920000 + }, + { + "epoch": 0.58, + "learning_rate": 4.977553287351474e-05, + "loss": 2.1941, + "step": 1925000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5150868587509698, + "eval_loss": 2.220703125, + "eval_runtime": 39.5078, + "eval_samples_per_second": 90.311, + "eval_steps_per_second": 11.289, + "step": 1925000 + }, + { + "epoch": 0.58, + "learning_rate": 4.970071549083798e-05, + "loss": 2.1984, + "step": 1930000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5151849392229835, + "eval_loss": 2.220703125, + "eval_runtime": 39.6089, + "eval_samples_per_second": 90.081, + "eval_steps_per_second": 11.26, + "step": 1930000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9625868151251246e-05, + "loss": 2.1929, + "step": 1935000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5150846670085785, + "eval_loss": 2.220703125, + "eval_runtime": 39.574, + "eval_samples_per_second": 90.16, + "eval_steps_per_second": 11.27, + "step": 1935000 + }, + { + "epoch": 0.58, + "learning_rate": 4.955106574702947e-05, + "loss": 2.1921, + "step": 1940000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.515442742921768, + "eval_loss": 2.21875, + "eval_runtime": 39.6485, + "eval_samples_per_second": 89.991, + "eval_steps_per_second": 11.249, + "step": 1940000 + }, + { + "epoch": 0.58, + "learning_rate": 4.947621840744274e-05, + "loss": 2.1932, + "step": 1945000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5153391830937759, + "eval_loss": 2.21875, + "eval_runtime": 39.6644, + "eval_samples_per_second": 89.955, + "eval_steps_per_second": 11.244, + "step": 1945000 + }, + { + "epoch": 0.58, + "learning_rate": 4.940138604631099e-05, + "loss": 2.1959, + "step": 1950000 + }, + { + "epoch": 0.58, + "eval_accuracy": 0.5154117845604899, + "eval_loss": 2.21875, + "eval_runtime": 39.6108, + "eval_samples_per_second": 90.077, + "eval_steps_per_second": 11.26, + "step": 1950000 + }, + { + "epoch": 0.59, + "learning_rate": 4.932656866363423e-05, + "loss": 2.1927, + "step": 1955000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5153761687466302, + "eval_loss": 2.21875, + "eval_runtime": 39.6613, + "eval_samples_per_second": 89.962, + "eval_steps_per_second": 11.245, + "step": 1955000 + }, + { + "epoch": 0.59, + "learning_rate": 4.9251751280957475e-05, + "loss": 2.1949, + "step": 1960000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5154917831577748, + "eval_loss": 2.21875, + "eval_runtime": 39.8742, + "eval_samples_per_second": 89.481, + "eval_steps_per_second": 11.185, + "step": 1960000 + }, + { + "epoch": 0.59, + "learning_rate": 4.9176933898280714e-05, + "loss": 2.1918, + "step": 1965000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5153786344568205, + "eval_loss": 2.216796875, + "eval_runtime": 39.9956, + "eval_samples_per_second": 89.21, + "eval_steps_per_second": 11.151, + "step": 1965000 + }, + { + "epoch": 0.59, + "learning_rate": 4.910210153714897e-05, + "loss": 2.1957, + "step": 1970000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5154745231864427, + "eval_loss": 2.216796875, + "eval_runtime": 39.6981, + "eval_samples_per_second": 89.878, + "eval_steps_per_second": 11.235, + "step": 1970000 + }, + { + "epoch": 0.59, + "learning_rate": 4.902726917601722e-05, + "loss": 2.1884, + "step": 1975000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.51571890246308, + "eval_loss": 2.216796875, + "eval_runtime": 39.9761, + "eval_samples_per_second": 89.253, + "eval_steps_per_second": 11.157, + "step": 1975000 + }, + { + "epoch": 0.59, + "learning_rate": 4.895245179334046e-05, + "loss": 2.1942, + "step": 1980000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5156073975689194, + "eval_loss": 2.21484375, + "eval_runtime": 39.9647, + "eval_samples_per_second": 89.279, + "eval_steps_per_second": 11.16, + "step": 1980000 + }, + { + "epoch": 0.59, + "learning_rate": 4.8877604453753726e-05, + "loss": 2.1938, + "step": 1985000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5155802747568262, + "eval_loss": 2.216796875, + "eval_runtime": 39.7886, + "eval_samples_per_second": 89.674, + "eval_steps_per_second": 11.209, + "step": 1985000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8802787071076965e-05, + "loss": 2.1935, + "step": 1990000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.5160071165875447, + "eval_loss": 2.21484375, + "eval_runtime": 40.1621, + "eval_samples_per_second": 88.84, + "eval_steps_per_second": 11.105, + "step": 1990000 + }, + { + "epoch": 0.6, + "learning_rate": 4.872795470994522e-05, + "loss": 2.1902, + "step": 1995000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.5157413678225916, + "eval_loss": 2.21484375, + "eval_runtime": 39.8406, + "eval_samples_per_second": 89.557, + "eval_steps_per_second": 11.195, + "step": 1995000 + }, + { + "epoch": 0.6, + "learning_rate": 4.865310737035848e-05, + "loss": 2.188, + "step": 2000000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.5158473933607739, + "eval_loss": 2.21484375, + "eval_runtime": 39.7424, + "eval_samples_per_second": 89.778, + "eval_steps_per_second": 11.222, + "step": 2000000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8578289987681716e-05, + "loss": 2.1862, + "step": 2005000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.5159235564088739, + "eval_loss": 2.212890625, + "eval_runtime": 39.7499, + "eval_samples_per_second": 89.761, + "eval_steps_per_second": 11.22, + "step": 2005000 + }, + { + "epoch": 0.6, + "learning_rate": 4.850347260500496e-05, + "loss": 2.1886, + "step": 2010000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.5160695812456987, + "eval_loss": 2.212890625, + "eval_runtime": 40.1441, + "eval_samples_per_second": 88.88, + "eval_steps_per_second": 11.11, + "step": 2010000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8428610286963236e-05, + "loss": 2.1811, + "step": 2015000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.516141360809016, + "eval_loss": 2.212890625, + "eval_runtime": 40.0164, + "eval_samples_per_second": 89.163, + "eval_steps_per_second": 11.145, + "step": 2015000 + }, + { + "epoch": 0.61, + "learning_rate": 4.835377792583148e-05, + "loss": 2.19, + "step": 2020000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.5160435543048012, + "eval_loss": 2.212890625, + "eval_runtime": 39.7821, + "eval_samples_per_second": 89.689, + "eval_steps_per_second": 11.211, + "step": 2020000 + }, + { + "epoch": 0.61, + "learning_rate": 4.827893058624475e-05, + "loss": 2.1895, + "step": 2025000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.5164950532374227, + "eval_loss": 2.212890625, + "eval_runtime": 39.7945, + "eval_samples_per_second": 89.661, + "eval_steps_per_second": 11.208, + "step": 2025000 + }, + { + "epoch": 0.61, + "learning_rate": 4.820411320356799e-05, + "loss": 2.1904, + "step": 2030000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.516082457732248, + "eval_loss": 2.212890625, + "eval_runtime": 39.8538, + "eval_samples_per_second": 89.527, + "eval_steps_per_second": 11.191, + "step": 2030000 + }, + { + "epoch": 0.61, + "learning_rate": 4.812928084243624e-05, + "loss": 2.1854, + "step": 2035000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.516504368142586, + "eval_loss": 2.212890625, + "eval_runtime": 39.7999, + "eval_samples_per_second": 89.649, + "eval_steps_per_second": 11.206, + "step": 2035000 + }, + { + "epoch": 0.61, + "learning_rate": 4.805447843821447e-05, + "loss": 2.1883, + "step": 2040000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.51652875127669, + "eval_loss": 2.2109375, + "eval_runtime": 39.8414, + "eval_samples_per_second": 89.555, + "eval_steps_per_second": 11.194, + "step": 2040000 + }, + { + "epoch": 0.61, + "learning_rate": 4.7979646077082725e-05, + "loss": 2.1859, + "step": 2045000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.5165314909546791, + "eval_loss": 2.2109375, + "eval_runtime": 39.9091, + "eval_samples_per_second": 89.403, + "eval_steps_per_second": 11.175, + "step": 2045000 + }, + { + "epoch": 0.61, + "learning_rate": 4.790481371595098e-05, + "loss": 2.1849, + "step": 2050000 + }, + { + "epoch": 0.61, + "eval_accuracy": 0.5167750483279198, + "eval_loss": 2.208984375, + "eval_runtime": 39.8945, + "eval_samples_per_second": 89.436, + "eval_steps_per_second": 11.179, + "step": 2050000 + }, + { + "epoch": 0.62, + "learning_rate": 4.782996637636424e-05, + "loss": 2.1844, + "step": 2055000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5167309395122934, + "eval_loss": 2.2109375, + "eval_runtime": 39.9417, + "eval_samples_per_second": 89.33, + "eval_steps_per_second": 11.166, + "step": 2055000 + }, + { + "epoch": 0.62, + "learning_rate": 4.7755148993687484e-05, + "loss": 2.1866, + "step": 2060000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5166953236984337, + "eval_loss": 2.208984375, + "eval_runtime": 39.9499, + "eval_samples_per_second": 89.312, + "eval_steps_per_second": 11.164, + "step": 2060000 + }, + { + "epoch": 0.62, + "learning_rate": 4.768031663255573e-05, + "loss": 2.1865, + "step": 2065000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5167821714906917, + "eval_loss": 2.208984375, + "eval_runtime": 39.8531, + "eval_samples_per_second": 89.529, + "eval_steps_per_second": 11.191, + "step": 2065000 + }, + { + "epoch": 0.62, + "learning_rate": 4.7605499249878976e-05, + "loss": 2.1846, + "step": 2070000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5171333982089081, + "eval_loss": 2.20703125, + "eval_runtime": 39.9613, + "eval_samples_per_second": 89.286, + "eval_steps_per_second": 11.161, + "step": 2070000 + }, + { + "epoch": 0.62, + "learning_rate": 4.753066688874723e-05, + "loss": 2.1821, + "step": 2075000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5169824419517027, + "eval_loss": 2.20703125, + "eval_runtime": 39.9564, + "eval_samples_per_second": 89.297, + "eval_steps_per_second": 11.162, + "step": 2075000 + }, + { + "epoch": 0.62, + "learning_rate": 4.7455849506070474e-05, + "loss": 2.184, + "step": 2080000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5170257288639323, + "eval_loss": 2.20703125, + "eval_runtime": 41.0867, + "eval_samples_per_second": 86.841, + "eval_steps_per_second": 10.855, + "step": 2080000 + }, + { + "epoch": 0.62, + "learning_rate": 4.738101714493872e-05, + "loss": 2.1847, + "step": 2085000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5173227099579624, + "eval_loss": 2.205078125, + "eval_runtime": 39.9314, + "eval_samples_per_second": 89.353, + "eval_steps_per_second": 11.169, + "step": 2085000 + }, + { + "epoch": 0.63, + "learning_rate": 4.730618478380697e-05, + "loss": 2.1836, + "step": 2090000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.5173509286412512, + "eval_loss": 2.205078125, + "eval_runtime": 40.0844, + "eval_samples_per_second": 89.012, + "eval_steps_per_second": 11.127, + "step": 2090000 + }, + { + "epoch": 0.63, + "learning_rate": 4.723136740113022e-05, + "loss": 2.1791, + "step": 2095000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.5174243520113619, + "eval_loss": 2.205078125, + "eval_runtime": 39.9803, + "eval_samples_per_second": 89.244, + "eval_steps_per_second": 11.156, + "step": 2095000 + }, + { + "epoch": 0.63, + "learning_rate": 4.715652006154348e-05, + "loss": 2.1812, + "step": 2100000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.5173136690205979, + "eval_loss": 2.205078125, + "eval_runtime": 40.0609, + "eval_samples_per_second": 89.064, + "eval_steps_per_second": 11.133, + "step": 2100000 + }, + { + "epoch": 0.63, + "learning_rate": 4.708168770041173e-05, + "loss": 2.1835, + "step": 2105000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.5175572263938386, + "eval_loss": 2.205078125, + "eval_runtime": 40.0398, + "eval_samples_per_second": 89.111, + "eval_steps_per_second": 11.139, + "step": 2105000 + }, + { + "epoch": 0.63, + "learning_rate": 4.700687031773498e-05, + "loss": 2.1806, + "step": 2110000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.517552294973458, + "eval_loss": 2.205078125, + "eval_runtime": 40.3766, + "eval_samples_per_second": 88.368, + "eval_steps_per_second": 11.046, + "step": 2110000 + }, + { + "epoch": 0.63, + "learning_rate": 4.6932037956603224e-05, + "loss": 2.1832, + "step": 2115000 + }, + { + "epoch": 0.63, + "eval_accuracy": 0.5174777757321516, + "eval_loss": 2.205078125, + "eval_runtime": 41.019, + "eval_samples_per_second": 86.984, + "eval_steps_per_second": 10.873, + "step": 2115000 + }, + { + "epoch": 0.64, + "learning_rate": 4.6857205595471476e-05, + "loss": 2.1766, + "step": 2120000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5177577708226486, + "eval_loss": 2.203125, + "eval_runtime": 40.432, + "eval_samples_per_second": 88.247, + "eval_steps_per_second": 11.031, + "step": 2120000 + }, + { + "epoch": 0.64, + "learning_rate": 4.678237323433973e-05, + "loss": 2.1775, + "step": 2125000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5178353037097432, + "eval_loss": 2.203125, + "eval_runtime": 41.1107, + "eval_samples_per_second": 86.79, + "eval_steps_per_second": 10.849, + "step": 2125000 + }, + { + "epoch": 0.64, + "learning_rate": 4.6707540873207975e-05, + "loss": 2.1801, + "step": 2130000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5176514713166673, + "eval_loss": 2.203125, + "eval_runtime": 41.0472, + "eval_samples_per_second": 86.924, + "eval_steps_per_second": 10.866, + "step": 2130000 + }, + { + "epoch": 0.64, + "learning_rate": 4.6632708512076235e-05, + "loss": 2.1789, + "step": 2135000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5177799622143612, + "eval_loss": 2.203125, + "eval_runtime": 41.4469, + "eval_samples_per_second": 86.086, + "eval_steps_per_second": 10.761, + "step": 2135000 + }, + { + "epoch": 0.64, + "learning_rate": 4.655787615094449e-05, + "loss": 2.1794, + "step": 2140000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5178131123180306, + "eval_loss": 2.203125, + "eval_runtime": 40.4764, + "eval_samples_per_second": 88.15, + "eval_steps_per_second": 11.019, + "step": 2140000 + }, + { + "epoch": 0.64, + "learning_rate": 4.6483043789812734e-05, + "loss": 2.1799, + "step": 2145000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5178917410763209, + "eval_loss": 2.201171875, + "eval_runtime": 40.376, + "eval_samples_per_second": 88.369, + "eval_steps_per_second": 11.046, + "step": 2145000 + }, + { + "epoch": 0.64, + "learning_rate": 4.6408196450226e-05, + "loss": 2.1746, + "step": 2150000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5180391357521402, + "eval_loss": 2.201171875, + "eval_runtime": 40.5387, + "eval_samples_per_second": 88.015, + "eval_steps_per_second": 11.002, + "step": 2150000 + }, + { + "epoch": 0.65, + "learning_rate": 4.633336408909425e-05, + "loss": 2.1766, + "step": 2155000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5178873575915381, + "eval_loss": 2.201171875, + "eval_runtime": 40.514, + "eval_samples_per_second": 88.068, + "eval_steps_per_second": 11.009, + "step": 2155000 + }, + { + "epoch": 0.65, + "learning_rate": 4.62585317279625e-05, + "loss": 2.1754, + "step": 2160000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5177369492699306, + "eval_loss": 2.201171875, + "eval_runtime": 42.6971, + "eval_samples_per_second": 83.565, + "eval_steps_per_second": 10.446, + "step": 2160000 + }, + { + "epoch": 0.65, + "learning_rate": 4.618369936683075e-05, + "loss": 2.1764, + "step": 2165000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5177405108513166, + "eval_loss": 2.201171875, + "eval_runtime": 40.5808, + "eval_samples_per_second": 87.923, + "eval_steps_per_second": 10.99, + "step": 2165000 + }, + { + "epoch": 0.65, + "learning_rate": 4.6108881984154e-05, + "loss": 2.1745, + "step": 2170000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.51831337751886, + "eval_loss": 2.19921875, + "eval_runtime": 42.1403, + "eval_samples_per_second": 84.67, + "eval_steps_per_second": 10.584, + "step": 2170000 + }, + { + "epoch": 0.65, + "learning_rate": 4.6034049623022244e-05, + "loss": 2.1735, + "step": 2175000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5180128348434438, + "eval_loss": 2.19921875, + "eval_runtime": 40.2519, + "eval_samples_per_second": 88.642, + "eval_steps_per_second": 11.08, + "step": 2175000 + }, + { + "epoch": 0.65, + "learning_rate": 4.59592172618905e-05, + "loss": 2.1778, + "step": 2180000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5180588614336625, + "eval_loss": 2.19921875, + "eval_runtime": 42.4462, + "eval_samples_per_second": 84.059, + "eval_steps_per_second": 10.507, + "step": 2180000 + }, + { + "epoch": 0.65, + "learning_rate": 4.5884384900758757e-05, + "loss": 2.1717, + "step": 2185000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.5183484453971218, + "eval_loss": 2.19921875, + "eval_runtime": 41.2843, + "eval_samples_per_second": 86.425, + "eval_steps_per_second": 10.803, + "step": 2185000 + }, + { + "epoch": 0.66, + "learning_rate": 4.580953756117201e-05, + "loss": 2.1752, + "step": 2190000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.518520497174844, + "eval_loss": 2.197265625, + "eval_runtime": 40.4537, + "eval_samples_per_second": 88.2, + "eval_steps_per_second": 11.025, + "step": 2190000 + }, + { + "epoch": 0.66, + "learning_rate": 4.5734720178495255e-05, + "loss": 2.1747, + "step": 2195000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5184799499406038, + "eval_loss": 2.197265625, + "eval_runtime": 43.6458, + "eval_samples_per_second": 81.749, + "eval_steps_per_second": 10.219, + "step": 2195000 + }, + { + "epoch": 0.66, + "learning_rate": 4.56599027958185e-05, + "loss": 2.1754, + "step": 2200000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5186040573535149, + "eval_loss": 2.197265625, + "eval_runtime": 41.0029, + "eval_samples_per_second": 87.018, + "eval_steps_per_second": 10.877, + "step": 2200000 + }, + { + "epoch": 0.66, + "learning_rate": 4.558508541314174e-05, + "loss": 2.1728, + "step": 2205000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5187728215176501, + "eval_loss": 2.197265625, + "eval_runtime": 40.4446, + "eval_samples_per_second": 88.22, + "eval_steps_per_second": 11.027, + "step": 2205000 + }, + { + "epoch": 0.66, + "learning_rate": 4.551025305200999e-05, + "loss": 2.1684, + "step": 2210000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5185736469278347, + "eval_loss": 2.197265625, + "eval_runtime": 40.6605, + "eval_samples_per_second": 87.751, + "eval_steps_per_second": 10.969, + "step": 2210000 + }, + { + "epoch": 0.66, + "learning_rate": 4.543540571242325e-05, + "loss": 2.1722, + "step": 2215000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5188199439790645, + "eval_loss": 2.1953125, + "eval_runtime": 40.5747, + "eval_samples_per_second": 87.937, + "eval_steps_per_second": 10.992, + "step": 2215000 + }, + { + "epoch": 0.67, + "learning_rate": 4.536058832974649e-05, + "loss": 2.1692, + "step": 2220000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.519004872243336, + "eval_loss": 2.1953125, + "eval_runtime": 41.1424, + "eval_samples_per_second": 86.723, + "eval_steps_per_second": 10.84, + "step": 2220000 + }, + { + "epoch": 0.67, + "learning_rate": 4.528577094706974e-05, + "loss": 2.176, + "step": 2225000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.519122404429073, + "eval_loss": 2.1953125, + "eval_runtime": 42.7268, + "eval_samples_per_second": 83.507, + "eval_steps_per_second": 10.438, + "step": 2225000 + }, + { + "epoch": 0.67, + "learning_rate": 4.5210923607483e-05, + "loss": 2.1697, + "step": 2230000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.5190202144400756, + "eval_loss": 2.1953125, + "eval_runtime": 40.4249, + "eval_samples_per_second": 88.262, + "eval_steps_per_second": 11.033, + "step": 2230000 + }, + { + "epoch": 0.67, + "learning_rate": 4.5136106224806244e-05, + "loss": 2.1731, + "step": 2235000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.5190837749694251, + "eval_loss": 2.1953125, + "eval_runtime": 42.0959, + "eval_samples_per_second": 84.759, + "eval_steps_per_second": 10.595, + "step": 2235000 + }, + { + "epoch": 0.67, + "learning_rate": 4.506124390676451e-05, + "loss": 2.173, + "step": 2240000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.5191125415883119, + "eval_loss": 2.193359375, + "eval_runtime": 40.9494, + "eval_samples_per_second": 87.132, + "eval_steps_per_second": 10.892, + "step": 2240000 + }, + { + "epoch": 0.67, + "learning_rate": 4.498644150254274e-05, + "loss": 2.1714, + "step": 2245000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.5192900727220126, + "eval_loss": 2.193359375, + "eval_runtime": 40.4515, + "eval_samples_per_second": 88.204, + "eval_steps_per_second": 11.026, + "step": 2245000 + }, + { + "epoch": 0.67, + "learning_rate": 4.4911594162956e-05, + "loss": 2.1719, + "step": 2250000 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.5192147315773094, + "eval_loss": 2.193359375, + "eval_runtime": 40.7422, + "eval_samples_per_second": 87.575, + "eval_steps_per_second": 10.947, + "step": 2250000 + }, + { + "epoch": 0.68, + "learning_rate": 4.483674682336927e-05, + "loss": 2.1667, + "step": 2255000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5189793932380363, + "eval_loss": 2.193359375, + "eval_runtime": 40.4467, + "eval_samples_per_second": 88.215, + "eval_steps_per_second": 11.027, + "step": 2255000 + }, + { + "epoch": 0.68, + "learning_rate": 4.476191446223752e-05, + "loss": 2.1653, + "step": 2260000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5191834992482324, + "eval_loss": 2.193359375, + "eval_runtime": 40.3493, + "eval_samples_per_second": 88.428, + "eval_steps_per_second": 11.053, + "step": 2260000 + }, + { + "epoch": 0.68, + "learning_rate": 4.468709707956076e-05, + "loss": 2.1656, + "step": 2265000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5193434964428021, + "eval_loss": 2.19140625, + "eval_runtime": 44.129, + "eval_samples_per_second": 80.854, + "eval_steps_per_second": 10.107, + "step": 2265000 + }, + { + "epoch": 0.68, + "learning_rate": 4.4612264718429014e-05, + "loss": 2.1695, + "step": 2270000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5194328099452503, + "eval_loss": 2.19140625, + "eval_runtime": 42.07, + "eval_samples_per_second": 84.811, + "eval_steps_per_second": 10.601, + "step": 2270000 + }, + { + "epoch": 0.68, + "learning_rate": 4.453744733575226e-05, + "loss": 2.17, + "step": 2275000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5195643144887323, + "eval_loss": 2.19140625, + "eval_runtime": 40.4092, + "eval_samples_per_second": 88.297, + "eval_steps_per_second": 11.037, + "step": 2275000 + }, + { + "epoch": 0.68, + "learning_rate": 4.44626299530755e-05, + "loss": 2.1628, + "step": 2280000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5197062298085732, + "eval_loss": 2.19140625, + "eval_runtime": 40.3663, + "eval_samples_per_second": 88.391, + "eval_steps_per_second": 11.049, + "step": 2280000 + }, + { + "epoch": 0.68, + "learning_rate": 4.438779759194375e-05, + "loss": 2.1648, + "step": 2285000 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.5196210258231089, + "eval_loss": 2.189453125, + "eval_runtime": 41.4265, + "eval_samples_per_second": 86.128, + "eval_steps_per_second": 10.766, + "step": 2285000 + }, + { + "epoch": 0.69, + "learning_rate": 4.431298020926699e-05, + "loss": 2.1647, + "step": 2290000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5199106097865681, + "eval_loss": 2.189453125, + "eval_runtime": 43.7368, + "eval_samples_per_second": 81.579, + "eval_steps_per_second": 10.197, + "step": 2290000 + }, + { + "epoch": 0.69, + "learning_rate": 4.423813286968026e-05, + "loss": 2.1648, + "step": 2295000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5198015706025977, + "eval_loss": 2.189453125, + "eval_runtime": 40.3741, + "eval_samples_per_second": 88.374, + "eval_steps_per_second": 11.047, + "step": 2295000 + }, + { + "epoch": 0.69, + "learning_rate": 4.4163315487003496e-05, + "loss": 2.168, + "step": 2300000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5196733536727027, + "eval_loss": 2.189453125, + "eval_runtime": 40.4104, + "eval_samples_per_second": 88.294, + "eval_steps_per_second": 11.037, + "step": 2300000 + }, + { + "epoch": 0.69, + "learning_rate": 4.4088498104326735e-05, + "loss": 2.1607, + "step": 2305000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5197840366634667, + "eval_loss": 2.189453125, + "eval_runtime": 40.7993, + "eval_samples_per_second": 87.452, + "eval_steps_per_second": 10.932, + "step": 2305000 + }, + { + "epoch": 0.69, + "learning_rate": 4.401365076474e-05, + "loss": 2.1674, + "step": 2310000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5199684169921404, + "eval_loss": 2.1875, + "eval_runtime": 40.3824, + "eval_samples_per_second": 88.355, + "eval_steps_per_second": 11.044, + "step": 2310000 + }, + { + "epoch": 0.69, + "learning_rate": 4.393881840360825e-05, + "loss": 2.1656, + "step": 2315000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5199689649277383, + "eval_loss": 2.1875, + "eval_runtime": 40.7535, + "eval_samples_per_second": 87.551, + "eval_steps_per_second": 10.944, + "step": 2315000 + }, + { + "epoch": 0.7, + "learning_rate": 4.38639860424765e-05, + "loss": 2.1637, + "step": 2320000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5201547150954066, + "eval_loss": 2.1875, + "eval_runtime": 40.6083, + "eval_samples_per_second": 87.864, + "eval_steps_per_second": 10.983, + "step": 2320000 + }, + { + "epoch": 0.7, + "learning_rate": 4.3789168659799746e-05, + "loss": 2.1649, + "step": 2325000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5201152637323619, + "eval_loss": 2.1875, + "eval_runtime": 41.4342, + "eval_samples_per_second": 86.112, + "eval_steps_per_second": 10.764, + "step": 2325000 + }, + { + "epoch": 0.7, + "learning_rate": 4.371436625557798e-05, + "loss": 2.1625, + "step": 2330000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5200544428810016, + "eval_loss": 2.1875, + "eval_runtime": 44.1316, + "eval_samples_per_second": 80.849, + "eval_steps_per_second": 10.106, + "step": 2330000 + }, + { + "epoch": 0.7, + "learning_rate": 4.363950393753625e-05, + "loss": 2.1627, + "step": 2335000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5202593707945943, + "eval_loss": 2.1875, + "eval_runtime": 42.6548, + "eval_samples_per_second": 83.648, + "eval_steps_per_second": 10.456, + "step": 2335000 + }, + { + "epoch": 0.7, + "learning_rate": 4.356468655485949e-05, + "loss": 2.1598, + "step": 2340000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5203048494492152, + "eval_loss": 2.185546875, + "eval_runtime": 43.9859, + "eval_samples_per_second": 81.117, + "eval_steps_per_second": 10.14, + "step": 2340000 + }, + { + "epoch": 0.7, + "learning_rate": 4.348982423681776e-05, + "loss": 2.1638, + "step": 2345000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5201473179648357, + "eval_loss": 2.1875, + "eval_runtime": 44.5632, + "eval_samples_per_second": 80.066, + "eval_steps_per_second": 10.008, + "step": 2345000 + }, + { + "epoch": 0.7, + "learning_rate": 4.341499187568602e-05, + "loss": 2.1588, + "step": 2350000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.5204785450337309, + "eval_loss": 2.185546875, + "eval_runtime": 41.6749, + "eval_samples_per_second": 85.615, + "eval_steps_per_second": 10.702, + "step": 2350000 + }, + { + "epoch": 0.71, + "learning_rate": 4.334015951455427e-05, + "loss": 2.1633, + "step": 2355000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.5204648466437849, + "eval_loss": 2.185546875, + "eval_runtime": 40.4241, + "eval_samples_per_second": 88.264, + "eval_steps_per_second": 11.033, + "step": 2355000 + }, + { + "epoch": 0.71, + "learning_rate": 4.326534213187751e-05, + "loss": 2.1621, + "step": 2360000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.5205470369834612, + "eval_loss": 2.185546875, + "eval_runtime": 40.3566, + "eval_samples_per_second": 88.412, + "eval_steps_per_second": 11.051, + "step": 2360000 + }, + { + "epoch": 0.71, + "learning_rate": 4.319049479229077e-05, + "loss": 2.165, + "step": 2365000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.5207188147933844, + "eval_loss": 2.18359375, + "eval_runtime": 40.8467, + "eval_samples_per_second": 87.351, + "eval_steps_per_second": 10.919, + "step": 2365000 + }, + { + "epoch": 0.71, + "learning_rate": 4.311566243115902e-05, + "loss": 2.159, + "step": 2370000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.52062347399936, + "eval_loss": 2.18359375, + "eval_runtime": 43.216, + "eval_samples_per_second": 82.562, + "eval_steps_per_second": 10.32, + "step": 2370000 + }, + { + "epoch": 0.71, + "learning_rate": 4.3040830070027275e-05, + "loss": 2.1573, + "step": 2375000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.5207314173121348, + "eval_loss": 2.18359375, + "eval_runtime": 44.6428, + "eval_samples_per_second": 79.923, + "eval_steps_per_second": 9.99, + "step": 2375000 + }, + { + "epoch": 0.71, + "learning_rate": 4.2966012687350514e-05, + "loss": 2.1556, + "step": 2380000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.520848401562274, + "eval_loss": 2.18359375, + "eval_runtime": 40.6164, + "eval_samples_per_second": 87.846, + "eval_steps_per_second": 10.981, + "step": 2380000 + }, + { + "epoch": 0.71, + "learning_rate": 4.289119530467376e-05, + "loss": 2.1562, + "step": 2385000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.5209563448750487, + "eval_loss": 2.18359375, + "eval_runtime": 40.356, + "eval_samples_per_second": 88.413, + "eval_steps_per_second": 11.052, + "step": 2385000 + }, + { + "epoch": 0.72, + "learning_rate": 4.281636294354201e-05, + "loss": 2.1572, + "step": 2390000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5209188112865967, + "eval_loss": 2.18359375, + "eval_runtime": 44.3639, + "eval_samples_per_second": 80.426, + "eval_steps_per_second": 10.053, + "step": 2390000 + }, + { + "epoch": 0.72, + "learning_rate": 4.274154556086525e-05, + "loss": 2.1577, + "step": 2395000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5208686751793942, + "eval_loss": 2.181640625, + "eval_runtime": 42.2402, + "eval_samples_per_second": 84.469, + "eval_steps_per_second": 10.559, + "step": 2395000 + }, + { + "epoch": 0.72, + "learning_rate": 4.2666713199733505e-05, + "loss": 2.1529, + "step": 2400000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5209933305279031, + "eval_loss": 2.181640625, + "eval_runtime": 42.0012, + "eval_samples_per_second": 84.95, + "eval_steps_per_second": 10.619, + "step": 2400000 + }, + { + "epoch": 0.72, + "learning_rate": 4.259188083860176e-05, + "loss": 2.1636, + "step": 2405000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5210516856690732, + "eval_loss": 2.181640625, + "eval_runtime": 40.8766, + "eval_samples_per_second": 87.287, + "eval_steps_per_second": 10.911, + "step": 2405000 + }, + { + "epoch": 0.72, + "learning_rate": 4.2517063455924996e-05, + "loss": 2.1521, + "step": 2410000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5212941471711181, + "eval_loss": 2.181640625, + "eval_runtime": 44.8987, + "eval_samples_per_second": 79.468, + "eval_steps_per_second": 9.933, + "step": 2410000 + }, + { + "epoch": 0.72, + "learning_rate": 4.244221611633826e-05, + "loss": 2.1574, + "step": 2415000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5213697622836202, + "eval_loss": 2.181640625, + "eval_runtime": 40.8755, + "eval_samples_per_second": 87.289, + "eval_steps_per_second": 10.911, + "step": 2415000 + }, + { + "epoch": 0.72, + "learning_rate": 4.236736877675152e-05, + "loss": 2.1546, + "step": 2420000 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.5213067496898685, + "eval_loss": 2.1796875, + "eval_runtime": 43.2862, + "eval_samples_per_second": 82.428, + "eval_steps_per_second": 10.304, + "step": 2420000 + }, + { + "epoch": 0.73, + "learning_rate": 4.2292536415619776e-05, + "loss": 2.1572, + "step": 2425000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.521194148924512, + "eval_loss": 2.1796875, + "eval_runtime": 42.5113, + "eval_samples_per_second": 83.931, + "eval_steps_per_second": 10.491, + "step": 2425000 + }, + { + "epoch": 0.73, + "learning_rate": 4.2217719032943015e-05, + "loss": 2.1544, + "step": 2430000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5212160663484257, + "eval_loss": 2.1796875, + "eval_runtime": 42.4347, + "eval_samples_per_second": 84.082, + "eval_steps_per_second": 10.51, + "step": 2430000 + }, + { + "epoch": 0.73, + "learning_rate": 4.214290165026626e-05, + "loss": 2.15, + "step": 2435000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5213122290458468, + "eval_loss": 2.1796875, + "eval_runtime": 41.657, + "eval_samples_per_second": 85.652, + "eval_steps_per_second": 10.706, + "step": 2435000 + }, + { + "epoch": 0.73, + "learning_rate": 4.206805431067952e-05, + "loss": 2.1537, + "step": 2440000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5217242766154238, + "eval_loss": 2.177734375, + "eval_runtime": 45.9893, + "eval_samples_per_second": 77.583, + "eval_steps_per_second": 9.698, + "step": 2440000 + }, + { + "epoch": 0.73, + "learning_rate": 4.199322194954777e-05, + "loss": 2.1552, + "step": 2445000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5215689368734356, + "eval_loss": 2.177734375, + "eval_runtime": 43.8458, + "eval_samples_per_second": 81.376, + "eval_steps_per_second": 10.172, + "step": 2445000 + }, + { + "epoch": 0.73, + "learning_rate": 4.1918389588416026e-05, + "loss": 2.1522, + "step": 2450000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5215245540900105, + "eval_loss": 2.177734375, + "eval_runtime": 45.304, + "eval_samples_per_second": 78.757, + "eval_steps_per_second": 9.845, + "step": 2450000 + }, + { + "epoch": 0.74, + "learning_rate": 4.1843542248829286e-05, + "loss": 2.1487, + "step": 2455000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.5214875684371562, + "eval_loss": 2.177734375, + "eval_runtime": 41.9232, + "eval_samples_per_second": 85.108, + "eval_steps_per_second": 10.639, + "step": 2455000 + }, + { + "epoch": 0.74, + "learning_rate": 4.176870988769754e-05, + "loss": 2.1582, + "step": 2460000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.52146373323865, + "eval_loss": 2.177734375, + "eval_runtime": 44.2104, + "eval_samples_per_second": 80.705, + "eval_steps_per_second": 10.088, + "step": 2460000 + }, + { + "epoch": 0.74, + "learning_rate": 4.169390748347577e-05, + "loss": 2.1582, + "step": 2465000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.5218020834703172, + "eval_loss": 2.177734375, + "eval_runtime": 43.1549, + "eval_samples_per_second": 82.679, + "eval_steps_per_second": 10.335, + "step": 2465000 + }, + { + "epoch": 0.74, + "learning_rate": 4.161907512234402e-05, + "loss": 2.1529, + "step": 2470000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.5217976999855345, + "eval_loss": 2.177734375, + "eval_runtime": 45.1081, + "eval_samples_per_second": 79.099, + "eval_steps_per_second": 9.887, + "step": 2470000 + }, + { + "epoch": 0.74, + "learning_rate": 4.1544242761212276e-05, + "loss": 2.1549, + "step": 2475000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.5219418070477668, + "eval_loss": 2.17578125, + "eval_runtime": 40.8292, + "eval_samples_per_second": 87.388, + "eval_steps_per_second": 10.924, + "step": 2475000 + }, + { + "epoch": 0.74, + "learning_rate": 4.1469425378535516e-05, + "loss": 2.1525, + "step": 2480000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.521930574368011, + "eval_loss": 2.17578125, + "eval_runtime": 43.1177, + "eval_samples_per_second": 82.75, + "eval_steps_per_second": 10.344, + "step": 2480000 + }, + { + "epoch": 0.74, + "learning_rate": 4.139459301740377e-05, + "loss": 2.1478, + "step": 2485000 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.5221272832476362, + "eval_loss": 2.17578125, + "eval_runtime": 44.4519, + "eval_samples_per_second": 80.266, + "eval_steps_per_second": 10.033, + "step": 2485000 + }, + { + "epoch": 0.75, + "learning_rate": 4.131977563472701e-05, + "loss": 2.1524, + "step": 2490000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5219949568007575, + "eval_loss": 2.17578125, + "eval_runtime": 43.8567, + "eval_samples_per_second": 81.356, + "eval_steps_per_second": 10.169, + "step": 2490000 + }, + { + "epoch": 0.75, + "learning_rate": 4.124494327359526e-05, + "loss": 2.1477, + "step": 2495000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5220256411942366, + "eval_loss": 2.173828125, + "eval_runtime": 40.3311, + "eval_samples_per_second": 88.468, + "eval_steps_per_second": 11.058, + "step": 2495000 + }, + { + "epoch": 0.75, + "learning_rate": 4.117011091246352e-05, + "loss": 2.1524, + "step": 2500000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5221511184461423, + "eval_loss": 2.173828125, + "eval_runtime": 42.2823, + "eval_samples_per_second": 84.385, + "eval_steps_per_second": 10.548, + "step": 2500000 + }, + { + "epoch": 0.75, + "learning_rate": 4.1095278551331766e-05, + "loss": 2.147, + "step": 2505000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5221815288718226, + "eval_loss": 2.173828125, + "eval_runtime": 40.7144, + "eval_samples_per_second": 87.635, + "eval_steps_per_second": 10.954, + "step": 2505000 + }, + { + "epoch": 0.75, + "learning_rate": 4.102046116865501e-05, + "loss": 2.1481, + "step": 2510000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5222954994761736, + "eval_loss": 2.173828125, + "eval_runtime": 42.7002, + "eval_samples_per_second": 83.559, + "eval_steps_per_second": 10.445, + "step": 2510000 + }, + { + "epoch": 0.75, + "learning_rate": 4.094559885061328e-05, + "loss": 2.1494, + "step": 2515000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5222659109538901, + "eval_loss": 2.173828125, + "eval_runtime": 42.0725, + "eval_samples_per_second": 84.806, + "eval_steps_per_second": 10.601, + "step": 2515000 + }, + { + "epoch": 0.75, + "learning_rate": 4.087078146793652e-05, + "loss": 2.1484, + "step": 2520000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5222675547606836, + "eval_loss": 2.173828125, + "eval_runtime": 44.1957, + "eval_samples_per_second": 80.732, + "eval_steps_per_second": 10.091, + "step": 2520000 + }, + { + "epoch": 0.76, + "learning_rate": 4.079594910680478e-05, + "loss": 2.1474, + "step": 2525000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5223297454510387, + "eval_loss": 2.173828125, + "eval_runtime": 43.3427, + "eval_samples_per_second": 82.321, + "eval_steps_per_second": 10.29, + "step": 2525000 + }, + { + "epoch": 0.76, + "learning_rate": 4.072110176721804e-05, + "loss": 2.1487, + "step": 2530000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5222724861810643, + "eval_loss": 2.173828125, + "eval_runtime": 44.0468, + "eval_samples_per_second": 81.005, + "eval_steps_per_second": 10.126, + "step": 2530000 + }, + { + "epoch": 0.76, + "learning_rate": 4.064626940608628e-05, + "loss": 2.1465, + "step": 2535000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5224768661590591, + "eval_loss": 2.171875, + "eval_runtime": 40.4028, + "eval_samples_per_second": 88.311, + "eval_steps_per_second": 11.039, + "step": 2535000 + }, + { + "epoch": 0.76, + "learning_rate": 4.057145202340953e-05, + "loss": 2.1456, + "step": 2540000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5226201513178947, + "eval_loss": 2.171875, + "eval_runtime": 42.5959, + "eval_samples_per_second": 83.764, + "eval_steps_per_second": 10.47, + "step": 2540000 + }, + { + "epoch": 0.76, + "learning_rate": 4.049661966227778e-05, + "loss": 2.1482, + "step": 2545000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5223516628749524, + "eval_loss": 2.171875, + "eval_runtime": 43.342, + "eval_samples_per_second": 82.322, + "eval_steps_per_second": 10.29, + "step": 2545000 + }, + { + "epoch": 0.76, + "learning_rate": 4.042180227960102e-05, + "loss": 2.1451, + "step": 2550000 + }, + { + "epoch": 0.76, + "eval_accuracy": 0.5226286443196613, + "eval_loss": 2.171875, + "eval_runtime": 42.1401, + "eval_samples_per_second": 84.67, + "eval_steps_per_second": 10.584, + "step": 2550000 + }, + { + "epoch": 0.77, + "learning_rate": 4.0346969918469274e-05, + "loss": 2.143, + "step": 2555000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5225609742733278, + "eval_loss": 2.171875, + "eval_runtime": 44.8351, + "eval_samples_per_second": 79.581, + "eval_steps_per_second": 9.948, + "step": 2555000 + }, + { + "epoch": 0.77, + "learning_rate": 4.027210760042755e-05, + "loss": 2.1463, + "step": 2560000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5225061807135436, + "eval_loss": 2.171875, + "eval_runtime": 42.3643, + "eval_samples_per_second": 84.222, + "eval_steps_per_second": 10.528, + "step": 2560000 + }, + { + "epoch": 0.77, + "learning_rate": 4.0197290217750786e-05, + "loss": 2.1466, + "step": 2565000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5227820662870569, + "eval_loss": 2.169921875, + "eval_runtime": 44.9199, + "eval_samples_per_second": 79.43, + "eval_steps_per_second": 9.929, + "step": 2565000 + }, + { + "epoch": 0.77, + "learning_rate": 4.012247283507403e-05, + "loss": 2.1423, + "step": 2570000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5229272692204849, + "eval_loss": 2.169921875, + "eval_runtime": 43.6123, + "eval_samples_per_second": 81.812, + "eval_steps_per_second": 10.226, + "step": 2570000 + }, + { + "epoch": 0.77, + "learning_rate": 4.004765545239728e-05, + "loss": 2.1423, + "step": 2575000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5230557601181788, + "eval_loss": 2.169921875, + "eval_runtime": 44.819, + "eval_samples_per_second": 79.609, + "eval_steps_per_second": 9.951, + "step": 2575000 + }, + { + "epoch": 0.77, + "learning_rate": 3.997283806972052e-05, + "loss": 2.1444, + "step": 2580000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5230245277891018, + "eval_loss": 2.169921875, + "eval_runtime": 42.2873, + "eval_samples_per_second": 84.375, + "eval_steps_per_second": 10.547, + "step": 2580000 + }, + { + "epoch": 0.77, + "learning_rate": 3.9898020687043756e-05, + "loss": 2.1402, + "step": 2585000 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.5230417877604338, + "eval_loss": 2.16796875, + "eval_runtime": 44.6974, + "eval_samples_per_second": 79.826, + "eval_steps_per_second": 9.978, + "step": 2585000 + }, + { + "epoch": 0.78, + "learning_rate": 3.9823203304367e-05, + "loss": 2.1376, + "step": 2590000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5230924718032341, + "eval_loss": 2.16796875, + "eval_runtime": 43.4573, + "eval_samples_per_second": 82.104, + "eval_steps_per_second": 10.263, + "step": 2590000 + }, + { + "epoch": 0.78, + "learning_rate": 3.974838592169024e-05, + "loss": 2.1395, + "step": 2595000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5231894564040521, + "eval_loss": 2.16796875, + "eval_runtime": 43.8137, + "eval_samples_per_second": 81.436, + "eval_steps_per_second": 10.179, + "step": 2595000 + }, + { + "epoch": 0.78, + "learning_rate": 3.9673553560558494e-05, + "loss": 2.1399, + "step": 2600000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5232801397454949, + "eval_loss": 2.16796875, + "eval_runtime": 41.9685, + "eval_samples_per_second": 85.016, + "eval_steps_per_second": 10.627, + "step": 2600000 + }, + { + "epoch": 0.78, + "learning_rate": 3.9598721199426747e-05, + "loss": 2.1379, + "step": 2605000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5230908279964406, + "eval_loss": 2.16796875, + "eval_runtime": 42.9652, + "eval_samples_per_second": 83.044, + "eval_steps_per_second": 10.38, + "step": 2605000 + }, + { + "epoch": 0.78, + "learning_rate": 3.952390381674999e-05, + "loss": 2.1411, + "step": 2610000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5233708230869376, + "eval_loss": 2.166015625, + "eval_runtime": 41.4989, + "eval_samples_per_second": 85.978, + "eval_steps_per_second": 10.747, + "step": 2610000 + }, + { + "epoch": 0.78, + "learning_rate": 3.944908643407323e-05, + "loss": 2.1421, + "step": 2615000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5232478115452223, + "eval_loss": 2.166015625, + "eval_runtime": 41.1264, + "eval_samples_per_second": 86.757, + "eval_steps_per_second": 10.845, + "step": 2615000 + }, + { + "epoch": 0.78, + "learning_rate": 3.9374254072941484e-05, + "loss": 2.1412, + "step": 2620000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5236650645029786, + "eval_loss": 2.166015625, + "eval_runtime": 43.7159, + "eval_samples_per_second": 81.618, + "eval_steps_per_second": 10.202, + "step": 2620000 + }, + { + "epoch": 0.79, + "learning_rate": 3.929942171180974e-05, + "loss": 2.1381, + "step": 2625000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.5235886274870797, + "eval_loss": 2.166015625, + "eval_runtime": 43.6678, + "eval_samples_per_second": 81.708, + "eval_steps_per_second": 10.213, + "step": 2625000 + }, + { + "epoch": 0.79, + "learning_rate": 3.922458935067799e-05, + "loss": 2.142, + "step": 2630000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.523625339172135, + "eval_loss": 2.166015625, + "eval_runtime": 42.3354, + "eval_samples_per_second": 84.279, + "eval_steps_per_second": 10.535, + "step": 2630000 + }, + { + "epoch": 0.79, + "learning_rate": 3.9149756989546236e-05, + "loss": 2.1394, + "step": 2635000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.5236212296551512, + "eval_loss": 2.1640625, + "eval_runtime": 42.9131, + "eval_samples_per_second": 83.145, + "eval_steps_per_second": 10.393, + "step": 2635000 + }, + { + "epoch": 0.79, + "learning_rate": 3.907493960686948e-05, + "loss": 2.1384, + "step": 2640000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.5233793160887042, + "eval_loss": 2.1640625, + "eval_runtime": 40.2525, + "eval_samples_per_second": 88.64, + "eval_steps_per_second": 11.08, + "step": 2640000 + }, + { + "epoch": 0.79, + "learning_rate": 3.900012222419273e-05, + "loss": 2.138, + "step": 2645000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.5235653402241714, + "eval_loss": 2.1640625, + "eval_runtime": 43.9803, + "eval_samples_per_second": 81.127, + "eval_steps_per_second": 10.141, + "step": 2645000 + }, + { + "epoch": 0.79, + "learning_rate": 3.892530484151597e-05, + "loss": 2.1346, + "step": 2650000 + }, + { + "epoch": 0.79, + "eval_accuracy": 0.523867526706381, + "eval_loss": 2.1640625, + "eval_runtime": 44.5226, + "eval_samples_per_second": 80.139, + "eval_steps_per_second": 10.017, + "step": 2650000 + }, + { + "epoch": 0.8, + "learning_rate": 3.885047248038421e-05, + "loss": 2.1376, + "step": 2655000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5239204024915728, + "eval_loss": 2.1640625, + "eval_runtime": 42.646, + "eval_samples_per_second": 83.666, + "eval_steps_per_second": 10.458, + "step": 2655000 + }, + { + "epoch": 0.8, + "learning_rate": 3.877564011925247e-05, + "loss": 2.1409, + "step": 2660000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5239705385987753, + "eval_loss": 2.1640625, + "eval_runtime": 40.3234, + "eval_samples_per_second": 88.485, + "eval_steps_per_second": 11.061, + "step": 2660000 + }, + { + "epoch": 0.8, + "learning_rate": 3.8700807758120725e-05, + "loss": 2.1343, + "step": 2665000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5239877985701072, + "eval_loss": 2.1640625, + "eval_runtime": 42.9773, + "eval_samples_per_second": 83.021, + "eval_steps_per_second": 10.378, + "step": 2665000 + }, + { + "epoch": 0.8, + "learning_rate": 3.862597539698897e-05, + "loss": 2.1363, + "step": 2670000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5240447838722828, + "eval_loss": 2.162109375, + "eval_runtime": 44.9261, + "eval_samples_per_second": 79.419, + "eval_steps_per_second": 9.927, + "step": 2670000 + }, + { + "epoch": 0.8, + "learning_rate": 3.855112805740224e-05, + "loss": 2.1343, + "step": 2675000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5241869731599227, + "eval_loss": 2.162109375, + "eval_runtime": 43.4218, + "eval_samples_per_second": 82.171, + "eval_steps_per_second": 10.271, + "step": 2675000 + }, + { + "epoch": 0.8, + "learning_rate": 3.847631067472548e-05, + "loss": 2.1381, + "step": 2680000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5243439567087043, + "eval_loss": 2.162109375, + "eval_runtime": 43.0405, + "eval_samples_per_second": 82.899, + "eval_steps_per_second": 10.362, + "step": 2680000 + }, + { + "epoch": 0.8, + "learning_rate": 3.840147831359373e-05, + "loss": 2.1355, + "step": 2685000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5241456040222856, + "eval_loss": 2.162109375, + "eval_runtime": 43.7247, + "eval_samples_per_second": 81.601, + "eval_steps_per_second": 10.2, + "step": 2685000 + }, + { + "epoch": 0.81, + "learning_rate": 3.8326615995552e-05, + "loss": 2.1394, + "step": 2690000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.5242305340399511, + "eval_loss": 2.16015625, + "eval_runtime": 40.8207, + "eval_samples_per_second": 87.407, + "eval_steps_per_second": 10.926, + "step": 2690000 + }, + { + "epoch": 0.81, + "learning_rate": 3.825179861287524e-05, + "loss": 2.1359, + "step": 2695000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.5244875158353388, + "eval_loss": 2.16015625, + "eval_runtime": 44.3125, + "eval_samples_per_second": 80.519, + "eval_steps_per_second": 10.065, + "step": 2695000 + }, + { + "epoch": 0.81, + "learning_rate": 3.817698123019848e-05, + "loss": 2.1365, + "step": 2700000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.524362312551232, + "eval_loss": 2.16015625, + "eval_runtime": 42.9757, + "eval_samples_per_second": 83.024, + "eval_steps_per_second": 10.378, + "step": 2700000 + }, + { + "epoch": 0.81, + "learning_rate": 3.810216384752173e-05, + "loss": 2.131, + "step": 2705000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.5244225854669946, + "eval_loss": 2.16015625, + "eval_runtime": 44.5469, + "eval_samples_per_second": 80.095, + "eval_steps_per_second": 10.012, + "step": 2705000 + }, + { + "epoch": 0.81, + "learning_rate": 3.802731650793499e-05, + "loss": 2.1337, + "step": 2710000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.5244025858176733, + "eval_loss": 2.16015625, + "eval_runtime": 43.6203, + "eval_samples_per_second": 81.797, + "eval_steps_per_second": 10.225, + "step": 2710000 + }, + { + "epoch": 0.81, + "learning_rate": 3.7952499125258226e-05, + "loss": 2.1307, + "step": 2715000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.5245954591481136, + "eval_loss": 2.158203125, + "eval_runtime": 43.7473, + "eval_samples_per_second": 81.559, + "eval_steps_per_second": 10.195, + "step": 2715000 + }, + { + "epoch": 0.81, + "learning_rate": 3.787765178567149e-05, + "loss": 2.1333, + "step": 2720000 + }, + { + "epoch": 0.81, + "eval_accuracy": 0.524749429051107, + "eval_loss": 2.158203125, + "eval_runtime": 43.6989, + "eval_samples_per_second": 81.65, + "eval_steps_per_second": 10.206, + "step": 2720000 + }, + { + "epoch": 0.82, + "learning_rate": 3.780283440299474e-05, + "loss": 2.1354, + "step": 2725000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5246316228975711, + "eval_loss": 2.158203125, + "eval_runtime": 43.001, + "eval_samples_per_second": 82.975, + "eval_steps_per_second": 10.372, + "step": 2725000 + }, + { + "epoch": 0.82, + "learning_rate": 3.7728002041862985e-05, + "loss": 2.1372, + "step": 2730000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5248077841922771, + "eval_loss": 2.158203125, + "eval_runtime": 41.555, + "eval_samples_per_second": 85.862, + "eval_steps_per_second": 10.733, + "step": 2730000 + }, + { + "epoch": 0.82, + "learning_rate": 3.765316968073124e-05, + "loss": 2.1323, + "step": 2735000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5248480574587185, + "eval_loss": 2.158203125, + "eval_runtime": 41.1337, + "eval_samples_per_second": 86.742, + "eval_steps_per_second": 10.843, + "step": 2735000 + }, + { + "epoch": 0.82, + "learning_rate": 3.757835229805448e-05, + "loss": 2.1315, + "step": 2740000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5249064125998887, + "eval_loss": 2.15625, + "eval_runtime": 43.6977, + "eval_samples_per_second": 81.652, + "eval_steps_per_second": 10.206, + "step": 2740000 + }, + { + "epoch": 0.82, + "learning_rate": 3.750351993692273e-05, + "loss": 2.1341, + "step": 2745000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5249143576660573, + "eval_loss": 2.15625, + "eval_runtime": 44.5031, + "eval_samples_per_second": 80.174, + "eval_steps_per_second": 10.022, + "step": 2745000 + }, + { + "epoch": 0.82, + "learning_rate": 3.7428702554245975e-05, + "loss": 2.132, + "step": 2750000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5249768223242113, + "eval_loss": 2.15625, + "eval_runtime": 40.671, + "eval_samples_per_second": 87.728, + "eval_steps_per_second": 10.966, + "step": 2750000 + }, + { + "epoch": 0.83, + "learning_rate": 3.7353885171569214e-05, + "loss": 2.1322, + "step": 2755000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5251954486277501, + "eval_loss": 2.15625, + "eval_runtime": 43.2998, + "eval_samples_per_second": 82.402, + "eval_steps_per_second": 10.3, + "step": 2755000 + }, + { + "epoch": 0.83, + "learning_rate": 3.727906778889246e-05, + "loss": 2.1298, + "step": 2760000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5252044895651145, + "eval_loss": 2.15625, + "eval_runtime": 44.6651, + "eval_samples_per_second": 79.883, + "eval_steps_per_second": 9.985, + "step": 2760000 + }, + { + "epoch": 0.83, + "learning_rate": 3.720422044930572e-05, + "loss": 2.1285, + "step": 2765000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5252362698297893, + "eval_loss": 2.154296875, + "eval_runtime": 43.7277, + "eval_samples_per_second": 81.596, + "eval_steps_per_second": 10.199, + "step": 2765000 + }, + { + "epoch": 0.83, + "learning_rate": 3.7129403066628966e-05, + "loss": 2.1299, + "step": 2770000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5251601067816893, + "eval_loss": 2.15625, + "eval_runtime": 43.8556, + "eval_samples_per_second": 81.358, + "eval_steps_per_second": 10.17, + "step": 2770000 + }, + { + "epoch": 0.83, + "learning_rate": 3.7054585683952205e-05, + "loss": 2.1304, + "step": 2775000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5252631186740835, + "eval_loss": 2.154296875, + "eval_runtime": 44.0024, + "eval_samples_per_second": 81.086, + "eval_steps_per_second": 10.136, + "step": 2775000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6979768301275444e-05, + "loss": 2.1288, + "step": 2780000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5254154447702835, + "eval_loss": 2.154296875, + "eval_runtime": 43.1187, + "eval_samples_per_second": 82.748, + "eval_steps_per_second": 10.344, + "step": 2780000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6904935940143697e-05, + "loss": 2.1295, + "step": 2785000 + }, + { + "epoch": 0.83, + "eval_accuracy": 0.5253255833322374, + "eval_loss": 2.154296875, + "eval_runtime": 44.6338, + "eval_samples_per_second": 79.939, + "eval_steps_per_second": 9.992, + "step": 2785000 + }, + { + "epoch": 0.84, + "learning_rate": 3.683010357901195e-05, + "loss": 2.129, + "step": 2790000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5255368125052053, + "eval_loss": 2.154296875, + "eval_runtime": 44.4544, + "eval_samples_per_second": 80.262, + "eval_steps_per_second": 10.033, + "step": 2790000 + }, + { + "epoch": 0.84, + "learning_rate": 3.6755286196335195e-05, + "loss": 2.1285, + "step": 2795000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5253598293071026, + "eval_loss": 2.154296875, + "eval_runtime": 44.2524, + "eval_samples_per_second": 80.628, + "eval_steps_per_second": 10.079, + "step": 2795000 + }, + { + "epoch": 0.84, + "learning_rate": 3.668045383520344e-05, + "loss": 2.1292, + "step": 2800000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5252880497437853, + "eval_loss": 2.154296875, + "eval_runtime": 40.6928, + "eval_samples_per_second": 87.681, + "eval_steps_per_second": 10.96, + "step": 2800000 + }, + { + "epoch": 0.84, + "learning_rate": 3.660563645252669e-05, + "loss": 2.1278, + "step": 2805000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5256472215281704, + "eval_loss": 2.15234375, + "eval_runtime": 43.3931, + "eval_samples_per_second": 82.225, + "eval_steps_per_second": 10.278, + "step": 2805000 + }, + { + "epoch": 0.84, + "learning_rate": 3.653080409139494e-05, + "loss": 2.1239, + "step": 2810000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5255110595321069, + "eval_loss": 2.15234375, + "eval_runtime": 42.2966, + "eval_samples_per_second": 84.357, + "eval_steps_per_second": 10.545, + "step": 2810000 + }, + { + "epoch": 0.84, + "learning_rate": 3.645598670871818e-05, + "loss": 2.1241, + "step": 2815000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5259063950659495, + "eval_loss": 2.15234375, + "eval_runtime": 42.6516, + "eval_samples_per_second": 83.655, + "eval_steps_per_second": 10.457, + "step": 2815000 + }, + { + "epoch": 0.84, + "learning_rate": 3.638115434758643e-05, + "loss": 2.1232, + "step": 2820000 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.5256781798894485, + "eval_loss": 2.15234375, + "eval_runtime": 46.2947, + "eval_samples_per_second": 77.071, + "eval_steps_per_second": 9.634, + "step": 2820000 + }, + { + "epoch": 0.85, + "learning_rate": 3.6306321986454685e-05, + "loss": 2.1241, + "step": 2825000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.525697357635373, + "eval_loss": 2.150390625, + "eval_runtime": 43.287, + "eval_samples_per_second": 82.427, + "eval_steps_per_second": 10.303, + "step": 2825000 + }, + { + "epoch": 0.85, + "learning_rate": 3.623148962532294e-05, + "loss": 2.1236, + "step": 2830000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.5259439286544016, + "eval_loss": 2.150390625, + "eval_runtime": 43.0484, + "eval_samples_per_second": 82.884, + "eval_steps_per_second": 10.36, + "step": 2830000 + }, + { + "epoch": 0.85, + "learning_rate": 3.6156672242646177e-05, + "loss": 2.1272, + "step": 2835000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.5259442026222007, + "eval_loss": 2.150390625, + "eval_runtime": 42.9436, + "eval_samples_per_second": 83.086, + "eval_steps_per_second": 10.386, + "step": 2835000 + }, + { + "epoch": 0.85, + "learning_rate": 3.608183988151443e-05, + "loss": 2.1271, + "step": 2840000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.5260592690977474, + "eval_loss": 2.150390625, + "eval_runtime": 40.596, + "eval_samples_per_second": 87.89, + "eval_steps_per_second": 10.986, + "step": 2840000 + }, + { + "epoch": 0.85, + "learning_rate": 3.6007022498837675e-05, + "loss": 2.1249, + "step": 2845000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.52616776034612, + "eval_loss": 2.1484375, + "eval_runtime": 43.4159, + "eval_samples_per_second": 82.182, + "eval_steps_per_second": 10.273, + "step": 2845000 + }, + { + "epoch": 0.85, + "learning_rate": 3.5932175159250935e-05, + "loss": 2.1245, + "step": 2850000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.5260307764466595, + "eval_loss": 2.1484375, + "eval_runtime": 43.8356, + "eval_samples_per_second": 81.395, + "eval_steps_per_second": 10.174, + "step": 2850000 + }, + { + "epoch": 0.86, + "learning_rate": 3.585735777657418e-05, + "loss": 2.1222, + "step": 2855000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5261137886897326, + "eval_loss": 2.1484375, + "eval_runtime": 43.0232, + "eval_samples_per_second": 82.932, + "eval_steps_per_second": 10.367, + "step": 2855000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5782525415442434e-05, + "loss": 2.125, + "step": 2860000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5263050182133793, + "eval_loss": 2.1484375, + "eval_runtime": 43.0096, + "eval_samples_per_second": 82.958, + "eval_steps_per_second": 10.37, + "step": 2860000 + }, + { + "epoch": 0.86, + "learning_rate": 3.570770803276567e-05, + "loss": 2.1261, + "step": 2865000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5260803646182642, + "eval_loss": 2.1484375, + "eval_runtime": 40.5259, + "eval_samples_per_second": 88.042, + "eval_steps_per_second": 11.005, + "step": 2865000 + }, + { + "epoch": 0.86, + "learning_rate": 3.563289065008891e-05, + "loss": 2.1247, + "step": 2870000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5262392659416383, + "eval_loss": 2.1484375, + "eval_runtime": 46.7614, + "eval_samples_per_second": 76.302, + "eval_steps_per_second": 9.538, + "step": 2870000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5558028332047185e-05, + "loss": 2.1225, + "step": 2875000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5263167988287328, + "eval_loss": 2.1484375, + "eval_runtime": 43.7319, + "eval_samples_per_second": 81.588, + "eval_steps_per_second": 10.199, + "step": 2875000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5483210949370424e-05, + "loss": 2.122, + "step": 2880000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5261085833015531, + "eval_loss": 2.1484375, + "eval_runtime": 46.8934, + "eval_samples_per_second": 76.087, + "eval_steps_per_second": 9.511, + "step": 2880000 + }, + { + "epoch": 0.86, + "learning_rate": 3.540839356669367e-05, + "loss": 2.1237, + "step": 2885000 + }, + { + "epoch": 0.86, + "eval_accuracy": 0.5261107750439444, + "eval_loss": 2.146484375, + "eval_runtime": 42.8485, + "eval_samples_per_second": 83.27, + "eval_steps_per_second": 10.409, + "step": 2885000 + }, + { + "epoch": 0.87, + "learning_rate": 3.533356120556192e-05, + "loss": 2.1219, + "step": 2890000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5261825546072617, + "eval_loss": 2.146484375, + "eval_runtime": 44.3826, + "eval_samples_per_second": 80.392, + "eval_steps_per_second": 10.049, + "step": 2890000 + }, + { + "epoch": 0.87, + "learning_rate": 3.525874382288516e-05, + "loss": 2.1248, + "step": 2895000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.526191595544626, + "eval_loss": 2.146484375, + "eval_runtime": 40.9038, + "eval_samples_per_second": 87.229, + "eval_steps_per_second": 10.904, + "step": 2895000 + }, + { + "epoch": 0.87, + "learning_rate": 3.51839264402084e-05, + "loss": 2.1191, + "step": 2900000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.526354332417185, + "eval_loss": 2.146484375, + "eval_runtime": 44.4026, + "eval_samples_per_second": 80.356, + "eval_steps_per_second": 10.044, + "step": 2900000 + }, + { + "epoch": 0.87, + "learning_rate": 3.510909407907666e-05, + "loss": 2.1181, + "step": 2905000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5264343310144699, + "eval_loss": 2.146484375, + "eval_runtime": 43.5492, + "eval_samples_per_second": 81.93, + "eval_steps_per_second": 10.241, + "step": 2905000 + }, + { + "epoch": 0.87, + "learning_rate": 3.50342766963999e-05, + "loss": 2.1176, + "step": 2910000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5263020045675911, + "eval_loss": 2.146484375, + "eval_runtime": 46.4423, + "eval_samples_per_second": 76.827, + "eval_steps_per_second": 9.603, + "step": 2910000 + }, + { + "epoch": 0.87, + "learning_rate": 3.495944433526815e-05, + "loss": 2.1191, + "step": 2915000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5266524093824109, + "eval_loss": 2.146484375, + "eval_runtime": 42.6675, + "eval_samples_per_second": 83.623, + "eval_steps_per_second": 10.453, + "step": 2915000 + }, + { + "epoch": 0.87, + "learning_rate": 3.488462695259139e-05, + "loss": 2.1206, + "step": 2920000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5267954205734475, + "eval_loss": 2.14453125, + "eval_runtime": 42.9737, + "eval_samples_per_second": 83.028, + "eval_steps_per_second": 10.378, + "step": 2920000 + }, + { + "epoch": 0.88, + "learning_rate": 3.4809794591459645e-05, + "loss": 2.1148, + "step": 2925000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5267219972033367, + "eval_loss": 2.14453125, + "eval_runtime": 44.1462, + "eval_samples_per_second": 80.822, + "eval_steps_per_second": 10.103, + "step": 2925000 + }, + { + "epoch": 0.88, + "learning_rate": 3.473497720878289e-05, + "loss": 2.1188, + "step": 2930000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5270244576533453, + "eval_loss": 2.14453125, + "eval_runtime": 44.0298, + "eval_samples_per_second": 81.036, + "eval_steps_per_second": 10.129, + "step": 2930000 + }, + { + "epoch": 0.88, + "learning_rate": 3.466015982610613e-05, + "loss": 2.1118, + "step": 2935000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5270036361006273, + "eval_loss": 2.14453125, + "eval_runtime": 41.5147, + "eval_samples_per_second": 85.945, + "eval_steps_per_second": 10.743, + "step": 2935000 + }, + { + "epoch": 0.88, + "learning_rate": 4.992518261732325e-05, + "loss": 2.1283, + "step": 2940000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5243740931665856, + "eval_loss": 2.158203125, + "eval_runtime": 39.6741, + "eval_samples_per_second": 89.933, + "eval_steps_per_second": 11.242, + "step": 2940000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9850350256191494e-05, + "loss": 2.1336, + "step": 2945000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5240346470637227, + "eval_loss": 2.162109375, + "eval_runtime": 39.6588, + "eval_samples_per_second": 89.967, + "eval_steps_per_second": 11.246, + "step": 2945000 + }, + { + "epoch": 0.88, + "learning_rate": 4.977551789505975e-05, + "loss": 2.1311, + "step": 2950000 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.5237494465850462, + "eval_loss": 2.162109375, + "eval_runtime": 39.7632, + "eval_samples_per_second": 89.731, + "eval_steps_per_second": 11.216, + "step": 2950000 + }, + { + "epoch": 0.89, + "learning_rate": 4.970070051238299e-05, + "loss": 2.1377, + "step": 2955000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.523618763944961, + "eval_loss": 2.1640625, + "eval_runtime": 39.7355, + "eval_samples_per_second": 89.794, + "eval_steps_per_second": 11.224, + "step": 2955000 + }, + { + "epoch": 0.89, + "learning_rate": 4.962588312970624e-05, + "loss": 2.136, + "step": 2960000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.523584244002297, + "eval_loss": 2.1640625, + "eval_runtime": 39.6172, + "eval_samples_per_second": 90.062, + "eval_steps_per_second": 11.258, + "step": 2960000 + }, + { + "epoch": 0.89, + "learning_rate": 4.955103579011949e-05, + "loss": 2.1394, + "step": 2965000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5233458920172359, + "eval_loss": 2.1640625, + "eval_runtime": 39.7138, + "eval_samples_per_second": 89.843, + "eval_steps_per_second": 11.23, + "step": 2965000 + }, + { + "epoch": 0.89, + "learning_rate": 4.947621840744274e-05, + "loss": 2.1405, + "step": 2970000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5233305498204963, + "eval_loss": 2.166015625, + "eval_runtime": 39.7403, + "eval_samples_per_second": 89.783, + "eval_steps_per_second": 11.223, + "step": 2970000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9401401024765983e-05, + "loss": 2.1391, + "step": 2975000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5235795865497153, + "eval_loss": 2.166015625, + "eval_runtime": 39.791, + "eval_samples_per_second": 89.668, + "eval_steps_per_second": 11.209, + "step": 2975000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9326553685179237e-05, + "loss": 2.1353, + "step": 2980000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5233823297344923, + "eval_loss": 2.166015625, + "eval_runtime": 39.6943, + "eval_samples_per_second": 89.887, + "eval_steps_per_second": 11.236, + "step": 2980000 + }, + { + "epoch": 0.89, + "learning_rate": 4.925173630250248e-05, + "loss": 2.1392, + "step": 2985000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5233889049616665, + "eval_loss": 2.166015625, + "eval_runtime": 39.7244, + "eval_samples_per_second": 89.819, + "eval_steps_per_second": 11.227, + "step": 2985000 + }, + { + "epoch": 0.9, + "learning_rate": 4.917691891982573e-05, + "loss": 2.1384, + "step": 2990000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.5235058892118056, + "eval_loss": 2.166015625, + "eval_runtime": 39.7383, + "eval_samples_per_second": 89.787, + "eval_steps_per_second": 11.223, + "step": 2990000 + }, + { + "epoch": 0.9, + "learning_rate": 4.910210153714897e-05, + "loss": 2.1373, + "step": 2995000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.523321234915333, + "eval_loss": 2.166015625, + "eval_runtime": 39.8394, + "eval_samples_per_second": 89.56, + "eval_steps_per_second": 11.195, + "step": 2995000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9027284154472206e-05, + "loss": 2.1346, + "step": 3000000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.523394110349846, + "eval_loss": 2.166015625, + "eval_runtime": 39.7248, + "eval_samples_per_second": 89.818, + "eval_steps_per_second": 11.227, + "step": 3000000 + }, + { + "epoch": 0.9, + "learning_rate": 4.895246677179545e-05, + "loss": 2.1368, + "step": 3005000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.523494108596452, + "eval_loss": 2.166015625, + "eval_runtime": 39.8116, + "eval_samples_per_second": 89.622, + "eval_steps_per_second": 11.203, + "step": 3005000 + }, + { + "epoch": 0.9, + "learning_rate": 4.8877634410663705e-05, + "loss": 2.1383, + "step": 3010000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.5232738384861197, + "eval_loss": 2.166015625, + "eval_runtime": 39.8277, + "eval_samples_per_second": 89.586, + "eval_steps_per_second": 11.198, + "step": 3010000 + }, + { + "epoch": 0.9, + "learning_rate": 4.880280204953195e-05, + "loss": 2.1447, + "step": 3015000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.5233116460423708, + "eval_loss": 2.166015625, + "eval_runtime": 39.9037, + "eval_samples_per_second": 89.415, + "eval_steps_per_second": 11.177, + "step": 3015000 + }, + { + "epoch": 0.9, + "learning_rate": 4.87279846668552e-05, + "loss": 2.1392, + "step": 3020000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.5234119182567758, + "eval_loss": 2.166015625, + "eval_runtime": 39.8551, + "eval_samples_per_second": 89.524, + "eval_steps_per_second": 11.191, + "step": 3020000 + }, + { + "epoch": 0.91, + "learning_rate": 4.865315230572345e-05, + "loss": 2.1359, + "step": 3025000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5233072625575881, + "eval_loss": 2.166015625, + "eval_runtime": 39.8861, + "eval_samples_per_second": 89.455, + "eval_steps_per_second": 11.182, + "step": 3025000 + }, + { + "epoch": 0.91, + "learning_rate": 4.8578334923046695e-05, + "loss": 2.1408, + "step": 3030000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5233184952373438, + "eval_loss": 2.166015625, + "eval_runtime": 39.8536, + "eval_samples_per_second": 89.528, + "eval_steps_per_second": 11.191, + "step": 3030000 + }, + { + "epoch": 0.91, + "learning_rate": 4.850350256191494e-05, + "loss": 2.1437, + "step": 3035000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5232642496131574, + "eval_loss": 2.166015625, + "eval_runtime": 39.8602, + "eval_samples_per_second": 89.513, + "eval_steps_per_second": 11.189, + "step": 3035000 + }, + { + "epoch": 0.91, + "learning_rate": 4.8428715136148166e-05, + "loss": 2.1354, + "step": 3040000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5233253444323168, + "eval_loss": 2.166015625, + "eval_runtime": 39.8447, + "eval_samples_per_second": 89.548, + "eval_steps_per_second": 11.193, + "step": 3040000 + }, + { + "epoch": 0.91, + "learning_rate": 4.835383783965145e-05, + "loss": 2.1371, + "step": 3045000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5234626022995761, + "eval_loss": 2.166015625, + "eval_runtime": 40.0007, + "eval_samples_per_second": 89.199, + "eval_steps_per_second": 11.15, + "step": 3045000 + }, + { + "epoch": 0.91, + "learning_rate": 4.82790054785197e-05, + "loss": 2.1399, + "step": 3050000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5233957541566394, + "eval_loss": 2.166015625, + "eval_runtime": 39.8876, + "eval_samples_per_second": 89.451, + "eval_steps_per_second": 11.181, + "step": 3050000 + }, + { + "epoch": 0.92, + "learning_rate": 4.820420307429793e-05, + "loss": 2.1387, + "step": 3055000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5234242468077273, + "eval_loss": 2.166015625, + "eval_runtime": 39.9685, + "eval_samples_per_second": 89.27, + "eval_steps_per_second": 11.159, + "step": 3055000 + }, + { + "epoch": 0.92, + "learning_rate": 4.8129340756256206e-05, + "loss": 2.1406, + "step": 3060000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5232119217635636, + "eval_loss": 2.166015625, + "eval_runtime": 39.9746, + "eval_samples_per_second": 89.257, + "eval_steps_per_second": 11.157, + "step": 3060000 + }, + { + "epoch": 0.92, + "learning_rate": 4.805450839512445e-05, + "loss": 2.1387, + "step": 3065000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5234650680097664, + "eval_loss": 2.166015625, + "eval_runtime": 40.0463, + "eval_samples_per_second": 89.097, + "eval_steps_per_second": 11.137, + "step": 3065000 + }, + { + "epoch": 0.92, + "learning_rate": 4.797966105553772e-05, + "loss": 2.1413, + "step": 3070000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5234842457556909, + "eval_loss": 2.166015625, + "eval_runtime": 39.9277, + "eval_samples_per_second": 89.362, + "eval_steps_per_second": 11.17, + "step": 3070000 + }, + { + "epoch": 0.92, + "learning_rate": 4.790484367286096e-05, + "loss": 2.1371, + "step": 3075000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.523473561011533, + "eval_loss": 2.1640625, + "eval_runtime": 39.9812, + "eval_samples_per_second": 89.242, + "eval_steps_per_second": 11.155, + "step": 3075000 + }, + { + "epoch": 0.92, + "learning_rate": 4.782999633327422e-05, + "loss": 2.138, + "step": 3080000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5234982181134358, + "eval_loss": 2.1640625, + "eval_runtime": 39.9703, + "eval_samples_per_second": 89.266, + "eval_steps_per_second": 11.158, + "step": 3080000 + }, + { + "epoch": 0.92, + "learning_rate": 4.7755148993687484e-05, + "loss": 2.1385, + "step": 3085000 + }, + { + "epoch": 0.92, + "eval_accuracy": 0.5236360239162929, + "eval_loss": 2.1640625, + "eval_runtime": 40.1115, + "eval_samples_per_second": 88.952, + "eval_steps_per_second": 11.119, + "step": 3085000 + }, + { + "epoch": 0.93, + "learning_rate": 4.768031663255573e-05, + "loss": 2.135, + "step": 3090000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5233746586361225, + "eval_loss": 2.166015625, + "eval_runtime": 40.1304, + "eval_samples_per_second": 88.91, + "eval_steps_per_second": 11.114, + "step": 3090000 + }, + { + "epoch": 0.93, + "learning_rate": 4.760548427142398e-05, + "loss": 2.1401, + "step": 3095000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5235689018055574, + "eval_loss": 2.1640625, + "eval_runtime": 40.1365, + "eval_samples_per_second": 88.897, + "eval_steps_per_second": 11.112, + "step": 3095000 + }, + { + "epoch": 0.93, + "learning_rate": 4.753066688874723e-05, + "loss": 2.1374, + "step": 3100000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5235552034156113, + "eval_loss": 2.1640625, + "eval_runtime": 40.133, + "eval_samples_per_second": 88.904, + "eval_steps_per_second": 11.113, + "step": 3100000 + }, + { + "epoch": 0.93, + "learning_rate": 4.745586448452546e-05, + "loss": 2.1358, + "step": 3105000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5237198580627628, + "eval_loss": 2.1640625, + "eval_runtime": 40.0267, + "eval_samples_per_second": 89.141, + "eval_steps_per_second": 11.143, + "step": 3105000 + }, + { + "epoch": 0.93, + "learning_rate": 4.7381032123393713e-05, + "loss": 2.1344, + "step": 3110000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5239288954933393, + "eval_loss": 2.162109375, + "eval_runtime": 40.1156, + "eval_samples_per_second": 88.943, + "eval_steps_per_second": 11.118, + "step": 3110000 + }, + { + "epoch": 0.93, + "learning_rate": 4.730621474071695e-05, + "loss": 2.1368, + "step": 3115000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5238793073217346, + "eval_loss": 2.162109375, + "eval_runtime": 40.1212, + "eval_samples_per_second": 88.93, + "eval_steps_per_second": 11.116, + "step": 3115000 + }, + { + "epoch": 0.93, + "learning_rate": 4.723136740113022e-05, + "loss": 2.1345, + "step": 3120000 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.5236836943133052, + "eval_loss": 2.162109375, + "eval_runtime": 40.203, + "eval_samples_per_second": 88.75, + "eval_steps_per_second": 11.094, + "step": 3120000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7156535039998465e-05, + "loss": 2.1358, + "step": 3125000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5238979371320612, + "eval_loss": 2.162109375, + "eval_runtime": 40.2344, + "eval_samples_per_second": 88.68, + "eval_steps_per_second": 11.085, + "step": 3125000 + }, + { + "epoch": 0.94, + "learning_rate": 4.708171765732171e-05, + "loss": 2.1395, + "step": 3130000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5239330050103231, + "eval_loss": 2.162109375, + "eval_runtime": 40.4542, + "eval_samples_per_second": 88.199, + "eval_steps_per_second": 11.025, + "step": 3130000 + }, + { + "epoch": 0.94, + "learning_rate": 4.700690027464496e-05, + "loss": 2.1359, + "step": 3135000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5242872453743277, + "eval_loss": 2.162109375, + "eval_runtime": 40.3355, + "eval_samples_per_second": 88.458, + "eval_steps_per_second": 11.057, + "step": 3135000 + }, + { + "epoch": 0.94, + "learning_rate": 4.693205293505821e-05, + "loss": 2.1373, + "step": 3140000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5241631379614166, + "eval_loss": 2.16015625, + "eval_runtime": 40.2137, + "eval_samples_per_second": 88.726, + "eval_steps_per_second": 11.091, + "step": 3140000 + }, + { + "epoch": 0.94, + "learning_rate": 4.6857235552381456e-05, + "loss": 2.1357, + "step": 3145000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5243412170307151, + "eval_loss": 2.16015625, + "eval_runtime": 40.3184, + "eval_samples_per_second": 88.496, + "eval_steps_per_second": 11.062, + "step": 3145000 + }, + { + "epoch": 0.94, + "learning_rate": 4.67824181697047e-05, + "loss": 2.1354, + "step": 3150000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.5243636823902266, + "eval_loss": 2.16015625, + "eval_runtime": 40.1892, + "eval_samples_per_second": 88.78, + "eval_steps_per_second": 11.098, + "step": 3150000 + }, + { + "epoch": 0.95, + "learning_rate": 4.670757083011796e-05, + "loss": 2.1323, + "step": 3155000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.5243839560073468, + "eval_loss": 2.16015625, + "eval_runtime": 40.3635, + "eval_samples_per_second": 88.397, + "eval_steps_per_second": 11.05, + "step": 3155000 + }, + { + "epoch": 0.95, + "learning_rate": 4.66327534474412e-05, + "loss": 2.133, + "step": 3160000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.5242327257823425, + "eval_loss": 2.16015625, + "eval_runtime": 41.1497, + "eval_samples_per_second": 86.708, + "eval_steps_per_second": 10.838, + "step": 3160000 + }, + { + "epoch": 0.95, + "learning_rate": 4.655790610785447e-05, + "loss": 2.1315, + "step": 3165000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.524407243270255, + "eval_loss": 2.16015625, + "eval_runtime": 41.4927, + "eval_samples_per_second": 85.991, + "eval_steps_per_second": 10.749, + "step": 3165000 + }, + { + "epoch": 0.95, + "learning_rate": 4.6483088725177706e-05, + "loss": 2.1363, + "step": 3170000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.5242845056963384, + "eval_loss": 2.16015625, + "eval_runtime": 43.5552, + "eval_samples_per_second": 81.919, + "eval_steps_per_second": 10.24, + "step": 3170000 + }, + { + "epoch": 0.95, + "learning_rate": 4.640825636404596e-05, + "loss": 2.1349, + "step": 3175000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.5245045018388719, + "eval_loss": 2.16015625, + "eval_runtime": 41.5498, + "eval_samples_per_second": 85.873, + "eval_steps_per_second": 10.734, + "step": 3175000 + }, + { + "epoch": 0.95, + "learning_rate": 4.633342400291421e-05, + "loss": 2.1336, + "step": 3180000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.524365600164819, + "eval_loss": 2.16015625, + "eval_runtime": 45.093, + "eval_samples_per_second": 79.125, + "eval_steps_per_second": 9.891, + "step": 3180000 + }, + { + "epoch": 0.95, + "learning_rate": 4.625860662023746e-05, + "loss": 2.1364, + "step": 3185000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.5243612166800363, + "eval_loss": 2.158203125, + "eval_runtime": 42.4027, + "eval_samples_per_second": 84.146, + "eval_steps_per_second": 10.518, + "step": 3185000 + }, + { + "epoch": 0.96, + "learning_rate": 4.618375928065071e-05, + "loss": 2.133, + "step": 3190000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5243256008661766, + "eval_loss": 2.158203125, + "eval_runtime": 40.6817, + "eval_samples_per_second": 87.705, + "eval_steps_per_second": 10.963, + "step": 3190000 + }, + { + "epoch": 0.96, + "learning_rate": 4.6108941897973956e-05, + "loss": 2.1349, + "step": 3195000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5245045018388719, + "eval_loss": 2.158203125, + "eval_runtime": 42.2213, + "eval_samples_per_second": 84.507, + "eval_steps_per_second": 10.563, + "step": 3195000 + }, + { + "epoch": 0.96, + "learning_rate": 4.60341245152972e-05, + "loss": 2.134, + "step": 3200000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5245702541106129, + "eval_loss": 2.158203125, + "eval_runtime": 41.8771, + "eval_samples_per_second": 85.202, + "eval_steps_per_second": 10.65, + "step": 3200000 + }, + { + "epoch": 0.96, + "learning_rate": 4.5959277175710455e-05, + "loss": 2.1308, + "step": 3205000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5249423023815473, + "eval_loss": 2.15625, + "eval_runtime": 42.7291, + "eval_samples_per_second": 83.503, + "eval_steps_per_second": 10.438, + "step": 3205000 + }, + { + "epoch": 0.96, + "learning_rate": 4.5884444814578715e-05, + "loss": 2.1302, + "step": 3210000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5246628552266481, + "eval_loss": 2.15625, + "eval_runtime": 41.7473, + "eval_samples_per_second": 85.467, + "eval_steps_per_second": 10.683, + "step": 3210000 + }, + { + "epoch": 0.96, + "learning_rate": 4.580964241035695e-05, + "loss": 2.1302, + "step": 3215000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5246708002928168, + "eval_loss": 2.15625, + "eval_runtime": 43.5638, + "eval_samples_per_second": 81.903, + "eval_steps_per_second": 10.238, + "step": 3215000 + }, + { + "epoch": 0.96, + "learning_rate": 4.5734825027680186e-05, + "loss": 2.1331, + "step": 3220000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5247650452156455, + "eval_loss": 2.15625, + "eval_runtime": 42.8533, + "eval_samples_per_second": 83.261, + "eval_steps_per_second": 10.408, + "step": 3220000 + }, + { + "epoch": 0.97, + "learning_rate": 4.565999266654844e-05, + "loss": 2.1273, + "step": 3225000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.5247250459170031, + "eval_loss": 2.15625, + "eval_runtime": 42.4534, + "eval_samples_per_second": 84.045, + "eval_steps_per_second": 10.506, + "step": 3225000 + }, + { + "epoch": 0.97, + "learning_rate": 4.5585175283871685e-05, + "loss": 2.1286, + "step": 3230000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.5249765483564124, + "eval_loss": 2.15625, + "eval_runtime": 43.0626, + "eval_samples_per_second": 82.856, + "eval_steps_per_second": 10.357, + "step": 3230000 + }, + { + "epoch": 0.97, + "learning_rate": 4.551034292273994e-05, + "loss": 2.1282, + "step": 3235000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.525047232048534, + "eval_loss": 2.154296875, + "eval_runtime": 42.4424, + "eval_samples_per_second": 84.067, + "eval_steps_per_second": 10.508, + "step": 3235000 + }, + { + "epoch": 0.97, + "learning_rate": 4.543552554006317e-05, + "loss": 2.1309, + "step": 3240000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.5250951764133451, + "eval_loss": 2.154296875, + "eval_runtime": 44.2507, + "eval_samples_per_second": 80.631, + "eval_steps_per_second": 10.079, + "step": 3240000 + }, + { + "epoch": 0.97, + "learning_rate": 4.5360708157386415e-05, + "loss": 2.1295, + "step": 3245000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.5253592813715047, + "eval_loss": 2.154296875, + "eval_runtime": 44.2616, + "eval_samples_per_second": 80.612, + "eval_steps_per_second": 10.076, + "step": 3245000 + }, + { + "epoch": 0.97, + "learning_rate": 4.528589077470966e-05, + "loss": 2.1275, + "step": 3250000 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.5253707880190593, + "eval_loss": 2.154296875, + "eval_runtime": 43.0103, + "eval_samples_per_second": 82.957, + "eval_steps_per_second": 10.37, + "step": 3250000 + }, + { + "epoch": 0.98, + "learning_rate": 4.5211058413577914e-05, + "loss": 2.133, + "step": 3255000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5253924314751741, + "eval_loss": 2.154296875, + "eval_runtime": 41.4169, + "eval_samples_per_second": 86.148, + "eval_steps_per_second": 10.769, + "step": 3255000 + }, + { + "epoch": 0.98, + "learning_rate": 4.513622605244616e-05, + "loss": 2.1301, + "step": 3260000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5251132582880739, + "eval_loss": 2.154296875, + "eval_runtime": 42.1862, + "eval_samples_per_second": 84.577, + "eval_steps_per_second": 10.572, + "step": 3260000 + }, + { + "epoch": 0.98, + "learning_rate": 4.5061408669769406e-05, + "loss": 2.1314, + "step": 3265000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5253096931999001, + "eval_loss": 2.15234375, + "eval_runtime": 43.9727, + "eval_samples_per_second": 81.141, + "eval_steps_per_second": 10.143, + "step": 3265000 + }, + { + "epoch": 0.98, + "learning_rate": 4.498656133018267e-05, + "loss": 2.1258, + "step": 3270000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5254751697504482, + "eval_loss": 2.15234375, + "eval_runtime": 42.0779, + "eval_samples_per_second": 84.795, + "eval_steps_per_second": 10.599, + "step": 3270000 + }, + { + "epoch": 0.98, + "learning_rate": 4.491172896905092e-05, + "loss": 2.1286, + "step": 3275000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.525354897886722, + "eval_loss": 2.15234375, + "eval_runtime": 41.8893, + "eval_samples_per_second": 85.177, + "eval_steps_per_second": 10.647, + "step": 3275000 + }, + { + "epoch": 0.98, + "learning_rate": 4.4836911586374165e-05, + "loss": 2.1267, + "step": 3280000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5253622950172928, + "eval_loss": 2.15234375, + "eval_runtime": 42.0241, + "eval_samples_per_second": 84.904, + "eval_steps_per_second": 10.613, + "step": 3280000 + }, + { + "epoch": 0.98, + "learning_rate": 4.476207922524242e-05, + "loss": 2.13, + "step": 3285000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5254332526772133, + "eval_loss": 2.15234375, + "eval_runtime": 43.4043, + "eval_samples_per_second": 82.204, + "eval_steps_per_second": 10.275, + "step": 3285000 + }, + { + "epoch": 0.99, + "learning_rate": 4.4687246864110663e-05, + "loss": 2.1284, + "step": 3290000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5254976351099597, + "eval_loss": 2.15234375, + "eval_runtime": 43.4747, + "eval_samples_per_second": 82.071, + "eval_steps_per_second": 10.259, + "step": 3290000 + }, + { + "epoch": 0.99, + "learning_rate": 4.461242948143391e-05, + "loss": 2.1295, + "step": 3295000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5254190063516695, + "eval_loss": 2.15234375, + "eval_runtime": 44.0831, + "eval_samples_per_second": 80.938, + "eval_steps_per_second": 10.117, + "step": 3295000 + }, + { + "epoch": 0.99, + "learning_rate": 4.453759712030216e-05, + "loss": 2.1241, + "step": 3300000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5255573600901244, + "eval_loss": 2.15234375, + "eval_runtime": 43.4229, + "eval_samples_per_second": 82.169, + "eval_steps_per_second": 10.271, + "step": 3300000 + }, + { + "epoch": 0.99, + "learning_rate": 4.446276475917041e-05, + "loss": 2.1297, + "step": 3305000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5257825616208374, + "eval_loss": 2.15234375, + "eval_runtime": 41.6675, + "eval_samples_per_second": 85.63, + "eval_steps_per_second": 10.704, + "step": 3305000 + }, + { + "epoch": 0.99, + "learning_rate": 4.4387947376493654e-05, + "loss": 2.126, + "step": 3310000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5256425640755888, + "eval_loss": 2.150390625, + "eval_runtime": 43.5829, + "eval_samples_per_second": 81.867, + "eval_steps_per_second": 10.233, + "step": 3310000 + }, + { + "epoch": 0.99, + "learning_rate": 4.43131299938169e-05, + "loss": 2.1263, + "step": 3315000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.5255672229308856, + "eval_loss": 2.150390625, + "eval_runtime": 40.6138, + "eval_samples_per_second": 87.852, + "eval_steps_per_second": 10.981, + "step": 3315000 + }, + { + "epoch": 0.99, + "learning_rate": 4.423828265423016e-05, + "loss": 2.1273, + "step": 3320000 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.525589962258196, + "eval_loss": 2.150390625, + "eval_runtime": 41.1643, + "eval_samples_per_second": 86.677, + "eval_steps_per_second": 10.835, + "step": 3320000 + }, + { + "epoch": 1.0, + "learning_rate": 4.4163450293098406e-05, + "loss": 2.1214, + "step": 3325000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5255433877323795, + "eval_loss": 2.150390625, + "eval_runtime": 40.8274, + "eval_samples_per_second": 87.392, + "eval_steps_per_second": 10.924, + "step": 3325000 + }, + { + "epoch": 1.0, + "learning_rate": 4.408863291042165e-05, + "loss": 2.1275, + "step": 3330000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5255872225802068, + "eval_loss": 2.150390625, + "eval_runtime": 42.63, + "eval_samples_per_second": 83.697, + "eval_steps_per_second": 10.462, + "step": 3330000 + }, + { + "epoch": 1.0, + "learning_rate": 4.40138155277449e-05, + "loss": 2.1227, + "step": 3335000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.5258348894704312, + "eval_loss": 2.150390625, + "eval_runtime": 42.4569, + "eval_samples_per_second": 84.038, + "eval_steps_per_second": 10.505, + "step": 3335000 + }, + { + "epoch": 1.0, + "step": 3338128, + "total_flos": 1.0872066371139498e+21, + "train_loss": 0.2576859601399347, + "train_runtime": 150388.7494, + "train_samples_per_second": 177.573, + "train_steps_per_second": 22.197 + } + ], + "max_steps": 3338128, + "num_train_epochs": 1, + "total_flos": 1.0872066371139498e+21, + "trial_name": null, + "trial_params": null +}