{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9839005449146736, "global_step": 76000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.0004997842653586619, "loss": 4.023, "step": 2000 }, { "epoch": 0.05, "eval_loss": 2.9591097831726074, "eval_runtime": 235.529, "eval_samples_per_second": 167.083, "eval_steps_per_second": 10.445, "step": 2000 }, { "epoch": 0.1, "learning_rate": 0.0004980606215742609, "loss": 2.9094, "step": 4000 }, { "epoch": 0.1, "eval_loss": 2.7322702407836914, "eval_runtime": 235.6062, "eval_samples_per_second": 167.029, "eval_steps_per_second": 10.441, "step": 4000 }, { "epoch": 0.16, "learning_rate": 0.0004946252280608798, "loss": 2.7599, "step": 6000 }, { "epoch": 0.16, "eval_loss": 2.633601665496826, "eval_runtime": 235.5566, "eval_samples_per_second": 167.064, "eval_steps_per_second": 10.443, "step": 6000 }, { "epoch": 0.21, "learning_rate": 0.0004895017908540765, "loss": 2.6821, "step": 8000 }, { "epoch": 0.21, "eval_loss": 2.571882724761963, "eval_runtime": 235.5446, "eval_samples_per_second": 167.072, "eval_steps_per_second": 10.444, "step": 8000 }, { "epoch": 0.26, "learning_rate": 0.0004827332514022428, "loss": 2.6301, "step": 10000 }, { "epoch": 0.26, "eval_loss": 2.5301053524017334, "eval_runtime": 236.1109, "eval_samples_per_second": 166.672, "eval_steps_per_second": 10.419, "step": 10000 }, { "epoch": 0.31, "learning_rate": 0.0004743573557450783, "loss": 2.5918, "step": 12000 }, { "epoch": 0.31, "eval_loss": 2.4954473972320557, "eval_runtime": 235.5537, "eval_samples_per_second": 167.066, "eval_steps_per_second": 10.443, "step": 12000 }, { "epoch": 0.37, "learning_rate": 0.00046442948254696206, "loss": 2.5616, "step": 14000 }, { "epoch": 0.37, "eval_loss": 2.4691474437713623, "eval_runtime": 235.6133, "eval_samples_per_second": 167.024, "eval_steps_per_second": 10.441, "step": 14000 }, { "epoch": 0.42, "learning_rate": 0.00045302193229931784, "loss": 2.5361, "step": 16000 }, { "epoch": 0.42, "eval_loss": 2.4461960792541504, "eval_runtime": 235.5392, "eval_samples_per_second": 167.076, "eval_steps_per_second": 10.444, "step": 16000 }, { "epoch": 0.47, "learning_rate": 0.00044023364212028847, "loss": 2.5158, "step": 18000 }, { "epoch": 0.47, "eval_loss": 2.4347801208496094, "eval_runtime": 235.4954, "eval_samples_per_second": 167.107, "eval_steps_per_second": 10.446, "step": 18000 }, { "epoch": 0.52, "learning_rate": 0.0004261144576270032, "loss": 2.4993, "step": 20000 }, { "epoch": 0.52, "eval_loss": 2.412471294403076, "eval_runtime": 235.4551, "eval_samples_per_second": 167.136, "eval_steps_per_second": 10.448, "step": 20000 }, { "epoch": 0.57, "learning_rate": 0.00041077999005815623, "loss": 2.4848, "step": 22000 }, { "epoch": 0.57, "eval_loss": 2.3976333141326904, "eval_runtime": 236.1174, "eval_samples_per_second": 166.667, "eval_steps_per_second": 10.419, "step": 22000 }, { "epoch": 0.63, "learning_rate": 0.00039433605536526187, "loss": 2.4689, "step": 24000 }, { "epoch": 0.63, "eval_loss": 2.3828279972076416, "eval_runtime": 235.5611, "eval_samples_per_second": 167.061, "eval_steps_per_second": 10.443, "step": 24000 }, { "epoch": 0.68, "learning_rate": 0.00037690507447365334, "loss": 2.4562, "step": 26000 }, { "epoch": 0.68, "eval_loss": 2.3714354038238525, "eval_runtime": 237.177, "eval_samples_per_second": 165.923, "eval_steps_per_second": 10.372, "step": 26000 }, { "epoch": 0.73, "learning_rate": 0.00035860861171489625, "loss": 2.4415, "step": 28000 }, { "epoch": 0.73, "eval_loss": 2.3587660789489746, "eval_runtime": 235.9789, "eval_samples_per_second": 166.765, "eval_steps_per_second": 10.425, "step": 28000 }, { "epoch": 0.78, "learning_rate": 0.0003395447945183574, "loss": 2.4321, "step": 30000 }, { "epoch": 0.78, "eval_loss": 2.3464348316192627, "eval_runtime": 235.9733, "eval_samples_per_second": 166.769, "eval_steps_per_second": 10.425, "step": 30000 }, { "epoch": 0.84, "learning_rate": 0.0003198730435893789, "loss": 2.4217, "step": 32000 }, { "epoch": 0.84, "eval_loss": 2.3358864784240723, "eval_runtime": 235.8391, "eval_samples_per_second": 166.864, "eval_steps_per_second": 10.431, "step": 32000 }, { "epoch": 0.89, "learning_rate": 0.00029970943417239666, "loss": 2.4062, "step": 34000 }, { "epoch": 0.89, "eval_loss": 2.3258254528045654, "eval_runtime": 235.6751, "eval_samples_per_second": 166.98, "eval_steps_per_second": 10.438, "step": 34000 }, { "epoch": 0.94, "learning_rate": 0.00027922343434053505, "loss": 2.3968, "step": 36000 }, { "epoch": 0.94, "eval_loss": 2.3131136894226074, "eval_runtime": 235.4005, "eval_samples_per_second": 167.175, "eval_steps_per_second": 10.45, "step": 36000 }, { "epoch": 0.99, "learning_rate": 0.000258556941398551, "loss": 2.3882, "step": 38000 }, { "epoch": 0.99, "eval_loss": 2.3022758960723877, "eval_runtime": 234.8551, "eval_samples_per_second": 167.563, "eval_steps_per_second": 10.475, "step": 38000 }, { "epoch": 1.04, "learning_rate": 0.00023779013959131367, "loss": 2.3708, "step": 40000 }, { "epoch": 1.04, "eval_loss": 2.292479991912842, "eval_runtime": 234.8376, "eval_samples_per_second": 167.575, "eval_steps_per_second": 10.475, "step": 40000 }, { "epoch": 1.1, "learning_rate": 0.00021710759229054673, "loss": 2.3597, "step": 42000 }, { "epoch": 1.1, "eval_loss": 2.2818188667297363, "eval_runtime": 234.9302, "eval_samples_per_second": 167.509, "eval_steps_per_second": 10.471, "step": 42000 }, { "epoch": 1.15, "learning_rate": 0.0001966520200317064, "loss": 2.3516, "step": 44000 }, { "epoch": 1.15, "eval_loss": 2.271631956100464, "eval_runtime": 234.8493, "eval_samples_per_second": 167.567, "eval_steps_per_second": 10.475, "step": 44000 }, { "epoch": 1.2, "learning_rate": 0.00017656457710219216, "loss": 2.3402, "step": 46000 }, { "epoch": 1.2, "eval_loss": 2.262141466140747, "eval_runtime": 234.8012, "eval_samples_per_second": 167.601, "eval_steps_per_second": 10.477, "step": 46000 }, { "epoch": 1.25, "learning_rate": 0.0001569838775019702, "loss": 2.3317, "step": 48000 }, { "epoch": 1.25, "eval_loss": 2.250854730606079, "eval_runtime": 234.8456, "eval_samples_per_second": 167.57, "eval_steps_per_second": 10.475, "step": 48000 }, { "epoch": 1.31, "learning_rate": 0.00013804503843352594, "loss": 2.3208, "step": 50000 }, { "epoch": 1.31, "eval_loss": 2.2419745922088623, "eval_runtime": 235.4331, "eval_samples_per_second": 167.151, "eval_steps_per_second": 10.449, "step": 50000 }, { "epoch": 1.36, "learning_rate": 0.00011988761690506509, "loss": 2.3129, "step": 52000 }, { "epoch": 1.36, "eval_loss": 2.232053518295288, "eval_runtime": 235.1368, "eval_samples_per_second": 167.362, "eval_steps_per_second": 10.462, "step": 52000 }, { "epoch": 1.41, "learning_rate": 0.00010262714265881779, "loss": 2.3034, "step": 54000 }, { "epoch": 1.41, "eval_loss": 2.223116874694824, "eval_runtime": 235.501, "eval_samples_per_second": 167.103, "eval_steps_per_second": 10.446, "step": 54000 }, { "epoch": 1.46, "learning_rate": 8.638260310471869e-05, "loss": 2.2944, "step": 56000 }, { "epoch": 1.46, "eval_loss": 2.215869188308716, "eval_runtime": 235.4682, "eval_samples_per_second": 167.127, "eval_steps_per_second": 10.447, "step": 56000 }, { "epoch": 1.51, "learning_rate": 7.126598216971828e-05, "loss": 2.2867, "step": 58000 }, { "epoch": 1.51, "eval_loss": 2.2056751251220703, "eval_runtime": 235.5179, "eval_samples_per_second": 167.091, "eval_steps_per_second": 10.445, "step": 58000 }, { "epoch": 1.57, "learning_rate": 5.737486728530572e-05, "loss": 2.2775, "step": 60000 }, { "epoch": 1.57, "eval_loss": 2.19879150390625, "eval_runtime": 235.4838, "eval_samples_per_second": 167.116, "eval_steps_per_second": 10.447, "step": 60000 }, { "epoch": 1.62, "learning_rate": 4.481890186103055e-05, "loss": 2.2706, "step": 62000 }, { "epoch": 1.62, "eval_loss": 2.191859006881714, "eval_runtime": 235.3807, "eval_samples_per_second": 167.189, "eval_steps_per_second": 10.451, "step": 62000 }, { "epoch": 1.67, "learning_rate": 3.36825867617602e-05, "loss": 2.2653, "step": 64000 }, { "epoch": 1.67, "eval_loss": 2.185826301574707, "eval_runtime": 235.8171, "eval_samples_per_second": 166.879, "eval_steps_per_second": 10.432, "step": 64000 }, { "epoch": 1.72, "learning_rate": 2.4031550628330818e-05, "loss": 2.2595, "step": 66000 }, { "epoch": 1.72, "eval_loss": 2.1810295581817627, "eval_runtime": 235.6965, "eval_samples_per_second": 166.965, "eval_steps_per_second": 10.437, "step": 66000 }, { "epoch": 1.78, "learning_rate": 1.5934609437343344e-05, "loss": 2.2546, "step": 68000 }, { "epoch": 1.78, "eval_loss": 2.1772620677948, "eval_runtime": 234.5679, "eval_samples_per_second": 167.768, "eval_steps_per_second": 10.487, "step": 68000 }, { "epoch": 1.83, "learning_rate": 9.452843465689393e-06, "loss": 2.2515, "step": 70000 }, { "epoch": 1.83, "eval_loss": 2.174480676651001, "eval_runtime": 234.8677, "eval_samples_per_second": 167.554, "eval_steps_per_second": 10.474, "step": 70000 }, { "epoch": 1.88, "learning_rate": 4.632970471463188e-06, "loss": 2.25, "step": 72000 }, { "epoch": 1.88, "eval_loss": 2.1722278594970703, "eval_runtime": 234.8936, "eval_samples_per_second": 167.535, "eval_steps_per_second": 10.473, "step": 72000 }, { "epoch": 1.93, "learning_rate": 1.504568227717773e-06, "loss": 2.2491, "step": 74000 }, { "epoch": 1.93, "eval_loss": 2.1710853576660156, "eval_runtime": 234.9822, "eval_samples_per_second": 167.472, "eval_steps_per_second": 10.469, "step": 74000 }, { "epoch": 1.98, "learning_rate": 8.920279775520568e-08, "loss": 2.2505, "step": 76000 }, { "epoch": 1.98, "eval_loss": 2.1710739135742188, "eval_runtime": 234.8578, "eval_samples_per_second": 167.561, "eval_steps_per_second": 10.474, "step": 76000 } ], "max_steps": 76616, "num_train_epochs": 2, "total_flos": 3.177337760907264e+18, "trial_name": null, "trial_params": null }