{ "best_metric": null, "best_model_checkpoint": null, "epoch": 34.210215142059454, "global_step": 19500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.917429577464789e-05, "loss": 1.7834, "step": 570 }, { "epoch": 1.0, "eval_loss": 0.6923606991767883, "eval_runtime": 1521.6883, "eval_samples_per_second": 921.873, "eval_steps_per_second": 14.404, "step": 570 }, { "epoch": 2.0, "learning_rate": 4.817077464788733e-05, "loss": 0.6994, "step": 1140 }, { "epoch": 2.0, "eval_loss": 0.6767147183418274, "eval_runtime": 1526.5049, "eval_samples_per_second": 918.964, "eval_steps_per_second": 14.359, "step": 1140 }, { "epoch": 3.0, "learning_rate": 4.7167253521126765e-05, "loss": 0.654, "step": 1710 }, { "epoch": 3.0, "eval_loss": 0.6709172129631042, "eval_runtime": 1516.8574, "eval_samples_per_second": 924.809, "eval_steps_per_second": 14.45, "step": 1710 }, { "epoch": 4.0, "learning_rate": 4.61637323943662e-05, "loss": 0.6194, "step": 2280 }, { "epoch": 4.0, "eval_loss": 0.6694589257240295, "eval_runtime": 1517.553, "eval_samples_per_second": 924.385, "eval_steps_per_second": 14.444, "step": 2280 }, { "epoch": 5.0, "learning_rate": 4.516021126760563e-05, "loss": 0.5904, "step": 2850 }, { "epoch": 5.0, "eval_loss": 0.6706073880195618, "eval_runtime": 1514.7995, "eval_samples_per_second": 926.065, "eval_steps_per_second": 14.47, "step": 2850 }, { "epoch": 6.0, "learning_rate": 4.4156690140845075e-05, "loss": 0.5653, "step": 3420 }, { "epoch": 6.0, "eval_loss": 0.6708899736404419, "eval_runtime": 1517.3829, "eval_samples_per_second": 924.488, "eval_steps_per_second": 14.445, "step": 3420 }, { "epoch": 7.0, "learning_rate": 4.315316901408451e-05, "loss": 0.5431, "step": 3990 }, { "epoch": 7.0, "eval_loss": 0.6717029809951782, "eval_runtime": 1515.5706, "eval_samples_per_second": 925.594, "eval_steps_per_second": 14.463, "step": 3990 }, { "epoch": 8.0, "learning_rate": 4.214964788732394e-05, "loss": 0.5237, "step": 4560 }, { "epoch": 8.0, "eval_loss": 0.6746511459350586, "eval_runtime": 1515.027, "eval_samples_per_second": 925.926, "eval_steps_per_second": 14.468, "step": 4560 }, { "epoch": 9.0, "learning_rate": 4.114612676056338e-05, "loss": 0.5061, "step": 5130 }, { "epoch": 9.0, "eval_loss": 0.6748108267784119, "eval_runtime": 1514.6887, "eval_samples_per_second": 926.133, "eval_steps_per_second": 14.471, "step": 5130 }, { "epoch": 10.0, "learning_rate": 4.014260563380282e-05, "loss": 0.4901, "step": 5700 }, { "epoch": 10.0, "eval_loss": 0.6780610680580139, "eval_runtime": 1515.4454, "eval_samples_per_second": 925.67, "eval_steps_per_second": 14.464, "step": 5700 }, { "epoch": 11.0, "learning_rate": 3.913908450704226e-05, "loss": 0.4755, "step": 6270 }, { "epoch": 11.0, "eval_loss": 0.6779971718788147, "eval_runtime": 1517.3277, "eval_samples_per_second": 924.522, "eval_steps_per_second": 14.446, "step": 6270 }, { "epoch": 12.0, "learning_rate": 3.813556338028169e-05, "loss": 0.4624, "step": 6840 }, { "epoch": 12.0, "eval_loss": 0.6860271096229553, "eval_runtime": 1515.1169, "eval_samples_per_second": 925.871, "eval_steps_per_second": 14.467, "step": 6840 }, { "epoch": 13.0, "learning_rate": 3.7132042253521126e-05, "loss": 0.4501, "step": 7410 }, { "epoch": 13.0, "eval_loss": 0.6835764646530151, "eval_runtime": 1513.5665, "eval_samples_per_second": 926.82, "eval_steps_per_second": 14.482, "step": 7410 }, { "epoch": 14.0, "learning_rate": 3.612852112676057e-05, "loss": 0.4389, "step": 7980 }, { "epoch": 14.0, "eval_loss": 0.6895220279693604, "eval_runtime": 1514.0338, "eval_samples_per_second": 926.533, "eval_steps_per_second": 14.477, "step": 7980 }, { "epoch": 15.0, "learning_rate": 3.5125e-05, "loss": 0.4286, "step": 8550 }, { "epoch": 15.0, "eval_loss": 0.6874631643295288, "eval_runtime": 1516.075, "eval_samples_per_second": 925.286, "eval_steps_per_second": 14.458, "step": 8550 }, { "epoch": 16.0, "learning_rate": 3.412147887323944e-05, "loss": 0.4189, "step": 9120 }, { "epoch": 16.0, "eval_loss": 0.6922647953033447, "eval_runtime": 1516.6302, "eval_samples_per_second": 924.947, "eval_steps_per_second": 14.452, "step": 9120 }, { "epoch": 17.0, "learning_rate": 3.3117957746478874e-05, "loss": 0.41, "step": 9690 }, { "epoch": 17.0, "eval_loss": 0.6977216005325317, "eval_runtime": 1513.9466, "eval_samples_per_second": 926.587, "eval_steps_per_second": 14.478, "step": 9690 }, { "epoch": 18.0, "learning_rate": 3.211443661971831e-05, "loss": 0.4017, "step": 10260 }, { "epoch": 18.0, "eval_loss": 0.705147922039032, "eval_runtime": 1513.405, "eval_samples_per_second": 926.918, "eval_steps_per_second": 14.483, "step": 10260 }, { "epoch": 19.0, "learning_rate": 3.111091549295775e-05, "loss": 0.394, "step": 10830 }, { "epoch": 19.0, "eval_loss": 0.6972260475158691, "eval_runtime": 1514.848, "eval_samples_per_second": 926.035, "eval_steps_per_second": 14.469, "step": 10830 }, { "epoch": 20.0, "learning_rate": 3.0109154929577467e-05, "loss": 0.3868, "step": 11400 }, { "epoch": 20.0, "eval_loss": 0.7031562328338623, "eval_runtime": 1513.8461, "eval_samples_per_second": 926.648, "eval_steps_per_second": 14.479, "step": 11400 }, { "epoch": 21.0, "learning_rate": 2.91056338028169e-05, "loss": 0.38, "step": 11970 }, { "epoch": 21.0, "eval_loss": 0.7028641104698181, "eval_runtime": 1513.4864, "eval_samples_per_second": 926.869, "eval_steps_per_second": 14.482, "step": 11970 }, { "epoch": 22.0, "learning_rate": 2.810211267605634e-05, "loss": 0.3738, "step": 12540 }, { "epoch": 22.0, "eval_loss": 0.7074136734008789, "eval_runtime": 1513.6381, "eval_samples_per_second": 926.776, "eval_steps_per_second": 14.481, "step": 12540 }, { "epoch": 23.0, "learning_rate": 2.7098591549295778e-05, "loss": 0.3679, "step": 13110 }, { "epoch": 23.0, "eval_loss": 0.7076618075370789, "eval_runtime": 1513.3948, "eval_samples_per_second": 926.925, "eval_steps_per_second": 14.483, "step": 13110 }, { "epoch": 24.0, "learning_rate": 2.609507042253521e-05, "loss": 0.3623, "step": 13680 }, { "epoch": 24.0, "eval_loss": 0.7148919701576233, "eval_runtime": 1514.4065, "eval_samples_per_second": 926.305, "eval_steps_per_second": 14.474, "step": 13680 }, { "epoch": 25.0, "learning_rate": 2.509507042253521e-05, "loss": 0.3572, "step": 14250 }, { "epoch": 25.0, "eval_loss": 0.7150377631187439, "eval_runtime": 1514.2022, "eval_samples_per_second": 926.43, "eval_steps_per_second": 14.476, "step": 14250 }, { "epoch": 26.0, "learning_rate": 2.409330985915493e-05, "loss": 0.3523, "step": 14820 }, { "epoch": 26.0, "eval_loss": 0.7058804035186768, "eval_runtime": 1514.5913, "eval_samples_per_second": 926.192, "eval_steps_per_second": 14.472, "step": 14820 }, { "epoch": 27.0, "learning_rate": 2.3089788732394367e-05, "loss": 0.3478, "step": 15390 }, { "epoch": 27.0, "eval_loss": 0.7150311470031738, "eval_runtime": 1515.4959, "eval_samples_per_second": 925.64, "eval_steps_per_second": 14.463, "step": 15390 }, { "epoch": 28.0, "learning_rate": 2.2086267605633804e-05, "loss": 0.3436, "step": 15960 }, { "epoch": 28.0, "eval_loss": 0.715233564376831, "eval_runtime": 1515.1776, "eval_samples_per_second": 925.834, "eval_steps_per_second": 14.466, "step": 15960 }, { "epoch": 29.0, "learning_rate": 2.108274647887324e-05, "loss": 0.3396, "step": 16530 }, { "epoch": 29.0, "eval_loss": 0.7200678586959839, "eval_runtime": 1514.8194, "eval_samples_per_second": 926.053, "eval_steps_per_second": 14.47, "step": 16530 }, { "epoch": 30.0, "learning_rate": 2.0079225352112678e-05, "loss": 0.3358, "step": 17100 }, { "epoch": 30.0, "eval_loss": 0.713365375995636, "eval_runtime": 1513.5909, "eval_samples_per_second": 926.805, "eval_steps_per_second": 14.481, "step": 17100 }, { "epoch": 31.0, "learning_rate": 1.9075704225352115e-05, "loss": 0.3323, "step": 17670 }, { "epoch": 31.0, "eval_loss": 0.7232212424278259, "eval_runtime": 1515.152, "eval_samples_per_second": 925.85, "eval_steps_per_second": 14.467, "step": 17670 }, { "epoch": 32.0, "learning_rate": 1.8072183098591548e-05, "loss": 0.3291, "step": 18240 }, { "epoch": 32.0, "eval_loss": 0.7199446558952332, "eval_runtime": 1514.7283, "eval_samples_per_second": 926.109, "eval_steps_per_second": 14.471, "step": 18240 }, { "epoch": 33.0, "learning_rate": 1.706866197183099e-05, "loss": 0.3259, "step": 18810 }, { "epoch": 33.0, "eval_loss": 0.7213279604911804, "eval_runtime": 1513.6184, "eval_samples_per_second": 926.788, "eval_steps_per_second": 14.481, "step": 18810 }, { "epoch": 34.0, "learning_rate": 1.6065140845070422e-05, "loss": 0.3231, "step": 19380 }, { "epoch": 34.0, "eval_loss": 0.7236403226852417, "eval_runtime": 1514.5169, "eval_samples_per_second": 926.238, "eval_steps_per_second": 14.473, "step": 19380 } ], "max_steps": 28500, "num_train_epochs": 50, "total_flos": 3.0481579178459136e+18, "trial_name": null, "trial_params": null }