{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2067952933391237, "global_step": 700000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 6.893176444637453e-06, "loss": 11.9733, "step": 10000 }, { "epoch": 0.1, "learning_rate": 1.3786352889274907e-05, "loss": 1.1347, "step": 20000 }, { "epoch": 0.05, "learning_rate": 2.067952933391236e-05, "loss": 0.9519, "step": 30000 }, { "epoch": 0.1, "learning_rate": 2.7572705778549813e-05, "loss": 0.8738, "step": 40000 }, { "epoch": 0.16, "learning_rate": 3.446588222318727e-05, "loss": 0.8298, "step": 50000 }, { "epoch": 0.21, "learning_rate": 4.135905866782472e-05, "loss": 0.7905, "step": 60000 }, { "epoch": 0.26, "learning_rate": 4.825223511246217e-05, "loss": 0.6585, "step": 70000 }, { "epoch": 0.05, "learning_rate": 5.5145411557099626e-05, "loss": 0.5349, "step": 80000 }, { "epoch": 0.1, "learning_rate": 6.203858800173708e-05, "loss": 0.4895, "step": 90000 }, { "epoch": 0.16, "learning_rate": 6.893176444637454e-05, "loss": 0.4541, "step": 100000 }, { "epoch": 0.21, "learning_rate": 7.582494089101199e-05, "loss": 0.4319, "step": 110000 }, { "epoch": 0.26, "learning_rate": 8.271811733564945e-05, "loss": 0.4165, "step": 120000 }, { "epoch": 0.05, "learning_rate": 8.96112937802869e-05, "loss": 0.4059, "step": 130000 }, { "epoch": 0.05, "learning_rate": 9.650447022492434e-05, "loss": 0.3957, "step": 140000 }, { "epoch": 0.1, "learning_rate": 9.982117649107569e-05, "loss": 0.388, "step": 150000 }, { "epoch": 0.16, "learning_rate": 9.945837773083162e-05, "loss": 0.3787, "step": 160000 }, { "epoch": 0.21, "learning_rate": 9.909557897058755e-05, "loss": 0.3705, "step": 170000 }, { "epoch": 0.26, "learning_rate": 9.873278021034346e-05, "loss": 0.3678, "step": 180000 }, { "epoch": 0.05, "learning_rate": 9.836998145009939e-05, "loss": 0.3595, "step": 190000 }, { "epoch": 0.1, "learning_rate": 9.800718268985532e-05, "loss": 0.3543, "step": 200000 }, { "epoch": 0.16, "learning_rate": 9.764438392961124e-05, "loss": 0.3495, "step": 210000 }, { "epoch": 0.21, "learning_rate": 9.728158516936716e-05, "loss": 0.3452, "step": 220000 }, { "epoch": 0.26, "learning_rate": 9.691878640912308e-05, "loss": 0.3419, "step": 230000 }, { "epoch": 1.05, "learning_rate": 9.655598764887901e-05, "loss": 0.3365, "step": 240000 }, { "epoch": 1.1, "learning_rate": 9.619318888863494e-05, "loss": 0.3336, "step": 250000 }, { "epoch": 1.05, "learning_rate": 9.583039012839085e-05, "loss": 0.331, "step": 260000 }, { "epoch": 1.1, "learning_rate": 9.546759136814678e-05, "loss": 0.3282, "step": 270000 }, { "epoch": 1.16, "learning_rate": 9.510479260790271e-05, "loss": 0.3263, "step": 280000 }, { "epoch": 1.21, "learning_rate": 9.474199384765862e-05, "loss": 0.3234, "step": 290000 }, { "epoch": 1.26, "learning_rate": 9.437919508741455e-05, "loss": 0.3213, "step": 300000 }, { "epoch": 1.05, "learning_rate": 9.401639632717048e-05, "loss": 0.3173, "step": 310000 }, { "epoch": 1.1, "learning_rate": 9.36535975669264e-05, "loss": 0.3166, "step": 320000 }, { "epoch": 1.16, "learning_rate": 9.329079880668232e-05, "loss": 0.3137, "step": 330000 }, { "epoch": 1.21, "learning_rate": 9.292800004643825e-05, "loss": 0.3129, "step": 340000 }, { "epoch": 1.26, "learning_rate": 9.256520128619417e-05, "loss": 0.3125, "step": 350000 }, { "epoch": 1.05, "learning_rate": 9.22024025259501e-05, "loss": 0.3079, "step": 360000 }, { "epoch": 1.1, "learning_rate": 9.183960376570602e-05, "loss": 0.3074, "step": 370000 }, { "epoch": 1.16, "learning_rate": 9.147680500546194e-05, "loss": 0.3062, "step": 380000 }, { "epoch": 1.21, "learning_rate": 9.111400624521787e-05, "loss": 0.3052, "step": 390000 }, { "epoch": 1.26, "learning_rate": 9.07512074849738e-05, "loss": 0.3037, "step": 400000 }, { "epoch": 2.05, "learning_rate": 9.038840872472971e-05, "loss": 0.3003, "step": 410000 }, { "epoch": 2.1, "learning_rate": 9.002560996448564e-05, "loss": 0.3007, "step": 420000 }, { "epoch": 2.16, "learning_rate": 8.966281120424155e-05, "loss": 0.2991, "step": 430000 }, { "epoch": 2.21, "learning_rate": 8.930001244399748e-05, "loss": 0.2972, "step": 440000 }, { "epoch": 2.26, "learning_rate": 8.893721368375341e-05, "loss": 0.2974, "step": 450000 }, { "epoch": 2.05, "learning_rate": 8.857441492350932e-05, "loss": 0.2938, "step": 460000 }, { "epoch": 2.1, "learning_rate": 8.821161616326525e-05, "loss": 0.2921, "step": 470000 }, { "epoch": 2.16, "learning_rate": 8.784881740302118e-05, "loss": 0.2932, "step": 480000 }, { "epoch": 2.21, "learning_rate": 8.74860186427771e-05, "loss": 0.2914, "step": 490000 }, { "epoch": 2.26, "learning_rate": 8.712321988253302e-05, "loss": 0.292, "step": 500000 }, { "epoch": 2.05, "learning_rate": 8.676042112228895e-05, "loss": 0.2871, "step": 510000 }, { "epoch": 2.1, "learning_rate": 8.639762236204487e-05, "loss": 0.2884, "step": 520000 }, { "epoch": 2.16, "learning_rate": 8.60348236018008e-05, "loss": 0.2874, "step": 530000 }, { "epoch": 2.21, "learning_rate": 8.567202484155672e-05, "loss": 0.2859, "step": 540000 }, { "epoch": 2.26, "learning_rate": 8.530922608131264e-05, "loss": 0.2867, "step": 550000 }, { "epoch": 2.05, "learning_rate": 8.494642732106857e-05, "loss": 0.2828, "step": 560000 }, { "epoch": 2.05, "learning_rate": 8.45836285608245e-05, "loss": 0.2829, "step": 570000 }, { "epoch": 2.1, "learning_rate": 8.422082980058041e-05, "loss": 0.2817, "step": 580000 }, { "epoch": 2.16, "learning_rate": 8.385803104033634e-05, "loss": 0.281, "step": 590000 }, { "epoch": 2.21, "learning_rate": 8.349523228009227e-05, "loss": 0.2819, "step": 600000 }, { "epoch": 2.26, "learning_rate": 8.313243351984818e-05, "loss": 0.2803, "step": 610000 }, { "epoch": 3.05, "learning_rate": 8.276963475960411e-05, "loss": 0.2782, "step": 620000 }, { "epoch": 3.1, "learning_rate": 8.240683599936003e-05, "loss": 0.2779, "step": 630000 }, { "epoch": 3.16, "learning_rate": 8.204403723911594e-05, "loss": 0.2793, "step": 640000 }, { "epoch": 3.21, "learning_rate": 8.168123847887187e-05, "loss": 0.2774, "step": 650000 }, { "epoch": 3.26, "learning_rate": 8.13184397186278e-05, "loss": 0.2765, "step": 660000 }, { "epoch": 3.05, "learning_rate": 8.095564095838371e-05, "loss": 0.2738, "step": 670000 }, { "epoch": 3.1, "learning_rate": 8.059284219813964e-05, "loss": 0.2742, "step": 680000 }, { "epoch": 3.16, "learning_rate": 8.023004343789557e-05, "loss": 0.2746, "step": 690000 }, { "epoch": 3.21, "learning_rate": 7.986724467765148e-05, "loss": 0.274, "step": 700000 } ], "max_steps": 2901420, "num_train_epochs": 15, "total_flos": 1.6750927872e+18, "trial_name": null, "trial_params": null }