{ "best_metric": 3.029510498046875, "best_model_checkpoint": "trained_models/microsoftDialoGPTmedium_crd3/checkpoint-9912", "epoch": 2.957040572792363, "eval_steps": 168, "global_step": 9912, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.949880668257757e-05, "loss": 3.805, "step": 168 }, { "epoch": 0.05, "eval_loss": 3.3926753997802734, "eval_runtime": 142.7844, "eval_samples_per_second": 29.653, "eval_steps_per_second": 3.712, "step": 168 }, { "epoch": 0.1, "learning_rate": 4.899761336515513e-05, "loss": 3.3486, "step": 336 }, { "epoch": 0.1, "eval_loss": 3.2977957725524902, "eval_runtime": 142.7066, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 336 }, { "epoch": 0.15, "learning_rate": 4.84964200477327e-05, "loss": 3.2539, "step": 504 }, { "epoch": 0.15, "eval_loss": 3.2512104511260986, "eval_runtime": 142.7113, "eval_samples_per_second": 29.668, "eval_steps_per_second": 3.714, "step": 504 }, { "epoch": 0.2, "learning_rate": 4.7995226730310264e-05, "loss": 3.2009, "step": 672 }, { "epoch": 0.2, "eval_loss": 3.221975564956665, "eval_runtime": 142.7296, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 672 }, { "epoch": 0.25, "learning_rate": 4.749403341288783e-05, "loss": 3.1685, "step": 840 }, { "epoch": 0.25, "eval_loss": 3.197237014770508, "eval_runtime": 142.735, "eval_samples_per_second": 29.663, "eval_steps_per_second": 3.713, "step": 840 }, { "epoch": 0.3, "learning_rate": 4.6992840095465395e-05, "loss": 3.1332, "step": 1008 }, { "epoch": 0.3, "eval_loss": 3.1779494285583496, "eval_runtime": 142.7443, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 1008 }, { "epoch": 0.35, "learning_rate": 4.649164677804296e-05, "loss": 3.1231, "step": 1176 }, { "epoch": 0.35, "eval_loss": 3.163996696472168, "eval_runtime": 142.7827, "eval_samples_per_second": 29.653, "eval_steps_per_second": 3.712, "step": 1176 }, { "epoch": 0.4, "learning_rate": 4.5990453460620526e-05, "loss": 3.1005, "step": 1344 }, { "epoch": 0.4, "eval_loss": 3.1513357162475586, "eval_runtime": 142.6836, "eval_samples_per_second": 29.674, "eval_steps_per_second": 3.715, "step": 1344 }, { "epoch": 0.45, "learning_rate": 4.548926014319809e-05, "loss": 3.0952, "step": 1512 }, { "epoch": 0.45, "eval_loss": 3.1407930850982666, "eval_runtime": 142.6804, "eval_samples_per_second": 29.675, "eval_steps_per_second": 3.715, "step": 1512 }, { "epoch": 0.5, "learning_rate": 4.498806682577566e-05, "loss": 3.0903, "step": 1680 }, { "epoch": 0.5, "eval_loss": 3.1297414302825928, "eval_runtime": 142.7084, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 1680 }, { "epoch": 0.55, "learning_rate": 4.448687350835322e-05, "loss": 3.0737, "step": 1848 }, { "epoch": 0.55, "eval_loss": 3.120724678039551, "eval_runtime": 142.7344, "eval_samples_per_second": 29.663, "eval_steps_per_second": 3.713, "step": 1848 }, { "epoch": 0.6, "learning_rate": 4.398568019093079e-05, "loss": 3.0681, "step": 2016 }, { "epoch": 0.6, "eval_loss": 3.1170568466186523, "eval_runtime": 142.7306, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 2016 }, { "epoch": 0.65, "learning_rate": 4.348448687350836e-05, "loss": 3.034, "step": 2184 }, { "epoch": 0.65, "eval_loss": 3.1076815128326416, "eval_runtime": 142.7258, "eval_samples_per_second": 29.665, "eval_steps_per_second": 3.713, "step": 2184 }, { "epoch": 0.7, "learning_rate": 4.298329355608592e-05, "loss": 3.0319, "step": 2352 }, { "epoch": 0.7, "eval_loss": 3.1014297008514404, "eval_runtime": 142.7172, "eval_samples_per_second": 29.667, "eval_steps_per_second": 3.714, "step": 2352 }, { "epoch": 0.75, "learning_rate": 4.2482100238663484e-05, "loss": 3.0075, "step": 2520 }, { "epoch": 0.75, "eval_loss": 3.0964250564575195, "eval_runtime": 142.7047, "eval_samples_per_second": 29.67, "eval_steps_per_second": 3.714, "step": 2520 }, { "epoch": 0.8, "learning_rate": 4.1980906921241056e-05, "loss": 3.0282, "step": 2688 }, { "epoch": 0.8, "eval_loss": 3.0913186073303223, "eval_runtime": 142.7245, "eval_samples_per_second": 29.666, "eval_steps_per_second": 3.713, "step": 2688 }, { "epoch": 0.85, "learning_rate": 4.1479713603818615e-05, "loss": 3.0055, "step": 2856 }, { "epoch": 0.85, "eval_loss": 3.0834176540374756, "eval_runtime": 142.7795, "eval_samples_per_second": 29.654, "eval_steps_per_second": 3.712, "step": 2856 }, { "epoch": 0.9, "learning_rate": 4.097852028639618e-05, "loss": 3.0101, "step": 3024 }, { "epoch": 0.9, "eval_loss": 3.0793018341064453, "eval_runtime": 142.7762, "eval_samples_per_second": 29.655, "eval_steps_per_second": 3.712, "step": 3024 }, { "epoch": 0.95, "learning_rate": 4.047732696897375e-05, "loss": 3.0017, "step": 3192 }, { "epoch": 0.95, "eval_loss": 3.075371742248535, "eval_runtime": 142.7297, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 3192 }, { "epoch": 1.0, "learning_rate": 3.997613365155131e-05, "loss": 2.977, "step": 3360 }, { "epoch": 1.0, "eval_loss": 3.0743861198425293, "eval_runtime": 142.7454, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 3360 }, { "epoch": 1.05, "learning_rate": 3.9474940334128877e-05, "loss": 2.8771, "step": 3528 }, { "epoch": 1.05, "eval_loss": 3.0757055282592773, "eval_runtime": 142.7589, "eval_samples_per_second": 29.658, "eval_steps_per_second": 3.713, "step": 3528 }, { "epoch": 1.1, "learning_rate": 3.897374701670645e-05, "loss": 2.8633, "step": 3696 }, { "epoch": 1.1, "eval_loss": 3.0775437355041504, "eval_runtime": 142.7329, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 3696 }, { "epoch": 1.15, "learning_rate": 3.8472553699284014e-05, "loss": 2.8591, "step": 3864 }, { "epoch": 1.15, "eval_loss": 3.0708892345428467, "eval_runtime": 142.7479, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 3864 }, { "epoch": 1.2, "learning_rate": 3.797136038186157e-05, "loss": 2.8625, "step": 4032 }, { "epoch": 1.2, "eval_loss": 3.0683975219726562, "eval_runtime": 142.757, "eval_samples_per_second": 29.659, "eval_steps_per_second": 3.713, "step": 4032 }, { "epoch": 1.25, "learning_rate": 3.7470167064439145e-05, "loss": 2.8605, "step": 4200 }, { "epoch": 1.25, "eval_loss": 3.066983222961426, "eval_runtime": 142.718, "eval_samples_per_second": 29.667, "eval_steps_per_second": 3.714, "step": 4200 }, { "epoch": 1.3, "learning_rate": 3.696897374701671e-05, "loss": 2.8466, "step": 4368 }, { "epoch": 1.3, "eval_loss": 3.0646440982818604, "eval_runtime": 142.755, "eval_samples_per_second": 29.659, "eval_steps_per_second": 3.713, "step": 4368 }, { "epoch": 1.35, "learning_rate": 3.6467780429594276e-05, "loss": 2.8398, "step": 4536 }, { "epoch": 1.35, "eval_loss": 3.0618984699249268, "eval_runtime": 142.7437, "eval_samples_per_second": 29.662, "eval_steps_per_second": 3.713, "step": 4536 }, { "epoch": 1.4, "learning_rate": 3.596658711217184e-05, "loss": 2.8502, "step": 4704 }, { "epoch": 1.4, "eval_loss": 3.059511661529541, "eval_runtime": 142.746, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 4704 }, { "epoch": 1.45, "learning_rate": 3.546539379474941e-05, "loss": 2.8523, "step": 4872 }, { "epoch": 1.45, "eval_loss": 3.0564985275268555, "eval_runtime": 142.7041, "eval_samples_per_second": 29.67, "eval_steps_per_second": 3.714, "step": 4872 }, { "epoch": 1.5, "learning_rate": 3.496420047732697e-05, "loss": 2.8545, "step": 5040 }, { "epoch": 1.5, "eval_loss": 3.0538723468780518, "eval_runtime": 142.7089, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 5040 }, { "epoch": 1.55, "learning_rate": 3.446300715990454e-05, "loss": 2.8431, "step": 5208 }, { "epoch": 1.55, "eval_loss": 3.053504705429077, "eval_runtime": 142.7279, "eval_samples_per_second": 29.665, "eval_steps_per_second": 3.713, "step": 5208 }, { "epoch": 1.6, "learning_rate": 3.39618138424821e-05, "loss": 2.865, "step": 5376 }, { "epoch": 1.6, "eval_loss": 3.0489370822906494, "eval_runtime": 142.7484, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 5376 }, { "epoch": 1.65, "learning_rate": 3.346062052505967e-05, "loss": 2.8412, "step": 5544 }, { "epoch": 1.65, "eval_loss": 3.048715114593506, "eval_runtime": 142.7028, "eval_samples_per_second": 29.67, "eval_steps_per_second": 3.714, "step": 5544 }, { "epoch": 1.7, "learning_rate": 3.2959427207637234e-05, "loss": 2.8382, "step": 5712 }, { "epoch": 1.7, "eval_loss": 3.0453927516937256, "eval_runtime": 142.6981, "eval_samples_per_second": 29.671, "eval_steps_per_second": 3.714, "step": 5712 }, { "epoch": 1.75, "learning_rate": 3.24582338902148e-05, "loss": 2.8356, "step": 5880 }, { "epoch": 1.75, "eval_loss": 3.0460968017578125, "eval_runtime": 142.6846, "eval_samples_per_second": 29.674, "eval_steps_per_second": 3.714, "step": 5880 }, { "epoch": 1.8, "learning_rate": 3.1957040572792365e-05, "loss": 2.8265, "step": 6048 }, { "epoch": 1.8, "eval_loss": 3.042219400405884, "eval_runtime": 142.7165, "eval_samples_per_second": 29.667, "eval_steps_per_second": 3.714, "step": 6048 }, { "epoch": 1.85, "learning_rate": 3.145584725536993e-05, "loss": 2.8301, "step": 6216 }, { "epoch": 1.85, "eval_loss": 3.0397562980651855, "eval_runtime": 142.7319, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 6216 }, { "epoch": 1.9, "learning_rate": 3.0954653937947496e-05, "loss": 2.8286, "step": 6384 }, { "epoch": 1.9, "eval_loss": 3.038638114929199, "eval_runtime": 142.7562, "eval_samples_per_second": 29.659, "eval_steps_per_second": 3.713, "step": 6384 }, { "epoch": 1.95, "learning_rate": 3.045346062052506e-05, "loss": 2.8416, "step": 6552 }, { "epoch": 1.95, "eval_loss": 3.035384178161621, "eval_runtime": 142.669, "eval_samples_per_second": 29.677, "eval_steps_per_second": 3.715, "step": 6552 }, { "epoch": 2.0, "learning_rate": 2.9952267303102627e-05, "loss": 2.8179, "step": 6720 }, { "epoch": 2.0, "eval_loss": 3.040719509124756, "eval_runtime": 142.723, "eval_samples_per_second": 29.666, "eval_steps_per_second": 3.713, "step": 6720 }, { "epoch": 2.05, "learning_rate": 2.9451073985680195e-05, "loss": 2.6965, "step": 6888 }, { "epoch": 2.05, "eval_loss": 3.0478203296661377, "eval_runtime": 142.7498, "eval_samples_per_second": 29.66, "eval_steps_per_second": 3.713, "step": 6888 }, { "epoch": 2.11, "learning_rate": 2.8949880668257757e-05, "loss": 2.7437, "step": 7056 }, { "epoch": 2.11, "eval_loss": 3.048868179321289, "eval_runtime": 142.7646, "eval_samples_per_second": 29.657, "eval_steps_per_second": 3.712, "step": 7056 }, { "epoch": 2.16, "learning_rate": 2.8448687350835323e-05, "loss": 2.7252, "step": 7224 }, { "epoch": 2.16, "eval_loss": 3.045444965362549, "eval_runtime": 142.7306, "eval_samples_per_second": 29.664, "eval_steps_per_second": 3.713, "step": 7224 }, { "epoch": 2.21, "learning_rate": 2.794749403341289e-05, "loss": 2.718, "step": 7392 }, { "epoch": 2.21, "eval_loss": 3.04612398147583, "eval_runtime": 142.7393, "eval_samples_per_second": 29.662, "eval_steps_per_second": 3.713, "step": 7392 }, { "epoch": 2.26, "learning_rate": 2.7446300715990454e-05, "loss": 2.7246, "step": 7560 }, { "epoch": 2.26, "eval_loss": 3.0447964668273926, "eval_runtime": 142.6751, "eval_samples_per_second": 29.676, "eval_steps_per_second": 3.715, "step": 7560 }, { "epoch": 2.31, "learning_rate": 2.694510739856802e-05, "loss": 2.7365, "step": 7728 }, { "epoch": 2.31, "eval_loss": 3.0433876514434814, "eval_runtime": 142.7111, "eval_samples_per_second": 29.668, "eval_steps_per_second": 3.714, "step": 7728 }, { "epoch": 2.36, "learning_rate": 2.6443914081145588e-05, "loss": 2.7242, "step": 7896 }, { "epoch": 2.36, "eval_loss": 3.044072151184082, "eval_runtime": 142.6648, "eval_samples_per_second": 29.678, "eval_steps_per_second": 3.715, "step": 7896 }, { "epoch": 2.41, "learning_rate": 2.594272076372315e-05, "loss": 2.7343, "step": 8064 }, { "epoch": 2.41, "eval_loss": 3.0415401458740234, "eval_runtime": 142.737, "eval_samples_per_second": 29.663, "eval_steps_per_second": 3.713, "step": 8064 }, { "epoch": 2.46, "learning_rate": 2.5441527446300715e-05, "loss": 2.7312, "step": 8232 }, { "epoch": 2.46, "eval_loss": 3.0406272411346436, "eval_runtime": 142.7448, "eval_samples_per_second": 29.661, "eval_steps_per_second": 3.713, "step": 8232 }, { "epoch": 2.51, "learning_rate": 2.494033412887828e-05, "loss": 2.7359, "step": 8400 }, { "epoch": 2.51, "eval_loss": 3.0369086265563965, "eval_runtime": 142.71, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 8400 }, { "epoch": 2.56, "learning_rate": 2.443914081145585e-05, "loss": 2.714, "step": 8568 }, { "epoch": 2.56, "eval_loss": 3.039921522140503, "eval_runtime": 142.7829, "eval_samples_per_second": 29.653, "eval_steps_per_second": 3.712, "step": 8568 }, { "epoch": 2.61, "learning_rate": 2.3937947494033415e-05, "loss": 2.7311, "step": 8736 }, { "epoch": 2.61, "eval_loss": 3.0377085208892822, "eval_runtime": 142.7212, "eval_samples_per_second": 29.666, "eval_steps_per_second": 3.714, "step": 8736 }, { "epoch": 2.66, "learning_rate": 2.3436754176610977e-05, "loss": 2.7191, "step": 8904 }, { "epoch": 2.66, "eval_loss": 3.036844253540039, "eval_runtime": 142.7078, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 8904 }, { "epoch": 2.71, "learning_rate": 2.2935560859188546e-05, "loss": 2.7246, "step": 9072 }, { "epoch": 2.71, "eval_loss": 3.0361039638519287, "eval_runtime": 142.7607, "eval_samples_per_second": 29.658, "eval_steps_per_second": 3.713, "step": 9072 }, { "epoch": 2.76, "learning_rate": 2.243436754176611e-05, "loss": 2.721, "step": 9240 }, { "epoch": 2.76, "eval_loss": 3.0351145267486572, "eval_runtime": 142.7366, "eval_samples_per_second": 29.663, "eval_steps_per_second": 3.713, "step": 9240 }, { "epoch": 2.81, "learning_rate": 2.1933174224343677e-05, "loss": 2.71, "step": 9408 }, { "epoch": 2.81, "eval_loss": 3.032681941986084, "eval_runtime": 142.729, "eval_samples_per_second": 29.665, "eval_steps_per_second": 3.713, "step": 9408 }, { "epoch": 2.86, "learning_rate": 2.1431980906921242e-05, "loss": 2.7252, "step": 9576 }, { "epoch": 2.86, "eval_loss": 3.033348798751831, "eval_runtime": 142.7408, "eval_samples_per_second": 29.662, "eval_steps_per_second": 3.713, "step": 9576 }, { "epoch": 2.91, "learning_rate": 2.0930787589498808e-05, "loss": 2.7181, "step": 9744 }, { "epoch": 2.91, "eval_loss": 3.0330820083618164, "eval_runtime": 142.7512, "eval_samples_per_second": 29.66, "eval_steps_per_second": 3.713, "step": 9744 }, { "epoch": 2.96, "learning_rate": 2.0429594272076373e-05, "loss": 2.7141, "step": 9912 }, { "epoch": 2.96, "eval_loss": 3.029510498046875, "eval_runtime": 142.7075, "eval_samples_per_second": 29.669, "eval_steps_per_second": 3.714, "step": 9912 } ], "logging_steps": 168, "max_steps": 16760, "num_train_epochs": 5, "save_steps": 168, "total_flos": 3.221291229039821e+16, "trial_name": null, "trial_params": null }