{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 1287, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "eval_loss": 2.3746426105499268, "eval_runtime": 1198.5011, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 25 }, { "epoch": 0.12, "eval_loss": 2.335777521133423, "eval_runtime": 1198.5148, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 50 }, { "epoch": 0.17, "eval_loss": 2.3206024169921875, "eval_runtime": 1198.298, "eval_samples_per_second": 10.491, "eval_steps_per_second": 0.164, "step": 75 }, { "epoch": 0.23, "eval_loss": 2.3067309856414795, "eval_runtime": 1198.4272, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.164, "step": 100 }, { "epoch": 0.29, "eval_loss": 2.295646905899048, "eval_runtime": 1198.0238, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 125 }, { "epoch": 0.35, "eval_loss": 2.288886308670044, "eval_runtime": 1198.6077, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.164, "step": 150 }, { "epoch": 0.41, "eval_loss": 2.2820334434509277, "eval_runtime": 1197.653, "eval_samples_per_second": 10.496, "eval_steps_per_second": 0.164, "step": 175 }, { "epoch": 0.47, "eval_loss": 2.275322675704956, "eval_runtime": 1197.9481, "eval_samples_per_second": 10.494, "eval_steps_per_second": 0.164, "step": 200 }, { "epoch": 0.52, "eval_loss": 2.269848108291626, "eval_runtime": 1197.9985, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 225 }, { "epoch": 0.58, "eval_loss": 2.266172170639038, "eval_runtime": 1198.9744, "eval_samples_per_second": 10.485, "eval_steps_per_second": 0.164, "step": 250 }, { "epoch": 0.64, "eval_loss": 2.2608513832092285, "eval_runtime": 1198.2986, "eval_samples_per_second": 10.491, "eval_steps_per_second": 0.164, "step": 275 }, { "epoch": 0.7, "eval_loss": 2.256772756576538, "eval_runtime": 1198.6541, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.164, "step": 300 }, { "epoch": 0.76, "eval_loss": 2.2526705265045166, "eval_runtime": 1198.4379, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 325 }, { "epoch": 0.82, "eval_loss": 2.2487242221832275, "eval_runtime": 1198.434, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.164, "step": 350 }, { "epoch": 0.87, "eval_loss": 2.2462055683135986, "eval_runtime": 1198.2638, "eval_samples_per_second": 10.491, "eval_steps_per_second": 0.164, "step": 375 }, { "epoch": 0.93, "eval_loss": 2.2430307865142822, "eval_runtime": 1198.979, "eval_samples_per_second": 10.485, "eval_steps_per_second": 0.164, "step": 400 }, { "epoch": 0.99, "eval_loss": 2.240046501159668, "eval_runtime": 1198.9558, "eval_samples_per_second": 10.485, "eval_steps_per_second": 0.164, "step": 425 }, { "epoch": 1.05, "eval_loss": 2.239072799682617, "eval_runtime": 1199.2167, "eval_samples_per_second": 10.483, "eval_steps_per_second": 0.164, "step": 450 }, { "epoch": 1.11, "eval_loss": 2.235506296157837, "eval_runtime": 1199.5308, "eval_samples_per_second": 10.48, "eval_steps_per_second": 0.164, "step": 475 }, { "epoch": 1.17, "learning_rate": 5e-06, "loss": 2.1094, "step": 500 }, { "epoch": 1.17, "eval_loss": 2.233208656311035, "eval_runtime": 1198.8451, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.164, "step": 500 }, { "epoch": 1.22, "eval_loss": 2.2311644554138184, "eval_runtime": 1198.7414, "eval_samples_per_second": 10.487, "eval_steps_per_second": 0.164, "step": 525 }, { "epoch": 1.28, "eval_loss": 2.2293059825897217, "eval_runtime": 1198.5095, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 550 }, { "epoch": 1.34, "eval_loss": 2.226186990737915, "eval_runtime": 1198.3579, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.164, "step": 575 }, { "epoch": 1.4, "eval_loss": 2.224607467651367, "eval_runtime": 1198.8254, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.164, "step": 600 }, { "epoch": 1.46, "eval_loss": 2.222740888595581, "eval_runtime": 1198.8308, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.164, "step": 625 }, { "epoch": 1.52, "eval_loss": 2.2205824851989746, "eval_runtime": 1197.986, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 650 }, { "epoch": 1.57, "eval_loss": 2.2183074951171875, "eval_runtime": 1198.7617, "eval_samples_per_second": 10.487, "eval_steps_per_second": 0.164, "step": 675 }, { "epoch": 1.63, "eval_loss": 2.216313123703003, "eval_runtime": 1197.8492, "eval_samples_per_second": 10.495, "eval_steps_per_second": 0.164, "step": 700 }, { "epoch": 1.69, "eval_loss": 2.214667320251465, "eval_runtime": 1198.1314, "eval_samples_per_second": 10.492, "eval_steps_per_second": 0.164, "step": 725 }, { "epoch": 1.75, "eval_loss": 2.2129483222961426, "eval_runtime": 1198.8701, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.164, "step": 750 }, { "epoch": 1.81, "eval_loss": 2.2119433879852295, "eval_runtime": 1198.3317, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.164, "step": 775 }, { "epoch": 1.86, "eval_loss": 2.210693836212158, "eval_runtime": 1198.0145, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 800 }, { "epoch": 1.92, "eval_loss": 2.209263801574707, "eval_runtime": 1198.6585, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.164, "step": 825 }, { "epoch": 1.98, "eval_loss": 2.20792293548584, "eval_runtime": 1198.1192, "eval_samples_per_second": 10.492, "eval_steps_per_second": 0.164, "step": 850 }, { "epoch": 2.04, "eval_loss": 2.2082931995391846, "eval_runtime": 1197.8097, "eval_samples_per_second": 10.495, "eval_steps_per_second": 0.164, "step": 875 }, { "epoch": 2.1, "eval_loss": 2.205972194671631, "eval_runtime": 1198.0467, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 900 }, { "epoch": 2.16, "eval_loss": 2.2038400173187256, "eval_runtime": 1197.5865, "eval_samples_per_second": 10.497, "eval_steps_per_second": 0.164, "step": 925 }, { "epoch": 2.21, "eval_loss": 2.2014477252960205, "eval_runtime": 1198.4564, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 950 }, { "epoch": 2.27, "eval_loss": 2.202155113220215, "eval_runtime": 1198.0855, "eval_samples_per_second": 10.493, "eval_steps_per_second": 0.164, "step": 975 }, { "epoch": 2.33, "learning_rate": 5e-06, "loss": 2.007, "step": 1000 }, { "epoch": 2.33, "eval_loss": 2.2006430625915527, "eval_runtime": 1198.775, "eval_samples_per_second": 10.487, "eval_steps_per_second": 0.164, "step": 1000 }, { "epoch": 2.39, "eval_loss": 2.1995391845703125, "eval_runtime": 1198.5076, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 1025 }, { "epoch": 2.45, "eval_loss": 2.199397087097168, "eval_runtime": 1198.3593, "eval_samples_per_second": 10.49, "eval_steps_per_second": 0.164, "step": 1050 }, { "epoch": 2.51, "eval_loss": 2.19655179977417, "eval_runtime": 1198.8863, "eval_samples_per_second": 10.486, "eval_steps_per_second": 0.164, "step": 1075 }, { "epoch": 2.56, "eval_loss": 2.196131467819214, "eval_runtime": 1198.1451, "eval_samples_per_second": 10.492, "eval_steps_per_second": 0.164, "step": 1100 }, { "epoch": 2.62, "eval_loss": 2.195343255996704, "eval_runtime": 1198.7177, "eval_samples_per_second": 10.487, "eval_steps_per_second": 0.164, "step": 1125 }, { "epoch": 2.68, "eval_loss": 2.194007158279419, "eval_runtime": 1199.5879, "eval_samples_per_second": 10.479, "eval_steps_per_second": 0.164, "step": 1150 }, { "epoch": 2.74, "eval_loss": 2.192256212234497, "eval_runtime": 1198.4679, "eval_samples_per_second": 10.489, "eval_steps_per_second": 0.164, "step": 1175 }, { "epoch": 2.8, "eval_loss": 2.1922357082366943, "eval_runtime": 1198.561, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.164, "step": 1200 }, { "epoch": 2.86, "eval_loss": 2.190948724746704, "eval_runtime": 1198.5536, "eval_samples_per_second": 10.488, "eval_steps_per_second": 0.164, "step": 1225 }, { "epoch": 2.91, "eval_loss": 2.188807487487793, "eval_runtime": 1199.0987, "eval_samples_per_second": 10.484, "eval_steps_per_second": 0.164, "step": 1250 }, { "epoch": 2.97, "eval_loss": 2.1877503395080566, "eval_runtime": 1199.1927, "eval_samples_per_second": 10.483, "eval_steps_per_second": 0.164, "step": 1275 }, { "epoch": 3.0, "step": 1287, "total_flos": 5.915384239745925e+18, "train_loss": 2.0404130036088044, "train_runtime": 190042.9753, "train_samples_per_second": 0.867, "train_steps_per_second": 0.007 } ], "max_steps": 1287, "num_train_epochs": 3, "total_flos": 5.915384239745925e+18, "trial_name": null, "trial_params": null }