{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994739610731194, "global_step": 950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_loss": 2.40625, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 15 }, { "epoch": 0.03, "eval_loss": 2.35546875, "eval_runtime": 1.9286, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 30 }, { "epoch": 0.05, "eval_loss": 2.341796875, "eval_runtime": 1.9286, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 45 }, { "epoch": 0.06, "eval_loss": 2.33203125, "eval_runtime": 1.9285, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 60 }, { "epoch": 0.08, "eval_loss": 2.32421875, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 75 }, { "epoch": 0.09, "eval_loss": 2.318359375, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 90 }, { "epoch": 0.11, "eval_loss": 2.314453125, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 105 }, { "epoch": 0.13, "eval_loss": 2.3125, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 120 }, { "epoch": 0.14, "eval_loss": 2.3125, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 135 }, { "epoch": 0.16, "eval_loss": 2.306640625, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 150 }, { "epoch": 0.17, "eval_loss": 2.30078125, "eval_runtime": 1.9284, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 165 }, { "epoch": 0.19, "eval_loss": 2.30078125, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 180 }, { "epoch": 0.21, "eval_loss": 2.302734375, "eval_runtime": 1.9292, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 195 }, { "epoch": 0.22, "eval_loss": 2.294921875, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 210 }, { "epoch": 0.24, "eval_loss": 2.287109375, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 225 }, { "epoch": 0.25, "eval_loss": 2.283203125, "eval_runtime": 1.9283, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 240 }, { "epoch": 0.27, "eval_loss": 2.279296875, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 255 }, { "epoch": 0.28, "eval_loss": 2.27734375, "eval_runtime": 1.929, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 270 }, { "epoch": 0.3, "eval_loss": 2.248046875, "eval_runtime": 1.9292, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 285 }, { "epoch": 0.32, "eval_loss": 2.2265625, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 300 }, { "epoch": 0.33, "eval_loss": 2.2265625, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 315 }, { "epoch": 0.35, "eval_loss": 2.2265625, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 330 }, { "epoch": 0.36, "eval_loss": 2.2265625, "eval_runtime": 1.9316, "eval_samples_per_second": 1.035, "eval_steps_per_second": 0.518, "step": 345 }, { "epoch": 0.38, "eval_loss": 2.228515625, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 360 }, { "epoch": 0.39, "eval_loss": 2.2265625, "eval_runtime": 1.9286, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 375 }, { "epoch": 0.41, "eval_loss": 2.224609375, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 390 }, { "epoch": 0.43, "eval_loss": 2.22265625, "eval_runtime": 1.9294, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 405 }, { "epoch": 0.44, "eval_loss": 2.220703125, "eval_runtime": 1.9284, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 420 }, { "epoch": 0.46, "eval_loss": 2.22265625, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 435 }, { "epoch": 0.47, "eval_loss": 2.22265625, "eval_runtime": 1.9295, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 450 }, { "epoch": 0.49, "eval_loss": 2.220703125, "eval_runtime": 1.9284, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 465 }, { "epoch": 0.5, "eval_loss": 2.220703125, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 480 }, { "epoch": 0.52, "eval_loss": 2.21875, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 495 }, { "epoch": 0.53, "learning_rate": 5e-06, "loss": 2.374, "step": 500 }, { "epoch": 0.54, "eval_loss": 2.203125, "eval_runtime": 1.937, "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.516, "step": 510 }, { "epoch": 0.55, "eval_loss": 2.1875, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 525 }, { "epoch": 0.57, "eval_loss": 2.185546875, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 540 }, { "epoch": 0.58, "eval_loss": 2.140625, "eval_runtime": 1.9294, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 555 }, { "epoch": 0.6, "eval_loss": 2.134765625, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 570 }, { "epoch": 0.62, "eval_loss": 2.134765625, "eval_runtime": 1.9285, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 585 }, { "epoch": 0.63, "eval_loss": 2.134765625, "eval_runtime": 1.929, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 600 }, { "epoch": 0.65, "eval_loss": 2.1328125, "eval_runtime": 1.929, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 615 }, { "epoch": 0.66, "eval_loss": 2.1328125, "eval_runtime": 1.9284, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 630 }, { "epoch": 0.68, "eval_loss": 2.130859375, "eval_runtime": 1.929, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 645 }, { "epoch": 0.69, "eval_loss": 2.12890625, "eval_runtime": 1.9293, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 660 }, { "epoch": 0.71, "eval_loss": 2.12890625, "eval_runtime": 1.9283, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 675 }, { "epoch": 0.73, "eval_loss": 2.1328125, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 690 }, { "epoch": 0.74, "eval_loss": 2.1328125, "eval_runtime": 1.9288, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 705 }, { "epoch": 0.76, "eval_loss": 2.130859375, "eval_runtime": 1.9294, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 720 }, { "epoch": 0.77, "eval_loss": 2.134765625, "eval_runtime": 1.929, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 735 }, { "epoch": 0.79, "eval_loss": 2.1328125, "eval_runtime": 1.9295, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 750 }, { "epoch": 0.8, "eval_loss": 2.134765625, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 765 }, { "epoch": 0.82, "eval_loss": 2.134765625, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 780 }, { "epoch": 0.84, "eval_loss": 2.134765625, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 795 }, { "epoch": 0.85, "eval_loss": 2.1328125, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 810 }, { "epoch": 0.87, "eval_loss": 2.12890625, "eval_runtime": 1.9286, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.519, "step": 825 }, { "epoch": 0.88, "eval_loss": 2.119140625, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 840 }, { "epoch": 0.9, "eval_loss": 2.12109375, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 855 }, { "epoch": 0.92, "eval_loss": 2.1171875, "eval_runtime": 1.9289, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 870 }, { "epoch": 0.93, "eval_loss": 2.119140625, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 885 }, { "epoch": 0.95, "eval_loss": 2.123046875, "eval_runtime": 1.9294, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 900 }, { "epoch": 0.96, "eval_loss": 2.12109375, "eval_runtime": 1.9292, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 915 }, { "epoch": 0.98, "eval_loss": 2.119140625, "eval_runtime": 1.9287, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 930 }, { "epoch": 0.99, "eval_loss": 2.12109375, "eval_runtime": 1.9291, "eval_samples_per_second": 1.037, "eval_steps_per_second": 0.518, "step": 945 }, { "epoch": 1.0, "step": 950, "total_flos": 1.2380121253648794e+17, "train_loss": 2.3266365131578945, "train_runtime": 29836.7817, "train_samples_per_second": 0.255, "train_steps_per_second": 0.032 } ], "max_steps": 950, "num_train_epochs": 1, "total_flos": 1.2380121253648794e+17, "trial_name": null, "trial_params": null }