{ "best_metric": 1.2406948804855347, "best_model_checkpoint": "../llama2-9439-21sept/checkpoint-1000", "epoch": 1.7366136034732271, "eval_steps": 200, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 5.9999999999999995e-05, "loss": 3.4609, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.00011999999999999999, "loss": 2.8346, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00017999999999999998, "loss": 1.7597, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00023999999999999998, "loss": 1.542, "step": 80 }, { "epoch": 0.17, "learning_rate": 0.0003, "loss": 1.4567, "step": 100 }, { "epoch": 0.21, "learning_rate": 0.0002963076923076923, "loss": 1.4081, "step": 120 }, { "epoch": 0.24, "learning_rate": 0.0002926153846153846, "loss": 1.3805, "step": 140 }, { "epoch": 0.28, "learning_rate": 0.0002889230769230769, "loss": 1.3605, "step": 160 }, { "epoch": 0.31, "learning_rate": 0.00028523076923076923, "loss": 1.3329, "step": 180 }, { "epoch": 0.35, "learning_rate": 0.0002815384615384615, "loss": 1.3367, "step": 200 }, { "epoch": 0.35, "eval_loss": 1.3450770378112793, "eval_runtime": 6.306, "eval_samples_per_second": 47.574, "eval_steps_per_second": 1.269, "step": 200 }, { "epoch": 0.38, "learning_rate": 0.0002778461538461538, "loss": 1.3152, "step": 220 }, { "epoch": 0.42, "learning_rate": 0.0002741538461538461, "loss": 1.312, "step": 240 }, { "epoch": 0.45, "learning_rate": 0.00027046153846153843, "loss": 1.2883, "step": 260 }, { "epoch": 0.49, "learning_rate": 0.00026676923076923074, "loss": 1.2843, "step": 280 }, { "epoch": 0.52, "learning_rate": 0.00026307692307692306, "loss": 1.2609, "step": 300 }, { "epoch": 0.56, "learning_rate": 0.00025938461538461537, "loss": 1.2707, "step": 320 }, { "epoch": 0.59, "learning_rate": 0.0002556923076923077, "loss": 1.2624, "step": 340 }, { "epoch": 0.63, "learning_rate": 0.00025199999999999995, "loss": 1.273, "step": 360 }, { "epoch": 0.66, "learning_rate": 0.0002483076923076923, "loss": 1.251, "step": 380 }, { "epoch": 0.69, "learning_rate": 0.0002446153846153846, "loss": 1.2648, "step": 400 }, { "epoch": 0.69, "eval_loss": 1.292348861694336, "eval_runtime": 6.304, "eval_samples_per_second": 47.589, "eval_steps_per_second": 1.269, "step": 400 }, { "epoch": 0.73, "learning_rate": 0.0002409230769230769, "loss": 1.265, "step": 420 }, { "epoch": 0.76, "learning_rate": 0.0002372307692307692, "loss": 1.2516, "step": 440 }, { "epoch": 0.8, "learning_rate": 0.00023353846153846151, "loss": 1.2441, "step": 460 }, { "epoch": 0.83, "learning_rate": 0.00022984615384615383, "loss": 1.2204, "step": 480 }, { "epoch": 0.87, "learning_rate": 0.00022615384615384614, "loss": 1.2221, "step": 500 }, { "epoch": 0.9, "learning_rate": 0.00022246153846153846, "loss": 1.2246, "step": 520 }, { "epoch": 0.94, "learning_rate": 0.00021876923076923074, "loss": 1.2227, "step": 540 }, { "epoch": 0.97, "learning_rate": 0.00021507692307692306, "loss": 1.2124, "step": 560 }, { "epoch": 1.01, "learning_rate": 0.00021138461538461537, "loss": 1.2065, "step": 580 }, { "epoch": 1.04, "learning_rate": 0.00020769230769230766, "loss": 1.2106, "step": 600 }, { "epoch": 1.04, "eval_loss": 1.2669302225112915, "eval_runtime": 6.3128, "eval_samples_per_second": 47.523, "eval_steps_per_second": 1.267, "step": 600 }, { "epoch": 1.08, "learning_rate": 0.000204, "loss": 1.2046, "step": 620 }, { "epoch": 1.11, "learning_rate": 0.00020030769230769229, "loss": 1.2114, "step": 640 }, { "epoch": 1.15, "learning_rate": 0.0001966153846153846, "loss": 1.2122, "step": 660 }, { "epoch": 1.18, "learning_rate": 0.00019310769230769227, "loss": 1.1963, "step": 680 }, { "epoch": 1.22, "learning_rate": 0.00018941538461538461, "loss": 1.1965, "step": 700 }, { "epoch": 1.25, "learning_rate": 0.0001857230769230769, "loss": 1.1937, "step": 720 }, { "epoch": 1.29, "learning_rate": 0.00018203076923076921, "loss": 1.186, "step": 740 }, { "epoch": 1.32, "learning_rate": 0.00017833846153846153, "loss": 1.1732, "step": 760 }, { "epoch": 1.35, "learning_rate": 0.00017464615384615381, "loss": 1.1911, "step": 780 }, { "epoch": 1.39, "learning_rate": 0.00017095384615384616, "loss": 1.2153, "step": 800 }, { "epoch": 1.39, "eval_loss": 1.2513903379440308, "eval_runtime": 6.3069, "eval_samples_per_second": 47.567, "eval_steps_per_second": 1.268, "step": 800 }, { "epoch": 1.42, "learning_rate": 0.00016726153846153844, "loss": 1.1991, "step": 820 }, { "epoch": 1.46, "learning_rate": 0.00016356923076923073, "loss": 1.1934, "step": 840 }, { "epoch": 1.49, "learning_rate": 0.00015987692307692307, "loss": 1.2047, "step": 860 }, { "epoch": 1.53, "learning_rate": 0.00015618461538461536, "loss": 1.2204, "step": 880 }, { "epoch": 1.56, "learning_rate": 0.0001524923076923077, "loss": 1.1785, "step": 900 }, { "epoch": 1.6, "learning_rate": 0.00014879999999999998, "loss": 1.1756, "step": 920 }, { "epoch": 1.63, "learning_rate": 0.0001451076923076923, "loss": 1.214, "step": 940 }, { "epoch": 1.67, "learning_rate": 0.0001414153846153846, "loss": 1.1794, "step": 960 }, { "epoch": 1.7, "learning_rate": 0.00013772307692307693, "loss": 1.1946, "step": 980 }, { "epoch": 1.74, "learning_rate": 0.0001340307692307692, "loss": 1.175, "step": 1000 }, { "epoch": 1.74, "eval_loss": 1.2406948804855347, "eval_runtime": 6.3315, "eval_samples_per_second": 47.382, "eval_steps_per_second": 1.264, "step": 1000 } ], "logging_steps": 20, "max_steps": 1725, "num_train_epochs": 3, "save_steps": 200, "total_flos": 2.3920781700366336e+18, "trial_name": null, "trial_params": null }