{ "best_metric": 1.2222874164581299, "best_model_checkpoint": "../llama2-9439-21sept/checkpoint-1600", "epoch": 2.7785817655571634, "eval_steps": 200, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 5.9999999999999995e-05, "loss": 3.4609, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.00011999999999999999, "loss": 2.8346, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00017999999999999998, "loss": 1.7597, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.00023999999999999998, "loss": 1.542, "step": 80 }, { "epoch": 0.17, "learning_rate": 0.0003, "loss": 1.4567, "step": 100 }, { "epoch": 0.21, "learning_rate": 0.0002963076923076923, "loss": 1.4081, "step": 120 }, { "epoch": 0.24, "learning_rate": 0.0002926153846153846, "loss": 1.3805, "step": 140 }, { "epoch": 0.28, "learning_rate": 0.0002889230769230769, "loss": 1.3605, "step": 160 }, { "epoch": 0.31, "learning_rate": 0.00028523076923076923, "loss": 1.3329, "step": 180 }, { "epoch": 0.35, "learning_rate": 0.0002815384615384615, "loss": 1.3367, "step": 200 }, { "epoch": 0.35, "eval_loss": 1.3450770378112793, "eval_runtime": 6.306, "eval_samples_per_second": 47.574, "eval_steps_per_second": 1.269, "step": 200 }, { "epoch": 0.38, "learning_rate": 0.0002778461538461538, "loss": 1.3152, "step": 220 }, { "epoch": 0.42, "learning_rate": 0.0002741538461538461, "loss": 1.312, "step": 240 }, { "epoch": 0.45, "learning_rate": 0.00027046153846153843, "loss": 1.2883, "step": 260 }, { "epoch": 0.49, "learning_rate": 0.00026676923076923074, "loss": 1.2843, "step": 280 }, { "epoch": 0.52, "learning_rate": 0.00026307692307692306, "loss": 1.2609, "step": 300 }, { "epoch": 0.56, "learning_rate": 0.00025938461538461537, "loss": 1.2707, "step": 320 }, { "epoch": 0.59, "learning_rate": 0.0002556923076923077, "loss": 1.2624, "step": 340 }, { "epoch": 0.63, "learning_rate": 0.00025199999999999995, "loss": 1.273, "step": 360 }, { "epoch": 0.66, "learning_rate": 0.0002483076923076923, "loss": 1.251, "step": 380 }, { "epoch": 0.69, "learning_rate": 0.0002446153846153846, "loss": 1.2648, "step": 400 }, { "epoch": 0.69, "eval_loss": 1.292348861694336, "eval_runtime": 6.304, "eval_samples_per_second": 47.589, "eval_steps_per_second": 1.269, "step": 400 }, { "epoch": 0.73, "learning_rate": 0.0002409230769230769, "loss": 1.265, "step": 420 }, { "epoch": 0.76, "learning_rate": 0.0002372307692307692, "loss": 1.2516, "step": 440 }, { "epoch": 0.8, "learning_rate": 0.00023353846153846151, "loss": 1.2441, "step": 460 }, { "epoch": 0.83, "learning_rate": 0.00022984615384615383, "loss": 1.2204, "step": 480 }, { "epoch": 0.87, "learning_rate": 0.00022615384615384614, "loss": 1.2221, "step": 500 }, { "epoch": 0.9, "learning_rate": 0.00022246153846153846, "loss": 1.2246, "step": 520 }, { "epoch": 0.94, "learning_rate": 0.00021876923076923074, "loss": 1.2227, "step": 540 }, { "epoch": 0.97, "learning_rate": 0.00021507692307692306, "loss": 1.2124, "step": 560 }, { "epoch": 1.01, "learning_rate": 0.00021138461538461537, "loss": 1.2065, "step": 580 }, { "epoch": 1.04, "learning_rate": 0.00020769230769230766, "loss": 1.2106, "step": 600 }, { "epoch": 1.04, "eval_loss": 1.2669302225112915, "eval_runtime": 6.3128, "eval_samples_per_second": 47.523, "eval_steps_per_second": 1.267, "step": 600 }, { "epoch": 1.08, "learning_rate": 0.000204, "loss": 1.2046, "step": 620 }, { "epoch": 1.11, "learning_rate": 0.00020030769230769229, "loss": 1.2114, "step": 640 }, { "epoch": 1.15, "learning_rate": 0.0001966153846153846, "loss": 1.2122, "step": 660 }, { "epoch": 1.18, "learning_rate": 0.00019310769230769227, "loss": 1.1963, "step": 680 }, { "epoch": 1.22, "learning_rate": 0.00018941538461538461, "loss": 1.1965, "step": 700 }, { "epoch": 1.25, "learning_rate": 0.0001857230769230769, "loss": 1.1937, "step": 720 }, { "epoch": 1.29, "learning_rate": 0.00018203076923076921, "loss": 1.186, "step": 740 }, { "epoch": 1.32, "learning_rate": 0.00017833846153846153, "loss": 1.1732, "step": 760 }, { "epoch": 1.35, "learning_rate": 0.00017464615384615381, "loss": 1.1911, "step": 780 }, { "epoch": 1.39, "learning_rate": 0.00017095384615384616, "loss": 1.2153, "step": 800 }, { "epoch": 1.39, "eval_loss": 1.2513903379440308, "eval_runtime": 6.3069, "eval_samples_per_second": 47.567, "eval_steps_per_second": 1.268, "step": 800 }, { "epoch": 1.42, "learning_rate": 0.00016726153846153844, "loss": 1.1991, "step": 820 }, { "epoch": 1.46, "learning_rate": 0.00016356923076923073, "loss": 1.1934, "step": 840 }, { "epoch": 1.49, "learning_rate": 0.00015987692307692307, "loss": 1.2047, "step": 860 }, { "epoch": 1.53, "learning_rate": 0.00015618461538461536, "loss": 1.2204, "step": 880 }, { "epoch": 1.56, "learning_rate": 0.0001524923076923077, "loss": 1.1785, "step": 900 }, { "epoch": 1.6, "learning_rate": 0.00014879999999999998, "loss": 1.1756, "step": 920 }, { "epoch": 1.63, "learning_rate": 0.0001451076923076923, "loss": 1.214, "step": 940 }, { "epoch": 1.67, "learning_rate": 0.0001414153846153846, "loss": 1.1794, "step": 960 }, { "epoch": 1.7, "learning_rate": 0.00013772307692307693, "loss": 1.1946, "step": 980 }, { "epoch": 1.74, "learning_rate": 0.0001340307692307692, "loss": 1.175, "step": 1000 }, { "epoch": 1.74, "eval_loss": 1.2406948804855347, "eval_runtime": 6.3315, "eval_samples_per_second": 47.382, "eval_steps_per_second": 1.264, "step": 1000 }, { "epoch": 1.77, "learning_rate": 0.00013033846153846153, "loss": 1.178, "step": 1020 }, { "epoch": 1.81, "learning_rate": 0.00012664615384615384, "loss": 1.1859, "step": 1040 }, { "epoch": 1.84, "learning_rate": 0.00012295384615384615, "loss": 1.1801, "step": 1060 }, { "epoch": 1.88, "learning_rate": 0.00011926153846153845, "loss": 1.1743, "step": 1080 }, { "epoch": 1.91, "learning_rate": 0.00011556923076923076, "loss": 1.1596, "step": 1100 }, { "epoch": 1.95, "learning_rate": 0.00011187692307692307, "loss": 1.1495, "step": 1120 }, { "epoch": 1.98, "learning_rate": 0.00010818461538461537, "loss": 1.1808, "step": 1140 }, { "epoch": 2.01, "learning_rate": 0.00010449230769230768, "loss": 1.1743, "step": 1160 }, { "epoch": 2.05, "learning_rate": 0.0001008, "loss": 1.1354, "step": 1180 }, { "epoch": 2.08, "learning_rate": 9.71076923076923e-05, "loss": 1.1712, "step": 1200 }, { "epoch": 2.08, "eval_loss": 1.2316911220550537, "eval_runtime": 6.3191, "eval_samples_per_second": 47.475, "eval_steps_per_second": 1.266, "step": 1200 }, { "epoch": 2.12, "learning_rate": 9.34153846153846e-05, "loss": 1.1709, "step": 1220 }, { "epoch": 2.15, "learning_rate": 8.972307692307691e-05, "loss": 1.1667, "step": 1240 }, { "epoch": 2.19, "learning_rate": 8.603076923076923e-05, "loss": 1.1781, "step": 1260 }, { "epoch": 2.22, "learning_rate": 8.233846153846154e-05, "loss": 1.1628, "step": 1280 }, { "epoch": 2.26, "learning_rate": 7.864615384615383e-05, "loss": 1.1723, "step": 1300 }, { "epoch": 2.29, "learning_rate": 7.495384615384615e-05, "loss": 1.1578, "step": 1320 }, { "epoch": 2.33, "learning_rate": 7.126153846153845e-05, "loss": 1.1399, "step": 1340 }, { "epoch": 2.36, "learning_rate": 6.756923076923077e-05, "loss": 1.1612, "step": 1360 }, { "epoch": 2.4, "learning_rate": 6.387692307692307e-05, "loss": 1.1582, "step": 1380 }, { "epoch": 2.43, "learning_rate": 6.0184615384615375e-05, "loss": 1.1534, "step": 1400 }, { "epoch": 2.43, "eval_loss": 1.2265406847000122, "eval_runtime": 6.3176, "eval_samples_per_second": 47.486, "eval_steps_per_second": 1.266, "step": 1400 }, { "epoch": 2.47, "learning_rate": 5.649230769230769e-05, "loss": 1.1626, "step": 1420 }, { "epoch": 2.5, "learning_rate": 5.279999999999999e-05, "loss": 1.1619, "step": 1440 }, { "epoch": 2.54, "learning_rate": 4.91076923076923e-05, "loss": 1.168, "step": 1460 }, { "epoch": 2.57, "learning_rate": 4.541538461538462e-05, "loss": 1.1475, "step": 1480 }, { "epoch": 2.6, "learning_rate": 4.172307692307692e-05, "loss": 1.1863, "step": 1500 }, { "epoch": 2.64, "learning_rate": 3.803076923076923e-05, "loss": 1.1452, "step": 1520 }, { "epoch": 2.67, "learning_rate": 3.433846153846154e-05, "loss": 1.1555, "step": 1540 }, { "epoch": 2.71, "learning_rate": 3.0646153846153845e-05, "loss": 1.1113, "step": 1560 }, { "epoch": 2.74, "learning_rate": 2.695384615384615e-05, "loss": 1.1429, "step": 1580 }, { "epoch": 2.78, "learning_rate": 2.326153846153846e-05, "loss": 1.1609, "step": 1600 }, { "epoch": 2.78, "eval_loss": 1.2222874164581299, "eval_runtime": 6.3153, "eval_samples_per_second": 47.504, "eval_steps_per_second": 1.267, "step": 1600 } ], "logging_steps": 20, "max_steps": 1725, "num_train_epochs": 3, "save_steps": 200, "total_flos": 3.827325072058614e+18, "trial_name": null, "trial_params": null }