{ "best_metric": 2.588986396789551, "best_model_checkpoint": "output/radiohead/checkpoint-240", "epoch": 8.0, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 0.0001280093426996125, "loss": 3.2073, "step": 5 }, { "epoch": 0.33, "learning_rate": 0.00010290000000000001, "loss": 2.9899, "step": 10 }, { "epoch": 0.5, "learning_rate": 6.86e-05, "loss": 2.8292, "step": 15 }, { "epoch": 0.67, "learning_rate": 3.4300000000000014e-05, "loss": 2.8291, "step": 20 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 2.8479, "step": 25 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.9261, "step": 30 }, { "epoch": 1.0, "eval_loss": 2.8060529232025146, "eval_runtime": 2.0848, "eval_samples_per_second": 22.545, "eval_steps_per_second": 2.878, "step": 30 }, { "epoch": 1.17, "learning_rate": 9.190657300387513e-06, "loss": 2.6517, "step": 35 }, { "epoch": 1.33, "learning_rate": 3.429999999999997e-05, "loss": 2.7329, "step": 40 }, { "epoch": 1.5, "learning_rate": 6.859999999999999e-05, "loss": 2.7524, "step": 45 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.5591, "step": 50 }, { "epoch": 1.83, "learning_rate": 0.00012800934269961248, "loss": 2.6201, "step": 55 }, { "epoch": 2.0, "learning_rate": 0.0001372, "loss": 2.5207, "step": 60 }, { "epoch": 2.0, "eval_loss": 2.7088782787323, "eval_runtime": 2.1182, "eval_samples_per_second": 22.189, "eval_steps_per_second": 2.833, "step": 60 }, { "epoch": 2.17, "learning_rate": 0.00012800934269961253, "loss": 2.3985, "step": 65 }, { "epoch": 2.33, "learning_rate": 0.00010289999999999998, "loss": 2.3027, "step": 70 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 2.1903, "step": 75 }, { "epoch": 2.67, "learning_rate": 3.4300000000000054e-05, "loss": 2.2458, "step": 80 }, { "epoch": 2.83, "learning_rate": 9.190657300387498e-06, "loss": 2.2898, "step": 85 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 2.2945, "step": 90 }, { "epoch": 3.0, "eval_loss": 2.6637511253356934, "eval_runtime": 2.1338, "eval_samples_per_second": 22.027, "eval_steps_per_second": 2.812, "step": 90 }, { "epoch": 3.17, "learning_rate": 9.190657300387474e-06, "loss": 2.1611, "step": 95 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.0392, "step": 100 }, { "epoch": 3.5, "learning_rate": 6.859999999999997e-05, "loss": 2.3157, "step": 105 }, { "epoch": 3.67, "learning_rate": 0.00010289999999999994, "loss": 2.1789, "step": 110 }, { "epoch": 3.83, "learning_rate": 0.0001280093426996125, "loss": 2.1359, "step": 115 }, { "epoch": 4.0, "learning_rate": 0.0001372, "loss": 2.0964, "step": 120 }, { "epoch": 4.0, "eval_loss": 2.617748737335205, "eval_runtime": 2.1438, "eval_samples_per_second": 21.924, "eval_steps_per_second": 2.799, "step": 120 }, { "epoch": 4.17, "learning_rate": 0.00012800934269961248, "loss": 1.8717, "step": 125 }, { "epoch": 4.33, "learning_rate": 0.00010290000000000009, "loss": 1.9838, "step": 130 }, { "epoch": 4.5, "learning_rate": 6.860000000000003e-05, "loss": 2.0446, "step": 135 }, { "epoch": 4.67, "learning_rate": 3.429999999999996e-05, "loss": 1.8428, "step": 140 }, { "epoch": 4.83, "learning_rate": 9.190657300387574e-06, "loss": 1.9594, "step": 145 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 2.0192, "step": 150 }, { "epoch": 5.0, "eval_loss": 2.6335015296936035, "eval_runtime": 2.1534, "eval_samples_per_second": 21.826, "eval_steps_per_second": 2.786, "step": 150 }, { "epoch": 5.17, "learning_rate": 9.190657300387467e-06, "loss": 1.7401, "step": 155 }, { "epoch": 5.33, "learning_rate": 3.429999999999989e-05, "loss": 1.8589, "step": 160 }, { "epoch": 5.5, "learning_rate": 6.859999999999984e-05, "loss": 1.7454, "step": 165 }, { "epoch": 5.67, "learning_rate": 0.00010290000000000003, "loss": 1.8506, "step": 170 }, { "epoch": 5.83, "learning_rate": 0.00012800934269961248, "loss": 1.7711, "step": 175 }, { "epoch": 6.0, "learning_rate": 0.0001372, "loss": 1.6952, "step": 180 }, { "epoch": 6.0, "eval_loss": 2.604886293411255, "eval_runtime": 2.1277, "eval_samples_per_second": 22.09, "eval_steps_per_second": 2.82, "step": 180 }, { "epoch": 6.17, "learning_rate": 0.00012800934269961253, "loss": 1.7717, "step": 185 }, { "epoch": 6.33, "learning_rate": 0.00010290000000000012, "loss": 1.577, "step": 190 }, { "epoch": 6.5, "learning_rate": 6.859999999999993e-05, "loss": 1.5197, "step": 195 }, { "epoch": 6.67, "learning_rate": 3.429999999999998e-05, "loss": 1.6176, "step": 200 }, { "epoch": 6.83, "learning_rate": 9.19065730038752e-06, "loss": 1.562, "step": 205 }, { "epoch": 7.0, "learning_rate": 0.0, "loss": 1.6157, "step": 210 }, { "epoch": 7.0, "eval_loss": 2.6068503856658936, "eval_runtime": 2.1309, "eval_samples_per_second": 22.056, "eval_steps_per_second": 2.816, "step": 210 }, { "epoch": 7.17, "learning_rate": 9.190657300387459e-06, "loss": 1.5445, "step": 215 }, { "epoch": 7.33, "learning_rate": 3.429999999999988e-05, "loss": 1.5187, "step": 220 }, { "epoch": 7.5, "learning_rate": 6.859999999999982e-05, "loss": 1.4775, "step": 225 }, { "epoch": 7.67, "learning_rate": 0.00010290000000000002, "loss": 1.3245, "step": 230 }, { "epoch": 7.83, "learning_rate": 0.00012800934269961248, "loss": 1.6077, "step": 235 }, { "epoch": 8.0, "learning_rate": 0.0001372, "loss": 1.5085, "step": 240 }, { "epoch": 8.0, "eval_loss": 2.588986396789551, "eval_runtime": 2.1323, "eval_samples_per_second": 22.042, "eval_steps_per_second": 2.814, "step": 240 } ], "max_steps": 300, "num_train_epochs": 10, "total_flos": 243524173824000.0, "trial_name": null, "trial_params": null }