{ "best_metric": 0.8880813121795654, "best_model_checkpoint": "./outputs/checkpoint-3700", "epoch": 4.933333333333334, "eval_steps": 100, "global_step": 3700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.001, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.0910990238189697, "eval_runtime": 728.6528, "eval_samples_per_second": 2.649, "eval_steps_per_second": 0.332, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.8971, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.0677164793014526, "eval_runtime": 727.2702, "eval_samples_per_second": 2.654, "eval_steps_per_second": 0.333, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.8815, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.052659273147583, "eval_runtime": 730.164, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.8695, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.0382879972457886, "eval_runtime": 732.8796, "eval_samples_per_second": 2.633, "eval_steps_per_second": 0.33, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.8548, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.0260452032089233, "eval_runtime": 730.1971, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.331, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.8496, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.0152662992477417, "eval_runtime": 732.4802, "eval_samples_per_second": 2.635, "eval_steps_per_second": 0.33, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.8445, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.0105178356170654, "eval_runtime": 802.0691, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.302, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.8346, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.0066964626312256, "eval_runtime": 789.7964, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.306, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.8199, "step": 900 }, { "epoch": 1.2, "eval_loss": 0.9989904165267944, "eval_runtime": 786.9058, "eval_samples_per_second": 2.453, "eval_steps_per_second": 0.308, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.8133, "step": 1000 }, { "epoch": 1.33, "eval_loss": 0.9895688891410828, "eval_runtime": 782.3169, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.8193, "step": 1100 }, { "epoch": 1.47, "eval_loss": 0.9853964447975159, "eval_runtime": 777.984, "eval_samples_per_second": 2.481, "eval_steps_per_second": 0.311, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.8091, "step": 1200 }, { "epoch": 1.6, "eval_loss": 0.9784607887268066, "eval_runtime": 780.7522, "eval_samples_per_second": 2.472, "eval_steps_per_second": 0.31, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.7983, "step": 1300 }, { "epoch": 1.73, "eval_loss": 0.9710213541984558, "eval_runtime": 787.7479, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.7971, "step": 1400 }, { "epoch": 1.87, "eval_loss": 0.9653750658035278, "eval_runtime": 777.1907, "eval_samples_per_second": 2.483, "eval_steps_per_second": 0.311, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.7899, "step": 1500 }, { "epoch": 2.0, "eval_loss": 0.9598689675331116, "eval_runtime": 788.3937, "eval_samples_per_second": 2.448, "eval_steps_per_second": 0.307, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.7689, "step": 1600 }, { "epoch": 2.13, "eval_loss": 0.9570510983467102, "eval_runtime": 782.2868, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.309, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.7676, "step": 1700 }, { "epoch": 2.27, "eval_loss": 0.9513885378837585, "eval_runtime": 768.2363, "eval_samples_per_second": 2.512, "eval_steps_per_second": 0.315, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.766, "step": 1800 }, { "epoch": 2.4, "eval_loss": 0.9479925036430359, "eval_runtime": 792.9294, "eval_samples_per_second": 2.434, "eval_steps_per_second": 0.305, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.7587, "step": 1900 }, { "epoch": 2.53, "eval_loss": 0.9420929551124573, "eval_runtime": 787.6767, "eval_samples_per_second": 2.45, "eval_steps_per_second": 0.307, "step": 1900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.7546, "step": 2000 }, { "epoch": 2.67, "eval_loss": 0.9371617436408997, "eval_runtime": 785.7561, "eval_samples_per_second": 2.456, "eval_steps_per_second": 0.308, "step": 2000 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.7635, "step": 2100 }, { "epoch": 2.8, "eval_loss": 0.9326320290565491, "eval_runtime": 789.0027, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.307, "step": 2100 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.7632, "step": 2200 }, { "epoch": 2.93, "eval_loss": 0.9287375211715698, "eval_runtime": 785.4978, "eval_samples_per_second": 2.457, "eval_steps_per_second": 0.308, "step": 2200 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 0.7409, "step": 2300 }, { "epoch": 3.07, "eval_loss": 0.9274052381515503, "eval_runtime": 779.2439, "eval_samples_per_second": 2.477, "eval_steps_per_second": 0.311, "step": 2300 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 0.7247, "step": 2400 }, { "epoch": 3.2, "eval_loss": 0.9260075688362122, "eval_runtime": 775.8037, "eval_samples_per_second": 2.488, "eval_steps_per_second": 0.312, "step": 2400 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 0.7277, "step": 2500 }, { "epoch": 3.33, "eval_loss": 0.921540379524231, "eval_runtime": 774.2681, "eval_samples_per_second": 2.493, "eval_steps_per_second": 0.313, "step": 2500 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 0.7242, "step": 2600 }, { "epoch": 3.47, "eval_loss": 0.9189460277557373, "eval_runtime": 777.6576, "eval_samples_per_second": 2.482, "eval_steps_per_second": 0.311, "step": 2600 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 0.7246, "step": 2700 }, { "epoch": 3.6, "eval_loss": 0.9128983020782471, "eval_runtime": 776.5971, "eval_samples_per_second": 2.485, "eval_steps_per_second": 0.312, "step": 2700 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 0.7224, "step": 2800 }, { "epoch": 3.73, "eval_loss": 0.9102813601493835, "eval_runtime": 777.5262, "eval_samples_per_second": 2.482, "eval_steps_per_second": 0.311, "step": 2800 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 0.7222, "step": 2900 }, { "epoch": 3.87, "eval_loss": 0.9069462418556213, "eval_runtime": 778.8375, "eval_samples_per_second": 2.478, "eval_steps_per_second": 0.311, "step": 2900 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 0.7253, "step": 3000 }, { "epoch": 4.0, "eval_loss": 0.9039433002471924, "eval_runtime": 773.591, "eval_samples_per_second": 2.495, "eval_steps_per_second": 0.313, "step": 3000 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 0.6845, "step": 3100 }, { "epoch": 4.13, "eval_loss": 0.906529426574707, "eval_runtime": 784.1515, "eval_samples_per_second": 2.461, "eval_steps_per_second": 0.309, "step": 3100 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 0.6886, "step": 3200 }, { "epoch": 4.27, "eval_loss": 0.9038098454475403, "eval_runtime": 786.1544, "eval_samples_per_second": 2.455, "eval_steps_per_second": 0.308, "step": 3200 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 0.7049, "step": 3300 }, { "epoch": 4.4, "eval_loss": 0.9004936218261719, "eval_runtime": 777.3844, "eval_samples_per_second": 2.483, "eval_steps_per_second": 0.311, "step": 3300 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 0.6887, "step": 3400 }, { "epoch": 4.53, "eval_loss": 0.8973463177680969, "eval_runtime": 777.6425, "eval_samples_per_second": 2.482, "eval_steps_per_second": 0.311, "step": 3400 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 0.6963, "step": 3500 }, { "epoch": 4.67, "eval_loss": 0.8955456614494324, "eval_runtime": 796.2741, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.304, "step": 3500 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 0.6882, "step": 3600 }, { "epoch": 4.8, "eval_loss": 0.8900671601295471, "eval_runtime": 776.8193, "eval_samples_per_second": 2.484, "eval_steps_per_second": 0.312, "step": 3600 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 0.6917, "step": 3700 }, { "epoch": 4.93, "eval_loss": 0.8880813121795654, "eval_runtime": 792.6123, "eval_samples_per_second": 2.435, "eval_steps_per_second": 0.305, "step": 3700 } ], "logging_steps": 100, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 9.582968034571469e+17, "trial_name": null, "trial_params": null }