{ "best_metric": 1.0850836038589478, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.9879781420765026, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 1.7654, "step": 100 }, { "epoch": 0.07, "eval_loss": 1.6328892707824707, "eval_runtime": 419.2151, "eval_samples_per_second": 14.966, "eval_steps_per_second": 1.873, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.6081, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.5872766971588135, "eval_runtime": 418.8615, "eval_samples_per_second": 14.979, "eval_steps_per_second": 1.874, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.5756, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.556390643119812, "eval_runtime": 418.8265, "eval_samples_per_second": 14.98, "eval_steps_per_second": 1.874, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.5444, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.5300668478012085, "eval_runtime": 418.8771, "eval_samples_per_second": 14.978, "eval_steps_per_second": 1.874, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.5114, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.5074748992919922, "eval_runtime": 418.9574, "eval_samples_per_second": 14.975, "eval_steps_per_second": 1.874, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.4948, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.4872732162475586, "eval_runtime": 419.0224, "eval_samples_per_second": 14.973, "eval_steps_per_second": 1.873, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.4723, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.4686368703842163, "eval_runtime": 418.747, "eval_samples_per_second": 14.983, "eval_steps_per_second": 1.875, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.4628, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.450691819190979, "eval_runtime": 418.912, "eval_samples_per_second": 14.977, "eval_steps_per_second": 1.874, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.4332, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.432775616645813, "eval_runtime": 418.8955, "eval_samples_per_second": 14.977, "eval_steps_per_second": 1.874, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.4271, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.4167886972427368, "eval_runtime": 418.6904, "eval_samples_per_second": 14.985, "eval_steps_per_second": 1.875, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.4129, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.402354121208191, "eval_runtime": 418.7796, "eval_samples_per_second": 14.982, "eval_steps_per_second": 1.874, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.3853, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.3865565061569214, "eval_runtime": 418.6288, "eval_samples_per_second": 14.987, "eval_steps_per_second": 1.875, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.394, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.372268795967102, "eval_runtime": 418.7001, "eval_samples_per_second": 14.984, "eval_steps_per_second": 1.875, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.356, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.3581469058990479, "eval_runtime": 418.565, "eval_samples_per_second": 14.989, "eval_steps_per_second": 1.875, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.3321, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.3450849056243896, "eval_runtime": 418.8093, "eval_samples_per_second": 14.981, "eval_steps_per_second": 1.874, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.3214, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.3320348262786865, "eval_runtime": 418.8397, "eval_samples_per_second": 14.979, "eval_steps_per_second": 1.874, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.3215, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.3184651136398315, "eval_runtime": 418.7321, "eval_samples_per_second": 14.983, "eval_steps_per_second": 1.875, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.2973, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.306998610496521, "eval_runtime": 418.7252, "eval_samples_per_second": 14.984, "eval_steps_per_second": 1.875, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.2893, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.293902039527893, "eval_runtime": 449.0285, "eval_samples_per_second": 13.972, "eval_steps_per_second": 1.748, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.2763, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.2827019691467285, "eval_runtime": 418.4577, "eval_samples_per_second": 14.993, "eval_steps_per_second": 1.876, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.2665, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.2712739706039429, "eval_runtime": 418.4388, "eval_samples_per_second": 14.994, "eval_steps_per_second": 1.876, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.2452, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.2593406438827515, "eval_runtime": 418.3773, "eval_samples_per_second": 14.996, "eval_steps_per_second": 1.876, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.2438, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.2501240968704224, "eval_runtime": 418.3937, "eval_samples_per_second": 14.995, "eval_steps_per_second": 1.876, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.2416, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.237477421760559, "eval_runtime": 418.5428, "eval_samples_per_second": 14.99, "eval_steps_per_second": 1.876, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.2224, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.2273696660995483, "eval_runtime": 418.5797, "eval_samples_per_second": 14.989, "eval_steps_per_second": 1.875, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.2106, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.2160065174102783, "eval_runtime": 418.6873, "eval_samples_per_second": 14.985, "eval_steps_per_second": 1.875, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.2036, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.2056827545166016, "eval_runtime": 418.5394, "eval_samples_per_second": 14.99, "eval_steps_per_second": 1.876, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.1786, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.196009635925293, "eval_runtime": 418.5266, "eval_samples_per_second": 14.991, "eval_steps_per_second": 1.876, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.1541, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.1857470273971558, "eval_runtime": 418.5437, "eval_samples_per_second": 14.99, "eval_steps_per_second": 1.876, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.1565, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.175557017326355, "eval_runtime": 418.72, "eval_samples_per_second": 14.984, "eval_steps_per_second": 1.875, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.1799, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.1915525197982788, "eval_runtime": 341.6989, "eval_samples_per_second": 18.361, "eval_steps_per_second": 2.297, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.1452, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.1769670248031616, "eval_runtime": 341.7617, "eval_samples_per_second": 18.358, "eval_steps_per_second": 2.297, "step": 3200 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 1.1401, "step": 3300 }, { "epoch": 2.41, "eval_loss": 1.1650751829147339, "eval_runtime": 342.0204, "eval_samples_per_second": 18.344, "eval_steps_per_second": 2.295, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.1249, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.1538294553756714, "eval_runtime": 341.9176, "eval_samples_per_second": 18.349, "eval_steps_per_second": 2.296, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.1227, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.1433058977127075, "eval_runtime": 342.1437, "eval_samples_per_second": 18.337, "eval_steps_per_second": 2.294, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.1161, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.1336331367492676, "eval_runtime": 342.2427, "eval_samples_per_second": 18.332, "eval_steps_per_second": 2.294, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.1109, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.123780369758606, "eval_runtime": 342.8733, "eval_samples_per_second": 18.298, "eval_steps_per_second": 2.289, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.0954, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.114593267440796, "eval_runtime": 342.239, "eval_samples_per_second": 18.332, "eval_steps_per_second": 2.294, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.0901, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.1041266918182373, "eval_runtime": 342.4571, "eval_samples_per_second": 18.321, "eval_steps_per_second": 2.292, "step": 3900 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 1.0798, "step": 4000 }, { "epoch": 2.92, "eval_loss": 1.0935524702072144, "eval_runtime": 342.5842, "eval_samples_per_second": 18.314, "eval_steps_per_second": 2.291, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.0688, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.0850836038589478, "eval_runtime": 396.9254, "eval_samples_per_second": 15.806, "eval_steps_per_second": 1.978, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.3292919651387863e+18, "trial_name": null, "trial_params": null }