|
{ |
|
"best_metric": 1.9820196628570557, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.987249544626594, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7398, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.641923666000366, |
|
"eval_runtime": 206.3681, |
|
"eval_samples_per_second": 30.402, |
|
"eval_steps_per_second": 3.804, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.592717409133911, |
|
"eval_runtime": 206.4891, |
|
"eval_samples_per_second": 30.384, |
|
"eval_steps_per_second": 3.802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5628, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.5575265884399414, |
|
"eval_runtime": 206.1916, |
|
"eval_samples_per_second": 30.428, |
|
"eval_steps_per_second": 3.807, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5372, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.525071859359741, |
|
"eval_runtime": 205.8756, |
|
"eval_samples_per_second": 30.475, |
|
"eval_steps_per_second": 3.813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.4999899864196777, |
|
"eval_runtime": 206.061, |
|
"eval_samples_per_second": 30.447, |
|
"eval_steps_per_second": 3.81, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4681, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.474062919616699, |
|
"eval_runtime": 206.3754, |
|
"eval_samples_per_second": 30.401, |
|
"eval_steps_per_second": 3.804, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4511, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.4490628242492676, |
|
"eval_runtime": 205.613, |
|
"eval_samples_per_second": 30.514, |
|
"eval_steps_per_second": 3.818, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 2.428, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.4292285442352295, |
|
"eval_runtime": 206.1663, |
|
"eval_samples_per_second": 30.432, |
|
"eval_steps_per_second": 3.808, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3917, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.4080374240875244, |
|
"eval_runtime": 206.1066, |
|
"eval_samples_per_second": 30.441, |
|
"eval_steps_per_second": 3.809, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3881, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.3876078128814697, |
|
"eval_runtime": 205.7165, |
|
"eval_samples_per_second": 30.498, |
|
"eval_steps_per_second": 3.816, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3815, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.3660354614257812, |
|
"eval_runtime": 206.3111, |
|
"eval_samples_per_second": 30.41, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3383, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.3476908206939697, |
|
"eval_runtime": 206.414, |
|
"eval_samples_per_second": 30.395, |
|
"eval_steps_per_second": 3.803, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3498, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.3309385776519775, |
|
"eval_runtime": 206.425, |
|
"eval_samples_per_second": 30.394, |
|
"eval_steps_per_second": 3.803, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2994, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.3077964782714844, |
|
"eval_runtime": 206.5558, |
|
"eval_samples_per_second": 30.374, |
|
"eval_steps_per_second": 3.8, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2611, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.290616989135742, |
|
"eval_runtime": 206.2977, |
|
"eval_samples_per_second": 30.412, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.2742502689361572, |
|
"eval_runtime": 206.4691, |
|
"eval_samples_per_second": 30.387, |
|
"eval_steps_per_second": 3.802, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2533, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.2574470043182373, |
|
"eval_runtime": 205.8893, |
|
"eval_samples_per_second": 30.473, |
|
"eval_steps_per_second": 3.813, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 2.214, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.241929769515991, |
|
"eval_runtime": 205.7921, |
|
"eval_samples_per_second": 30.487, |
|
"eval_steps_per_second": 3.815, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 2.199, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.2262773513793945, |
|
"eval_runtime": 206.0779, |
|
"eval_samples_per_second": 30.445, |
|
"eval_steps_per_second": 3.809, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.2117421627044678, |
|
"eval_runtime": 206.4818, |
|
"eval_samples_per_second": 30.385, |
|
"eval_steps_per_second": 3.802, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1761, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.1993796825408936, |
|
"eval_runtime": 206.3625, |
|
"eval_samples_per_second": 30.403, |
|
"eval_steps_per_second": 3.804, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 2.159, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.1830873489379883, |
|
"eval_runtime": 206.5502, |
|
"eval_samples_per_second": 30.375, |
|
"eval_steps_per_second": 3.801, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1497, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.1709437370300293, |
|
"eval_runtime": 206.5171, |
|
"eval_samples_per_second": 30.38, |
|
"eval_steps_per_second": 3.801, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1548, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.1570258140563965, |
|
"eval_runtime": 206.4592, |
|
"eval_samples_per_second": 30.389, |
|
"eval_steps_per_second": 3.802, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1218, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.1442573070526123, |
|
"eval_runtime": 206.6982, |
|
"eval_samples_per_second": 30.353, |
|
"eval_steps_per_second": 3.798, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1246, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.1329336166381836, |
|
"eval_runtime": 206.0093, |
|
"eval_samples_per_second": 30.455, |
|
"eval_steps_per_second": 3.811, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1067, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.117797374725342, |
|
"eval_runtime": 206.8081, |
|
"eval_samples_per_second": 30.337, |
|
"eval_steps_per_second": 3.796, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0643, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.111072540283203, |
|
"eval_runtime": 312.0262, |
|
"eval_samples_per_second": 20.107, |
|
"eval_steps_per_second": 2.516, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0379, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.097459554672241, |
|
"eval_runtime": 206.2193, |
|
"eval_samples_per_second": 30.424, |
|
"eval_steps_per_second": 3.807, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.0867855548858643, |
|
"eval_runtime": 206.4732, |
|
"eval_samples_per_second": 30.387, |
|
"eval_steps_per_second": 3.802, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0243, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.0787196159362793, |
|
"eval_runtime": 206.3044, |
|
"eval_samples_per_second": 30.411, |
|
"eval_steps_per_second": 3.805, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0407, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.066490888595581, |
|
"eval_runtime": 206.5529, |
|
"eval_samples_per_second": 30.375, |
|
"eval_steps_per_second": 3.8, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 2.009, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.0566089153289795, |
|
"eval_runtime": 205.9821, |
|
"eval_samples_per_second": 30.459, |
|
"eval_steps_per_second": 3.811, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 2.002, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.0491139888763428, |
|
"eval_runtime": 206.3719, |
|
"eval_samples_per_second": 30.401, |
|
"eval_steps_per_second": 3.804, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0002, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.0374956130981445, |
|
"eval_runtime": 206.2009, |
|
"eval_samples_per_second": 30.427, |
|
"eval_steps_per_second": 3.807, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9945, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.027383804321289, |
|
"eval_runtime": 206.0279, |
|
"eval_samples_per_second": 30.452, |
|
"eval_steps_per_second": 3.81, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9759, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 2.0199601650238037, |
|
"eval_runtime": 206.7188, |
|
"eval_samples_per_second": 30.35, |
|
"eval_steps_per_second": 3.797, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9675, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 2.0125296115875244, |
|
"eval_runtime": 206.1247, |
|
"eval_samples_per_second": 30.438, |
|
"eval_steps_per_second": 3.808, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9542, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.9999363422393799, |
|
"eval_runtime": 206.6362, |
|
"eval_samples_per_second": 30.363, |
|
"eval_steps_per_second": 3.799, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9555, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.9915796518325806, |
|
"eval_runtime": 206.7673, |
|
"eval_samples_per_second": 30.343, |
|
"eval_steps_per_second": 3.797, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9575, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.9820196628570557, |
|
"eval_runtime": 207.4963, |
|
"eval_samples_per_second": 30.237, |
|
"eval_steps_per_second": 3.783, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.1959366338920448e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|