|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 750000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 3.5986, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.83344888888889e-06, |
|
"loss": 0.685, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5626, |
|
"eval_loss": 0.676932692527771, |
|
"eval_runtime": 23.4325, |
|
"eval_samples_per_second": 426.757, |
|
"eval_steps_per_second": 6.7, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.666853333333333e-06, |
|
"loss": 0.6788, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5642, |
|
"eval_loss": 0.6758846640586853, |
|
"eval_runtime": 23.4496, |
|
"eval_samples_per_second": 426.447, |
|
"eval_steps_per_second": 6.695, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.500266666666668e-06, |
|
"loss": 0.6774, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5681, |
|
"eval_loss": 0.6747780442237854, |
|
"eval_runtime": 23.6291, |
|
"eval_samples_per_second": 423.207, |
|
"eval_steps_per_second": 6.644, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.333671111111113e-06, |
|
"loss": 0.6771, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5748, |
|
"eval_loss": 0.6741061806678772, |
|
"eval_runtime": 23.3134, |
|
"eval_samples_per_second": 428.938, |
|
"eval_steps_per_second": 6.734, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.167075555555556e-06, |
|
"loss": 0.6762, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5738, |
|
"eval_loss": 0.6736128926277161, |
|
"eval_runtime": 23.6397, |
|
"eval_samples_per_second": 423.017, |
|
"eval_steps_per_second": 6.641, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.00048e-06, |
|
"loss": 0.6755, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5736, |
|
"eval_loss": 0.6737410426139832, |
|
"eval_runtime": 23.3251, |
|
"eval_samples_per_second": 428.723, |
|
"eval_steps_per_second": 6.731, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.833866666666667e-06, |
|
"loss": 0.6751, |
|
"step": 131250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5731, |
|
"eval_loss": 0.6734435558319092, |
|
"eval_runtime": 23.3345, |
|
"eval_samples_per_second": 428.549, |
|
"eval_steps_per_second": 6.728, |
|
"step": 131250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.667271111111112e-06, |
|
"loss": 0.6749, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5714, |
|
"eval_loss": 0.6720886826515198, |
|
"eval_runtime": 23.4003, |
|
"eval_samples_per_second": 427.345, |
|
"eval_steps_per_second": 6.709, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.500666666666666e-06, |
|
"loss": 0.6746, |
|
"step": 168750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5767, |
|
"eval_loss": 0.6722772717475891, |
|
"eval_runtime": 23.5061, |
|
"eval_samples_per_second": 425.422, |
|
"eval_steps_per_second": 6.679, |
|
"step": 168750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.334071111111112e-06, |
|
"loss": 0.6745, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5756, |
|
"eval_loss": 0.6716436147689819, |
|
"eval_runtime": 23.4294, |
|
"eval_samples_per_second": 426.814, |
|
"eval_steps_per_second": 6.701, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.167475555555557e-06, |
|
"loss": 0.6745, |
|
"step": 206250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5785, |
|
"eval_loss": 0.6716175675392151, |
|
"eval_runtime": 23.3298, |
|
"eval_samples_per_second": 428.636, |
|
"eval_steps_per_second": 6.73, |
|
"step": 206250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.00088e-06, |
|
"loss": 0.6744, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5721, |
|
"eval_loss": 0.6720548272132874, |
|
"eval_runtime": 23.2772, |
|
"eval_samples_per_second": 429.606, |
|
"eval_steps_per_second": 6.745, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.834284444444445e-06, |
|
"loss": 0.674, |
|
"step": 243750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5742, |
|
"eval_loss": 0.6714185476303101, |
|
"eval_runtime": 23.567, |
|
"eval_samples_per_second": 424.321, |
|
"eval_steps_per_second": 6.662, |
|
"step": 243750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.66768888888889e-06, |
|
"loss": 0.6743, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5699, |
|
"eval_loss": 0.6723877191543579, |
|
"eval_runtime": 23.1824, |
|
"eval_samples_per_second": 431.361, |
|
"eval_steps_per_second": 6.772, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.501084444444445e-06, |
|
"loss": 0.6737, |
|
"step": 281250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5775, |
|
"eval_loss": 0.6708235144615173, |
|
"eval_runtime": 23.3586, |
|
"eval_samples_per_second": 428.108, |
|
"eval_steps_per_second": 6.721, |
|
"step": 281250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.33448888888889e-06, |
|
"loss": 0.6736, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5749, |
|
"eval_loss": 0.6718671917915344, |
|
"eval_runtime": 23.412, |
|
"eval_samples_per_second": 427.132, |
|
"eval_steps_per_second": 6.706, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.167893333333334e-06, |
|
"loss": 0.6737, |
|
"step": 318750 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5756, |
|
"eval_loss": 0.6709334254264832, |
|
"eval_runtime": 23.5575, |
|
"eval_samples_per_second": 424.493, |
|
"eval_steps_per_second": 6.665, |
|
"step": 318750 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.001297777777778e-06, |
|
"loss": 0.6736, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5725, |
|
"eval_loss": 0.6705034375190735, |
|
"eval_runtime": 23.4245, |
|
"eval_samples_per_second": 426.903, |
|
"eval_steps_per_second": 6.702, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.834711111111112e-06, |
|
"loss": 0.6736, |
|
"step": 356250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5734, |
|
"eval_loss": 0.6707730293273926, |
|
"eval_runtime": 23.4532, |
|
"eval_samples_per_second": 426.381, |
|
"eval_steps_per_second": 6.694, |
|
"step": 356250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.6681155555555566e-06, |
|
"loss": 0.6731, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5765, |
|
"eval_loss": 0.6712862253189087, |
|
"eval_runtime": 23.3551, |
|
"eval_samples_per_second": 428.173, |
|
"eval_steps_per_second": 6.722, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.50152e-06, |
|
"loss": 0.6704, |
|
"step": 393750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.5772, |
|
"eval_loss": 0.6697613000869751, |
|
"eval_runtime": 23.5543, |
|
"eval_samples_per_second": 424.551, |
|
"eval_steps_per_second": 6.665, |
|
"step": 393750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.334924444444444e-06, |
|
"loss": 0.6703, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.5747, |
|
"eval_loss": 0.6700881719589233, |
|
"eval_runtime": 23.557, |
|
"eval_samples_per_second": 424.502, |
|
"eval_steps_per_second": 6.665, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.1683288888888896e-06, |
|
"loss": 0.6703, |
|
"step": 431250 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.578, |
|
"eval_loss": 0.669475257396698, |
|
"eval_runtime": 23.4497, |
|
"eval_samples_per_second": 426.445, |
|
"eval_steps_per_second": 6.695, |
|
"step": 431250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.001733333333334e-06, |
|
"loss": 0.6705, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.577, |
|
"eval_loss": 0.6688240170478821, |
|
"eval_runtime": 23.5829, |
|
"eval_samples_per_second": 424.035, |
|
"eval_steps_per_second": 6.657, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.8351466666666665e-06, |
|
"loss": 0.6703, |
|
"step": 468750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.5795, |
|
"eval_loss": 0.6692911982536316, |
|
"eval_runtime": 23.4227, |
|
"eval_samples_per_second": 426.936, |
|
"eval_steps_per_second": 6.703, |
|
"step": 468750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.668551111111112e-06, |
|
"loss": 0.6704, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.5783, |
|
"eval_loss": 0.6689985394477844, |
|
"eval_runtime": 23.436, |
|
"eval_samples_per_second": 426.694, |
|
"eval_steps_per_second": 6.699, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.501955555555556e-06, |
|
"loss": 0.6703, |
|
"step": 506250 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.5783, |
|
"eval_loss": 0.6690422296524048, |
|
"eval_runtime": 23.5352, |
|
"eval_samples_per_second": 424.896, |
|
"eval_steps_per_second": 6.671, |
|
"step": 506250 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.33536e-06, |
|
"loss": 0.6705, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.5785, |
|
"eval_loss": 0.6693896055221558, |
|
"eval_runtime": 23.5799, |
|
"eval_samples_per_second": 424.09, |
|
"eval_steps_per_second": 6.658, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.168782222222223e-06, |
|
"loss": 0.6705, |
|
"step": 543750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.5806, |
|
"eval_loss": 0.6683958768844604, |
|
"eval_runtime": 23.4942, |
|
"eval_samples_per_second": 425.637, |
|
"eval_steps_per_second": 6.683, |
|
"step": 543750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.002186666666667e-06, |
|
"loss": 0.6704, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.5792, |
|
"eval_loss": 0.6692180633544922, |
|
"eval_runtime": 23.4522, |
|
"eval_samples_per_second": 426.4, |
|
"eval_steps_per_second": 6.694, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.8356e-06, |
|
"loss": 0.6706, |
|
"step": 581250 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.5787, |
|
"eval_loss": 0.6688229441642761, |
|
"eval_runtime": 23.3586, |
|
"eval_samples_per_second": 428.109, |
|
"eval_steps_per_second": 6.721, |
|
"step": 581250 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.669004444444445e-06, |
|
"loss": 0.6706, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.5814, |
|
"eval_loss": 0.6699367165565491, |
|
"eval_runtime": 23.5906, |
|
"eval_samples_per_second": 423.898, |
|
"eval_steps_per_second": 6.655, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.5024177777777786e-06, |
|
"loss": 0.6702, |
|
"step": 618750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.5766, |
|
"eval_loss": 0.669127881526947, |
|
"eval_runtime": 23.4357, |
|
"eval_samples_per_second": 426.7, |
|
"eval_steps_per_second": 6.699, |
|
"step": 618750 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.335804444444445e-06, |
|
"loss": 0.6702, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.5782, |
|
"eval_loss": 0.6684728264808655, |
|
"eval_runtime": 23.4934, |
|
"eval_samples_per_second": 425.651, |
|
"eval_steps_per_second": 6.683, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.169217777777778e-06, |
|
"loss": 0.67, |
|
"step": 656250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.5817, |
|
"eval_loss": 0.6683481931686401, |
|
"eval_runtime": 23.6396, |
|
"eval_samples_per_second": 423.02, |
|
"eval_steps_per_second": 6.641, |
|
"step": 656250 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.002613333333334e-06, |
|
"loss": 0.6697, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.582, |
|
"eval_loss": 0.667320966720581, |
|
"eval_runtime": 23.7429, |
|
"eval_samples_per_second": 421.178, |
|
"eval_steps_per_second": 6.612, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.836026666666667e-06, |
|
"loss": 0.6703, |
|
"step": 693750 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.581, |
|
"eval_loss": 0.6678736209869385, |
|
"eval_runtime": 23.3426, |
|
"eval_samples_per_second": 428.401, |
|
"eval_steps_per_second": 6.726, |
|
"step": 693750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.6694400000000002e-06, |
|
"loss": 0.6697, |
|
"step": 712500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.5854, |
|
"eval_loss": 0.6670705080032349, |
|
"eval_runtime": 23.4267, |
|
"eval_samples_per_second": 426.862, |
|
"eval_steps_per_second": 6.702, |
|
"step": 712500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.502835555555556e-06, |
|
"loss": 0.6697, |
|
"step": 731250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.5837, |
|
"eval_loss": 0.6675453186035156, |
|
"eval_runtime": 23.4269, |
|
"eval_samples_per_second": 426.859, |
|
"eval_steps_per_second": 6.702, |
|
"step": 731250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.33624e-06, |
|
"loss": 0.6702, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5857, |
|
"eval_loss": 0.6678363084793091, |
|
"eval_runtime": 23.4622, |
|
"eval_samples_per_second": 426.218, |
|
"eval_steps_per_second": 6.692, |
|
"step": 750000 |
|
} |
|
], |
|
"max_steps": 1125000, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.4577934226915066e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|