|
{ |
|
"best_metric": 0.4716978371143341, |
|
"best_model_checkpoint": "./vit-lr-0.0001/checkpoint-642", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 3852, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.450945854187012, |
|
"learning_rate": 9.549150281252611e-06, |
|
"loss": 0.682, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8099861303744799, |
|
"eval_f1": 0.798120450266025, |
|
"eval_loss": 0.5192127823829651, |
|
"eval_precision": 0.8034262669589041, |
|
"eval_recall": 0.8099861303744799, |
|
"eval_runtime": 37.3933, |
|
"eval_samples_per_second": 77.126, |
|
"eval_steps_per_second": 9.654, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.513205051422119, |
|
"learning_rate": 6.394955530196143e-05, |
|
"loss": 0.4386, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8283633841886269, |
|
"eval_f1": 0.8213379813460607, |
|
"eval_loss": 0.4716978371143341, |
|
"eval_precision": 0.8230999577196753, |
|
"eval_recall": 0.8283633841886269, |
|
"eval_runtime": 37.886, |
|
"eval_samples_per_second": 76.123, |
|
"eval_steps_per_second": 9.529, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.9458593726158142, |
|
"learning_rate": 6.840622763423387e-05, |
|
"loss": 0.2621, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8349514563106796, |
|
"eval_f1": 0.8201100816778963, |
|
"eval_loss": 0.5676096081733704, |
|
"eval_precision": 0.8262616496556073, |
|
"eval_recall": 0.8349514563106796, |
|
"eval_runtime": 37.2214, |
|
"eval_samples_per_second": 77.482, |
|
"eval_steps_per_second": 9.699, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.709182262420654, |
|
"learning_rate": 6.9628986498044885e-06, |
|
"loss": 0.1891, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8495145631067961, |
|
"eval_f1": 0.8436304900485414, |
|
"eval_loss": 0.5022268891334534, |
|
"eval_precision": 0.8555501567834226, |
|
"eval_recall": 0.8495145631067961, |
|
"eval_runtime": 37.4592, |
|
"eval_samples_per_second": 76.99, |
|
"eval_steps_per_second": 9.637, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.030552733689546585, |
|
"learning_rate": 9.960573506570156e-05, |
|
"loss": 0.1052, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8398058252427184, |
|
"eval_f1": 0.8357173097291434, |
|
"eval_loss": 0.6660671830177307, |
|
"eval_precision": 0.8482768361910064, |
|
"eval_recall": 0.8398058252427184, |
|
"eval_runtime": 39.2791, |
|
"eval_samples_per_second": 73.423, |
|
"eval_steps_per_second": 9.191, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1932728290557861, |
|
"learning_rate": 1.4644660940675861e-05, |
|
"loss": 0.0785, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8592233009708737, |
|
"eval_f1": 0.8574256259898136, |
|
"eval_loss": 0.5570405125617981, |
|
"eval_precision": 0.8573736727029027, |
|
"eval_recall": 0.8592233009708737, |
|
"eval_runtime": 37.1007, |
|
"eval_samples_per_second": 77.734, |
|
"eval_steps_per_second": 9.73, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 17.325477600097656, |
|
"learning_rate": 5.626666167820289e-05, |
|
"loss": 0.0481, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8547156726768377, |
|
"eval_f1": 0.8523115263952025, |
|
"eval_loss": 0.6496189832687378, |
|
"eval_precision": 0.8547876753873573, |
|
"eval_recall": 0.8547156726768377, |
|
"eval_runtime": 36.7758, |
|
"eval_samples_per_second": 78.421, |
|
"eval_steps_per_second": 9.816, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.6180940270423889, |
|
"learning_rate": 7.545207078756922e-05, |
|
"loss": 0.0281, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8544924977438809, |
|
"eval_loss": 0.7423234581947327, |
|
"eval_precision": 0.8569592454687236, |
|
"eval_recall": 0.8571428571428571, |
|
"eval_runtime": 37.383, |
|
"eval_samples_per_second": 77.147, |
|
"eval_steps_per_second": 9.657, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.014829314313828945, |
|
"learning_rate": 3.5111757055883184e-06, |
|
"loss": 0.0439, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8540221914008321, |
|
"eval_f1": 0.8518021160859013, |
|
"eval_loss": 0.7677786946296692, |
|
"eval_precision": 0.851139101545856, |
|
"eval_recall": 0.8540221914008321, |
|
"eval_runtime": 37.2162, |
|
"eval_samples_per_second": 77.493, |
|
"eval_steps_per_second": 9.7, |
|
"step": 2889 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.003476586891338229, |
|
"learning_rate": 9.801468428386933e-05, |
|
"loss": 0.0297, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8467406380027739, |
|
"eval_f1": 0.8464070946689463, |
|
"eval_loss": 0.8647756576538086, |
|
"eval_precision": 0.8478472152581428, |
|
"eval_recall": 0.8467406380027739, |
|
"eval_runtime": 37.1575, |
|
"eval_samples_per_second": 77.616, |
|
"eval_steps_per_second": 9.715, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.09397422522306442, |
|
"learning_rate": 2.0610737385380886e-05, |
|
"loss": 0.037, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8561026352288488, |
|
"eval_f1": 0.8548878398804619, |
|
"eval_loss": 0.7682856321334839, |
|
"eval_precision": 0.8552181984518148, |
|
"eval_recall": 0.8561026352288488, |
|
"eval_runtime": 37.5045, |
|
"eval_samples_per_second": 76.898, |
|
"eval_steps_per_second": 9.626, |
|
"step": 3531 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.002134101465344429, |
|
"learning_rate": 4.999999999998897e-05, |
|
"loss": 0.0322, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8512482662968099, |
|
"eval_f1": 0.8396847788052201, |
|
"eval_loss": 1.0125652551651, |
|
"eval_precision": 0.8561110421693547, |
|
"eval_recall": 0.8512482662968099, |
|
"eval_runtime": 37.3877, |
|
"eval_samples_per_second": 77.138, |
|
"eval_steps_per_second": 9.656, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 3852, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_loss": 0.16453982563157443, |
|
"train_runtime": 1738.7619, |
|
"train_samples_per_second": 294.922, |
|
"train_steps_per_second": 18.461 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 4.768760767819088e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|