vit-lr-0.0001 / trainer_state.json
sharren's picture
🍻 cheers
a8be02c verified
raw
history blame
7.07 kB
{
"best_metric": 0.4716978371143341,
"best_model_checkpoint": "./vit-lr-0.0001/checkpoint-642",
"epoch": 12.0,
"eval_steps": 500,
"global_step": 3852,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.450945854187012,
"learning_rate": 9.549150281252611e-06,
"loss": 0.682,
"step": 321
},
{
"epoch": 1.0,
"eval_accuracy": 0.8099861303744799,
"eval_f1": 0.798120450266025,
"eval_loss": 0.5192127823829651,
"eval_precision": 0.8034262669589041,
"eval_recall": 0.8099861303744799,
"eval_runtime": 37.3933,
"eval_samples_per_second": 77.126,
"eval_steps_per_second": 9.654,
"step": 321
},
{
"epoch": 2.0,
"grad_norm": 5.513205051422119,
"learning_rate": 6.394955530196143e-05,
"loss": 0.4386,
"step": 642
},
{
"epoch": 2.0,
"eval_accuracy": 0.8283633841886269,
"eval_f1": 0.8213379813460607,
"eval_loss": 0.4716978371143341,
"eval_precision": 0.8230999577196753,
"eval_recall": 0.8283633841886269,
"eval_runtime": 37.886,
"eval_samples_per_second": 76.123,
"eval_steps_per_second": 9.529,
"step": 642
},
{
"epoch": 3.0,
"grad_norm": 0.9458593726158142,
"learning_rate": 6.840622763423387e-05,
"loss": 0.2621,
"step": 963
},
{
"epoch": 3.0,
"eval_accuracy": 0.8349514563106796,
"eval_f1": 0.8201100816778963,
"eval_loss": 0.5676096081733704,
"eval_precision": 0.8262616496556073,
"eval_recall": 0.8349514563106796,
"eval_runtime": 37.2214,
"eval_samples_per_second": 77.482,
"eval_steps_per_second": 9.699,
"step": 963
},
{
"epoch": 4.0,
"grad_norm": 7.709182262420654,
"learning_rate": 6.9628986498044885e-06,
"loss": 0.1891,
"step": 1284
},
{
"epoch": 4.0,
"eval_accuracy": 0.8495145631067961,
"eval_f1": 0.8436304900485414,
"eval_loss": 0.5022268891334534,
"eval_precision": 0.8555501567834226,
"eval_recall": 0.8495145631067961,
"eval_runtime": 37.4592,
"eval_samples_per_second": 76.99,
"eval_steps_per_second": 9.637,
"step": 1284
},
{
"epoch": 5.0,
"grad_norm": 0.030552733689546585,
"learning_rate": 9.960573506570156e-05,
"loss": 0.1052,
"step": 1605
},
{
"epoch": 5.0,
"eval_accuracy": 0.8398058252427184,
"eval_f1": 0.8357173097291434,
"eval_loss": 0.6660671830177307,
"eval_precision": 0.8482768361910064,
"eval_recall": 0.8398058252427184,
"eval_runtime": 39.2791,
"eval_samples_per_second": 73.423,
"eval_steps_per_second": 9.191,
"step": 1605
},
{
"epoch": 6.0,
"grad_norm": 1.1932728290557861,
"learning_rate": 1.4644660940675861e-05,
"loss": 0.0785,
"step": 1926
},
{
"epoch": 6.0,
"eval_accuracy": 0.8592233009708737,
"eval_f1": 0.8574256259898136,
"eval_loss": 0.5570405125617981,
"eval_precision": 0.8573736727029027,
"eval_recall": 0.8592233009708737,
"eval_runtime": 37.1007,
"eval_samples_per_second": 77.734,
"eval_steps_per_second": 9.73,
"step": 1926
},
{
"epoch": 7.0,
"grad_norm": 17.325477600097656,
"learning_rate": 5.626666167820289e-05,
"loss": 0.0481,
"step": 2247
},
{
"epoch": 7.0,
"eval_accuracy": 0.8547156726768377,
"eval_f1": 0.8523115263952025,
"eval_loss": 0.6496189832687378,
"eval_precision": 0.8547876753873573,
"eval_recall": 0.8547156726768377,
"eval_runtime": 36.7758,
"eval_samples_per_second": 78.421,
"eval_steps_per_second": 9.816,
"step": 2247
},
{
"epoch": 8.0,
"grad_norm": 0.6180940270423889,
"learning_rate": 7.545207078756922e-05,
"loss": 0.0281,
"step": 2568
},
{
"epoch": 8.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8544924977438809,
"eval_loss": 0.7423234581947327,
"eval_precision": 0.8569592454687236,
"eval_recall": 0.8571428571428571,
"eval_runtime": 37.383,
"eval_samples_per_second": 77.147,
"eval_steps_per_second": 9.657,
"step": 2568
},
{
"epoch": 9.0,
"grad_norm": 0.014829314313828945,
"learning_rate": 3.5111757055883184e-06,
"loss": 0.0439,
"step": 2889
},
{
"epoch": 9.0,
"eval_accuracy": 0.8540221914008321,
"eval_f1": 0.8518021160859013,
"eval_loss": 0.7677786946296692,
"eval_precision": 0.851139101545856,
"eval_recall": 0.8540221914008321,
"eval_runtime": 37.2162,
"eval_samples_per_second": 77.493,
"eval_steps_per_second": 9.7,
"step": 2889
},
{
"epoch": 10.0,
"grad_norm": 0.003476586891338229,
"learning_rate": 9.801468428386933e-05,
"loss": 0.0297,
"step": 3210
},
{
"epoch": 10.0,
"eval_accuracy": 0.8467406380027739,
"eval_f1": 0.8464070946689463,
"eval_loss": 0.8647756576538086,
"eval_precision": 0.8478472152581428,
"eval_recall": 0.8467406380027739,
"eval_runtime": 37.1575,
"eval_samples_per_second": 77.616,
"eval_steps_per_second": 9.715,
"step": 3210
},
{
"epoch": 11.0,
"grad_norm": 0.09397422522306442,
"learning_rate": 2.0610737385380886e-05,
"loss": 0.037,
"step": 3531
},
{
"epoch": 11.0,
"eval_accuracy": 0.8561026352288488,
"eval_f1": 0.8548878398804619,
"eval_loss": 0.7682856321334839,
"eval_precision": 0.8552181984518148,
"eval_recall": 0.8561026352288488,
"eval_runtime": 37.5045,
"eval_samples_per_second": 76.898,
"eval_steps_per_second": 9.626,
"step": 3531
},
{
"epoch": 12.0,
"grad_norm": 0.002134101465344429,
"learning_rate": 4.999999999998897e-05,
"loss": 0.0322,
"step": 3852
},
{
"epoch": 12.0,
"eval_accuracy": 0.8512482662968099,
"eval_f1": 0.8396847788052201,
"eval_loss": 1.0125652551651,
"eval_precision": 0.8561110421693547,
"eval_recall": 0.8512482662968099,
"eval_runtime": 37.3877,
"eval_samples_per_second": 77.138,
"eval_steps_per_second": 9.656,
"step": 3852
},
{
"epoch": 12.0,
"step": 3852,
"total_flos": 4.768760767819088e+18,
"train_loss": 0.16453982563157443,
"train_runtime": 1738.7619,
"train_samples_per_second": 294.922,
"train_steps_per_second": 18.461
}
],
"logging_steps": 500,
"max_steps": 32100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 4.768760767819088e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}