gpt2_f_experiment_4_drug_data / trainer_state.json
mllm-dev's picture
Upload folder using huggingface_hub
e8213d6 verified
{
"best_metric": 1.410697102546692,
"best_model_checkpoint": "tam_test_out_drug_data/checkpoint-1011",
"epoch": 18.0,
"eval_steps": 500,
"global_step": 6066,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.46396354505719334,
"eval_loss": 1.46442449092865,
"eval_runtime": 15.0839,
"eval_samples_per_second": 712.879,
"eval_steps_per_second": 7.491,
"step": 337
},
{
"epoch": 1.48,
"grad_norm": 391296.28125,
"learning_rate": 5.554896142433234e-05,
"loss": 1.6493,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.47372826188040545,
"eval_loss": 1.4272058010101318,
"eval_runtime": 15.1004,
"eval_samples_per_second": 712.101,
"eval_steps_per_second": 7.483,
"step": 674
},
{
"epoch": 2.97,
"grad_norm": 266398.21875,
"learning_rate": 5.109792284866469e-05,
"loss": 1.3762,
"step": 1000
},
{
"epoch": 3.0,
"eval_accuracy": 0.47456523760810937,
"eval_loss": 1.410697102546692,
"eval_runtime": 15.0489,
"eval_samples_per_second": 714.539,
"eval_steps_per_second": 7.509,
"step": 1011
},
{
"epoch": 4.0,
"eval_accuracy": 0.4733562726680926,
"eval_loss": 1.4484425783157349,
"eval_runtime": 15.0431,
"eval_samples_per_second": 714.813,
"eval_steps_per_second": 7.512,
"step": 1348
},
{
"epoch": 4.45,
"grad_norm": 369044.0,
"learning_rate": 4.664688427299703e-05,
"loss": 1.2056,
"step": 1500
},
{
"epoch": 5.0,
"eval_accuracy": 0.47503022412350043,
"eval_loss": 1.4948519468307495,
"eval_runtime": 15.1534,
"eval_samples_per_second": 709.61,
"eval_steps_per_second": 7.457,
"step": 1685
},
{
"epoch": 5.93,
"grad_norm": 454632.0625,
"learning_rate": 4.219584569732938e-05,
"loss": 1.0582,
"step": 2000
},
{
"epoch": 6.0,
"eval_accuracy": 0.4732632753650144,
"eval_loss": 1.5683261156082153,
"eval_runtime": 15.1769,
"eval_samples_per_second": 708.512,
"eval_steps_per_second": 7.446,
"step": 2022
},
{
"epoch": 7.0,
"eval_accuracy": 0.4656374965126011,
"eval_loss": 1.6979163885116577,
"eval_runtime": 15.1051,
"eval_samples_per_second": 711.881,
"eval_steps_per_second": 7.481,
"step": 2359
},
{
"epoch": 7.42,
"grad_norm": 407773.9375,
"learning_rate": 3.774480712166172e-05,
"loss": 0.8549,
"step": 2500
},
{
"epoch": 8.0,
"eval_accuracy": 0.47707616479122106,
"eval_loss": 1.8021113872528076,
"eval_runtime": 15.07,
"eval_samples_per_second": 713.539,
"eval_steps_per_second": 7.498,
"step": 2696
},
{
"epoch": 8.9,
"grad_norm": 589767.5,
"learning_rate": 3.3293768545994065e-05,
"loss": 0.7149,
"step": 3000
},
{
"epoch": 9.0,
"eval_accuracy": 0.477913140518925,
"eval_loss": 1.9680180549621582,
"eval_runtime": 15.2684,
"eval_samples_per_second": 704.265,
"eval_steps_per_second": 7.401,
"step": 3033
},
{
"epoch": 10.0,
"eval_accuracy": 0.4806100623081931,
"eval_loss": 2.1097300052642822,
"eval_runtime": 15.2893,
"eval_samples_per_second": 703.301,
"eval_steps_per_second": 7.391,
"step": 3370
},
{
"epoch": 10.39,
"grad_norm": 584814.1875,
"learning_rate": 2.884272997032641e-05,
"loss": 0.5551,
"step": 3500
},
{
"epoch": 11.0,
"eval_accuracy": 0.46675346414953967,
"eval_loss": 2.3649089336395264,
"eval_runtime": 15.0959,
"eval_samples_per_second": 712.313,
"eval_steps_per_second": 7.485,
"step": 3707
},
{
"epoch": 11.87,
"grad_norm": 582511.875,
"learning_rate": 2.4391691394658753e-05,
"loss": 0.4522,
"step": 4000
},
{
"epoch": 12.0,
"eval_accuracy": 0.47447224030503116,
"eval_loss": 2.432915210723877,
"eval_runtime": 15.1278,
"eval_samples_per_second": 710.811,
"eval_steps_per_second": 7.47,
"step": 4044
},
{
"epoch": 13.0,
"eval_accuracy": 0.4858179112805729,
"eval_loss": 2.5537261962890625,
"eval_runtime": 15.2471,
"eval_samples_per_second": 705.247,
"eval_steps_per_second": 7.411,
"step": 4381
},
{
"epoch": 13.35,
"grad_norm": 696384.0625,
"learning_rate": 1.9940652818991097e-05,
"loss": 0.3587,
"step": 4500
},
{
"epoch": 14.0,
"eval_accuracy": 0.4863758950990421,
"eval_loss": 2.6619648933410645,
"eval_runtime": 15.473,
"eval_samples_per_second": 694.953,
"eval_steps_per_second": 7.303,
"step": 4718
},
{
"epoch": 14.84,
"grad_norm": 512484.6875,
"learning_rate": 1.548961424332344e-05,
"loss": 0.2918,
"step": 5000
},
{
"epoch": 15.0,
"eval_accuracy": 0.4929787036175951,
"eval_loss": 2.762254476547241,
"eval_runtime": 15.2999,
"eval_samples_per_second": 702.817,
"eval_steps_per_second": 7.386,
"step": 5055
},
{
"epoch": 16.0,
"eval_accuracy": 0.48079605691434946,
"eval_loss": 2.8322901725769043,
"eval_runtime": 15.0354,
"eval_samples_per_second": 715.178,
"eval_steps_per_second": 7.516,
"step": 5392
},
{
"epoch": 16.32,
"grad_norm": 442634.8125,
"learning_rate": 1.1038575667655787e-05,
"loss": 0.2289,
"step": 5500
},
{
"epoch": 17.0,
"eval_accuracy": 0.4767971728819864,
"eval_loss": 2.9443678855895996,
"eval_runtime": 15.2489,
"eval_samples_per_second": 705.165,
"eval_steps_per_second": 7.41,
"step": 5729
},
{
"epoch": 17.8,
"grad_norm": 365735.21875,
"learning_rate": 6.587537091988131e-06,
"loss": 0.1962,
"step": 6000
},
{
"epoch": 18.0,
"eval_accuracy": 0.47828512973123777,
"eval_loss": 3.0224764347076416,
"eval_runtime": 15.2048,
"eval_samples_per_second": 707.209,
"eval_steps_per_second": 7.432,
"step": 6066
}
],
"logging_steps": 500,
"max_steps": 6740,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7.642688743213056e+16,
"train_batch_size": 96,
"trial_name": null,
"trial_params": null
}