PiccoviralesGPT / trainer_state.json
avuhong's picture
Upload 16 files
63b1277
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 31.998864926220204,
"global_step": 7040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.8224893386183709,
"eval_loss": 1.1622651815414429,
"eval_runtime": 5.9816,
"eval_samples_per_second": 15.213,
"eval_steps_per_second": 7.69,
"step": 220
},
{
"epoch": 2.0,
"eval_accuracy": 0.8538665635439829,
"eval_loss": 0.9566460251808167,
"eval_runtime": 5.9768,
"eval_samples_per_second": 15.226,
"eval_steps_per_second": 7.696,
"step": 440
},
{
"epoch": 2.27,
"learning_rate": 4.6448863636363636e-05,
"loss": 1.1942,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8709140322043548,
"eval_loss": 0.8456286191940308,
"eval_runtime": 5.9871,
"eval_samples_per_second": 15.199,
"eval_steps_per_second": 7.683,
"step": 660
},
{
"epoch": 4.0,
"eval_accuracy": 0.8801198801198801,
"eval_loss": 0.7718582153320312,
"eval_runtime": 5.6728,
"eval_samples_per_second": 16.041,
"eval_steps_per_second": 8.109,
"step": 880
},
{
"epoch": 4.54,
"learning_rate": 4.289772727272727e-05,
"loss": 0.7805,
"step": 1000
},
{
"epoch": 5.0,
"eval_accuracy": 0.8871880807364678,
"eval_loss": 0.7224407196044922,
"eval_runtime": 5.988,
"eval_samples_per_second": 15.197,
"eval_steps_per_second": 7.682,
"step": 1100
},
{
"epoch": 6.0,
"eval_accuracy": 0.892816860558796,
"eval_loss": 0.6894707679748535,
"eval_runtime": 5.9958,
"eval_samples_per_second": 15.177,
"eval_steps_per_second": 7.672,
"step": 1320
},
{
"epoch": 6.82,
"learning_rate": 3.934659090909091e-05,
"loss": 0.6257,
"step": 1500
},
{
"epoch": 7.0,
"eval_accuracy": 0.8972103165651553,
"eval_loss": 0.6574080586433411,
"eval_runtime": 5.9909,
"eval_samples_per_second": 15.19,
"eval_steps_per_second": 7.678,
"step": 1540
},
{
"epoch": 8.0,
"eval_accuracy": 0.9014426433781273,
"eval_loss": 0.6289474368095398,
"eval_runtime": 5.6911,
"eval_samples_per_second": 15.99,
"eval_steps_per_second": 8.083,
"step": 1760
},
{
"epoch": 9.0,
"eval_accuracy": 0.9045470658373884,
"eval_loss": 0.6054205298423767,
"eval_runtime": 5.6786,
"eval_samples_per_second": 16.025,
"eval_steps_per_second": 8.101,
"step": 1980
},
{
"epoch": 9.09,
"learning_rate": 3.579545454545455e-05,
"loss": 0.5385,
"step": 2000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9076622302428754,
"eval_loss": 0.5881273746490479,
"eval_runtime": 6.0014,
"eval_samples_per_second": 15.163,
"eval_steps_per_second": 7.665,
"step": 2200
},
{
"epoch": 11.0,
"eval_accuracy": 0.9101865876059424,
"eval_loss": 0.5709272623062134,
"eval_runtime": 5.9886,
"eval_samples_per_second": 15.195,
"eval_steps_per_second": 7.681,
"step": 2420
},
{
"epoch": 11.36,
"learning_rate": 3.2244318181818185e-05,
"loss": 0.4778,
"step": 2500
},
{
"epoch": 12.0,
"eval_accuracy": 0.9120986540341379,
"eval_loss": 0.5591339468955994,
"eval_runtime": 5.9874,
"eval_samples_per_second": 15.199,
"eval_steps_per_second": 7.683,
"step": 2640
},
{
"epoch": 13.0,
"eval_accuracy": 0.9142900110642046,
"eval_loss": 0.5496613383293152,
"eval_runtime": 5.9826,
"eval_samples_per_second": 15.211,
"eval_steps_per_second": 7.689,
"step": 2860
},
{
"epoch": 13.64,
"learning_rate": 2.870028409090909e-05,
"loss": 0.427,
"step": 3000
},
{
"epoch": 14.0,
"eval_accuracy": 0.9161053999763678,
"eval_loss": 0.5385328531265259,
"eval_runtime": 5.9885,
"eval_samples_per_second": 15.196,
"eval_steps_per_second": 7.681,
"step": 3080
},
{
"epoch": 15.0,
"eval_accuracy": 0.9179637566734341,
"eval_loss": 0.5258467793464661,
"eval_runtime": 5.9812,
"eval_samples_per_second": 15.214,
"eval_steps_per_second": 7.691,
"step": 3300
},
{
"epoch": 15.91,
"learning_rate": 2.5149147727272725e-05,
"loss": 0.394,
"step": 3500
},
{
"epoch": 16.0,
"eval_accuracy": 0.9195428227686292,
"eval_loss": 0.5170450806617737,
"eval_runtime": 5.6775,
"eval_samples_per_second": 16.028,
"eval_steps_per_second": 8.102,
"step": 3520
},
{
"epoch": 17.0,
"eval_accuracy": 0.9211648566487276,
"eval_loss": 0.5156892538070679,
"eval_runtime": 5.9945,
"eval_samples_per_second": 15.18,
"eval_steps_per_second": 7.674,
"step": 3740
},
{
"epoch": 18.0,
"eval_accuracy": 0.9220671801316963,
"eval_loss": 0.5037761926651001,
"eval_runtime": 5.687,
"eval_samples_per_second": 16.001,
"eval_steps_per_second": 8.089,
"step": 3960
},
{
"epoch": 18.18,
"learning_rate": 2.1598011363636363e-05,
"loss": 0.363,
"step": 4000
},
{
"epoch": 19.0,
"eval_accuracy": 0.9233884395174717,
"eval_loss": 0.49766021966934204,
"eval_runtime": 5.6849,
"eval_samples_per_second": 16.007,
"eval_steps_per_second": 8.092,
"step": 4180
},
{
"epoch": 20.0,
"eval_accuracy": 0.9236462462268914,
"eval_loss": 0.4975946843624115,
"eval_runtime": 5.9923,
"eval_samples_per_second": 15.186,
"eval_steps_per_second": 7.677,
"step": 4400
},
{
"epoch": 20.45,
"learning_rate": 1.8046875000000003e-05,
"loss": 0.3392,
"step": 4500
},
{
"epoch": 21.0,
"eval_accuracy": 0.9246882150107957,
"eval_loss": 0.49241966009140015,
"eval_runtime": 5.9923,
"eval_samples_per_second": 15.186,
"eval_steps_per_second": 7.677,
"step": 4620
},
{
"epoch": 22.0,
"eval_accuracy": 0.9255046029239578,
"eval_loss": 0.4887617826461792,
"eval_runtime": 5.69,
"eval_samples_per_second": 15.993,
"eval_steps_per_second": 8.084,
"step": 4840
},
{
"epoch": 22.73,
"learning_rate": 1.4495738636363637e-05,
"loss": 0.33,
"step": 5000
},
{
"epoch": 23.0,
"eval_accuracy": 0.9262028294286359,
"eval_loss": 0.4889785051345825,
"eval_runtime": 6.0024,
"eval_samples_per_second": 15.161,
"eval_steps_per_second": 7.664,
"step": 5060
},
{
"epoch": 24.0,
"eval_accuracy": 0.9267936364710558,
"eval_loss": 0.4856303334236145,
"eval_runtime": 5.9867,
"eval_samples_per_second": 15.2,
"eval_steps_per_second": 7.684,
"step": 5280
},
{
"epoch": 25.0,
"learning_rate": 1.0951704545454545e-05,
"loss": 0.3058,
"step": 5500
},
{
"epoch": 25.0,
"eval_accuracy": 0.9275348307606373,
"eval_loss": 0.4802783131599426,
"eval_runtime": 5.6869,
"eval_samples_per_second": 16.002,
"eval_steps_per_second": 8.089,
"step": 5500
},
{
"epoch": 26.0,
"eval_accuracy": 0.9277389277389277,
"eval_loss": 0.47845765948295593,
"eval_runtime": 5.9773,
"eval_samples_per_second": 15.224,
"eval_steps_per_second": 7.696,
"step": 5720
},
{
"epoch": 27.0,
"eval_accuracy": 0.9280611861257022,
"eval_loss": 0.4813348948955536,
"eval_runtime": 5.6869,
"eval_samples_per_second": 16.002,
"eval_steps_per_second": 8.089,
"step": 5940
},
{
"epoch": 27.27,
"learning_rate": 7.407670454545455e-06,
"loss": 0.2973,
"step": 6000
},
{
"epoch": 28.0,
"eval_accuracy": 0.9281793475341863,
"eval_loss": 0.4798637628555298,
"eval_runtime": 5.6812,
"eval_samples_per_second": 16.018,
"eval_steps_per_second": 8.097,
"step": 6160
},
{
"epoch": 29.0,
"eval_accuracy": 0.9285230898134124,
"eval_loss": 0.47730037569999695,
"eval_runtime": 5.9912,
"eval_samples_per_second": 15.189,
"eval_steps_per_second": 7.678,
"step": 6380
},
{
"epoch": 29.54,
"learning_rate": 3.856534090909091e-06,
"loss": 0.2931,
"step": 6500
},
{
"epoch": 30.0,
"eval_accuracy": 0.9285982834369931,
"eval_loss": 0.4778377413749695,
"eval_runtime": 5.9957,
"eval_samples_per_second": 15.177,
"eval_steps_per_second": 7.672,
"step": 6600
},
{
"epoch": 31.0,
"eval_accuracy": 0.9289527676624451,
"eval_loss": 0.4756244122982025,
"eval_runtime": 5.6812,
"eval_samples_per_second": 16.018,
"eval_steps_per_second": 8.097,
"step": 6820
},
{
"epoch": 31.82,
"learning_rate": 3.053977272727273e-07,
"loss": 0.2879,
"step": 7000
},
{
"epoch": 32.0,
"eval_accuracy": 0.9289527676624451,
"eval_loss": 0.47753414511680603,
"eval_runtime": 5.6774,
"eval_samples_per_second": 16.029,
"eval_steps_per_second": 8.102,
"step": 7040
},
{
"epoch": 32.0,
"step": 7040,
"total_flos": 2.4539422830415053e+17,
"train_loss": 0.47427067851478405,
"train_runtime": 10360.152,
"train_samples_per_second": 5.442,
"train_steps_per_second": 0.68
}
],
"max_steps": 7040,
"num_train_epochs": 32,
"total_flos": 2.4539422830415053e+17,
"trial_name": null,
"trial_params": null
}