gogpt2-7b-pretrain / trainer_state.json
Chriskuei's picture
nit
6a3a040
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998971722365039,
"global_step": 4862,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 11.981,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 3.9970117109141705e-05,
"loss": 12.0789,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 9.331893267009234e-05,
"loss": 10.7133,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00011407670594843083,
"loss": 8.7339,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00012721122651399258,
"loss": 8.301,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00013684136855727938,
"loss": 8.1964,
"step": 50
},
{
"epoch": 0.01,
"eval_accuracy": 0.10310832025117739,
"eval_loss": 8.203125,
"eval_runtime": 6.5764,
"eval_samples_per_second": 5.322,
"eval_steps_per_second": 0.304,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 0.00014444862339428802,
"loss": 8.0553,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 0.00015073705430110066,
"loss": 7.9436,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 0.00015609707636042195,
"loss": 7.8368,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.00016076788727202945,
"loss": 7.7333,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 0.00016490670495758757,
"loss": 7.6139,
"step": 100
},
{
"epoch": 0.02,
"eval_accuracy": 0.12459794174079888,
"eval_loss": 7.81640625,
"eval_runtime": 6.6059,
"eval_samples_per_second": 5.298,
"eval_steps_per_second": 0.303,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.0001686224178807056,
"loss": 7.4892,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 0.000171993565594773,
"loss": 7.3256,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 0.00017507866443784335,
"loss": 7.1827,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 0.0001779224840062419,
"loss": 6.9698,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.00018056004207494319,
"loss": 6.8162,
"step": 150
},
{
"epoch": 0.03,
"eval_accuracy": 0.1567556253270539,
"eval_loss": 7.08203125,
"eval_runtime": 6.5908,
"eval_samples_per_second": 5.31,
"eval_steps_per_second": 0.303,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 0.00018301924610008189,
"loss": 6.6293,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 0.00018532269677939782,
"loss": 6.4114,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 0.00018748895370481112,
"loss": 6.2911,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.00018953344483335556,
"loss": 6.1047,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 0.00019146913367833817,
"loss": 5.9957,
"step": 200
},
{
"epoch": 0.04,
"eval_accuracy": 0.19811616954474098,
"eval_loss": 6.4296875,
"eval_runtime": 6.5897,
"eval_samples_per_second": 5.311,
"eval_steps_per_second": 0.304,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 0.00019330701776944063,
"loss": 5.8281,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 0.00019505650713185044,
"loss": 5.6927,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 0.00019672571585424665,
"loss": 5.5564,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 0.00019832168964685297,
"loss": 5.3813,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 0.0001998505855457085,
"loss": 5.2496,
"step": 250
},
{
"epoch": 0.05,
"eval_accuracy": 0.24055817198674342,
"eval_loss": 5.8203125,
"eval_runtime": 6.6013,
"eval_samples_per_second": 5.302,
"eval_steps_per_second": 0.303,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 0.0001996535296665223,
"loss": 5.13,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 0.0001992204417496752,
"loss": 5.0354,
"step": 270
},
{
"epoch": 0.06,
"learning_rate": 0.00019878735383282807,
"loss": 4.9021,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 0.00019835426591598097,
"loss": 4.8181,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 0.00019792117799913384,
"loss": 4.6993,
"step": 300
},
{
"epoch": 0.06,
"eval_accuracy": 0.2641758241758242,
"eval_loss": 5.41796875,
"eval_runtime": 6.6009,
"eval_samples_per_second": 5.302,
"eval_steps_per_second": 0.303,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 0.0001974880900822867,
"loss": 4.6761,
"step": 310
},
{
"epoch": 0.07,
"learning_rate": 0.00019705500216543958,
"loss": 4.5908,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 0.00019662191424859245,
"loss": 4.5301,
"step": 330
},
{
"epoch": 0.07,
"learning_rate": 0.00019618882633174535,
"loss": 4.4729,
"step": 340
},
{
"epoch": 0.07,
"learning_rate": 0.00019575573841489822,
"loss": 4.3928,
"step": 350
},
{
"epoch": 0.07,
"eval_accuracy": 0.27927437641723357,
"eval_loss": 5.14453125,
"eval_runtime": 6.6101,
"eval_samples_per_second": 5.295,
"eval_steps_per_second": 0.303,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 0.00019532265049805112,
"loss": 4.4083,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 0.000194889562581204,
"loss": 4.336,
"step": 370
},
{
"epoch": 0.08,
"learning_rate": 0.0001944564746643569,
"loss": 4.2714,
"step": 380
},
{
"epoch": 0.08,
"learning_rate": 0.00019402338674750976,
"loss": 4.2124,
"step": 390
},
{
"epoch": 0.08,
"learning_rate": 0.00019359029883066263,
"loss": 4.2395,
"step": 400
},
{
"epoch": 0.08,
"eval_accuracy": 0.2894470608756323,
"eval_loss": 4.96875,
"eval_runtime": 6.5945,
"eval_samples_per_second": 5.307,
"eval_steps_per_second": 0.303,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 0.0001931572109138155,
"loss": 4.1867,
"step": 410
},
{
"epoch": 0.09,
"learning_rate": 0.0001927241229969684,
"loss": 4.1687,
"step": 420
},
{
"epoch": 0.09,
"learning_rate": 0.00019229103508012127,
"loss": 4.1027,
"step": 430
},
{
"epoch": 0.09,
"learning_rate": 0.00019185794716327414,
"loss": 4.1233,
"step": 440
},
{
"epoch": 0.09,
"learning_rate": 0.00019142485924642704,
"loss": 4.0781,
"step": 450
},
{
"epoch": 0.09,
"eval_accuracy": 0.29638932496075354,
"eval_loss": 4.8515625,
"eval_runtime": 6.5996,
"eval_samples_per_second": 5.303,
"eval_steps_per_second": 0.303,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 0.0001909917713295799,
"loss": 4.0855,
"step": 460
},
{
"epoch": 0.1,
"learning_rate": 0.0001905586834127328,
"loss": 4.0859,
"step": 470
},
{
"epoch": 0.1,
"learning_rate": 0.00019012559549588568,
"loss": 4.0124,
"step": 480
},
{
"epoch": 0.1,
"learning_rate": 0.00018969250757903855,
"loss": 4.0151,
"step": 490
},
{
"epoch": 0.1,
"learning_rate": 0.00018925941966219142,
"loss": 4.0409,
"step": 500
},
{
"epoch": 0.1,
"eval_accuracy": 0.30181057038199893,
"eval_loss": 4.76953125,
"eval_runtime": 6.6117,
"eval_samples_per_second": 5.294,
"eval_steps_per_second": 0.302,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 0.00018882633174534431,
"loss": 3.9912,
"step": 510
},
{
"epoch": 0.11,
"learning_rate": 0.00018839324382849718,
"loss": 3.9383,
"step": 520
},
{
"epoch": 0.11,
"learning_rate": 0.00018796015591165008,
"loss": 3.9764,
"step": 530
},
{
"epoch": 0.11,
"learning_rate": 0.00018752706799480295,
"loss": 3.9809,
"step": 540
},
{
"epoch": 0.11,
"learning_rate": 0.00018709398007795585,
"loss": 3.9178,
"step": 550
},
{
"epoch": 0.11,
"eval_accuracy": 0.30575963718820864,
"eval_loss": 4.703125,
"eval_runtime": 6.6265,
"eval_samples_per_second": 5.282,
"eval_steps_per_second": 0.302,
"step": 550
},
{
"epoch": 0.12,
"learning_rate": 0.00018666089216110872,
"loss": 3.9073,
"step": 560
},
{
"epoch": 0.12,
"learning_rate": 0.0001862278042442616,
"loss": 3.9459,
"step": 570
},
{
"epoch": 0.12,
"learning_rate": 0.00018579471632741446,
"loss": 3.9535,
"step": 580
},
{
"epoch": 0.12,
"learning_rate": 0.00018536162841056733,
"loss": 3.8982,
"step": 590
},
{
"epoch": 0.12,
"learning_rate": 0.00018492854049372023,
"loss": 3.834,
"step": 600
},
{
"epoch": 0.12,
"eval_accuracy": 0.3082574568288854,
"eval_loss": 4.65625,
"eval_runtime": 6.6135,
"eval_samples_per_second": 5.292,
"eval_steps_per_second": 0.302,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 0.0001844954525768731,
"loss": 3.8759,
"step": 610
},
{
"epoch": 0.13,
"learning_rate": 0.000184062364660026,
"loss": 3.8835,
"step": 620
},
{
"epoch": 0.13,
"learning_rate": 0.00018362927674317887,
"loss": 3.9003,
"step": 630
},
{
"epoch": 0.13,
"learning_rate": 0.00018319618882633177,
"loss": 3.8538,
"step": 640
},
{
"epoch": 0.13,
"learning_rate": 0.00018276310090948464,
"loss": 3.8316,
"step": 650
},
{
"epoch": 0.13,
"eval_accuracy": 0.30981336124193265,
"eval_loss": 4.625,
"eval_runtime": 6.6039,
"eval_samples_per_second": 5.3,
"eval_steps_per_second": 0.303,
"step": 650
},
{
"epoch": 0.14,
"learning_rate": 0.00018233001299263754,
"loss": 3.9085,
"step": 660
},
{
"epoch": 0.14,
"learning_rate": 0.00018189692507579038,
"loss": 3.7825,
"step": 670
},
{
"epoch": 0.14,