dragon-v0.1 / trainer_state.json
Arotte's picture
First version
3cd1652
raw
history blame
No virus
19.5 kB
{
"best_metric": 3.029510498046875,
"best_model_checkpoint": "trained_models/microsoftDialoGPTmedium_crd3/checkpoint-9912",
"epoch": 2.957040572792363,
"eval_steps": 168,
"global_step": 9912,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 4.949880668257757e-05,
"loss": 3.805,
"step": 168
},
{
"epoch": 0.05,
"eval_loss": 3.3926753997802734,
"eval_runtime": 142.7844,
"eval_samples_per_second": 29.653,
"eval_steps_per_second": 3.712,
"step": 168
},
{
"epoch": 0.1,
"learning_rate": 4.899761336515513e-05,
"loss": 3.3486,
"step": 336
},
{
"epoch": 0.1,
"eval_loss": 3.2977957725524902,
"eval_runtime": 142.7066,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 336
},
{
"epoch": 0.15,
"learning_rate": 4.84964200477327e-05,
"loss": 3.2539,
"step": 504
},
{
"epoch": 0.15,
"eval_loss": 3.2512104511260986,
"eval_runtime": 142.7113,
"eval_samples_per_second": 29.668,
"eval_steps_per_second": 3.714,
"step": 504
},
{
"epoch": 0.2,
"learning_rate": 4.7995226730310264e-05,
"loss": 3.2009,
"step": 672
},
{
"epoch": 0.2,
"eval_loss": 3.221975564956665,
"eval_runtime": 142.7296,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 672
},
{
"epoch": 0.25,
"learning_rate": 4.749403341288783e-05,
"loss": 3.1685,
"step": 840
},
{
"epoch": 0.25,
"eval_loss": 3.197237014770508,
"eval_runtime": 142.735,
"eval_samples_per_second": 29.663,
"eval_steps_per_second": 3.713,
"step": 840
},
{
"epoch": 0.3,
"learning_rate": 4.6992840095465395e-05,
"loss": 3.1332,
"step": 1008
},
{
"epoch": 0.3,
"eval_loss": 3.1779494285583496,
"eval_runtime": 142.7443,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 1008
},
{
"epoch": 0.35,
"learning_rate": 4.649164677804296e-05,
"loss": 3.1231,
"step": 1176
},
{
"epoch": 0.35,
"eval_loss": 3.163996696472168,
"eval_runtime": 142.7827,
"eval_samples_per_second": 29.653,
"eval_steps_per_second": 3.712,
"step": 1176
},
{
"epoch": 0.4,
"learning_rate": 4.5990453460620526e-05,
"loss": 3.1005,
"step": 1344
},
{
"epoch": 0.4,
"eval_loss": 3.1513357162475586,
"eval_runtime": 142.6836,
"eval_samples_per_second": 29.674,
"eval_steps_per_second": 3.715,
"step": 1344
},
{
"epoch": 0.45,
"learning_rate": 4.548926014319809e-05,
"loss": 3.0952,
"step": 1512
},
{
"epoch": 0.45,
"eval_loss": 3.1407930850982666,
"eval_runtime": 142.6804,
"eval_samples_per_second": 29.675,
"eval_steps_per_second": 3.715,
"step": 1512
},
{
"epoch": 0.5,
"learning_rate": 4.498806682577566e-05,
"loss": 3.0903,
"step": 1680
},
{
"epoch": 0.5,
"eval_loss": 3.1297414302825928,
"eval_runtime": 142.7084,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 1680
},
{
"epoch": 0.55,
"learning_rate": 4.448687350835322e-05,
"loss": 3.0737,
"step": 1848
},
{
"epoch": 0.55,
"eval_loss": 3.120724678039551,
"eval_runtime": 142.7344,
"eval_samples_per_second": 29.663,
"eval_steps_per_second": 3.713,
"step": 1848
},
{
"epoch": 0.6,
"learning_rate": 4.398568019093079e-05,
"loss": 3.0681,
"step": 2016
},
{
"epoch": 0.6,
"eval_loss": 3.1170568466186523,
"eval_runtime": 142.7306,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 2016
},
{
"epoch": 0.65,
"learning_rate": 4.348448687350836e-05,
"loss": 3.034,
"step": 2184
},
{
"epoch": 0.65,
"eval_loss": 3.1076815128326416,
"eval_runtime": 142.7258,
"eval_samples_per_second": 29.665,
"eval_steps_per_second": 3.713,
"step": 2184
},
{
"epoch": 0.7,
"learning_rate": 4.298329355608592e-05,
"loss": 3.0319,
"step": 2352
},
{
"epoch": 0.7,
"eval_loss": 3.1014297008514404,
"eval_runtime": 142.7172,
"eval_samples_per_second": 29.667,
"eval_steps_per_second": 3.714,
"step": 2352
},
{
"epoch": 0.75,
"learning_rate": 4.2482100238663484e-05,
"loss": 3.0075,
"step": 2520
},
{
"epoch": 0.75,
"eval_loss": 3.0964250564575195,
"eval_runtime": 142.7047,
"eval_samples_per_second": 29.67,
"eval_steps_per_second": 3.714,
"step": 2520
},
{
"epoch": 0.8,
"learning_rate": 4.1980906921241056e-05,
"loss": 3.0282,
"step": 2688
},
{
"epoch": 0.8,
"eval_loss": 3.0913186073303223,
"eval_runtime": 142.7245,
"eval_samples_per_second": 29.666,
"eval_steps_per_second": 3.713,
"step": 2688
},
{
"epoch": 0.85,
"learning_rate": 4.1479713603818615e-05,
"loss": 3.0055,
"step": 2856
},
{
"epoch": 0.85,
"eval_loss": 3.0834176540374756,
"eval_runtime": 142.7795,
"eval_samples_per_second": 29.654,
"eval_steps_per_second": 3.712,
"step": 2856
},
{
"epoch": 0.9,
"learning_rate": 4.097852028639618e-05,
"loss": 3.0101,
"step": 3024
},
{
"epoch": 0.9,
"eval_loss": 3.0793018341064453,
"eval_runtime": 142.7762,
"eval_samples_per_second": 29.655,
"eval_steps_per_second": 3.712,
"step": 3024
},
{
"epoch": 0.95,
"learning_rate": 4.047732696897375e-05,
"loss": 3.0017,
"step": 3192
},
{
"epoch": 0.95,
"eval_loss": 3.075371742248535,
"eval_runtime": 142.7297,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 3192
},
{
"epoch": 1.0,
"learning_rate": 3.997613365155131e-05,
"loss": 2.977,
"step": 3360
},
{
"epoch": 1.0,
"eval_loss": 3.0743861198425293,
"eval_runtime": 142.7454,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 3360
},
{
"epoch": 1.05,
"learning_rate": 3.9474940334128877e-05,
"loss": 2.8771,
"step": 3528
},
{
"epoch": 1.05,
"eval_loss": 3.0757055282592773,
"eval_runtime": 142.7589,
"eval_samples_per_second": 29.658,
"eval_steps_per_second": 3.713,
"step": 3528
},
{
"epoch": 1.1,
"learning_rate": 3.897374701670645e-05,
"loss": 2.8633,
"step": 3696
},
{
"epoch": 1.1,
"eval_loss": 3.0775437355041504,
"eval_runtime": 142.7329,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 3696
},
{
"epoch": 1.15,
"learning_rate": 3.8472553699284014e-05,
"loss": 2.8591,
"step": 3864
},
{
"epoch": 1.15,
"eval_loss": 3.0708892345428467,
"eval_runtime": 142.7479,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 3864
},
{
"epoch": 1.2,
"learning_rate": 3.797136038186157e-05,
"loss": 2.8625,
"step": 4032
},
{
"epoch": 1.2,
"eval_loss": 3.0683975219726562,
"eval_runtime": 142.757,
"eval_samples_per_second": 29.659,
"eval_steps_per_second": 3.713,
"step": 4032
},
{
"epoch": 1.25,
"learning_rate": 3.7470167064439145e-05,
"loss": 2.8605,
"step": 4200
},
{
"epoch": 1.25,
"eval_loss": 3.066983222961426,
"eval_runtime": 142.718,
"eval_samples_per_second": 29.667,
"eval_steps_per_second": 3.714,
"step": 4200
},
{
"epoch": 1.3,
"learning_rate": 3.696897374701671e-05,
"loss": 2.8466,
"step": 4368
},
{
"epoch": 1.3,
"eval_loss": 3.0646440982818604,
"eval_runtime": 142.755,
"eval_samples_per_second": 29.659,
"eval_steps_per_second": 3.713,
"step": 4368
},
{
"epoch": 1.35,
"learning_rate": 3.6467780429594276e-05,
"loss": 2.8398,
"step": 4536
},
{
"epoch": 1.35,
"eval_loss": 3.0618984699249268,
"eval_runtime": 142.7437,
"eval_samples_per_second": 29.662,
"eval_steps_per_second": 3.713,
"step": 4536
},
{
"epoch": 1.4,
"learning_rate": 3.596658711217184e-05,
"loss": 2.8502,
"step": 4704
},
{
"epoch": 1.4,
"eval_loss": 3.059511661529541,
"eval_runtime": 142.746,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 4704
},
{
"epoch": 1.45,
"learning_rate": 3.546539379474941e-05,
"loss": 2.8523,
"step": 4872
},
{
"epoch": 1.45,
"eval_loss": 3.0564985275268555,
"eval_runtime": 142.7041,
"eval_samples_per_second": 29.67,
"eval_steps_per_second": 3.714,
"step": 4872
},
{
"epoch": 1.5,
"learning_rate": 3.496420047732697e-05,
"loss": 2.8545,
"step": 5040
},
{
"epoch": 1.5,
"eval_loss": 3.0538723468780518,
"eval_runtime": 142.7089,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 5040
},
{
"epoch": 1.55,
"learning_rate": 3.446300715990454e-05,
"loss": 2.8431,
"step": 5208
},
{
"epoch": 1.55,
"eval_loss": 3.053504705429077,
"eval_runtime": 142.7279,
"eval_samples_per_second": 29.665,
"eval_steps_per_second": 3.713,
"step": 5208
},
{
"epoch": 1.6,
"learning_rate": 3.39618138424821e-05,
"loss": 2.865,
"step": 5376
},
{
"epoch": 1.6,
"eval_loss": 3.0489370822906494,
"eval_runtime": 142.7484,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 5376
},
{
"epoch": 1.65,
"learning_rate": 3.346062052505967e-05,
"loss": 2.8412,
"step": 5544
},
{
"epoch": 1.65,
"eval_loss": 3.048715114593506,
"eval_runtime": 142.7028,
"eval_samples_per_second": 29.67,
"eval_steps_per_second": 3.714,
"step": 5544
},
{
"epoch": 1.7,
"learning_rate": 3.2959427207637234e-05,
"loss": 2.8382,
"step": 5712
},
{
"epoch": 1.7,
"eval_loss": 3.0453927516937256,
"eval_runtime": 142.6981,
"eval_samples_per_second": 29.671,
"eval_steps_per_second": 3.714,
"step": 5712
},
{
"epoch": 1.75,
"learning_rate": 3.24582338902148e-05,
"loss": 2.8356,
"step": 5880
},
{
"epoch": 1.75,
"eval_loss": 3.0460968017578125,
"eval_runtime": 142.6846,
"eval_samples_per_second": 29.674,
"eval_steps_per_second": 3.714,
"step": 5880
},
{
"epoch": 1.8,
"learning_rate": 3.1957040572792365e-05,
"loss": 2.8265,
"step": 6048
},
{
"epoch": 1.8,
"eval_loss": 3.042219400405884,
"eval_runtime": 142.7165,
"eval_samples_per_second": 29.667,
"eval_steps_per_second": 3.714,
"step": 6048
},
{
"epoch": 1.85,
"learning_rate": 3.145584725536993e-05,
"loss": 2.8301,
"step": 6216
},
{
"epoch": 1.85,
"eval_loss": 3.0397562980651855,
"eval_runtime": 142.7319,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 6216
},
{
"epoch": 1.9,
"learning_rate": 3.0954653937947496e-05,
"loss": 2.8286,
"step": 6384
},
{
"epoch": 1.9,
"eval_loss": 3.038638114929199,
"eval_runtime": 142.7562,
"eval_samples_per_second": 29.659,
"eval_steps_per_second": 3.713,
"step": 6384
},
{
"epoch": 1.95,
"learning_rate": 3.045346062052506e-05,
"loss": 2.8416,
"step": 6552
},
{
"epoch": 1.95,
"eval_loss": 3.035384178161621,
"eval_runtime": 142.669,
"eval_samples_per_second": 29.677,
"eval_steps_per_second": 3.715,
"step": 6552
},
{
"epoch": 2.0,
"learning_rate": 2.9952267303102627e-05,
"loss": 2.8179,
"step": 6720
},
{
"epoch": 2.0,
"eval_loss": 3.040719509124756,
"eval_runtime": 142.723,
"eval_samples_per_second": 29.666,
"eval_steps_per_second": 3.713,
"step": 6720
},
{
"epoch": 2.05,
"learning_rate": 2.9451073985680195e-05,
"loss": 2.6965,
"step": 6888
},
{
"epoch": 2.05,
"eval_loss": 3.0478203296661377,
"eval_runtime": 142.7498,
"eval_samples_per_second": 29.66,
"eval_steps_per_second": 3.713,
"step": 6888
},
{
"epoch": 2.11,
"learning_rate": 2.8949880668257757e-05,
"loss": 2.7437,
"step": 7056
},
{
"epoch": 2.11,
"eval_loss": 3.048868179321289,
"eval_runtime": 142.7646,
"eval_samples_per_second": 29.657,
"eval_steps_per_second": 3.712,
"step": 7056
},
{
"epoch": 2.16,
"learning_rate": 2.8448687350835323e-05,
"loss": 2.7252,
"step": 7224
},
{
"epoch": 2.16,
"eval_loss": 3.045444965362549,
"eval_runtime": 142.7306,
"eval_samples_per_second": 29.664,
"eval_steps_per_second": 3.713,
"step": 7224
},
{
"epoch": 2.21,
"learning_rate": 2.794749403341289e-05,
"loss": 2.718,
"step": 7392
},
{
"epoch": 2.21,
"eval_loss": 3.04612398147583,
"eval_runtime": 142.7393,
"eval_samples_per_second": 29.662,
"eval_steps_per_second": 3.713,
"step": 7392
},
{
"epoch": 2.26,
"learning_rate": 2.7446300715990454e-05,
"loss": 2.7246,
"step": 7560
},
{
"epoch": 2.26,
"eval_loss": 3.0447964668273926,
"eval_runtime": 142.6751,
"eval_samples_per_second": 29.676,
"eval_steps_per_second": 3.715,
"step": 7560
},
{
"epoch": 2.31,
"learning_rate": 2.694510739856802e-05,
"loss": 2.7365,
"step": 7728
},
{
"epoch": 2.31,
"eval_loss": 3.0433876514434814,
"eval_runtime": 142.7111,
"eval_samples_per_second": 29.668,
"eval_steps_per_second": 3.714,
"step": 7728
},
{
"epoch": 2.36,
"learning_rate": 2.6443914081145588e-05,
"loss": 2.7242,
"step": 7896
},
{
"epoch": 2.36,
"eval_loss": 3.044072151184082,
"eval_runtime": 142.6648,
"eval_samples_per_second": 29.678,
"eval_steps_per_second": 3.715,
"step": 7896
},
{
"epoch": 2.41,
"learning_rate": 2.594272076372315e-05,
"loss": 2.7343,
"step": 8064
},
{
"epoch": 2.41,
"eval_loss": 3.0415401458740234,
"eval_runtime": 142.737,
"eval_samples_per_second": 29.663,
"eval_steps_per_second": 3.713,
"step": 8064
},
{
"epoch": 2.46,
"learning_rate": 2.5441527446300715e-05,
"loss": 2.7312,
"step": 8232
},
{
"epoch": 2.46,
"eval_loss": 3.0406272411346436,
"eval_runtime": 142.7448,
"eval_samples_per_second": 29.661,
"eval_steps_per_second": 3.713,
"step": 8232
},
{
"epoch": 2.51,
"learning_rate": 2.494033412887828e-05,
"loss": 2.7359,
"step": 8400
},
{
"epoch": 2.51,
"eval_loss": 3.0369086265563965,
"eval_runtime": 142.71,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 8400
},
{
"epoch": 2.56,
"learning_rate": 2.443914081145585e-05,
"loss": 2.714,
"step": 8568
},
{
"epoch": 2.56,
"eval_loss": 3.039921522140503,
"eval_runtime": 142.7829,
"eval_samples_per_second": 29.653,
"eval_steps_per_second": 3.712,
"step": 8568
},
{
"epoch": 2.61,
"learning_rate": 2.3937947494033415e-05,
"loss": 2.7311,
"step": 8736
},
{
"epoch": 2.61,
"eval_loss": 3.0377085208892822,
"eval_runtime": 142.7212,
"eval_samples_per_second": 29.666,
"eval_steps_per_second": 3.714,
"step": 8736
},
{
"epoch": 2.66,
"learning_rate": 2.3436754176610977e-05,
"loss": 2.7191,
"step": 8904
},
{
"epoch": 2.66,
"eval_loss": 3.036844253540039,
"eval_runtime": 142.7078,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 8904
},
{
"epoch": 2.71,
"learning_rate": 2.2935560859188546e-05,
"loss": 2.7246,
"step": 9072
},
{
"epoch": 2.71,
"eval_loss": 3.0361039638519287,
"eval_runtime": 142.7607,
"eval_samples_per_second": 29.658,
"eval_steps_per_second": 3.713,
"step": 9072
},
{
"epoch": 2.76,
"learning_rate": 2.243436754176611e-05,
"loss": 2.721,
"step": 9240
},
{
"epoch": 2.76,
"eval_loss": 3.0351145267486572,
"eval_runtime": 142.7366,
"eval_samples_per_second": 29.663,
"eval_steps_per_second": 3.713,
"step": 9240
},
{
"epoch": 2.81,
"learning_rate": 2.1933174224343677e-05,
"loss": 2.71,
"step": 9408
},
{
"epoch": 2.81,
"eval_loss": 3.032681941986084,
"eval_runtime": 142.729,
"eval_samples_per_second": 29.665,
"eval_steps_per_second": 3.713,
"step": 9408
},
{
"epoch": 2.86,
"learning_rate": 2.1431980906921242e-05,
"loss": 2.7252,
"step": 9576
},
{
"epoch": 2.86,
"eval_loss": 3.033348798751831,
"eval_runtime": 142.7408,
"eval_samples_per_second": 29.662,
"eval_steps_per_second": 3.713,
"step": 9576
},
{
"epoch": 2.91,
"learning_rate": 2.0930787589498808e-05,
"loss": 2.7181,
"step": 9744
},
{
"epoch": 2.91,
"eval_loss": 3.0330820083618164,
"eval_runtime": 142.7512,
"eval_samples_per_second": 29.66,
"eval_steps_per_second": 3.713,
"step": 9744
},
{
"epoch": 2.96,
"learning_rate": 2.0429594272076373e-05,
"loss": 2.7141,
"step": 9912
},
{
"epoch": 2.96,
"eval_loss": 3.029510498046875,
"eval_runtime": 142.7075,
"eval_samples_per_second": 29.669,
"eval_steps_per_second": 3.714,
"step": 9912
}
],
"logging_steps": 168,
"max_steps": 16760,
"num_train_epochs": 5,
"save_steps": 168,
"total_flos": 3.221291229039821e+16,
"trial_name": null,
"trial_params": null
}