Danny-Test / trainer_state.json
Delta-Vector's picture
Upload folder using huggingface_hub
f8e98cd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 908,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04405286343612335,
"grad_norm": 10.519313963525699,
"learning_rate": 3.296703296703297e-07,
"loss": 1.4512,
"step": 10
},
{
"epoch": 0.0881057268722467,
"grad_norm": 5.275494720043015,
"learning_rate": 6.593406593406594e-07,
"loss": 1.4723,
"step": 20
},
{
"epoch": 0.13215859030837004,
"grad_norm": 1.8226315526905816,
"learning_rate": 9.890109890109891e-07,
"loss": 1.3932,
"step": 30
},
{
"epoch": 0.1762114537444934,
"grad_norm": 2.044759232085493,
"learning_rate": 1.3186813186813187e-06,
"loss": 1.3093,
"step": 40
},
{
"epoch": 0.22026431718061673,
"grad_norm": 1.7807644006153884,
"learning_rate": 1.6483516483516486e-06,
"loss": 1.2687,
"step": 50
},
{
"epoch": 0.2643171806167401,
"grad_norm": 1.5054649369651574,
"learning_rate": 1.9780219780219782e-06,
"loss": 1.2427,
"step": 60
},
{
"epoch": 0.30837004405286345,
"grad_norm": 1.645138466222819,
"learning_rate": 2.307692307692308e-06,
"loss": 1.2286,
"step": 70
},
{
"epoch": 0.3524229074889868,
"grad_norm": 1.4999909747158735,
"learning_rate": 2.6373626373626375e-06,
"loss": 1.1827,
"step": 80
},
{
"epoch": 0.3964757709251101,
"grad_norm": 1.7582872706972468,
"learning_rate": 2.9670329670329673e-06,
"loss": 1.1941,
"step": 90
},
{
"epoch": 0.44052863436123346,
"grad_norm": 1.693711129252621,
"learning_rate": 2.999101829950985e-06,
"loss": 1.1731,
"step": 100
},
{
"epoch": 0.4845814977973568,
"grad_norm": 1.4467996095205224,
"learning_rate": 2.9959984254953274e-06,
"loss": 1.1148,
"step": 110
},
{
"epoch": 0.5286343612334802,
"grad_norm": 1.5298410629642052,
"learning_rate": 2.9906832852013294e-06,
"loss": 1.191,
"step": 120
},
{
"epoch": 0.5726872246696035,
"grad_norm": 1.4536712154523523,
"learning_rate": 2.9831642671563205e-06,
"loss": 1.1818,
"step": 130
},
{
"epoch": 0.6167400881057269,
"grad_norm": 1.5305738885906355,
"learning_rate": 2.9734524877367604e-06,
"loss": 1.1969,
"step": 140
},
{
"epoch": 0.6607929515418502,
"grad_norm": 1.4595832724906421,
"learning_rate": 2.961562305173399e-06,
"loss": 1.1203,
"step": 150
},
{
"epoch": 0.7048458149779736,
"grad_norm": 1.7304480079500448,
"learning_rate": 2.9475112983235753e-06,
"loss": 1.2301,
"step": 160
},
{
"epoch": 0.748898678414097,
"grad_norm": 1.5486760284827386,
"learning_rate": 2.9313202406820232e-06,
"loss": 1.183,
"step": 170
},
{
"epoch": 0.7929515418502202,
"grad_norm": 1.700519163465852,
"learning_rate": 2.9130130696686196e-06,
"loss": 1.1304,
"step": 180
},
{
"epoch": 0.8370044052863436,
"grad_norm": 1.4763145358774594,
"learning_rate": 2.8926168512384744e-06,
"loss": 1.1618,
"step": 190
},
{
"epoch": 0.8810572687224669,
"grad_norm": 1.5267413192202046,
"learning_rate": 2.870161739866686e-06,
"loss": 1.1491,
"step": 200
},
{
"epoch": 0.9251101321585903,
"grad_norm": 1.733908977152635,
"learning_rate": 2.845680933966922e-06,
"loss": 1.145,
"step": 210
},
{
"epoch": 0.9691629955947136,
"grad_norm": 1.498098134685152,
"learning_rate": 2.8192106268097337e-06,
"loss": 1.2206,
"step": 220
},
{
"epoch": 1.013215859030837,
"grad_norm": 4.7125491501567724,
"learning_rate": 2.7907899530131763e-06,
"loss": 1.133,
"step": 230
},
{
"epoch": 1.0572687224669604,
"grad_norm": 2.038277716111568,
"learning_rate": 2.7604609306848312e-06,
"loss": 1.0399,
"step": 240
},
{
"epoch": 1.1013215859030836,
"grad_norm": 1.8451563548941678,
"learning_rate": 2.7282683993007865e-06,
"loss": 1.007,
"step": 250
},
{
"epoch": 1.145374449339207,
"grad_norm": 1.5438408367179703,
"learning_rate": 2.6942599534133987e-06,
"loss": 1.0139,
"step": 260
},
{
"epoch": 1.1894273127753303,
"grad_norm": 1.6126849019096945,
"learning_rate": 2.658485872285863e-06,
"loss": 1.0493,
"step": 270
},
{
"epoch": 1.2334801762114538,
"grad_norm": 1.9217991771681158,
"learning_rate": 2.6209990455576105e-06,
"loss": 0.9478,
"step": 280
},
{
"epoch": 1.277533039647577,
"grad_norm": 1.981595215749417,
"learning_rate": 2.5818548950504342e-06,
"loss": 1.0108,
"step": 290
},
{
"epoch": 1.3215859030837005,
"grad_norm": 1.6455591880816465,
"learning_rate": 2.541111292830951e-06,
"loss": 1.0294,
"step": 300
},
{
"epoch": 1.3656387665198237,
"grad_norm": 1.5108115682140577,
"learning_rate": 2.4988284756505333e-06,
"loss": 0.9748,
"step": 310
},
{
"epoch": 1.4096916299559472,
"grad_norm": 1.5787304886557472,
"learning_rate": 2.455068955889216e-06,
"loss": 1.0054,
"step": 320
},
{
"epoch": 1.4537444933920705,
"grad_norm": 1.6633458413004865,
"learning_rate": 2.4098974291352257e-06,
"loss": 1.0155,
"step": 330
},
{
"epoch": 1.497797356828194,
"grad_norm": 1.7167391655172333,
"learning_rate": 2.3633806785367873e-06,
"loss": 1.0677,
"step": 340
},
{
"epoch": 1.5418502202643172,
"grad_norm": 1.519612290214899,
"learning_rate": 2.315587476067607e-06,
"loss": 0.987,
"step": 350
},
{
"epoch": 1.5859030837004404,
"grad_norm": 1.6948233763887988,
"learning_rate": 2.2665884808520048e-06,
"loss": 1.0526,
"step": 360
},
{
"epoch": 1.6299559471365639,
"grad_norm": 1.6931931897778194,
"learning_rate": 2.2164561347000213e-06,
"loss": 1.0304,
"step": 370
},
{
"epoch": 1.6740088105726874,
"grad_norm": 1.5818868292083217,
"learning_rate": 2.1652645550069394e-06,
"loss": 0.9921,
"step": 380
},
{
"epoch": 1.7180616740088106,
"grad_norm": 1.670222373489549,
"learning_rate": 2.113089425175561e-06,
"loss": 1.0062,
"step": 390
},
{
"epoch": 1.7621145374449338,
"grad_norm": 1.5676805638818068,
"learning_rate": 2.0600078827232473e-06,
"loss": 0.993,
"step": 400
},
{
"epoch": 1.8061674008810573,
"grad_norm": 1.6518866030477966,
"learning_rate": 2.006098405239142e-06,
"loss": 0.989,
"step": 410
},
{
"epoch": 1.8502202643171806,
"grad_norm": 1.7025140153769915,
"learning_rate": 1.95144069436019e-06,
"loss": 1.0184,
"step": 420
},
{
"epoch": 1.894273127753304,
"grad_norm": 1.6535956905677116,
"learning_rate": 1.896115557937476e-06,
"loss": 1.0655,
"step": 430
},
{
"epoch": 1.9383259911894273,
"grad_norm": 1.6062899976218377,
"learning_rate": 1.8402047905671065e-06,
"loss": 1.0139,
"step": 440
},
{
"epoch": 1.9823788546255505,
"grad_norm": 1.6037621791723693,
"learning_rate": 1.7837910526622437e-06,
"loss": 1.0103,
"step": 450
},
{
"epoch": 2.026431718061674,
"grad_norm": 2.347205562826239,
"learning_rate": 1.726957748245093e-06,
"loss": 0.8977,
"step": 460
},
{
"epoch": 2.0704845814977975,
"grad_norm": 1.9061910666122825,
"learning_rate": 1.6697889016395088e-06,
"loss": 0.8438,
"step": 470
},
{
"epoch": 2.1145374449339207,
"grad_norm": 1.6451786150601784,
"learning_rate": 1.6123690332465296e-06,
"loss": 0.9008,
"step": 480
},
{
"epoch": 2.158590308370044,
"grad_norm": 1.8874201469690022,
"learning_rate": 1.5547830345864886e-06,
"loss": 0.8176,
"step": 490
},
{
"epoch": 2.202643171806167,
"grad_norm": 1.9254603832738357,
"learning_rate": 1.4971160427924554e-06,
"loss": 0.8901,
"step": 500
},
{
"epoch": 2.246696035242291,
"grad_norm": 1.835023323363367,
"learning_rate": 1.439453314740552e-06,
"loss": 0.8247,
"step": 510
},
{
"epoch": 2.290748898678414,
"grad_norm": 1.93123359826113,
"learning_rate": 1.381880101003235e-06,
"loss": 0.8294,
"step": 520
},
{
"epoch": 2.3348017621145374,
"grad_norm": 1.866122498094617,
"learning_rate": 1.3244815198119024e-06,
"loss": 0.9032,
"step": 530
},
{
"epoch": 2.3788546255506606,
"grad_norm": 1.7177415721533669,
"learning_rate": 1.2673424312151519e-06,
"loss": 0.8862,
"step": 540
},
{
"epoch": 2.4229074889867843,
"grad_norm": 2.1997603304322237,
"learning_rate": 1.2105473116187517e-06,
"loss": 0.894,
"step": 550
},
{
"epoch": 2.4669603524229076,
"grad_norm": 1.8715395183959846,
"learning_rate": 1.154180128892796e-06,
"loss": 0.8126,
"step": 560
},
{
"epoch": 2.511013215859031,
"grad_norm": 1.86690865903126,
"learning_rate": 1.0983242182307033e-06,
"loss": 0.8059,
"step": 570
},
{
"epoch": 2.555066079295154,
"grad_norm": 2.0917187570122517,
"learning_rate": 1.0430621589435803e-06,
"loss": 0.8959,
"step": 580
},
{
"epoch": 2.5991189427312777,
"grad_norm": 2.558938317516458,
"learning_rate": 9.884756523721117e-07,
"loss": 0.8393,
"step": 590
},
{
"epoch": 2.643171806167401,
"grad_norm": 1.5987062295416765,
"learning_rate": 9.346454010964724e-07,
"loss": 0.862,
"step": 600
},
{
"epoch": 2.6872246696035242,
"grad_norm": 1.6649451786987866,
"learning_rate": 8.816509896228376e-07,
"loss": 0.859,
"step": 610
},
{
"epoch": 2.7312775330396475,
"grad_norm": 1.6834312437789734,
"learning_rate": 8.295707667228987e-07,
"loss": 0.8348,
"step": 620
},
{
"epoch": 2.7753303964757707,
"grad_norm": 1.7102647759571665,
"learning_rate": 7.784817296003237e-07,
"loss": 0.8668,
"step": 630
},
{
"epoch": 2.8193832599118944,
"grad_norm": 1.7566168287161772,
"learning_rate": 7.284594100554311e-07,
"loss": 0.8627,
"step": 640
},
{
"epoch": 2.8634361233480177,
"grad_norm": 1.7731380983410612,
"learning_rate": 6.7957776281636e-07,
"loss": 0.855,
"step": 650
},
{
"epoch": 2.907488986784141,
"grad_norm": 3.413162667971741,
"learning_rate": 6.31909056201842e-07,
"loss": 0.8849,
"step": 660
},
{
"epoch": 2.951541850220264,
"grad_norm": 1.793806206724264,
"learning_rate": 5.855237652772183e-07,
"loss": 0.8629,
"step": 670
},
{
"epoch": 2.995594713656388,
"grad_norm": 1.7832177184392761,
"learning_rate": 5.404904676616635e-07,
"loss": 0.8796,
"step": 680
},
{
"epoch": 3.039647577092511,
"grad_norm": 13.295009142965908,
"learning_rate": 4.968757421406609e-07,
"loss": 0.779,
"step": 690
},
{
"epoch": 3.0837004405286343,
"grad_norm": 1.8559346739765339,
"learning_rate": 4.547440702336238e-07,
"loss": 0.7582,
"step": 700
},
{
"epoch": 3.1277533039647576,
"grad_norm": 1.7949928357857732,
"learning_rate": 4.1415774086218277e-07,
"loss": 0.7063,
"step": 710
},
{
"epoch": 3.171806167400881,
"grad_norm": 1.778323655929332,
"learning_rate": 3.7517675826009083e-07,
"loss": 0.752,
"step": 720
},
{
"epoch": 3.2158590308370045,
"grad_norm": 6.004368017727455,
"learning_rate": 3.378587532608872e-07,
"loss": 0.7829,
"step": 730
},
{
"epoch": 3.2599118942731278,
"grad_norm": 1.9250826926237672,
"learning_rate": 3.0225889809447925e-07,
"loss": 0.72,
"step": 740
},
{
"epoch": 3.303964757709251,
"grad_norm": 1.8286462728037371,
"learning_rate": 2.684298248186077e-07,
"loss": 0.7852,
"step": 750
},
{
"epoch": 3.3480176211453743,
"grad_norm": 1.8502492223085096,
"learning_rate": 2.3642154750579275e-07,
"loss": 0.7912,
"step": 760
},
{
"epoch": 3.392070484581498,
"grad_norm": 1.8690732598717457,
"learning_rate": 2.0628138830079696e-07,
"loss": 0.7456,
"step": 770
},
{
"epoch": 3.436123348017621,
"grad_norm": 2.841225832952903,
"learning_rate": 1.7805390745792993e-07,
"loss": 0.7654,
"step": 780
},
{
"epoch": 3.4801762114537445,
"grad_norm": 2.2582022273376845,
"learning_rate": 1.5178083746162667e-07,
"loss": 0.7437,
"step": 790
},
{
"epoch": 3.5242290748898677,
"grad_norm": 1.874522268468866,
"learning_rate": 1.2750102132769738e-07,
"loss": 0.7834,
"step": 800
},
{
"epoch": 3.568281938325991,
"grad_norm": 2.037336725579371,
"learning_rate": 1.0525035517647014e-07,
"loss": 0.7943,
"step": 810
},
{
"epoch": 3.6123348017621146,
"grad_norm": 1.7256797980612035,
"learning_rate": 8.50617351627232e-08,
"loss": 0.7826,
"step": 820
},
{
"epoch": 3.656387665198238,
"grad_norm": 1.8660504133848668,
"learning_rate": 6.696500884087259e-08,
"loss": 0.8379,
"step": 830
},
{
"epoch": 3.700440528634361,
"grad_norm": 1.6608110580956394,
"learning_rate": 5.0986931037314666e-08,
"loss": 0.7614,
"step": 840
},
{
"epoch": 3.744493392070485,
"grad_norm": 2.132591912569078,
"learning_rate": 3.7151124295163374e-08,
"loss": 0.8152,
"step": 850
},
{
"epoch": 3.788546255506608,
"grad_norm": 2.142924158927503,
"learning_rate": 2.5478043949868192e-08,
"loss": 0.7517,
"step": 860
},
{
"epoch": 3.8325991189427313,
"grad_norm": 2.1270227065723284,
"learning_rate": 1.5984947887334623e-08,
"loss": 0.7697,
"step": 870
},
{
"epoch": 3.8766519823788546,
"grad_norm": 1.8195791930606864,
"learning_rate": 8.685871029272318e-09,
"loss": 0.7506,
"step": 880
},
{
"epoch": 3.920704845814978,
"grad_norm": 2.2849986922595553,
"learning_rate": 3.5916045834781253e-09,
"loss": 0.7273,
"step": 890
},
{
"epoch": 3.964757709251101,
"grad_norm": 1.8303283767313243,
"learning_rate": 7.096800897425371e-10,
"loss": 0.8123,
"step": 900
},
{
"epoch": 4.0,
"step": 908,
"total_flos": 648773332107264.0,
"train_loss": 0.9660957064397535,
"train_runtime": 3882.5924,
"train_samples_per_second": 5.59,
"train_steps_per_second": 0.234
}
],
"logging_steps": 10,
"max_steps": 908,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 648773332107264.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}