PHY-AI / checkpoint-781 /trainer_state.json
AL-Sayed's picture
Upload folder using huggingface_hub
7c83e32 verified
raw
history blame
5.89 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999360204734485,
"eval_steps": 500,
"global_step": 781,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03198976327575176,
"grad_norm": 0.16216526925563812,
"learning_rate": 6.329113924050633e-05,
"loss": 0.9047,
"step": 25
},
{
"epoch": 0.06397952655150352,
"grad_norm": 0.22862769663333893,
"learning_rate": 0.00012658227848101267,
"loss": 0.7593,
"step": 50
},
{
"epoch": 0.09596928982725528,
"grad_norm": 0.2344927191734314,
"learning_rate": 0.00018987341772151899,
"loss": 0.6731,
"step": 75
},
{
"epoch": 0.12795905310300704,
"grad_norm": 0.2598811686038971,
"learning_rate": 0.00019401709401709402,
"loss": 0.6604,
"step": 100
},
{
"epoch": 0.1599488163787588,
"grad_norm": 0.2400255650281906,
"learning_rate": 0.0001868945868945869,
"loss": 0.6304,
"step": 125
},
{
"epoch": 0.19193857965451055,
"grad_norm": 0.24225808680057526,
"learning_rate": 0.00017977207977207978,
"loss": 0.6428,
"step": 150
},
{
"epoch": 0.22392834293026231,
"grad_norm": 0.22859220206737518,
"learning_rate": 0.00017264957264957268,
"loss": 0.6154,
"step": 175
},
{
"epoch": 0.2559181062060141,
"grad_norm": 0.23886021971702576,
"learning_rate": 0.00016552706552706555,
"loss": 0.6001,
"step": 200
},
{
"epoch": 0.28790786948176583,
"grad_norm": 0.22877049446105957,
"learning_rate": 0.00015840455840455842,
"loss": 0.5983,
"step": 225
},
{
"epoch": 0.3198976327575176,
"grad_norm": 0.24990104138851166,
"learning_rate": 0.00015128205128205128,
"loss": 0.5789,
"step": 250
},
{
"epoch": 0.35188739603326935,
"grad_norm": 0.2319009006023407,
"learning_rate": 0.00014415954415954415,
"loss": 0.5851,
"step": 275
},
{
"epoch": 0.3838771593090211,
"grad_norm": 0.22513625025749207,
"learning_rate": 0.00013703703703703705,
"loss": 0.5974,
"step": 300
},
{
"epoch": 0.41586692258477287,
"grad_norm": 0.2516462504863739,
"learning_rate": 0.00012991452991452992,
"loss": 0.5811,
"step": 325
},
{
"epoch": 0.44785668586052463,
"grad_norm": 0.23952844738960266,
"learning_rate": 0.00012279202279202279,
"loss": 0.5885,
"step": 350
},
{
"epoch": 0.4798464491362764,
"grad_norm": 0.26743239164352417,
"learning_rate": 0.00011566951566951567,
"loss": 0.5812,
"step": 375
},
{
"epoch": 0.5118362124120281,
"grad_norm": 0.24359311163425446,
"learning_rate": 0.00010854700854700855,
"loss": 0.5853,
"step": 400
},
{
"epoch": 0.5438259756877799,
"grad_norm": 0.26046106219291687,
"learning_rate": 0.00010142450142450144,
"loss": 0.5717,
"step": 425
},
{
"epoch": 0.5758157389635317,
"grad_norm": 0.2767123878002167,
"learning_rate": 9.430199430199431e-05,
"loss": 0.5711,
"step": 450
},
{
"epoch": 0.6078055022392834,
"grad_norm": 0.2743181884288788,
"learning_rate": 8.717948717948718e-05,
"loss": 0.5786,
"step": 475
},
{
"epoch": 0.6397952655150352,
"grad_norm": 0.2655166983604431,
"learning_rate": 8.005698005698006e-05,
"loss": 0.563,
"step": 500
},
{
"epoch": 0.6717850287907869,
"grad_norm": 0.2630331814289093,
"learning_rate": 7.293447293447295e-05,
"loss": 0.5688,
"step": 525
},
{
"epoch": 0.7037747920665387,
"grad_norm": 0.27685314416885376,
"learning_rate": 6.581196581196581e-05,
"loss": 0.5687,
"step": 550
},
{
"epoch": 0.7357645553422905,
"grad_norm": 0.2695849537849426,
"learning_rate": 5.868945868945869e-05,
"loss": 0.5592,
"step": 575
},
{
"epoch": 0.7677543186180422,
"grad_norm": 0.25096848607063293,
"learning_rate": 5.156695156695157e-05,
"loss": 0.5477,
"step": 600
},
{
"epoch": 0.799744081893794,
"grad_norm": 0.2821820378303528,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.5574,
"step": 625
},
{
"epoch": 0.8317338451695457,
"grad_norm": 0.26849839091300964,
"learning_rate": 3.732193732193732e-05,
"loss": 0.5662,
"step": 650
},
{
"epoch": 0.8637236084452975,
"grad_norm": 0.27688708901405334,
"learning_rate": 3.01994301994302e-05,
"loss": 0.5485,
"step": 675
},
{
"epoch": 0.8957133717210493,
"grad_norm": 0.2868192195892334,
"learning_rate": 2.307692307692308e-05,
"loss": 0.5493,
"step": 700
},
{
"epoch": 0.927703134996801,
"grad_norm": 0.28862541913986206,
"learning_rate": 1.5954415954415954e-05,
"loss": 0.5494,
"step": 725
},
{
"epoch": 0.9596928982725528,
"grad_norm": 0.2842100262641907,
"learning_rate": 8.831908831908831e-06,
"loss": 0.5582,
"step": 750
},
{
"epoch": 0.9916826615483045,
"grad_norm": 0.2615242302417755,
"learning_rate": 1.7094017094017097e-06,
"loss": 0.5641,
"step": 775
}
],
"logging_steps": 25,
"max_steps": 781,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 2.5396281704290714e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}