TCS_MLM_All / last-checkpoint /trainer_state.json
mgh6's picture
Training in progress, step 21000, checkpoint
87f470f verified
raw
history blame
4.42 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.058412621943876,
"eval_steps": 500,
"global_step": 21000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24087679152113695,
"grad_norm": 0.5082331299781799,
"learning_rate": 0.000991969806472336,
"loss": 1.4852,
"step": 1000
},
{
"epoch": 0.4817535830422739,
"grad_norm": 0.4949457347393036,
"learning_rate": 0.0009839396129446719,
"loss": 1.369,
"step": 2000
},
{
"epoch": 0.7226303745634108,
"grad_norm": 0.5630651116371155,
"learning_rate": 0.0009759094194170079,
"loss": 1.3189,
"step": 3000
},
{
"epoch": 0.9635071660845478,
"grad_norm": 0.5108479261398315,
"learning_rate": 0.000967879225889344,
"loss": 1.2817,
"step": 4000
},
{
"epoch": 1.2043839576056847,
"grad_norm": 0.47163671255111694,
"learning_rate": 0.00095984903236168,
"loss": 1.2511,
"step": 5000
},
{
"epoch": 1.4452607491268217,
"grad_norm": 0.531821072101593,
"learning_rate": 0.0009518188388340159,
"loss": 1.2309,
"step": 6000
},
{
"epoch": 1.6861375406479586,
"grad_norm": 0.4891831874847412,
"learning_rate": 0.0009437886453063518,
"loss": 1.2088,
"step": 7000
},
{
"epoch": 1.9270143321690956,
"grad_norm": 0.4778994619846344,
"learning_rate": 0.0009357584517786879,
"loss": 1.1936,
"step": 8000
},
{
"epoch": 2.1678911236902323,
"grad_norm": 0.5436965823173523,
"learning_rate": 0.0009277282582510239,
"loss": 1.1792,
"step": 9000
},
{
"epoch": 2.4087679152113695,
"grad_norm": 0.45789214968681335,
"learning_rate": 0.0009196980647233599,
"loss": 1.1609,
"step": 10000
},
{
"epoch": 2.649644706732506,
"grad_norm": 0.49292871356010437,
"learning_rate": 0.0009116678711956958,
"loss": 1.1465,
"step": 11000
},
{
"epoch": 2.8905214982536434,
"grad_norm": 0.4877796769142151,
"learning_rate": 0.0009036376776680318,
"loss": 1.1339,
"step": 12000
},
{
"epoch": 3.13139828977478,
"grad_norm": 0.45624956488609314,
"learning_rate": 0.0008956074841403679,
"loss": 1.1235,
"step": 13000
},
{
"epoch": 3.3722750812959172,
"grad_norm": 0.45705732703208923,
"learning_rate": 0.0008875772906127038,
"loss": 1.114,
"step": 14000
},
{
"epoch": 3.613151872817054,
"grad_norm": 0.4534723460674286,
"learning_rate": 0.0008795470970850398,
"loss": 1.1022,
"step": 15000
},
{
"epoch": 3.854028664338191,
"grad_norm": 0.5117238163948059,
"learning_rate": 0.0008715169035573758,
"loss": 1.0936,
"step": 16000
},
{
"epoch": 4.094905455859328,
"grad_norm": 0.4832773804664612,
"learning_rate": 0.0008634867100297117,
"loss": 1.0895,
"step": 17000
},
{
"epoch": 4.335782247380465,
"grad_norm": 0.5012445449829102,
"learning_rate": 0.0008554565165020477,
"loss": 1.0793,
"step": 18000
},
{
"epoch": 4.576659038901602,
"grad_norm": 0.5131984353065491,
"learning_rate": 0.0008474263229743838,
"loss": 1.0737,
"step": 19000
},
{
"epoch": 4.817535830422739,
"grad_norm": 0.5085521340370178,
"learning_rate": 0.0008393961294467197,
"loss": 1.068,
"step": 20000
},
{
"epoch": 5.058412621943876,
"grad_norm": 0.4861578345298767,
"learning_rate": 0.0008313659359190556,
"loss": 1.0596,
"step": 21000
}
],
"logging_steps": 1000,
"max_steps": 124530,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.1007632729269862e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}