Bibek1129's picture
Upload 11 files
f18db35 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"eval_steps": 500,
"global_step": 118100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.12,
"grad_norm": 1.353366494178772,
"learning_rate": 0.0002,
"loss": 3.6004,
"step": 5000
},
{
"epoch": 4.23,
"grad_norm": 1.4591169357299805,
"learning_rate": 0.0002,
"loss": 3.3767,
"step": 10000
},
{
"epoch": 6.35,
"grad_norm": 1.5558066368103027,
"learning_rate": 0.0002,
"loss": 3.3035,
"step": 15000
},
{
"epoch": 8.47,
"grad_norm": 1.54267418384552,
"learning_rate": 0.0002,
"loss": 3.2643,
"step": 20000
},
{
"epoch": 10.58,
"grad_norm": 1.6217669248580933,
"learning_rate": 0.0002,
"loss": 3.238,
"step": 25000
},
{
"epoch": 12.7,
"grad_norm": 1.5397529602050781,
"learning_rate": 0.0002,
"loss": 3.2201,
"step": 30000
},
{
"epoch": 14.82,
"grad_norm": 1.5048496723175049,
"learning_rate": 0.0002,
"loss": 3.2048,
"step": 35000
},
{
"epoch": 16.93,
"grad_norm": 1.5197534561157227,
"learning_rate": 0.0002,
"loss": 3.1947,
"step": 40000
},
{
"epoch": 19.05,
"grad_norm": 1.5024417638778687,
"learning_rate": 0.0002,
"loss": 3.1829,
"step": 45000
},
{
"epoch": 21.17,
"grad_norm": 1.5805625915527344,
"learning_rate": 0.0002,
"loss": 3.1729,
"step": 50000
},
{
"epoch": 23.29,
"grad_norm": 1.5682896375656128,
"learning_rate": 0.0002,
"loss": 3.1681,
"step": 55000
},
{
"epoch": 25.4,
"grad_norm": 1.5147111415863037,
"learning_rate": 0.0002,
"loss": 3.1619,
"step": 60000
},
{
"epoch": 27.52,
"grad_norm": 1.6233525276184082,
"learning_rate": 0.0002,
"loss": 3.1557,
"step": 65000
},
{
"epoch": 29.64,
"grad_norm": 1.5563185214996338,
"learning_rate": 0.0002,
"loss": 3.153,
"step": 70000
},
{
"epoch": 31.75,
"grad_norm": 1.5635435581207275,
"learning_rate": 0.0002,
"loss": 3.151,
"step": 75000
},
{
"epoch": 33.87,
"grad_norm": 1.471053957939148,
"learning_rate": 0.0002,
"loss": 3.1449,
"step": 80000
},
{
"epoch": 35.99,
"grad_norm": 1.5087348222732544,
"learning_rate": 0.0002,
"loss": 3.1414,
"step": 85000
},
{
"epoch": 38.1,
"grad_norm": 1.6342508792877197,
"learning_rate": 0.0002,
"loss": 3.1363,
"step": 90000
},
{
"epoch": 40.22,
"grad_norm": 1.7150408029556274,
"learning_rate": 0.0002,
"loss": 3.1327,
"step": 95000
},
{
"epoch": 42.34,
"grad_norm": 1.8055483102798462,
"learning_rate": 0.0002,
"loss": 3.1319,
"step": 100000
},
{
"epoch": 44.45,
"grad_norm": 1.509770154953003,
"learning_rate": 0.0002,
"loss": 3.13,
"step": 105000
},
{
"epoch": 46.57,
"grad_norm": 1.6583279371261597,
"learning_rate": 0.0002,
"loss": 3.1266,
"step": 110000
},
{
"epoch": 48.69,
"grad_norm": 1.7038261890411377,
"learning_rate": 0.0002,
"loss": 3.1273,
"step": 115000
}
],
"logging_steps": 5000,
"max_steps": 118100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 7.86717088860818e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}