jangmin's picture
upload model files
853ba7b
raw
history blame
5.79 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 8775,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.962631077216398e-06,
"loss": 0.1958,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 9.886367969494757e-06,
"loss": 0.1228,
"step": 400
},
{
"epoch": 0.07,
"learning_rate": 9.810104861773118e-06,
"loss": 0.1051,
"step": 600
},
{
"epoch": 0.09,
"learning_rate": 9.733841754051478e-06,
"loss": 0.0977,
"step": 800
},
{
"epoch": 0.11,
"learning_rate": 9.657578646329838e-06,
"loss": 0.0891,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 9.581315538608199e-06,
"loss": 0.0827,
"step": 1200
},
{
"epoch": 0.16,
"learning_rate": 9.50505243088656e-06,
"loss": 0.0822,
"step": 1400
},
{
"epoch": 0.18,
"learning_rate": 9.42878932316492e-06,
"loss": 0.0796,
"step": 1600
},
{
"epoch": 0.21,
"learning_rate": 9.352526215443281e-06,
"loss": 0.0777,
"step": 1800
},
{
"epoch": 0.23,
"learning_rate": 9.27626310772164e-06,
"loss": 0.0769,
"step": 2000
},
{
"epoch": 0.25,
"learning_rate": 9.200000000000002e-06,
"loss": 0.0736,
"step": 2200
},
{
"epoch": 0.27,
"learning_rate": 9.123736892278361e-06,
"loss": 0.0721,
"step": 2400
},
{
"epoch": 0.3,
"learning_rate": 9.047473784556723e-06,
"loss": 0.0748,
"step": 2600
},
{
"epoch": 0.32,
"learning_rate": 8.971210676835082e-06,
"loss": 0.0736,
"step": 2800
},
{
"epoch": 0.34,
"learning_rate": 8.894947569113442e-06,
"loss": 0.0714,
"step": 3000
},
{
"epoch": 0.36,
"learning_rate": 8.818684461391803e-06,
"loss": 0.0718,
"step": 3200
},
{
"epoch": 0.39,
"learning_rate": 8.742421353670163e-06,
"loss": 0.0729,
"step": 3400
},
{
"epoch": 0.41,
"learning_rate": 8.666158245948522e-06,
"loss": 0.0711,
"step": 3600
},
{
"epoch": 0.43,
"learning_rate": 8.589895138226883e-06,
"loss": 0.0663,
"step": 3800
},
{
"epoch": 0.46,
"learning_rate": 8.513632030505243e-06,
"loss": 0.0675,
"step": 4000
},
{
"epoch": 0.48,
"learning_rate": 8.437368922783604e-06,
"loss": 0.0689,
"step": 4200
},
{
"epoch": 0.5,
"learning_rate": 8.361105815061964e-06,
"loss": 0.0685,
"step": 4400
},
{
"epoch": 0.52,
"learning_rate": 8.284842707340325e-06,
"loss": 0.0663,
"step": 4600
},
{
"epoch": 0.55,
"learning_rate": 8.208579599618686e-06,
"loss": 0.0651,
"step": 4800
},
{
"epoch": 0.57,
"learning_rate": 8.132316491897046e-06,
"loss": 0.0652,
"step": 5000
},
{
"epoch": 0.59,
"learning_rate": 8.056053384175407e-06,
"loss": 0.0653,
"step": 5200
},
{
"epoch": 0.62,
"learning_rate": 7.979790276453767e-06,
"loss": 0.0656,
"step": 5400
},
{
"epoch": 0.64,
"learning_rate": 7.903527168732126e-06,
"loss": 0.0635,
"step": 5600
},
{
"epoch": 0.66,
"learning_rate": 7.827264061010486e-06,
"loss": 0.061,
"step": 5800
},
{
"epoch": 0.68,
"learning_rate": 7.751000953288847e-06,
"loss": 0.0637,
"step": 6000
},
{
"epoch": 0.71,
"learning_rate": 7.674737845567207e-06,
"loss": 0.0621,
"step": 6200
},
{
"epoch": 0.73,
"learning_rate": 7.598856053384176e-06,
"loss": 0.0657,
"step": 6400
},
{
"epoch": 0.75,
"learning_rate": 7.522592945662537e-06,
"loss": 0.0618,
"step": 6600
},
{
"epoch": 0.77,
"learning_rate": 7.446329837940897e-06,
"loss": 0.0617,
"step": 6800
},
{
"epoch": 0.8,
"learning_rate": 7.370066730219257e-06,
"loss": 0.0623,
"step": 7000
},
{
"epoch": 0.82,
"learning_rate": 7.293803622497617e-06,
"loss": 0.0581,
"step": 7200
},
{
"epoch": 0.84,
"learning_rate": 7.217540514775978e-06,
"loss": 0.0612,
"step": 7400
},
{
"epoch": 0.87,
"learning_rate": 7.141277407054338e-06,
"loss": 0.0584,
"step": 7600
},
{
"epoch": 0.89,
"learning_rate": 7.065014299332699e-06,
"loss": 0.0595,
"step": 7800
},
{
"epoch": 0.91,
"learning_rate": 6.9887511916110584e-06,
"loss": 0.0605,
"step": 8000
},
{
"epoch": 0.93,
"learning_rate": 6.912488083889419e-06,
"loss": 0.0584,
"step": 8200
},
{
"epoch": 0.96,
"learning_rate": 6.836606291706388e-06,
"loss": 0.06,
"step": 8400
},
{
"epoch": 0.98,
"learning_rate": 6.760343183984748e-06,
"loss": 0.0589,
"step": 8600
},
{
"epoch": 1.0,
"eval_loss": 0.12254729866981506,
"eval_runtime": 5547.0518,
"eval_samples_per_second": 6.063,
"eval_steps_per_second": 0.189,
"eval_wer": 0.06041116269525739,
"step": 8775
}
],
"max_steps": 26325,
"num_train_epochs": 3,
"total_flos": 2.1491591961083904e+20,
"trial_name": null,
"trial_params": null
}