med-alex's picture
End of training
a9460bc verified
raw
history blame contribute delete
No virus
7.11 kB
{
"best_metric": 41.842087703189854,
"best_model_checkpoint": "/root/turkic_qa/en_kaz_models/orig_kaz_roberta_base_model/checkpoint-6020",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 6020,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 602,
"train_exact_match": 4.595404595404595,
"train_f1": 11.363671882650538,
"train_runtime": 9.9586,
"train_samples_per_second": 112.967,
"train_steps_per_second": 4.117
},
{
"epoch": 1.0,
"grad_norm": 7.829487323760986,
"learning_rate": 5e-06,
"loss": 4.7559,
"step": 602
},
{
"epoch": 1.0,
"eval_exact_match": 4.0625,
"eval_f1": 10.65255335870508,
"eval_runtime": 31.5322,
"eval_samples_per_second": 113.281,
"eval_steps_per_second": 4.059,
"step": 602
},
{
"epoch": 2.0,
"step": 1204,
"train_exact_match": 16.083916083916083,
"train_f1": 24.66111862744578,
"train_runtime": 10.0503,
"train_samples_per_second": 113.131,
"train_steps_per_second": 4.079
},
{
"epoch": 2.0,
"grad_norm": 9.797904014587402,
"learning_rate": 1e-05,
"loss": 3.6465,
"step": 1204
},
{
"epoch": 2.0,
"eval_exact_match": 12.375,
"eval_f1": 19.998235228782818,
"eval_runtime": 31.6899,
"eval_samples_per_second": 112.717,
"eval_steps_per_second": 4.039,
"step": 1204
},
{
"epoch": 3.0,
"step": 1806,
"train_exact_match": 28.571428571428573,
"train_f1": 39.799779936991705,
"train_runtime": 9.9917,
"train_samples_per_second": 112.694,
"train_steps_per_second": 4.103
},
{
"epoch": 3.0,
"grad_norm": 15.731840133666992,
"learning_rate": 8.750000000000001e-06,
"loss": 3.0904,
"step": 1806
},
{
"epoch": 3.0,
"eval_exact_match": 20.8125,
"eval_f1": 31.83521011700413,
"eval_runtime": 31.6833,
"eval_samples_per_second": 112.741,
"eval_steps_per_second": 4.04,
"step": 1806
},
{
"epoch": 4.0,
"step": 2408,
"train_exact_match": 37.76223776223776,
"train_f1": 50.512835216096065,
"train_runtime": 10.2559,
"train_samples_per_second": 113.009,
"train_steps_per_second": 4.095
},
{
"epoch": 4.0,
"grad_norm": 15.223044395446777,
"learning_rate": 7.500000000000001e-06,
"loss": 2.5695,
"step": 2408
},
{
"epoch": 4.0,
"eval_exact_match": 26.34375,
"eval_f1": 37.68196751270749,
"eval_runtime": 31.6681,
"eval_samples_per_second": 112.795,
"eval_steps_per_second": 4.042,
"step": 2408
},
{
"epoch": 5.0,
"step": 3010,
"train_exact_match": 46.553446553446555,
"train_f1": 59.64402432927812,
"train_runtime": 9.9179,
"train_samples_per_second": 112.827,
"train_steps_per_second": 4.033
},
{
"epoch": 5.0,
"grad_norm": 19.780818939208984,
"learning_rate": 6.25e-06,
"loss": 2.2508,
"step": 3010
},
{
"epoch": 5.0,
"eval_exact_match": 27.5625,
"eval_f1": 39.44221624780652,
"eval_runtime": 31.714,
"eval_samples_per_second": 112.632,
"eval_steps_per_second": 4.036,
"step": 3010
},
{
"epoch": 6.0,
"step": 3612,
"train_exact_match": 50.94905094905095,
"train_f1": 63.29384394642245,
"train_runtime": 9.8492,
"train_samples_per_second": 111.278,
"train_steps_per_second": 4.061
},
{
"epoch": 6.0,
"grad_norm": 19.713895797729492,
"learning_rate": 5e-06,
"loss": 2.0297,
"step": 3612
},
{
"epoch": 6.0,
"eval_exact_match": 28.75,
"eval_f1": 41.0194110809582,
"eval_runtime": 31.7278,
"eval_samples_per_second": 112.583,
"eval_steps_per_second": 4.034,
"step": 3612
},
{
"epoch": 7.0,
"step": 4214,
"train_exact_match": 53.74625374625375,
"train_f1": 65.89175767102272,
"train_runtime": 9.8541,
"train_samples_per_second": 113.658,
"train_steps_per_second": 4.059
},
{
"epoch": 7.0,
"grad_norm": 18.399381637573242,
"learning_rate": 3.7500000000000005e-06,
"loss": 1.8692,
"step": 4214
},
{
"epoch": 7.0,
"eval_exact_match": 29.0625,
"eval_f1": 41.37431545961761,
"eval_runtime": 31.718,
"eval_samples_per_second": 112.617,
"eval_steps_per_second": 4.036,
"step": 4214
},
{
"epoch": 8.0,
"step": 4816,
"train_exact_match": 57.04295704295704,
"train_f1": 69.23809998725315,
"train_runtime": 9.8266,
"train_samples_per_second": 113.06,
"train_steps_per_second": 4.071
},
{
"epoch": 8.0,
"grad_norm": 19.948137283325195,
"learning_rate": 2.5e-06,
"loss": 1.7415,
"step": 4816
},
{
"epoch": 8.0,
"eval_exact_match": 29.09375,
"eval_f1": 41.58572921874224,
"eval_runtime": 31.6379,
"eval_samples_per_second": 112.903,
"eval_steps_per_second": 4.046,
"step": 4816
},
{
"epoch": 9.0,
"step": 5418,
"train_exact_match": 59.14085914085914,
"train_f1": 70.72972336346831,
"train_runtime": 10.1,
"train_samples_per_second": 111.485,
"train_steps_per_second": 4.059
},
{
"epoch": 9.0,
"grad_norm": 18.038774490356445,
"learning_rate": 1.25e-06,
"loss": 1.6377,
"step": 5418
},
{
"epoch": 9.0,
"eval_exact_match": 29.21875,
"eval_f1": 41.68346527288109,
"eval_runtime": 31.8369,
"eval_samples_per_second": 112.197,
"eval_steps_per_second": 4.02,
"step": 5418
},
{
"epoch": 10.0,
"step": 6020,
"train_exact_match": 59.94005994005994,
"train_f1": 72.84928684458019,
"train_runtime": 9.7454,
"train_samples_per_second": 112.976,
"train_steps_per_second": 4.104
},
{
"epoch": 10.0,
"grad_norm": 21.71363639831543,
"learning_rate": 0.0,
"loss": 1.583,
"step": 6020
},
{
"epoch": 10.0,
"eval_exact_match": 29.28125,
"eval_f1": 41.842087703189854,
"eval_runtime": 31.896,
"eval_samples_per_second": 111.989,
"eval_steps_per_second": 4.013,
"step": 6020
},
{
"epoch": 10.0,
"step": 6020,
"total_flos": 1.651520510954496e+16,
"train_loss": 2.5174192206804142,
"train_runtime": 2180.484,
"train_samples_per_second": 77.295,
"train_steps_per_second": 2.761
}
],
"logging_steps": 500,
"max_steps": 6020,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.651520510954496e+16,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}