|
{ |
|
"best_metric": 41.842087703189854, |
|
"best_model_checkpoint": "/root/turkic_qa/en_kaz_models/orig_kaz_roberta_base_model/checkpoint-6020", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 602, |
|
"train_exact_match": 4.595404595404595, |
|
"train_f1": 11.363671882650538, |
|
"train_runtime": 9.9586, |
|
"train_samples_per_second": 112.967, |
|
"train_steps_per_second": 4.117 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.829487323760986, |
|
"learning_rate": 5e-06, |
|
"loss": 4.7559, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 4.0625, |
|
"eval_f1": 10.65255335870508, |
|
"eval_runtime": 31.5322, |
|
"eval_samples_per_second": 113.281, |
|
"eval_steps_per_second": 4.059, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1204, |
|
"train_exact_match": 16.083916083916083, |
|
"train_f1": 24.66111862744578, |
|
"train_runtime": 10.0503, |
|
"train_samples_per_second": 113.131, |
|
"train_steps_per_second": 4.079 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 9.797904014587402, |
|
"learning_rate": 1e-05, |
|
"loss": 3.6465, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 12.375, |
|
"eval_f1": 19.998235228782818, |
|
"eval_runtime": 31.6899, |
|
"eval_samples_per_second": 112.717, |
|
"eval_steps_per_second": 4.039, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1806, |
|
"train_exact_match": 28.571428571428573, |
|
"train_f1": 39.799779936991705, |
|
"train_runtime": 9.9917, |
|
"train_samples_per_second": 112.694, |
|
"train_steps_per_second": 4.103 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 15.731840133666992, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 3.0904, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 20.8125, |
|
"eval_f1": 31.83521011700413, |
|
"eval_runtime": 31.6833, |
|
"eval_samples_per_second": 112.741, |
|
"eval_steps_per_second": 4.04, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2408, |
|
"train_exact_match": 37.76223776223776, |
|
"train_f1": 50.512835216096065, |
|
"train_runtime": 10.2559, |
|
"train_samples_per_second": 113.009, |
|
"train_steps_per_second": 4.095 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 15.223044395446777, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.5695, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 26.34375, |
|
"eval_f1": 37.68196751270749, |
|
"eval_runtime": 31.6681, |
|
"eval_samples_per_second": 112.795, |
|
"eval_steps_per_second": 4.042, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3010, |
|
"train_exact_match": 46.553446553446555, |
|
"train_f1": 59.64402432927812, |
|
"train_runtime": 9.9179, |
|
"train_samples_per_second": 112.827, |
|
"train_steps_per_second": 4.033 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 19.780818939208984, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.2508, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 27.5625, |
|
"eval_f1": 39.44221624780652, |
|
"eval_runtime": 31.714, |
|
"eval_samples_per_second": 112.632, |
|
"eval_steps_per_second": 4.036, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3612, |
|
"train_exact_match": 50.94905094905095, |
|
"train_f1": 63.29384394642245, |
|
"train_runtime": 9.8492, |
|
"train_samples_per_second": 111.278, |
|
"train_steps_per_second": 4.061 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 19.713895797729492, |
|
"learning_rate": 5e-06, |
|
"loss": 2.0297, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 28.75, |
|
"eval_f1": 41.0194110809582, |
|
"eval_runtime": 31.7278, |
|
"eval_samples_per_second": 112.583, |
|
"eval_steps_per_second": 4.034, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 4214, |
|
"train_exact_match": 53.74625374625375, |
|
"train_f1": 65.89175767102272, |
|
"train_runtime": 9.8541, |
|
"train_samples_per_second": 113.658, |
|
"train_steps_per_second": 4.059 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 18.399381637573242, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.8692, |
|
"step": 4214 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 29.0625, |
|
"eval_f1": 41.37431545961761, |
|
"eval_runtime": 31.718, |
|
"eval_samples_per_second": 112.617, |
|
"eval_steps_per_second": 4.036, |
|
"step": 4214 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4816, |
|
"train_exact_match": 57.04295704295704, |
|
"train_f1": 69.23809998725315, |
|
"train_runtime": 9.8266, |
|
"train_samples_per_second": 113.06, |
|
"train_steps_per_second": 4.071 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 19.948137283325195, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.7415, |
|
"step": 4816 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 29.09375, |
|
"eval_f1": 41.58572921874224, |
|
"eval_runtime": 31.6379, |
|
"eval_samples_per_second": 112.903, |
|
"eval_steps_per_second": 4.046, |
|
"step": 4816 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 5418, |
|
"train_exact_match": 59.14085914085914, |
|
"train_f1": 70.72972336346831, |
|
"train_runtime": 10.1, |
|
"train_samples_per_second": 111.485, |
|
"train_steps_per_second": 4.059 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 18.038774490356445, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.6377, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 29.21875, |
|
"eval_f1": 41.68346527288109, |
|
"eval_runtime": 31.8369, |
|
"eval_samples_per_second": 112.197, |
|
"eval_steps_per_second": 4.02, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6020, |
|
"train_exact_match": 59.94005994005994, |
|
"train_f1": 72.84928684458019, |
|
"train_runtime": 9.7454, |
|
"train_samples_per_second": 112.976, |
|
"train_steps_per_second": 4.104 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 21.71363639831543, |
|
"learning_rate": 0.0, |
|
"loss": 1.583, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 29.28125, |
|
"eval_f1": 41.842087703189854, |
|
"eval_runtime": 31.896, |
|
"eval_samples_per_second": 111.989, |
|
"eval_steps_per_second": 4.013, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6020, |
|
"total_flos": 1.651520510954496e+16, |
|
"train_loss": 2.5174192206804142, |
|
"train_runtime": 2180.484, |
|
"train_samples_per_second": 77.295, |
|
"train_steps_per_second": 2.761 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.651520510954496e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|