|
{ |
|
"best_metric": 16.6036, |
|
"best_model_checkpoint": "./zh-kr_mid/checkpoint-12000", |
|
"epoch": 11.976047904191617, |
|
"eval_steps": 1000, |
|
"global_step": 16000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.91e-05, |
|
"loss": 4.4024, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.95362674726105e-05, |
|
"loss": 2.7248, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_bleu": 3.2381, |
|
"eval_gen_len": 48.6095, |
|
"eval_loss": 1.9409534931182861, |
|
"eval_runtime": 1917.4714, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.9064979221760484e-05, |
|
"loss": 1.8483, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.85927465054779e-05, |
|
"loss": 1.5683, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_bleu": 10.2345, |
|
"eval_gen_len": 20.4433, |
|
"eval_loss": 1.6889078617095947, |
|
"eval_runtime": 497.5454, |
|
"eval_samples_per_second": 5.368, |
|
"eval_steps_per_second": 0.336, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.812051378919532e-05, |
|
"loss": 1.5205, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.764828107291273e-05, |
|
"loss": 1.1916, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_bleu": 13.4571, |
|
"eval_gen_len": 18.8854, |
|
"eval_loss": 1.6842907667160034, |
|
"eval_runtime": 538.3261, |
|
"eval_samples_per_second": 4.962, |
|
"eval_steps_per_second": 0.31, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.7176048356630146e-05, |
|
"loss": 1.0458, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.670381564034756e-05, |
|
"loss": 1.068, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_bleu": 15.6862, |
|
"eval_gen_len": 15.5054, |
|
"eval_loss": 1.6389988660812378, |
|
"eval_runtime": 200.5304, |
|
"eval_samples_per_second": 13.32, |
|
"eval_steps_per_second": 0.833, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.623158292406498e-05, |
|
"loss": 0.7097, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.57593502077824e-05, |
|
"loss": 0.7313, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_bleu": 15.2014, |
|
"eval_gen_len": 16.5938, |
|
"eval_loss": 1.7002748250961304, |
|
"eval_runtime": 222.4222, |
|
"eval_samples_per_second": 12.009, |
|
"eval_steps_per_second": 0.751, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.5287117491499814e-05, |
|
"loss": 0.6587, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.481488477521723e-05, |
|
"loss": 0.4832, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_bleu": 15.0381, |
|
"eval_gen_len": 16.9068, |
|
"eval_loss": 1.8982346057891846, |
|
"eval_runtime": 258.8867, |
|
"eval_samples_per_second": 10.317, |
|
"eval_steps_per_second": 0.645, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.434265205893465e-05, |
|
"loss": 0.5089, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.3870419342652066e-05, |
|
"loss": 0.3862, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_bleu": 15.5397, |
|
"eval_gen_len": 15.6451, |
|
"eval_loss": 2.1426022052764893, |
|
"eval_runtime": 170.9728, |
|
"eval_samples_per_second": 15.622, |
|
"eval_steps_per_second": 0.977, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 4.339913109180204e-05, |
|
"loss": 0.3495, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.292689837551946e-05, |
|
"loss": 0.3675, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 15.8847, |
|
"eval_gen_len": 15.6926, |
|
"eval_loss": 2.1168086528778076, |
|
"eval_runtime": 169.7878, |
|
"eval_samples_per_second": 15.731, |
|
"eval_steps_per_second": 0.984, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 4.2454665659236876e-05, |
|
"loss": 0.2443, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.198243294295429e-05, |
|
"loss": 0.2627, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_bleu": 16.3603, |
|
"eval_gen_len": 15.9671, |
|
"eval_loss": 2.260291814804077, |
|
"eval_runtime": 176.4048, |
|
"eval_samples_per_second": 15.141, |
|
"eval_steps_per_second": 0.947, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.151020022667171e-05, |
|
"loss": 0.2471, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 4.103796751038912e-05, |
|
"loss": 0.1955, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_bleu": 15.7447, |
|
"eval_gen_len": 15.979, |
|
"eval_loss": 2.4113738536834717, |
|
"eval_runtime": 200.4127, |
|
"eval_samples_per_second": 13.327, |
|
"eval_steps_per_second": 0.833, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 4.056573479410654e-05, |
|
"loss": 0.2088, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 4.0093502077823954e-05, |
|
"loss": 0.171, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_bleu": 15.7852, |
|
"eval_gen_len": 15.9244, |
|
"eval_loss": 2.5141146183013916, |
|
"eval_runtime": 180.5467, |
|
"eval_samples_per_second": 14.794, |
|
"eval_steps_per_second": 0.925, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 3.962126936154137e-05, |
|
"loss": 0.157, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.9149981110691354e-05, |
|
"loss": 0.1702, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_bleu": 16.6036, |
|
"eval_gen_len": 15.4901, |
|
"eval_loss": 2.5557188987731934, |
|
"eval_runtime": 168.9011, |
|
"eval_samples_per_second": 15.814, |
|
"eval_steps_per_second": 0.989, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.8677748394408764e-05, |
|
"loss": 0.1206, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 3.820551567812618e-05, |
|
"loss": 0.1298, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_bleu": 16.1319, |
|
"eval_gen_len": 15.5492, |
|
"eval_loss": 2.653599739074707, |
|
"eval_runtime": 168.2024, |
|
"eval_samples_per_second": 15.88, |
|
"eval_steps_per_second": 0.993, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 3.77332829618436e-05, |
|
"loss": 0.1263, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 3.7261050245561016e-05, |
|
"loss": 0.1052, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"eval_bleu": 16.1807, |
|
"eval_gen_len": 15.8884, |
|
"eval_loss": 2.7585606575012207, |
|
"eval_runtime": 171.7593, |
|
"eval_samples_per_second": 15.551, |
|
"eval_steps_per_second": 0.972, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 3.678881752927843e-05, |
|
"loss": 0.1137, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 3.6320362674726105e-05, |
|
"loss": 0.2268, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_bleu": 15.1752, |
|
"eval_gen_len": 15.5346, |
|
"eval_loss": 2.7257988452911377, |
|
"eval_runtime": 171.5928, |
|
"eval_samples_per_second": 15.566, |
|
"eval_steps_per_second": 0.973, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 3.584812995844352e-05, |
|
"loss": 0.1133, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 3.5376841707593505e-05, |
|
"loss": 0.1327, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_bleu": 15.8563, |
|
"eval_gen_len": 15.7971, |
|
"eval_loss": 2.719254493713379, |
|
"eval_runtime": 171.7089, |
|
"eval_samples_per_second": 15.555, |
|
"eval_steps_per_second": 0.973, |
|
"step": 16000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 53440, |
|
"num_train_epochs": 40, |
|
"save_steps": 1000, |
|
"total_flos": 5.54785220591616e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|