|
{ |
|
"best_metric": 0.7621653079986572, |
|
"best_model_checkpoint": "/home2/s5431786/jb-internship/results/Salesforce-codet5-small-CodeXGLUE-CONCODE-adamw/checkpoint-9000", |
|
"epoch": 3.2, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.166986564299425e-05, |
|
"loss": 2.3935, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 0.11268710945019392, |
|
"eval_exact_match": 0.129, |
|
"eval_loss": 0.9724392294883728, |
|
"eval_rouge1": 0.5286362208212969, |
|
"eval_rouge2": 0.3465696747006132, |
|
"eval_rougeL": 0.5098110686689669, |
|
"eval_rougeLsum": 0.5153482514973529, |
|
"eval_runtime": 27.4689, |
|
"eval_samples_per_second": 72.81, |
|
"eval_steps_per_second": 2.294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.3659628918746e-05, |
|
"loss": 0.8984, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bleu": 0.11998407865663271, |
|
"eval_exact_match": 0.138, |
|
"eval_loss": 0.8919487595558167, |
|
"eval_rouge1": 0.546341326360675, |
|
"eval_rouge2": 0.3714077684410343, |
|
"eval_rougeL": 0.5285236062967356, |
|
"eval_rougeLsum": 0.535261365301597, |
|
"eval_runtime": 26.8866, |
|
"eval_samples_per_second": 74.386, |
|
"eval_steps_per_second": 2.343, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.564939219449776e-05, |
|
"loss": 0.8121, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 0.11580804646751057, |
|
"eval_exact_match": 0.1455, |
|
"eval_loss": 0.8583337664604187, |
|
"eval_rouge1": 0.5528659122019346, |
|
"eval_rouge2": 0.3786855675064588, |
|
"eval_rougeL": 0.535002514875875, |
|
"eval_rougeLsum": 0.5425756750433103, |
|
"eval_runtime": 27.5302, |
|
"eval_samples_per_second": 72.647, |
|
"eval_steps_per_second": 2.288, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.854481759692796e-05, |
|
"loss": 0.7598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 0.11973654523564387, |
|
"eval_exact_match": 0.1485, |
|
"eval_loss": 0.8436598181724548, |
|
"eval_rouge1": 0.5541164072038078, |
|
"eval_rouge2": 0.3813328094993438, |
|
"eval_rougeL": 0.5354722356320805, |
|
"eval_rougeLsum": 0.5432424855167899, |
|
"eval_runtime": 26.6962, |
|
"eval_samples_per_second": 74.917, |
|
"eval_steps_per_second": 2.36, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.686057870448345e-05, |
|
"loss": 0.7289, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 0.12215125938884584, |
|
"eval_exact_match": 0.158, |
|
"eval_loss": 0.818922758102417, |
|
"eval_rouge1": 0.5596647043092989, |
|
"eval_rouge2": 0.390646741130832, |
|
"eval_rougeL": 0.5415781749603358, |
|
"eval_rougeLsum": 0.5501483448643409, |
|
"eval_runtime": 26.4885, |
|
"eval_samples_per_second": 75.504, |
|
"eval_steps_per_second": 2.378, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.517633981203894e-05, |
|
"loss": 0.7053, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 0.12223620743911237, |
|
"eval_exact_match": 0.161, |
|
"eval_loss": 0.8145434856414795, |
|
"eval_rouge1": 0.5571603682618643, |
|
"eval_rouge2": 0.3887993919896527, |
|
"eval_rougeL": 0.5392381596924516, |
|
"eval_rougeLsum": 0.5469429464366221, |
|
"eval_runtime": 27.992, |
|
"eval_samples_per_second": 71.449, |
|
"eval_steps_per_second": 2.251, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.349210091959444e-05, |
|
"loss": 0.6544, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_bleu": 0.1260404864365832, |
|
"eval_exact_match": 0.1565, |
|
"eval_loss": 0.7981612682342529, |
|
"eval_rouge1": 0.5606467979991658, |
|
"eval_rouge2": 0.3920123242296646, |
|
"eval_rougeL": 0.5435919709288143, |
|
"eval_rougeLsum": 0.5516785996846396, |
|
"eval_runtime": 29.5652, |
|
"eval_samples_per_second": 67.647, |
|
"eval_steps_per_second": 2.131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.180786202714994e-05, |
|
"loss": 0.6334, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_bleu": 0.12839687243556736, |
|
"eval_exact_match": 0.1585, |
|
"eval_loss": 0.7973963618278503, |
|
"eval_rouge1": 0.5633046028358244, |
|
"eval_rouge2": 0.3905965243856279, |
|
"eval_rougeL": 0.5447842726270915, |
|
"eval_rougeLsum": 0.5529333478079217, |
|
"eval_runtime": 26.3748, |
|
"eval_samples_per_second": 75.83, |
|
"eval_steps_per_second": 2.389, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.012362313470542e-05, |
|
"loss": 0.6236, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_bleu": 0.12748409364928656, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7942736744880676, |
|
"eval_rouge1": 0.563881072938117, |
|
"eval_rouge2": 0.3930844670404538, |
|
"eval_rougeL": 0.5455192493710137, |
|
"eval_rougeLsum": 0.5541560433164279, |
|
"eval_runtime": 26.3891, |
|
"eval_samples_per_second": 75.789, |
|
"eval_steps_per_second": 2.387, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.843938424226093e-05, |
|
"loss": 0.6221, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_bleu": 0.13095132849637034, |
|
"eval_exact_match": 0.1655, |
|
"eval_loss": 0.782397449016571, |
|
"eval_rouge1": 0.571774543234057, |
|
"eval_rouge2": 0.40112070812038864, |
|
"eval_rougeL": 0.5536602781518731, |
|
"eval_rougeLsum": 0.5621325917699274, |
|
"eval_runtime": 27.1882, |
|
"eval_samples_per_second": 73.561, |
|
"eval_steps_per_second": 2.317, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.675514534981643e-05, |
|
"loss": 0.608, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_bleu": 0.13144704649097816, |
|
"eval_exact_match": 0.163, |
|
"eval_loss": 0.7792329788208008, |
|
"eval_rouge1": 0.5663801985181786, |
|
"eval_rouge2": 0.3996756504619762, |
|
"eval_rougeL": 0.5490225477397204, |
|
"eval_rougeLsum": 0.556696752293008, |
|
"eval_runtime": 26.3826, |
|
"eval_samples_per_second": 75.808, |
|
"eval_steps_per_second": 2.388, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.507090645737192e-05, |
|
"loss": 0.5956, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_bleu": 0.129401969984806, |
|
"eval_exact_match": 0.1605, |
|
"eval_loss": 0.7785006165504456, |
|
"eval_rouge1": 0.5640650180352025, |
|
"eval_rouge2": 0.39806305672301234, |
|
"eval_rougeL": 0.5469935550994057, |
|
"eval_rougeLsum": 0.554556710156975, |
|
"eval_runtime": 27.3051, |
|
"eval_samples_per_second": 73.246, |
|
"eval_steps_per_second": 2.307, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 8.338666756492741e-05, |
|
"loss": 0.5701, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_bleu": 0.13363680452673993, |
|
"eval_exact_match": 0.157, |
|
"eval_loss": 0.7799766063690186, |
|
"eval_rouge1": 0.5672994124585793, |
|
"eval_rouge2": 0.39553178231486497, |
|
"eval_rougeL": 0.548936720126607, |
|
"eval_rougeLsum": 0.5568480771592814, |
|
"eval_runtime": 26.8025, |
|
"eval_samples_per_second": 74.62, |
|
"eval_steps_per_second": 2.351, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.170242867248291e-05, |
|
"loss": 0.5378, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_bleu": 0.13079743156246348, |
|
"eval_exact_match": 0.1655, |
|
"eval_loss": 0.7720439434051514, |
|
"eval_rouge1": 0.5685604278995763, |
|
"eval_rouge2": 0.3999533201987808, |
|
"eval_rougeL": 0.5504327783527796, |
|
"eval_rougeLsum": 0.5582442604826536, |
|
"eval_runtime": 26.7858, |
|
"eval_samples_per_second": 74.666, |
|
"eval_steps_per_second": 2.352, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.00181897800384e-05, |
|
"loss": 0.541, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_bleu": 0.1312928889201776, |
|
"eval_exact_match": 0.1625, |
|
"eval_loss": 0.7708559632301331, |
|
"eval_rouge1": 0.5698993331552187, |
|
"eval_rouge2": 0.39836218536255497, |
|
"eval_rougeL": 0.5510518000592355, |
|
"eval_rougeLsum": 0.5590054429199334, |
|
"eval_runtime": 26.5491, |
|
"eval_samples_per_second": 75.332, |
|
"eval_steps_per_second": 2.373, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.83339508875939e-05, |
|
"loss": 0.5359, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_bleu": 0.133238582586381, |
|
"eval_exact_match": 0.164, |
|
"eval_loss": 0.767312228679657, |
|
"eval_rouge1": 0.5696523576197161, |
|
"eval_rouge2": 0.40229167377006236, |
|
"eval_rougeL": 0.552121263102242, |
|
"eval_rougeLsum": 0.5600576350734027, |
|
"eval_runtime": 26.7037, |
|
"eval_samples_per_second": 74.896, |
|
"eval_steps_per_second": 2.359, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 7.664971199514939e-05, |
|
"loss": 0.5322, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_bleu": 0.13498312180179964, |
|
"eval_exact_match": 0.1665, |
|
"eval_loss": 0.7641937732696533, |
|
"eval_rouge1": 0.5707696930577069, |
|
"eval_rouge2": 0.403346138073659, |
|
"eval_rougeL": 0.5527280072901636, |
|
"eval_rougeLsum": 0.5605579795104842, |
|
"eval_runtime": 26.4882, |
|
"eval_samples_per_second": 75.505, |
|
"eval_steps_per_second": 2.378, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.496547310270489e-05, |
|
"loss": 0.5387, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_bleu": 0.13416837075728422, |
|
"eval_exact_match": 0.159, |
|
"eval_loss": 0.7621653079986572, |
|
"eval_rouge1": 0.5671758743776842, |
|
"eval_rouge2": 0.3987572074969532, |
|
"eval_rougeL": 0.5499787838213916, |
|
"eval_rougeLsum": 0.55728689293535, |
|
"eval_runtime": 26.7851, |
|
"eval_samples_per_second": 74.668, |
|
"eval_steps_per_second": 2.352, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.328123421026039e-05, |
|
"loss": 0.514, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_bleu": 0.13519274318497243, |
|
"eval_exact_match": 0.166, |
|
"eval_loss": 0.7700380682945251, |
|
"eval_rouge1": 0.572246331088752, |
|
"eval_rouge2": 0.40519052168186803, |
|
"eval_rougeL": 0.5545858464982438, |
|
"eval_rougeLsum": 0.5618015934184992, |
|
"eval_runtime": 26.4898, |
|
"eval_samples_per_second": 75.501, |
|
"eval_steps_per_second": 2.378, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.159699531781589e-05, |
|
"loss": 0.4895, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_bleu": 0.13587785975036965, |
|
"eval_exact_match": 0.1615, |
|
"eval_loss": 0.7675954699516296, |
|
"eval_rouge1": 0.5695980848985347, |
|
"eval_rouge2": 0.40156815801776047, |
|
"eval_rougeL": 0.551645493171788, |
|
"eval_rougeLsum": 0.5591021610099983, |
|
"eval_runtime": 28.9264, |
|
"eval_samples_per_second": 69.141, |
|
"eval_steps_per_second": 2.178, |
|
"step": 10000 |
|
} |
|
], |
|
"max_steps": 31250, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.252071441727488e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|