k4black's picture
Training in progress, step 10000
2ea4b70
raw
history blame
11.7 kB
{
"best_metric": 0.7621653079986572,
"best_model_checkpoint": "/home2/s5431786/jb-internship/results/Salesforce-codet5-small-CodeXGLUE-CONCODE-adamw/checkpoint-9000",
"epoch": 3.2,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 3.166986564299425e-05,
"loss": 2.3935,
"step": 500
},
{
"epoch": 0.16,
"eval_bleu": 0.11268710945019392,
"eval_exact_match": 0.129,
"eval_loss": 0.9724392294883728,
"eval_rouge1": 0.5286362208212969,
"eval_rouge2": 0.3465696747006132,
"eval_rougeL": 0.5098110686689669,
"eval_rougeLsum": 0.5153482514973529,
"eval_runtime": 27.4689,
"eval_samples_per_second": 72.81,
"eval_steps_per_second": 2.294,
"step": 500
},
{
"epoch": 0.32,
"learning_rate": 6.3659628918746e-05,
"loss": 0.8984,
"step": 1000
},
{
"epoch": 0.32,
"eval_bleu": 0.11998407865663271,
"eval_exact_match": 0.138,
"eval_loss": 0.8919487595558167,
"eval_rouge1": 0.546341326360675,
"eval_rouge2": 0.3714077684410343,
"eval_rougeL": 0.5285236062967356,
"eval_rougeLsum": 0.535261365301597,
"eval_runtime": 26.8866,
"eval_samples_per_second": 74.386,
"eval_steps_per_second": 2.343,
"step": 1000
},
{
"epoch": 0.48,
"learning_rate": 9.564939219449776e-05,
"loss": 0.8121,
"step": 1500
},
{
"epoch": 0.48,
"eval_bleu": 0.11580804646751057,
"eval_exact_match": 0.1455,
"eval_loss": 0.8583337664604187,
"eval_rouge1": 0.5528659122019346,
"eval_rouge2": 0.3786855675064588,
"eval_rougeL": 0.535002514875875,
"eval_rougeLsum": 0.5425756750433103,
"eval_runtime": 27.5302,
"eval_samples_per_second": 72.647,
"eval_steps_per_second": 2.288,
"step": 1500
},
{
"epoch": 0.64,
"learning_rate": 9.854481759692796e-05,
"loss": 0.7598,
"step": 2000
},
{
"epoch": 0.64,
"eval_bleu": 0.11973654523564387,
"eval_exact_match": 0.1485,
"eval_loss": 0.8436598181724548,
"eval_rouge1": 0.5541164072038078,
"eval_rouge2": 0.3813328094993438,
"eval_rougeL": 0.5354722356320805,
"eval_rougeLsum": 0.5432424855167899,
"eval_runtime": 26.6962,
"eval_samples_per_second": 74.917,
"eval_steps_per_second": 2.36,
"step": 2000
},
{
"epoch": 0.8,
"learning_rate": 9.686057870448345e-05,
"loss": 0.7289,
"step": 2500
},
{
"epoch": 0.8,
"eval_bleu": 0.12215125938884584,
"eval_exact_match": 0.158,
"eval_loss": 0.818922758102417,
"eval_rouge1": 0.5596647043092989,
"eval_rouge2": 0.390646741130832,
"eval_rougeL": 0.5415781749603358,
"eval_rougeLsum": 0.5501483448643409,
"eval_runtime": 26.4885,
"eval_samples_per_second": 75.504,
"eval_steps_per_second": 2.378,
"step": 2500
},
{
"epoch": 0.96,
"learning_rate": 9.517633981203894e-05,
"loss": 0.7053,
"step": 3000
},
{
"epoch": 0.96,
"eval_bleu": 0.12223620743911237,
"eval_exact_match": 0.161,
"eval_loss": 0.8145434856414795,
"eval_rouge1": 0.5571603682618643,
"eval_rouge2": 0.3887993919896527,
"eval_rougeL": 0.5392381596924516,
"eval_rougeLsum": 0.5469429464366221,
"eval_runtime": 27.992,
"eval_samples_per_second": 71.449,
"eval_steps_per_second": 2.251,
"step": 3000
},
{
"epoch": 1.12,
"learning_rate": 9.349210091959444e-05,
"loss": 0.6544,
"step": 3500
},
{
"epoch": 1.12,
"eval_bleu": 0.1260404864365832,
"eval_exact_match": 0.1565,
"eval_loss": 0.7981612682342529,
"eval_rouge1": 0.5606467979991658,
"eval_rouge2": 0.3920123242296646,
"eval_rougeL": 0.5435919709288143,
"eval_rougeLsum": 0.5516785996846396,
"eval_runtime": 29.5652,
"eval_samples_per_second": 67.647,
"eval_steps_per_second": 2.131,
"step": 3500
},
{
"epoch": 1.28,
"learning_rate": 9.180786202714994e-05,
"loss": 0.6334,
"step": 4000
},
{
"epoch": 1.28,
"eval_bleu": 0.12839687243556736,
"eval_exact_match": 0.1585,
"eval_loss": 0.7973963618278503,
"eval_rouge1": 0.5633046028358244,
"eval_rouge2": 0.3905965243856279,
"eval_rougeL": 0.5447842726270915,
"eval_rougeLsum": 0.5529333478079217,
"eval_runtime": 26.3748,
"eval_samples_per_second": 75.83,
"eval_steps_per_second": 2.389,
"step": 4000
},
{
"epoch": 1.44,
"learning_rate": 9.012362313470542e-05,
"loss": 0.6236,
"step": 4500
},
{
"epoch": 1.44,
"eval_bleu": 0.12748409364928656,
"eval_exact_match": 0.163,
"eval_loss": 0.7942736744880676,
"eval_rouge1": 0.563881072938117,
"eval_rouge2": 0.3930844670404538,
"eval_rougeL": 0.5455192493710137,
"eval_rougeLsum": 0.5541560433164279,
"eval_runtime": 26.3891,
"eval_samples_per_second": 75.789,
"eval_steps_per_second": 2.387,
"step": 4500
},
{
"epoch": 1.6,
"learning_rate": 8.843938424226093e-05,
"loss": 0.6221,
"step": 5000
},
{
"epoch": 1.6,
"eval_bleu": 0.13095132849637034,
"eval_exact_match": 0.1655,
"eval_loss": 0.782397449016571,
"eval_rouge1": 0.571774543234057,
"eval_rouge2": 0.40112070812038864,
"eval_rougeL": 0.5536602781518731,
"eval_rougeLsum": 0.5621325917699274,
"eval_runtime": 27.1882,
"eval_samples_per_second": 73.561,
"eval_steps_per_second": 2.317,
"step": 5000
},
{
"epoch": 1.76,
"learning_rate": 8.675514534981643e-05,
"loss": 0.608,
"step": 5500
},
{
"epoch": 1.76,
"eval_bleu": 0.13144704649097816,
"eval_exact_match": 0.163,
"eval_loss": 0.7792329788208008,
"eval_rouge1": 0.5663801985181786,
"eval_rouge2": 0.3996756504619762,
"eval_rougeL": 0.5490225477397204,
"eval_rougeLsum": 0.556696752293008,
"eval_runtime": 26.3826,
"eval_samples_per_second": 75.808,
"eval_steps_per_second": 2.388,
"step": 5500
},
{
"epoch": 1.92,
"learning_rate": 8.507090645737192e-05,
"loss": 0.5956,
"step": 6000
},
{
"epoch": 1.92,
"eval_bleu": 0.129401969984806,
"eval_exact_match": 0.1605,
"eval_loss": 0.7785006165504456,
"eval_rouge1": 0.5640650180352025,
"eval_rouge2": 0.39806305672301234,
"eval_rougeL": 0.5469935550994057,
"eval_rougeLsum": 0.554556710156975,
"eval_runtime": 27.3051,
"eval_samples_per_second": 73.246,
"eval_steps_per_second": 2.307,
"step": 6000
},
{
"epoch": 2.08,
"learning_rate": 8.338666756492741e-05,
"loss": 0.5701,
"step": 6500
},
{
"epoch": 2.08,
"eval_bleu": 0.13363680452673993,
"eval_exact_match": 0.157,
"eval_loss": 0.7799766063690186,
"eval_rouge1": 0.5672994124585793,
"eval_rouge2": 0.39553178231486497,
"eval_rougeL": 0.548936720126607,
"eval_rougeLsum": 0.5568480771592814,
"eval_runtime": 26.8025,
"eval_samples_per_second": 74.62,
"eval_steps_per_second": 2.351,
"step": 6500
},
{
"epoch": 2.24,
"learning_rate": 8.170242867248291e-05,
"loss": 0.5378,
"step": 7000
},
{
"epoch": 2.24,
"eval_bleu": 0.13079743156246348,
"eval_exact_match": 0.1655,
"eval_loss": 0.7720439434051514,
"eval_rouge1": 0.5685604278995763,
"eval_rouge2": 0.3999533201987808,
"eval_rougeL": 0.5504327783527796,
"eval_rougeLsum": 0.5582442604826536,
"eval_runtime": 26.7858,
"eval_samples_per_second": 74.666,
"eval_steps_per_second": 2.352,
"step": 7000
},
{
"epoch": 2.4,
"learning_rate": 8.00181897800384e-05,
"loss": 0.541,
"step": 7500
},
{
"epoch": 2.4,
"eval_bleu": 0.1312928889201776,
"eval_exact_match": 0.1625,
"eval_loss": 0.7708559632301331,
"eval_rouge1": 0.5698993331552187,
"eval_rouge2": 0.39836218536255497,
"eval_rougeL": 0.5510518000592355,
"eval_rougeLsum": 0.5590054429199334,
"eval_runtime": 26.5491,
"eval_samples_per_second": 75.332,
"eval_steps_per_second": 2.373,
"step": 7500
},
{
"epoch": 2.56,
"learning_rate": 7.83339508875939e-05,
"loss": 0.5359,
"step": 8000
},
{
"epoch": 2.56,
"eval_bleu": 0.133238582586381,
"eval_exact_match": 0.164,
"eval_loss": 0.767312228679657,
"eval_rouge1": 0.5696523576197161,
"eval_rouge2": 0.40229167377006236,
"eval_rougeL": 0.552121263102242,
"eval_rougeLsum": 0.5600576350734027,
"eval_runtime": 26.7037,
"eval_samples_per_second": 74.896,
"eval_steps_per_second": 2.359,
"step": 8000
},
{
"epoch": 2.72,
"learning_rate": 7.664971199514939e-05,
"loss": 0.5322,
"step": 8500
},
{
"epoch": 2.72,
"eval_bleu": 0.13498312180179964,
"eval_exact_match": 0.1665,
"eval_loss": 0.7641937732696533,
"eval_rouge1": 0.5707696930577069,
"eval_rouge2": 0.403346138073659,
"eval_rougeL": 0.5527280072901636,
"eval_rougeLsum": 0.5605579795104842,
"eval_runtime": 26.4882,
"eval_samples_per_second": 75.505,
"eval_steps_per_second": 2.378,
"step": 8500
},
{
"epoch": 2.88,
"learning_rate": 7.496547310270489e-05,
"loss": 0.5387,
"step": 9000
},
{
"epoch": 2.88,
"eval_bleu": 0.13416837075728422,
"eval_exact_match": 0.159,
"eval_loss": 0.7621653079986572,
"eval_rouge1": 0.5671758743776842,
"eval_rouge2": 0.3987572074969532,
"eval_rougeL": 0.5499787838213916,
"eval_rougeLsum": 0.55728689293535,
"eval_runtime": 26.7851,
"eval_samples_per_second": 74.668,
"eval_steps_per_second": 2.352,
"step": 9000
},
{
"epoch": 3.04,
"learning_rate": 7.328123421026039e-05,
"loss": 0.514,
"step": 9500
},
{
"epoch": 3.04,
"eval_bleu": 0.13519274318497243,
"eval_exact_match": 0.166,
"eval_loss": 0.7700380682945251,
"eval_rouge1": 0.572246331088752,
"eval_rouge2": 0.40519052168186803,
"eval_rougeL": 0.5545858464982438,
"eval_rougeLsum": 0.5618015934184992,
"eval_runtime": 26.4898,
"eval_samples_per_second": 75.501,
"eval_steps_per_second": 2.378,
"step": 9500
},
{
"epoch": 3.2,
"learning_rate": 7.159699531781589e-05,
"loss": 0.4895,
"step": 10000
},
{
"epoch": 3.2,
"eval_bleu": 0.13587785975036965,
"eval_exact_match": 0.1615,
"eval_loss": 0.7675954699516296,
"eval_rouge1": 0.5695980848985347,
"eval_rouge2": 0.40156815801776047,
"eval_rougeL": 0.551645493171788,
"eval_rougeLsum": 0.5591021610099983,
"eval_runtime": 28.9264,
"eval_samples_per_second": 69.141,
"eval_steps_per_second": 2.178,
"step": 10000
}
],
"max_steps": 31250,
"num_train_epochs": 10,
"total_flos": 4.252071441727488e+16,
"trial_name": null,
"trial_params": null
}