|
{ |
|
"best_metric": 2.5907859802246094, |
|
"best_model_checkpoint": "/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/models/lyrlen/bart/bart-finetuned-lyrlen-256-tokens/checkpoint-9500", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.546516180038452, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 2.9847, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.741720676422119, |
|
"eval_runtime": 260.4273, |
|
"eval_samples_per_second": 11.52, |
|
"eval_steps_per_second": 1.44, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.4709670543670654, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 2.7603, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.6936144828796387, |
|
"eval_runtime": 262.3249, |
|
"eval_samples_per_second": 11.436, |
|
"eval_steps_per_second": 1.43, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.5319254398345947, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.6777, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 2.703195571899414, |
|
"eval_runtime": 257.9552, |
|
"eval_samples_per_second": 11.63, |
|
"eval_steps_per_second": 1.454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.216550588607788, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.6679, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.67903995513916, |
|
"eval_runtime": 258.1985, |
|
"eval_samples_per_second": 11.619, |
|
"eval_steps_per_second": 1.452, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.104050874710083, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 2.631, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 2.6584043502807617, |
|
"eval_runtime": 258.3509, |
|
"eval_samples_per_second": 11.612, |
|
"eval_steps_per_second": 1.452, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.068540334701538, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.612, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.6528127193450928, |
|
"eval_runtime": 258.2606, |
|
"eval_samples_per_second": 11.616, |
|
"eval_steps_per_second": 1.452, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 2.086799144744873, |
|
"learning_rate": 3.541666666666667e-05, |
|
"loss": 2.5602, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.7752346992492676, |
|
"eval_runtime": 257.6956, |
|
"eval_samples_per_second": 11.642, |
|
"eval_steps_per_second": 1.455, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.9567131996154785, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.5321, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 2.644227981567383, |
|
"eval_runtime": 258.3013, |
|
"eval_samples_per_second": 11.614, |
|
"eval_steps_per_second": 1.452, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.2101922035217285, |
|
"learning_rate": 3.125e-05, |
|
"loss": 2.523, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.616760492324829, |
|
"eval_runtime": 257.5303, |
|
"eval_samples_per_second": 11.649, |
|
"eval_steps_per_second": 1.456, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 2.022505044937134, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 2.5031, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 2.635617971420288, |
|
"eval_runtime": 258.5677, |
|
"eval_samples_per_second": 11.602, |
|
"eval_steps_per_second": 1.45, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.627197742462158, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 2.4454, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 2.6986706256866455, |
|
"eval_runtime": 253.7072, |
|
"eval_samples_per_second": 11.825, |
|
"eval_steps_per_second": 1.478, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.1193881034851074, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.5141, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.627661943435669, |
|
"eval_runtime": 253.4776, |
|
"eval_samples_per_second": 11.835, |
|
"eval_steps_per_second": 1.479, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 2.328862428665161, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 2.434, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 2.6612467765808105, |
|
"eval_runtime": 258.1952, |
|
"eval_samples_per_second": 11.619, |
|
"eval_steps_per_second": 1.452, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 2.1137049198150635, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 2.4381, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.6339426040649414, |
|
"eval_runtime": 258.4481, |
|
"eval_samples_per_second": 11.608, |
|
"eval_steps_per_second": 1.451, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.8148552179336548, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 2.4182, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 2.629108190536499, |
|
"eval_runtime": 258.5172, |
|
"eval_samples_per_second": 11.605, |
|
"eval_steps_per_second": 1.451, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 2.4865896701812744, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.3564, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.6308934688568115, |
|
"eval_runtime": 258.6512, |
|
"eval_samples_per_second": 11.599, |
|
"eval_steps_per_second": 1.45, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 2.048546314239502, |
|
"learning_rate": 1.4583333333333335e-05, |
|
"loss": 2.4002, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.595694065093994, |
|
"eval_runtime": 258.545, |
|
"eval_samples_per_second": 11.603, |
|
"eval_steps_per_second": 1.45, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.7174828052520752, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.3692, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.6149754524230957, |
|
"eval_runtime": 258.5814, |
|
"eval_samples_per_second": 11.602, |
|
"eval_steps_per_second": 1.45, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 1.8168933391571045, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 2.4242, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.5907859802246094, |
|
"eval_runtime": 255.9648, |
|
"eval_samples_per_second": 11.72, |
|
"eval_steps_per_second": 1.465, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 2.5351202487945557, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.3854, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 2.60528564453125, |
|
"eval_runtime": 259.6978, |
|
"eval_samples_per_second": 11.552, |
|
"eval_steps_per_second": 1.444, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 2.13393497467041, |
|
"learning_rate": 6.25e-06, |
|
"loss": 2.3493, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.6874961853027344, |
|
"eval_runtime": 259.0478, |
|
"eval_samples_per_second": 11.581, |
|
"eval_steps_per_second": 1.448, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 2.1561803817749023, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 2.3231, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.6197221279144287, |
|
"eval_runtime": 259.0325, |
|
"eval_samples_per_second": 11.582, |
|
"eval_steps_per_second": 1.448, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 2.055306911468506, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 2.3609, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 2.601628303527832, |
|
"eval_runtime": 258.7708, |
|
"eval_samples_per_second": 11.593, |
|
"eval_steps_per_second": 1.449, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.0090279579162598, |
|
"learning_rate": 0.0, |
|
"loss": 2.3651, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.6246001720428467, |
|
"eval_runtime": 259.2649, |
|
"eval_samples_per_second": 11.571, |
|
"eval_steps_per_second": 1.446, |
|
"step": 12000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 5.2010510450688e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|