|
{ |
|
"best_metric": 1.2616792917251587, |
|
"best_model_checkpoint": "/content/drive/My Drive/Colab Notebooks/aai520-project/checkpoints/distilbert-finetuned-uncased/checkpoint-3900", |
|
"epoch": 7.257416033341506, |
|
"eval_steps": 100, |
|
"global_step": 7400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.607843137254902e-05, |
|
"loss": 3.6437, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.1780340671539307, |
|
"eval_runtime": 8.4412, |
|
"eval_samples_per_second": 1417.927, |
|
"eval_steps_per_second": 11.136, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.215686274509804e-05, |
|
"loss": 2.1596, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.6557185649871826, |
|
"eval_runtime": 8.3581, |
|
"eval_samples_per_second": 1432.022, |
|
"eval_steps_per_second": 11.247, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 1.8138, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.5682721138000488, |
|
"eval_runtime": 8.4191, |
|
"eval_samples_per_second": 1421.643, |
|
"eval_steps_per_second": 11.165, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 1.6987, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.5075817108154297, |
|
"eval_runtime": 8.3595, |
|
"eval_samples_per_second": 1431.779, |
|
"eval_steps_per_second": 11.245, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.6586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.535025715827942, |
|
"eval_runtime": 8.4289, |
|
"eval_samples_per_second": 1419.991, |
|
"eval_steps_per_second": 11.152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 1.5957, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.4431041479110718, |
|
"eval_runtime": 8.5594, |
|
"eval_samples_per_second": 1398.342, |
|
"eval_steps_per_second": 21.964, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 1.5825, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.495514154434204, |
|
"eval_runtime": 8.5285, |
|
"eval_samples_per_second": 1403.419, |
|
"eval_steps_per_second": 22.044, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 1.5523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.4444433450698853, |
|
"eval_runtime": 8.5112, |
|
"eval_samples_per_second": 1406.272, |
|
"eval_steps_per_second": 22.089, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 1.5346, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 1.393009066581726, |
|
"eval_runtime": 8.5265, |
|
"eval_samples_per_second": 1403.737, |
|
"eval_steps_per_second": 22.049, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.5098, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.4284701347351074, |
|
"eval_runtime": 8.5994, |
|
"eval_samples_per_second": 1391.84, |
|
"eval_steps_per_second": 21.862, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.215686274509804e-06, |
|
"loss": 1.4632, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.3630493879318237, |
|
"eval_runtime": 8.4807, |
|
"eval_samples_per_second": 1411.328, |
|
"eval_steps_per_second": 22.168, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 1.4468, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.370953917503357, |
|
"eval_runtime": 8.5147, |
|
"eval_samples_per_second": 1405.685, |
|
"eval_steps_per_second": 22.079, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.2549019607843145e-06, |
|
"loss": 1.4343, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.3422259092330933, |
|
"eval_runtime": 8.4859, |
|
"eval_samples_per_second": 1410.461, |
|
"eval_steps_per_second": 22.154, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.274509803921569e-06, |
|
"loss": 1.4225, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.397080659866333, |
|
"eval_runtime": 8.4725, |
|
"eval_samples_per_second": 1412.689, |
|
"eval_steps_per_second": 22.189, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.294117647058824e-06, |
|
"loss": 1.408, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.435463547706604, |
|
"eval_runtime": 8.4775, |
|
"eval_samples_per_second": 1411.85, |
|
"eval_steps_per_second": 22.176, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.313725490196079e-06, |
|
"loss": 1.3609, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 1.3331981897354126, |
|
"eval_runtime": 8.4786, |
|
"eval_samples_per_second": 1411.679, |
|
"eval_steps_per_second": 22.174, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.3398, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.3791619539260864, |
|
"eval_runtime": 8.4678, |
|
"eval_samples_per_second": 1413.466, |
|
"eval_steps_per_second": 22.202, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 1.3224, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 1.41716730594635, |
|
"eval_runtime": 8.4259, |
|
"eval_samples_per_second": 1420.506, |
|
"eval_steps_per_second": 22.312, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"loss": 1.3152, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 1.3955893516540527, |
|
"eval_runtime": 8.444, |
|
"eval_samples_per_second": 1417.453, |
|
"eval_steps_per_second": 22.264, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.3141, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 1.3748189210891724, |
|
"eval_runtime": 8.4509, |
|
"eval_samples_per_second": 1416.303, |
|
"eval_steps_per_second": 22.246, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.587831207065751e-05, |
|
"loss": 1.3085, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 1.3948506116867065, |
|
"eval_runtime": 61.1737, |
|
"eval_samples_per_second": 195.656, |
|
"eval_steps_per_second": 6.13, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5682041216879295e-05, |
|
"loss": 1.3325, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.4869881868362427, |
|
"eval_runtime": 61.232, |
|
"eval_samples_per_second": 195.47, |
|
"eval_steps_per_second": 6.124, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.548577036310108e-05, |
|
"loss": 1.3162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.4565335512161255, |
|
"eval_runtime": 61.2209, |
|
"eval_samples_per_second": 195.505, |
|
"eval_steps_per_second": 6.125, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.5289499509322867e-05, |
|
"loss": 1.2936, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.449613332748413, |
|
"eval_runtime": 61.2023, |
|
"eval_samples_per_second": 195.565, |
|
"eval_steps_per_second": 6.127, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.5093228655544654e-05, |
|
"loss": 1.2648, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 1.2867687940597534, |
|
"eval_runtime": 61.2092, |
|
"eval_samples_per_second": 195.542, |
|
"eval_steps_per_second": 6.127, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.4896957801766438e-05, |
|
"loss": 1.2531, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.5093618631362915, |
|
"eval_runtime": 60.9769, |
|
"eval_samples_per_second": 196.287, |
|
"eval_steps_per_second": 6.15, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.4700686947988226e-05, |
|
"loss": 1.2599, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 1.3450872898101807, |
|
"eval_runtime": 61.0675, |
|
"eval_samples_per_second": 195.996, |
|
"eval_steps_per_second": 6.141, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.4504416094210011e-05, |
|
"loss": 1.2545, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.407065749168396, |
|
"eval_runtime": 61.1333, |
|
"eval_samples_per_second": 195.785, |
|
"eval_steps_per_second": 6.134, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.4308145240431797e-05, |
|
"loss": 1.2461, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.3378450870513916, |
|
"eval_runtime": 61.1961, |
|
"eval_samples_per_second": 195.584, |
|
"eval_steps_per_second": 6.128, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.4111874386653583e-05, |
|
"loss": 1.2038, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.294646143913269, |
|
"eval_runtime": 61.1636, |
|
"eval_samples_per_second": 195.688, |
|
"eval_steps_per_second": 6.131, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.391560353287537e-05, |
|
"loss": 1.1677, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 1.480231761932373, |
|
"eval_runtime": 61.0678, |
|
"eval_samples_per_second": 195.995, |
|
"eval_steps_per_second": 6.141, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.3719332679097154e-05, |
|
"loss": 1.103, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 1.3580176830291748, |
|
"eval_runtime": 61.18, |
|
"eval_samples_per_second": 195.636, |
|
"eval_steps_per_second": 6.129, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.3523061825318942e-05, |
|
"loss": 1.1205, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 1.3818870782852173, |
|
"eval_runtime": 60.9958, |
|
"eval_samples_per_second": 196.227, |
|
"eval_steps_per_second": 6.148, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.3326790971540726e-05, |
|
"loss": 1.095, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.4335613250732422, |
|
"eval_runtime": 61.1187, |
|
"eval_samples_per_second": 195.832, |
|
"eval_steps_per_second": 6.136, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.3130520117762513e-05, |
|
"loss": 1.0896, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 1.4962539672851562, |
|
"eval_runtime": 61.0543, |
|
"eval_samples_per_second": 196.039, |
|
"eval_steps_per_second": 6.142, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.2934249263984299e-05, |
|
"loss": 1.0856, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 1.3384228944778442, |
|
"eval_runtime": 61.1027, |
|
"eval_samples_per_second": 195.883, |
|
"eval_steps_per_second": 6.137, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.2737978410206085e-05, |
|
"loss": 1.0652, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 1.3583240509033203, |
|
"eval_runtime": 61.0826, |
|
"eval_samples_per_second": 195.948, |
|
"eval_steps_per_second": 6.139, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.254170755642787e-05, |
|
"loss": 1.0859, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 1.414008378982544, |
|
"eval_runtime": 61.0681, |
|
"eval_samples_per_second": 195.994, |
|
"eval_steps_per_second": 6.141, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.2345436702649658e-05, |
|
"loss": 1.058, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 1.2616792917251587, |
|
"eval_runtime": 61.1143, |
|
"eval_samples_per_second": 195.846, |
|
"eval_steps_per_second": 6.136, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.2149165848871442e-05, |
|
"loss": 1.0724, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 1.3551816940307617, |
|
"eval_runtime": 61.0974, |
|
"eval_samples_per_second": 195.9, |
|
"eval_steps_per_second": 6.138, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.1954857703631012e-05, |
|
"loss": 1.0509, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 1.2970877885818481, |
|
"eval_runtime": 61.0416, |
|
"eval_samples_per_second": 196.079, |
|
"eval_steps_per_second": 6.143, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.1758586849852797e-05, |
|
"loss": 0.97, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 1.3268026113510132, |
|
"eval_runtime": 61.0438, |
|
"eval_samples_per_second": 196.072, |
|
"eval_steps_per_second": 6.143, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.1562315996074585e-05, |
|
"loss": 0.95, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 1.3753584623336792, |
|
"eval_runtime": 61.1527, |
|
"eval_samples_per_second": 195.723, |
|
"eval_steps_per_second": 6.132, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.1366045142296369e-05, |
|
"loss": 0.9337, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_loss": 1.3687292337417603, |
|
"eval_runtime": 61.1591, |
|
"eval_samples_per_second": 195.703, |
|
"eval_steps_per_second": 6.132, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1169774288518156e-05, |
|
"loss": 0.977, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 1.3613475561141968, |
|
"eval_runtime": 61.1865, |
|
"eval_samples_per_second": 195.615, |
|
"eval_steps_per_second": 6.129, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.0973503434739942e-05, |
|
"loss": 0.9484, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 1.513939380645752, |
|
"eval_runtime": 61.2246, |
|
"eval_samples_per_second": 195.493, |
|
"eval_steps_per_second": 6.125, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.0777232580961728e-05, |
|
"loss": 0.9739, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 1.2861210107803345, |
|
"eval_runtime": 61.1721, |
|
"eval_samples_per_second": 195.661, |
|
"eval_steps_per_second": 6.13, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.0580961727183514e-05, |
|
"loss": 0.955, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_loss": 1.3666507005691528, |
|
"eval_runtime": 61.1291, |
|
"eval_samples_per_second": 195.799, |
|
"eval_steps_per_second": 6.135, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.0384690873405301e-05, |
|
"loss": 0.9536, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 1.3179610967636108, |
|
"eval_runtime": 61.1785, |
|
"eval_samples_per_second": 195.641, |
|
"eval_steps_per_second": 6.13, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.0188420019627085e-05, |
|
"loss": 0.9541, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 1.4610702991485596, |
|
"eval_runtime": 61.0871, |
|
"eval_samples_per_second": 195.933, |
|
"eval_steps_per_second": 6.139, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.992149165848873e-06, |
|
"loss": 0.9462, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.4066604375839233, |
|
"eval_runtime": 61.1331, |
|
"eval_samples_per_second": 195.786, |
|
"eval_steps_per_second": 6.134, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 9.795878312070658e-06, |
|
"loss": 0.8728, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 1.3490474224090576, |
|
"eval_runtime": 61.0973, |
|
"eval_samples_per_second": 195.901, |
|
"eval_steps_per_second": 6.138, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 9.599607458292444e-06, |
|
"loss": 0.8646, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 1.4630706310272217, |
|
"eval_runtime": 61.0445, |
|
"eval_samples_per_second": 196.07, |
|
"eval_steps_per_second": 6.143, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 9.40333660451423e-06, |
|
"loss": 0.8683, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 1.4977810382843018, |
|
"eval_runtime": 61.0754, |
|
"eval_samples_per_second": 195.971, |
|
"eval_steps_per_second": 6.14, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 9.207065750736016e-06, |
|
"loss": 0.8571, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 1.5814284086227417, |
|
"eval_runtime": 61.0641, |
|
"eval_samples_per_second": 196.007, |
|
"eval_steps_per_second": 6.141, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.010794896957803e-06, |
|
"loss": 0.8475, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 1.5535171031951904, |
|
"eval_runtime": 61.1062, |
|
"eval_samples_per_second": 195.872, |
|
"eval_steps_per_second": 6.137, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 8.814524043179589e-06, |
|
"loss": 0.8653, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 1.4938398599624634, |
|
"eval_runtime": 61.1584, |
|
"eval_samples_per_second": 195.705, |
|
"eval_steps_per_second": 6.132, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 8.618253189401375e-06, |
|
"loss": 0.8664, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 1.414058804512024, |
|
"eval_runtime": 61.1886, |
|
"eval_samples_per_second": 195.608, |
|
"eval_steps_per_second": 6.129, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.42198233562316e-06, |
|
"loss": 0.889, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 1.4486888647079468, |
|
"eval_runtime": 61.1387, |
|
"eval_samples_per_second": 195.768, |
|
"eval_steps_per_second": 6.134, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 8.225711481844946e-06, |
|
"loss": 0.8601, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 1.47215735912323, |
|
"eval_runtime": 61.1249, |
|
"eval_samples_per_second": 195.812, |
|
"eval_steps_per_second": 6.135, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 8.031403336604516e-06, |
|
"loss": 0.8645, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_loss": 1.5842609405517578, |
|
"eval_runtime": 61.0783, |
|
"eval_samples_per_second": 195.962, |
|
"eval_steps_per_second": 6.14, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 7.835132482826301e-06, |
|
"loss": 0.785, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 1.6027640104293823, |
|
"eval_runtime": 61.1172, |
|
"eval_samples_per_second": 195.837, |
|
"eval_steps_per_second": 6.136, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 7.638861629048087e-06, |
|
"loss": 0.7711, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 1.6270956993103027, |
|
"eval_runtime": 61.0612, |
|
"eval_samples_per_second": 196.016, |
|
"eval_steps_per_second": 6.141, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 7.442590775269874e-06, |
|
"loss": 0.8056, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_loss": 1.5399292707443237, |
|
"eval_runtime": 61.1714, |
|
"eval_samples_per_second": 195.663, |
|
"eval_steps_per_second": 6.13, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 7.246319921491659e-06, |
|
"loss": 0.8087, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 1.492693305015564, |
|
"eval_runtime": 61.0261, |
|
"eval_samples_per_second": 196.129, |
|
"eval_steps_per_second": 6.145, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.0500490677134445e-06, |
|
"loss": 0.7859, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 1.4677467346191406, |
|
"eval_runtime": 61.0807, |
|
"eval_samples_per_second": 195.954, |
|
"eval_steps_per_second": 6.139, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 6.853778213935232e-06, |
|
"loss": 0.7896, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 1.4780325889587402, |
|
"eval_runtime": 61.0973, |
|
"eval_samples_per_second": 195.901, |
|
"eval_steps_per_second": 6.138, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 6.657507360157017e-06, |
|
"loss": 0.7971, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 1.5110238790512085, |
|
"eval_runtime": 61.1306, |
|
"eval_samples_per_second": 195.794, |
|
"eval_steps_per_second": 6.134, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 6.461236506378803e-06, |
|
"loss": 0.7952, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 1.545872688293457, |
|
"eval_runtime": 61.0099, |
|
"eval_samples_per_second": 196.181, |
|
"eval_steps_per_second": 6.147, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 6.26496565260059e-06, |
|
"loss": 0.7971, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 1.5281697511672974, |
|
"eval_runtime": 61.0816, |
|
"eval_samples_per_second": 195.951, |
|
"eval_steps_per_second": 6.139, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 6.068694798822376e-06, |
|
"loss": 0.7908, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_loss": 1.4799116849899292, |
|
"eval_runtime": 61.1305, |
|
"eval_samples_per_second": 195.794, |
|
"eval_steps_per_second": 6.134, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 5.872423945044161e-06, |
|
"loss": 0.7456, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 1.6487486362457275, |
|
"eval_runtime": 61.0413, |
|
"eval_samples_per_second": 196.08, |
|
"eval_steps_per_second": 6.143, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 5.676153091265948e-06, |
|
"loss": 0.7236, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_loss": 1.654253602027893, |
|
"eval_runtime": 61.1832, |
|
"eval_samples_per_second": 195.626, |
|
"eval_steps_per_second": 6.129, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 5.479882237487734e-06, |
|
"loss": 0.7484, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_loss": 1.6202023029327393, |
|
"eval_runtime": 61.1291, |
|
"eval_samples_per_second": 195.799, |
|
"eval_steps_per_second": 6.135, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"step": 7400, |
|
"total_flos": 1.738708177538776e+17, |
|
"train_loss": 0.0, |
|
"train_runtime": 4.5618, |
|
"train_samples_per_second": 286080.391, |
|
"train_steps_per_second": 1117.99 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_loss": 1.261675477027893, |
|
"eval_runtime": 64.7008, |
|
"eval_samples_per_second": 184.99, |
|
"eval_steps_per_second": 2.906, |
|
"step": 7400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5100, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 1.738708177538776e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|