{ "best_metric": null, "best_model_checkpoint": null, "epoch": 79.61504811898513, "global_step": 91000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 4.9726596675415576e-05, "loss": 3.1357, "step": 500 }, { "epoch": 0.87, "learning_rate": 4.945319335083115e-05, "loss": 2.8425, "step": 1000 }, { "epoch": 1.0, "eval_loss": 2.7537224292755127, "eval_runtime": 26.7077, "eval_samples_per_second": 168.341, "eval_steps_per_second": 7.039, "step": 1143 }, { "epoch": 1.31, "learning_rate": 4.917979002624672e-05, "loss": 2.6802, "step": 1500 }, { "epoch": 1.75, "learning_rate": 4.890638670166229e-05, "loss": 2.5984, "step": 2000 }, { "epoch": 2.0, "eval_loss": 2.565607786178589, "eval_runtime": 26.4707, "eval_samples_per_second": 169.848, "eval_steps_per_second": 7.102, "step": 2286 }, { "epoch": 2.19, "learning_rate": 4.8632983377077865e-05, "loss": 2.5251, "step": 2500 }, { "epoch": 2.62, "learning_rate": 4.835958005249344e-05, "loss": 2.4559, "step": 3000 }, { "epoch": 3.0, "eval_loss": 2.4342074394226074, "eval_runtime": 26.7553, "eval_samples_per_second": 168.042, "eval_steps_per_second": 7.027, "step": 3429 }, { "epoch": 3.06, "learning_rate": 4.808617672790901e-05, "loss": 2.401, "step": 3500 }, { "epoch": 3.5, "learning_rate": 4.781277340332459e-05, "loss": 2.3488, "step": 4000 }, { "epoch": 3.94, "learning_rate": 4.753937007874016e-05, "loss": 2.3162, "step": 4500 }, { "epoch": 4.0, "eval_loss": 2.3351871967315674, "eval_runtime": 26.8296, "eval_samples_per_second": 167.576, "eval_steps_per_second": 7.007, "step": 4572 }, { "epoch": 4.37, "learning_rate": 4.7265966754155735e-05, "loss": 2.2725, "step": 5000 }, { "epoch": 4.81, "learning_rate": 4.699256342957131e-05, "loss": 2.2522, "step": 5500 }, { "epoch": 5.0, "eval_loss": 2.2760047912597656, "eval_runtime": 26.7861, "eval_samples_per_second": 167.848, "eval_steps_per_second": 7.019, "step": 5715 }, { "epoch": 5.25, "learning_rate": 4.671916010498688e-05, "loss": 2.2133, "step": 6000 }, { "epoch": 5.69, "learning_rate": 4.644575678040245e-05, "loss": 2.1975, "step": 6500 }, { "epoch": 6.0, "eval_loss": 2.2312655448913574, "eval_runtime": 26.7663, "eval_samples_per_second": 167.972, "eval_steps_per_second": 7.024, "step": 6858 }, { "epoch": 6.12, "learning_rate": 4.6172353455818024e-05, "loss": 2.1634, "step": 7000 }, { "epoch": 6.56, "learning_rate": 4.58989501312336e-05, "loss": 2.1379, "step": 7500 }, { "epoch": 7.0, "learning_rate": 4.562554680664917e-05, "loss": 2.122, "step": 8000 }, { "epoch": 7.0, "eval_loss": 2.1581709384918213, "eval_runtime": 26.6421, "eval_samples_per_second": 168.755, "eval_steps_per_second": 7.056, "step": 8001 }, { "epoch": 7.44, "learning_rate": 4.5352143482064746e-05, "loss": 2.086, "step": 8500 }, { "epoch": 7.87, "learning_rate": 4.507874015748031e-05, "loss": 2.0819, "step": 9000 }, { "epoch": 8.0, "eval_loss": 2.1259472370147705, "eval_runtime": 26.7263, "eval_samples_per_second": 168.224, "eval_steps_per_second": 7.034, "step": 9144 }, { "epoch": 8.31, "learning_rate": 4.480533683289589e-05, "loss": 2.0559, "step": 9500 }, { "epoch": 8.75, "learning_rate": 4.453193350831146e-05, "loss": 2.0446, "step": 10000 }, { "epoch": 9.0, "eval_loss": 2.088864803314209, "eval_runtime": 26.6021, "eval_samples_per_second": 169.009, "eval_steps_per_second": 7.067, "step": 10287 }, { "epoch": 9.19, "learning_rate": 4.4258530183727035e-05, "loss": 2.0174, "step": 10500 }, { "epoch": 9.62, "learning_rate": 4.398512685914261e-05, "loss": 2.0024, "step": 11000 }, { "epoch": 10.0, "eval_loss": 2.051894426345825, "eval_runtime": 26.7181, "eval_samples_per_second": 168.275, "eval_steps_per_second": 7.036, "step": 11430 }, { "epoch": 10.06, "learning_rate": 4.371172353455818e-05, "loss": 1.9808, "step": 11500 }, { "epoch": 10.5, "learning_rate": 4.343832020997376e-05, "loss": 1.9612, "step": 12000 }, { "epoch": 10.94, "learning_rate": 4.316491688538933e-05, "loss": 1.9605, "step": 12500 }, { "epoch": 11.0, "eval_loss": 2.0255160331726074, "eval_runtime": 26.7012, "eval_samples_per_second": 168.382, "eval_steps_per_second": 7.041, "step": 12573 }, { "epoch": 11.37, "learning_rate": 4.2891513560804905e-05, "loss": 1.9339, "step": 13000 }, { "epoch": 11.81, "learning_rate": 4.261811023622048e-05, "loss": 1.9306, "step": 13500 }, { "epoch": 12.0, "eval_loss": 2.002980947494507, "eval_runtime": 26.7201, "eval_samples_per_second": 168.263, "eval_steps_per_second": 7.036, "step": 13716 }, { "epoch": 12.25, "learning_rate": 4.2344706911636046e-05, "loss": 1.9146, "step": 14000 }, { "epoch": 12.69, "learning_rate": 4.207130358705162e-05, "loss": 1.8966, "step": 14500 }, { "epoch": 13.0, "eval_loss": 1.9790760278701782, "eval_runtime": 26.794, "eval_samples_per_second": 167.799, "eval_steps_per_second": 7.016, "step": 14859 }, { "epoch": 13.12, "learning_rate": 4.1797900262467194e-05, "loss": 1.8971, "step": 15000 }, { "epoch": 13.56, "learning_rate": 4.152449693788277e-05, "loss": 1.8737, "step": 15500 }, { "epoch": 14.0, "learning_rate": 4.125109361329834e-05, "loss": 1.8727, "step": 16000 }, { "epoch": 14.0, "eval_loss": 1.953186273574829, "eval_runtime": 26.7868, "eval_samples_per_second": 167.844, "eval_steps_per_second": 7.018, "step": 16002 }, { "epoch": 14.44, "learning_rate": 4.097769028871391e-05, "loss": 1.8443, "step": 16500 }, { "epoch": 14.87, "learning_rate": 4.070428696412948e-05, "loss": 1.8494, "step": 17000 }, { "epoch": 15.0, "eval_loss": 1.9439117908477783, "eval_runtime": 26.6625, "eval_samples_per_second": 168.626, "eval_steps_per_second": 7.051, "step": 17145 }, { "epoch": 15.31, "learning_rate": 4.043088363954506e-05, "loss": 1.8469, "step": 17500 }, { "epoch": 15.75, "learning_rate": 4.015748031496063e-05, "loss": 1.832, "step": 18000 }, { "epoch": 16.0, "eval_loss": 1.9127792119979858, "eval_runtime": 26.757, "eval_samples_per_second": 168.031, "eval_steps_per_second": 7.026, "step": 18288 }, { "epoch": 16.19, "learning_rate": 3.9884076990376205e-05, "loss": 1.8145, "step": 18500 }, { "epoch": 16.62, "learning_rate": 3.961067366579177e-05, "loss": 1.8106, "step": 19000 }, { "epoch": 17.0, "eval_loss": 1.9018843173980713, "eval_runtime": 26.8662, "eval_samples_per_second": 167.348, "eval_steps_per_second": 6.998, "step": 19431 }, { "epoch": 17.06, "learning_rate": 3.9337270341207346e-05, "loss": 1.7961, "step": 19500 }, { "epoch": 17.5, "learning_rate": 3.906386701662293e-05, "loss": 1.7879, "step": 20000 }, { "epoch": 17.94, "learning_rate": 3.87904636920385e-05, "loss": 1.7897, "step": 20500 }, { "epoch": 18.0, "eval_loss": 1.8865631818771362, "eval_runtime": 26.5843, "eval_samples_per_second": 169.122, "eval_steps_per_second": 7.072, "step": 20574 }, { "epoch": 18.37, "learning_rate": 3.8517060367454075e-05, "loss": 1.7704, "step": 21000 }, { "epoch": 18.81, "learning_rate": 3.824365704286964e-05, "loss": 1.7672, "step": 21500 }, { "epoch": 19.0, "eval_loss": 1.8684182167053223, "eval_runtime": 26.6963, "eval_samples_per_second": 168.413, "eval_steps_per_second": 7.042, "step": 21717 }, { "epoch": 19.25, "learning_rate": 3.7970253718285216e-05, "loss": 1.756, "step": 22000 }, { "epoch": 19.69, "learning_rate": 3.769685039370079e-05, "loss": 1.7536, "step": 22500 }, { "epoch": 20.0, "eval_loss": 1.8641901016235352, "eval_runtime": 26.774, "eval_samples_per_second": 167.924, "eval_steps_per_second": 7.022, "step": 22860 }, { "epoch": 20.12, "learning_rate": 3.7423447069116364e-05, "loss": 1.7425, "step": 23000 }, { "epoch": 20.56, "learning_rate": 3.715004374453194e-05, "loss": 1.7371, "step": 23500 }, { "epoch": 21.0, "learning_rate": 3.6876640419947505e-05, "loss": 1.7359, "step": 24000 }, { "epoch": 21.0, "eval_loss": 1.8315424919128418, "eval_runtime": 26.6323, "eval_samples_per_second": 168.817, "eval_steps_per_second": 7.059, "step": 24003 }, { "epoch": 21.43, "learning_rate": 3.660323709536308e-05, "loss": 1.7111, "step": 24500 }, { "epoch": 21.87, "learning_rate": 3.632983377077865e-05, "loss": 1.72, "step": 25000 }, { "epoch": 22.0, "eval_loss": 1.8326971530914307, "eval_runtime": 26.5909, "eval_samples_per_second": 169.081, "eval_steps_per_second": 7.07, "step": 25146 }, { "epoch": 22.31, "learning_rate": 3.605643044619423e-05, "loss": 1.7062, "step": 25500 }, { "epoch": 22.75, "learning_rate": 3.57830271216098e-05, "loss": 1.703, "step": 26000 }, { "epoch": 23.0, "eval_loss": 1.8233990669250488, "eval_runtime": 26.5859, "eval_samples_per_second": 169.112, "eval_steps_per_second": 7.071, "step": 26289 }, { "epoch": 23.18, "learning_rate": 3.550962379702537e-05, "loss": 1.699, "step": 26500 }, { "epoch": 23.62, "learning_rate": 3.523622047244094e-05, "loss": 1.6834, "step": 27000 }, { "epoch": 24.0, "eval_loss": 1.8098665475845337, "eval_runtime": 26.717, "eval_samples_per_second": 168.282, "eval_steps_per_second": 7.037, "step": 27432 }, { "epoch": 24.06, "learning_rate": 3.4962817147856516e-05, "loss": 1.6929, "step": 27500 }, { "epoch": 24.5, "learning_rate": 3.468941382327209e-05, "loss": 1.6699, "step": 28000 }, { "epoch": 24.93, "learning_rate": 3.441601049868767e-05, "loss": 1.678, "step": 28500 }, { "epoch": 25.0, "eval_loss": 1.8052376508712769, "eval_runtime": 26.5734, "eval_samples_per_second": 169.192, "eval_steps_per_second": 7.075, "step": 28575 }, { "epoch": 25.37, "learning_rate": 3.414260717410324e-05, "loss": 1.6644, "step": 29000 }, { "epoch": 25.81, "learning_rate": 3.386920384951881e-05, "loss": 1.6611, "step": 29500 }, { "epoch": 26.0, "eval_loss": 1.7889142036437988, "eval_runtime": 26.6293, "eval_samples_per_second": 168.836, "eval_steps_per_second": 7.06, "step": 29718 }, { "epoch": 26.25, "learning_rate": 3.3595800524934386e-05, "loss": 1.6471, "step": 30000 }, { "epoch": 26.68, "learning_rate": 3.332239720034996e-05, "loss": 1.6501, "step": 30500 }, { "epoch": 27.0, "eval_loss": 1.7786972522735596, "eval_runtime": 26.6312, "eval_samples_per_second": 168.824, "eval_steps_per_second": 7.059, "step": 30861 }, { "epoch": 27.12, "learning_rate": 3.3048993875765534e-05, "loss": 1.6411, "step": 31000 }, { "epoch": 27.56, "learning_rate": 3.27755905511811e-05, "loss": 1.6381, "step": 31500 }, { "epoch": 28.0, "learning_rate": 3.2502187226596675e-05, "loss": 1.6356, "step": 32000 }, { "epoch": 28.0, "eval_loss": 1.7748563289642334, "eval_runtime": 26.5521, "eval_samples_per_second": 169.328, "eval_steps_per_second": 7.08, "step": 32004 }, { "epoch": 28.43, "learning_rate": 3.222878390201225e-05, "loss": 1.6218, "step": 32500 }, { "epoch": 28.87, "learning_rate": 3.195538057742782e-05, "loss": 1.6201, "step": 33000 }, { "epoch": 29.0, "eval_loss": 1.7615448236465454, "eval_runtime": 26.6981, "eval_samples_per_second": 168.401, "eval_steps_per_second": 7.042, "step": 33147 }, { "epoch": 29.31, "learning_rate": 3.16819772528434e-05, "loss": 1.6206, "step": 33500 }, { "epoch": 29.75, "learning_rate": 3.1408573928258964e-05, "loss": 1.608, "step": 34000 }, { "epoch": 30.0, "eval_loss": 1.7431529760360718, "eval_runtime": 26.6194, "eval_samples_per_second": 168.9, "eval_steps_per_second": 7.063, "step": 34290 }, { "epoch": 30.18, "learning_rate": 3.113517060367454e-05, "loss": 1.6002, "step": 34500 }, { "epoch": 30.62, "learning_rate": 3.086176727909011e-05, "loss": 1.5964, "step": 35000 }, { "epoch": 31.0, "eval_loss": 1.7418159246444702, "eval_runtime": 26.4725, "eval_samples_per_second": 169.836, "eval_steps_per_second": 7.102, "step": 35433 }, { "epoch": 31.06, "learning_rate": 3.0588363954505686e-05, "loss": 1.6037, "step": 35500 }, { "epoch": 31.5, "learning_rate": 3.0314960629921263e-05, "loss": 1.5951, "step": 36000 }, { "epoch": 31.93, "learning_rate": 3.0041557305336837e-05, "loss": 1.5895, "step": 36500 }, { "epoch": 32.0, "eval_loss": 1.7344874143600464, "eval_runtime": 26.6199, "eval_samples_per_second": 168.896, "eval_steps_per_second": 7.062, "step": 36576 }, { "epoch": 32.37, "learning_rate": 2.9768153980752404e-05, "loss": 1.5793, "step": 37000 }, { "epoch": 32.81, "learning_rate": 2.9494750656167978e-05, "loss": 1.5796, "step": 37500 }, { "epoch": 33.0, "eval_loss": 1.724800705909729, "eval_runtime": 26.6386, "eval_samples_per_second": 168.778, "eval_steps_per_second": 7.057, "step": 37719 }, { "epoch": 33.25, "learning_rate": 2.9221347331583556e-05, "loss": 1.567, "step": 38000 }, { "epoch": 33.68, "learning_rate": 2.894794400699913e-05, "loss": 1.5707, "step": 38500 }, { "epoch": 34.0, "eval_loss": 1.7171574831008911, "eval_runtime": 26.5659, "eval_samples_per_second": 169.239, "eval_steps_per_second": 7.077, "step": 38862 }, { "epoch": 34.12, "learning_rate": 2.8674540682414704e-05, "loss": 1.5722, "step": 39000 }, { "epoch": 34.56, "learning_rate": 2.840113735783027e-05, "loss": 1.5531, "step": 39500 }, { "epoch": 35.0, "learning_rate": 2.8127734033245845e-05, "loss": 1.5621, "step": 40000 }, { "epoch": 35.0, "eval_loss": 1.7109158039093018, "eval_runtime": 26.806, "eval_samples_per_second": 167.724, "eval_steps_per_second": 7.013, "step": 40005 }, { "epoch": 35.43, "learning_rate": 2.785433070866142e-05, "loss": 1.5494, "step": 40500 }, { "epoch": 35.87, "learning_rate": 2.7580927384076993e-05, "loss": 1.5527, "step": 41000 }, { "epoch": 36.0, "eval_loss": 1.7070965766906738, "eval_runtime": 26.6485, "eval_samples_per_second": 168.715, "eval_steps_per_second": 7.055, "step": 41148 }, { "epoch": 36.31, "learning_rate": 2.7307524059492567e-05, "loss": 1.5436, "step": 41500 }, { "epoch": 36.75, "learning_rate": 2.7034120734908137e-05, "loss": 1.5412, "step": 42000 }, { "epoch": 37.0, "eval_loss": 1.6965086460113525, "eval_runtime": 26.6507, "eval_samples_per_second": 168.701, "eval_steps_per_second": 7.054, "step": 42291 }, { "epoch": 37.18, "learning_rate": 2.676071741032371e-05, "loss": 1.5347, "step": 42500 }, { "epoch": 37.62, "learning_rate": 2.6487314085739285e-05, "loss": 1.5364, "step": 43000 }, { "epoch": 38.0, "eval_loss": 1.6897602081298828, "eval_runtime": 26.7708, "eval_samples_per_second": 167.944, "eval_steps_per_second": 7.023, "step": 43434 }, { "epoch": 38.06, "learning_rate": 2.621391076115486e-05, "loss": 1.536, "step": 43500 }, { "epoch": 38.5, "learning_rate": 2.5940507436570433e-05, "loss": 1.5233, "step": 44000 }, { "epoch": 38.93, "learning_rate": 2.5667104111986e-05, "loss": 1.5242, "step": 44500 }, { "epoch": 39.0, "eval_loss": 1.6954392194747925, "eval_runtime": 26.5185, "eval_samples_per_second": 169.542, "eval_steps_per_second": 7.089, "step": 44577 }, { "epoch": 39.37, "learning_rate": 2.5393700787401574e-05, "loss": 1.5179, "step": 45000 }, { "epoch": 39.81, "learning_rate": 2.5120297462817148e-05, "loss": 1.5186, "step": 45500 }, { "epoch": 40.0, "eval_loss": 1.6827205419540405, "eval_runtime": 26.5585, "eval_samples_per_second": 169.287, "eval_steps_per_second": 7.079, "step": 45720 }, { "epoch": 40.24, "learning_rate": 2.4846894138232722e-05, "loss": 1.5092, "step": 46000 }, { "epoch": 40.68, "learning_rate": 2.4573490813648296e-05, "loss": 1.5051, "step": 46500 }, { "epoch": 41.0, "eval_loss": 1.6745613813400269, "eval_runtime": 26.674, "eval_samples_per_second": 168.554, "eval_steps_per_second": 7.048, "step": 46863 }, { "epoch": 41.12, "learning_rate": 2.430008748906387e-05, "loss": 1.5061, "step": 47000 }, { "epoch": 41.56, "learning_rate": 2.402668416447944e-05, "loss": 1.5006, "step": 47500 }, { "epoch": 41.99, "learning_rate": 2.3753280839895015e-05, "loss": 1.5021, "step": 48000 }, { "epoch": 42.0, "eval_loss": 1.6715837717056274, "eval_runtime": 26.6407, "eval_samples_per_second": 168.765, "eval_steps_per_second": 7.057, "step": 48006 }, { "epoch": 42.43, "learning_rate": 2.3479877515310585e-05, "loss": 1.4944, "step": 48500 }, { "epoch": 42.87, "learning_rate": 2.320647419072616e-05, "loss": 1.4947, "step": 49000 }, { "epoch": 43.0, "eval_loss": 1.6718155145645142, "eval_runtime": 26.5453, "eval_samples_per_second": 169.371, "eval_steps_per_second": 7.082, "step": 49149 }, { "epoch": 43.31, "learning_rate": 2.2933070866141733e-05, "loss": 1.4708, "step": 49500 }, { "epoch": 43.74, "learning_rate": 2.2659667541557307e-05, "loss": 1.487, "step": 50000 }, { "epoch": 44.0, "eval_loss": 1.655574917793274, "eval_runtime": 26.6883, "eval_samples_per_second": 168.463, "eval_steps_per_second": 7.044, "step": 50292 }, { "epoch": 44.18, "learning_rate": 2.238626421697288e-05, "loss": 1.4856, "step": 50500 }, { "epoch": 44.62, "learning_rate": 2.211286089238845e-05, "loss": 1.4878, "step": 51000 }, { "epoch": 45.0, "eval_loss": 1.6630425453186035, "eval_runtime": 26.6239, "eval_samples_per_second": 168.871, "eval_steps_per_second": 7.061, "step": 51435 }, { "epoch": 45.06, "learning_rate": 2.1839457567804025e-05, "loss": 1.4726, "step": 51500 }, { "epoch": 45.49, "learning_rate": 2.15660542432196e-05, "loss": 1.4628, "step": 52000 }, { "epoch": 45.93, "learning_rate": 2.129265091863517e-05, "loss": 1.47, "step": 52500 }, { "epoch": 46.0, "eval_loss": 1.6468228101730347, "eval_runtime": 26.5809, "eval_samples_per_second": 169.144, "eval_steps_per_second": 7.073, "step": 52578 }, { "epoch": 46.37, "learning_rate": 2.1019247594050744e-05, "loss": 1.4704, "step": 53000 }, { "epoch": 46.81, "learning_rate": 2.0745844269466318e-05, "loss": 1.4648, "step": 53500 }, { "epoch": 47.0, "eval_loss": 1.6624916791915894, "eval_runtime": 26.637, "eval_samples_per_second": 168.788, "eval_steps_per_second": 7.058, "step": 53721 }, { "epoch": 47.24, "learning_rate": 2.0472440944881892e-05, "loss": 1.4506, "step": 54000 }, { "epoch": 47.68, "learning_rate": 2.0199037620297466e-05, "loss": 1.4613, "step": 54500 }, { "epoch": 48.0, "eval_loss": 1.6419734954833984, "eval_runtime": 26.7063, "eval_samples_per_second": 168.35, "eval_steps_per_second": 7.04, "step": 54864 }, { "epoch": 48.12, "learning_rate": 1.9925634295713036e-05, "loss": 1.453, "step": 55000 }, { "epoch": 48.56, "learning_rate": 1.965223097112861e-05, "loss": 1.4535, "step": 55500 }, { "epoch": 48.99, "learning_rate": 1.9378827646544184e-05, "loss": 1.4483, "step": 56000 }, { "epoch": 49.0, "eval_loss": 1.6399292945861816, "eval_runtime": 26.5852, "eval_samples_per_second": 169.117, "eval_steps_per_second": 7.072, "step": 56007 }, { "epoch": 49.43, "learning_rate": 1.9105424321959755e-05, "loss": 1.4387, "step": 56500 }, { "epoch": 49.87, "learning_rate": 1.883202099737533e-05, "loss": 1.4511, "step": 57000 }, { "epoch": 50.0, "eval_loss": 1.6433073282241821, "eval_runtime": 26.7101, "eval_samples_per_second": 168.326, "eval_steps_per_second": 7.039, "step": 57150 }, { "epoch": 50.31, "learning_rate": 1.85586176727909e-05, "loss": 1.4369, "step": 57500 }, { "epoch": 50.74, "learning_rate": 1.8285214348206477e-05, "loss": 1.4416, "step": 58000 }, { "epoch": 51.0, "eval_loss": 1.632462739944458, "eval_runtime": 26.7312, "eval_samples_per_second": 168.193, "eval_steps_per_second": 7.033, "step": 58293 }, { "epoch": 51.18, "learning_rate": 1.801181102362205e-05, "loss": 1.4299, "step": 58500 }, { "epoch": 51.62, "learning_rate": 1.773840769903762e-05, "loss": 1.4323, "step": 59000 }, { "epoch": 52.0, "eval_loss": 1.6295057535171509, "eval_runtime": 26.6499, "eval_samples_per_second": 168.706, "eval_steps_per_second": 7.054, "step": 59436 }, { "epoch": 52.06, "learning_rate": 1.7465004374453195e-05, "loss": 1.435, "step": 59500 }, { "epoch": 52.49, "learning_rate": 1.7191601049868766e-05, "loss": 1.4286, "step": 60000 }, { "epoch": 52.93, "learning_rate": 1.691819772528434e-05, "loss": 1.4251, "step": 60500 }, { "epoch": 53.0, "eval_loss": 1.617984414100647, "eval_runtime": 26.827, "eval_samples_per_second": 167.592, "eval_steps_per_second": 7.008, "step": 60579 }, { "epoch": 53.37, "learning_rate": 1.6644794400699914e-05, "loss": 1.4184, "step": 61000 }, { "epoch": 53.81, "learning_rate": 1.6371391076115484e-05, "loss": 1.4232, "step": 61500 }, { "epoch": 54.0, "eval_loss": 1.629773736000061, "eval_runtime": 26.6119, "eval_samples_per_second": 168.947, "eval_steps_per_second": 7.065, "step": 61722 }, { "epoch": 54.24, "learning_rate": 1.6097987751531062e-05, "loss": 1.4191, "step": 62000 }, { "epoch": 54.68, "learning_rate": 1.5824584426946632e-05, "loss": 1.4149, "step": 62500 }, { "epoch": 55.0, "eval_loss": 1.6210088729858398, "eval_runtime": 26.736, "eval_samples_per_second": 168.163, "eval_steps_per_second": 7.032, "step": 62865 }, { "epoch": 55.12, "learning_rate": 1.5551181102362206e-05, "loss": 1.416, "step": 63000 }, { "epoch": 55.56, "learning_rate": 1.527777777777778e-05, "loss": 1.4053, "step": 63500 }, { "epoch": 55.99, "learning_rate": 1.500437445319335e-05, "loss": 1.4053, "step": 64000 }, { "epoch": 56.0, "eval_loss": 1.6154247522354126, "eval_runtime": 26.7899, "eval_samples_per_second": 167.824, "eval_steps_per_second": 7.018, "step": 64008 }, { "epoch": 56.43, "learning_rate": 1.4730971128608925e-05, "loss": 1.4046, "step": 64500 }, { "epoch": 56.87, "learning_rate": 1.4457567804024497e-05, "loss": 1.4048, "step": 65000 }, { "epoch": 57.0, "eval_loss": 1.6118608713150024, "eval_runtime": 26.8108, "eval_samples_per_second": 167.694, "eval_steps_per_second": 7.012, "step": 65151 }, { "epoch": 57.31, "learning_rate": 1.4184164479440071e-05, "loss": 1.3955, "step": 65500 }, { "epoch": 57.74, "learning_rate": 1.3910761154855645e-05, "loss": 1.3897, "step": 66000 }, { "epoch": 58.0, "eval_loss": 1.615386724472046, "eval_runtime": 26.7926, "eval_samples_per_second": 167.808, "eval_steps_per_second": 7.017, "step": 66294 }, { "epoch": 58.18, "learning_rate": 1.3637357830271215e-05, "loss": 1.4013, "step": 66500 }, { "epoch": 58.62, "learning_rate": 1.3363954505686791e-05, "loss": 1.3894, "step": 67000 }, { "epoch": 59.0, "eval_loss": 1.615539312362671, "eval_runtime": 26.7037, "eval_samples_per_second": 168.366, "eval_steps_per_second": 7.04, "step": 67437 }, { "epoch": 59.06, "learning_rate": 1.3090551181102362e-05, "loss": 1.3925, "step": 67500 }, { "epoch": 59.49, "learning_rate": 1.2817147856517936e-05, "loss": 1.3901, "step": 68000 }, { "epoch": 59.93, "learning_rate": 1.254374453193351e-05, "loss": 1.3874, "step": 68500 }, { "epoch": 60.0, "eval_loss": 1.5988539457321167, "eval_runtime": 26.8328, "eval_samples_per_second": 167.556, "eval_steps_per_second": 7.006, "step": 68580 }, { "epoch": 60.37, "learning_rate": 1.2270341207349082e-05, "loss": 1.3905, "step": 69000 }, { "epoch": 60.8, "learning_rate": 1.1996937882764656e-05, "loss": 1.3828, "step": 69500 }, { "epoch": 61.0, "eval_loss": 1.606756567955017, "eval_runtime": 26.7077, "eval_samples_per_second": 168.341, "eval_steps_per_second": 7.039, "step": 69723 }, { "epoch": 61.24, "learning_rate": 1.1723534558180228e-05, "loss": 1.3915, "step": 70000 }, { "epoch": 61.68, "learning_rate": 1.14501312335958e-05, "loss": 1.3732, "step": 70500 }, { "epoch": 62.0, "eval_loss": 1.6009830236434937, "eval_runtime": 26.6568, "eval_samples_per_second": 168.663, "eval_steps_per_second": 7.053, "step": 70866 }, { "epoch": 62.12, "learning_rate": 1.1176727909011374e-05, "loss": 1.378, "step": 71000 }, { "epoch": 62.55, "learning_rate": 1.0903324584426947e-05, "loss": 1.3793, "step": 71500 }, { "epoch": 62.99, "learning_rate": 1.062992125984252e-05, "loss": 1.3762, "step": 72000 }, { "epoch": 63.0, "eval_loss": 1.588931679725647, "eval_runtime": 26.5912, "eval_samples_per_second": 169.078, "eval_steps_per_second": 7.07, "step": 72009 }, { "epoch": 63.43, "learning_rate": 1.0356517935258093e-05, "loss": 1.3722, "step": 72500 }, { "epoch": 63.87, "learning_rate": 1.0083114610673667e-05, "loss": 1.3793, "step": 73000 }, { "epoch": 64.0, "eval_loss": 1.5957828760147095, "eval_runtime": 26.789, "eval_samples_per_second": 167.83, "eval_steps_per_second": 7.018, "step": 73152 }, { "epoch": 64.3, "learning_rate": 9.809711286089239e-06, "loss": 1.3676, "step": 73500 }, { "epoch": 64.74, "learning_rate": 9.536307961504811e-06, "loss": 1.369, "step": 74000 }, { "epoch": 65.0, "eval_loss": 1.585767388343811, "eval_runtime": 26.8836, "eval_samples_per_second": 167.239, "eval_steps_per_second": 6.993, "step": 74295 }, { "epoch": 65.18, "learning_rate": 9.262904636920385e-06, "loss": 1.3716, "step": 74500 }, { "epoch": 65.62, "learning_rate": 8.98950131233596e-06, "loss": 1.3649, "step": 75000 }, { "epoch": 66.0, "eval_loss": 1.5860302448272705, "eval_runtime": 26.6649, "eval_samples_per_second": 168.611, "eval_steps_per_second": 7.05, "step": 75438 }, { "epoch": 66.05, "learning_rate": 8.716097987751532e-06, "loss": 1.3685, "step": 75500 }, { "epoch": 66.49, "learning_rate": 8.442694663167104e-06, "loss": 1.3634, "step": 76000 }, { "epoch": 66.93, "learning_rate": 8.169291338582676e-06, "loss": 1.3657, "step": 76500 }, { "epoch": 67.0, "eval_loss": 1.5799576044082642, "eval_runtime": 26.723, "eval_samples_per_second": 168.245, "eval_steps_per_second": 7.035, "step": 76581 }, { "epoch": 67.37, "learning_rate": 7.895888013998252e-06, "loss": 1.3567, "step": 77000 }, { "epoch": 67.8, "learning_rate": 7.622484689413824e-06, "loss": 1.3542, "step": 77500 }, { "epoch": 68.0, "eval_loss": 1.5821391344070435, "eval_runtime": 26.708, "eval_samples_per_second": 168.339, "eval_steps_per_second": 7.039, "step": 77724 }, { "epoch": 68.24, "learning_rate": 7.349081364829396e-06, "loss": 1.3553, "step": 78000 }, { "epoch": 68.68, "learning_rate": 7.075678040244969e-06, "loss": 1.3499, "step": 78500 }, { "epoch": 69.0, "eval_loss": 1.566676139831543, "eval_runtime": 26.7673, "eval_samples_per_second": 167.966, "eval_steps_per_second": 7.023, "step": 78867 }, { "epoch": 69.12, "learning_rate": 6.8022747156605425e-06, "loss": 1.3625, "step": 79000 }, { "epoch": 69.55, "learning_rate": 6.5288713910761165e-06, "loss": 1.344, "step": 79500 }, { "epoch": 69.99, "learning_rate": 6.255468066491689e-06, "loss": 1.3556, "step": 80000 }, { "epoch": 70.0, "eval_loss": 1.5794533491134644, "eval_runtime": 26.5526, "eval_samples_per_second": 169.324, "eval_steps_per_second": 7.08, "step": 80010 }, { "epoch": 70.43, "learning_rate": 5.982064741907262e-06, "loss": 1.3489, "step": 80500 }, { "epoch": 70.87, "learning_rate": 5.708661417322835e-06, "loss": 1.3459, "step": 81000 }, { "epoch": 71.0, "eval_loss": 1.5812574625015259, "eval_runtime": 26.6663, "eval_samples_per_second": 168.602, "eval_steps_per_second": 7.05, "step": 81153 }, { "epoch": 71.3, "learning_rate": 5.435258092738408e-06, "loss": 1.3402, "step": 81500 }, { "epoch": 71.74, "learning_rate": 5.16185476815398e-06, "loss": 1.349, "step": 82000 }, { "epoch": 72.0, "eval_loss": 1.5749701261520386, "eval_runtime": 26.5968, "eval_samples_per_second": 169.043, "eval_steps_per_second": 7.069, "step": 82296 }, { "epoch": 72.18, "learning_rate": 4.888451443569554e-06, "loss": 1.3431, "step": 82500 }, { "epoch": 72.62, "learning_rate": 4.6150481189851266e-06, "loss": 1.3462, "step": 83000 }, { "epoch": 73.0, "eval_loss": 1.5782840251922607, "eval_runtime": 26.7153, "eval_samples_per_second": 168.293, "eval_steps_per_second": 7.037, "step": 83439 }, { "epoch": 73.05, "learning_rate": 4.3416447944007005e-06, "loss": 1.3358, "step": 83500 }, { "epoch": 73.49, "learning_rate": 4.068241469816273e-06, "loss": 1.3365, "step": 84000 }, { "epoch": 73.93, "learning_rate": 3.794838145231846e-06, "loss": 1.3467, "step": 84500 }, { "epoch": 74.0, "eval_loss": 1.5689624547958374, "eval_runtime": 26.6651, "eval_samples_per_second": 168.61, "eval_steps_per_second": 7.05, "step": 84582 }, { "epoch": 74.37, "learning_rate": 3.5214348206474194e-06, "loss": 1.3447, "step": 85000 }, { "epoch": 74.8, "learning_rate": 3.248031496062992e-06, "loss": 1.3375, "step": 85500 }, { "epoch": 75.0, "eval_loss": 1.571561336517334, "eval_runtime": 26.7826, "eval_samples_per_second": 167.87, "eval_steps_per_second": 7.019, "step": 85725 }, { "epoch": 75.24, "learning_rate": 2.9746281714785652e-06, "loss": 1.3416, "step": 86000 }, { "epoch": 75.68, "learning_rate": 2.7012248468941383e-06, "loss": 1.3304, "step": 86500 }, { "epoch": 76.0, "eval_loss": 1.5706144571304321, "eval_runtime": 26.6581, "eval_samples_per_second": 168.654, "eval_steps_per_second": 7.052, "step": 86868 }, { "epoch": 76.12, "learning_rate": 2.4278215223097115e-06, "loss": 1.3315, "step": 87000 }, { "epoch": 76.55, "learning_rate": 2.1544181977252846e-06, "loss": 1.3385, "step": 87500 }, { "epoch": 76.99, "learning_rate": 1.8810148731408575e-06, "loss": 1.3341, "step": 88000 }, { "epoch": 77.0, "eval_loss": 1.5661262273788452, "eval_runtime": 26.6726, "eval_samples_per_second": 168.562, "eval_steps_per_second": 7.048, "step": 88011 }, { "epoch": 77.43, "learning_rate": 1.6076115485564306e-06, "loss": 1.3315, "step": 88500 }, { "epoch": 77.87, "learning_rate": 1.3342082239720037e-06, "loss": 1.3326, "step": 89000 }, { "epoch": 78.0, "eval_loss": 1.5639817714691162, "eval_runtime": 26.5292, "eval_samples_per_second": 169.474, "eval_steps_per_second": 7.087, "step": 89154 }, { "epoch": 78.3, "learning_rate": 1.0608048993875766e-06, "loss": 1.3367, "step": 89500 }, { "epoch": 78.74, "learning_rate": 7.874015748031496e-07, "loss": 1.3325, "step": 90000 }, { "epoch": 79.0, "eval_loss": 1.5637917518615723, "eval_runtime": 26.8397, "eval_samples_per_second": 167.513, "eval_steps_per_second": 7.005, "step": 90297 }, { "epoch": 79.18, "learning_rate": 5.139982502187226e-07, "loss": 1.3292, "step": 90500 }, { "epoch": 79.62, "learning_rate": 2.405949256342957e-07, "loss": 1.3252, "step": 91000 } ], "max_steps": 91440, "num_train_epochs": 80, "total_flos": 7.57423814122537e+17, "trial_name": null, "trial_params": null }