|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.968985378821444, |
|
"global_step": 157500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9936704854737643e-05, |
|
"loss": 2.1885, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9873409709475284e-05, |
|
"loss": 2.1503, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9810114564212926e-05, |
|
"loss": 2.1207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.974681941895057e-05, |
|
"loss": 2.093, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.968352427368821e-05, |
|
"loss": 2.0938, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9620229128425853e-05, |
|
"loss": 2.0842, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9556933983163494e-05, |
|
"loss": 2.0991, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9493638837901136e-05, |
|
"loss": 2.0715, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.9430343692638777e-05, |
|
"loss": 2.073, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.936704854737642e-05, |
|
"loss": 2.08, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.930375340211406e-05, |
|
"loss": 2.0494, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.92404582568517e-05, |
|
"loss": 2.0616, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9177163111589342e-05, |
|
"loss": 2.0571, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9113867966326984e-05, |
|
"loss": 2.0561, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.9050572821064625e-05, |
|
"loss": 2.0658, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.8987277675802266e-05, |
|
"loss": 2.0275, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8923982530539908e-05, |
|
"loss": 2.0335, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.8860687385277552e-05, |
|
"loss": 2.0312, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.8797392240015194e-05, |
|
"loss": 2.0138, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8734097094752835e-05, |
|
"loss": 2.0005, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8670801949490476e-05, |
|
"loss": 2.0248, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.8607506804228118e-05, |
|
"loss": 2.0152, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.854421165896576e-05, |
|
"loss": 2.0168, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.84809165137034e-05, |
|
"loss": 2.0196, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.841762136844104e-05, |
|
"loss": 2.0003, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8354326223178683e-05, |
|
"loss": 2.0267, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8291031077916324e-05, |
|
"loss": 2.0022, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8227735932653966e-05, |
|
"loss": 2.0102, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8164440787391607e-05, |
|
"loss": 2.0, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.810114564212925e-05, |
|
"loss": 1.9751, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.803785049686689e-05, |
|
"loss": 1.9912, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.708198070526123, |
|
"eval_runtime": 629.4341, |
|
"eval_samples_per_second": 401.594, |
|
"eval_steps_per_second": 25.1, |
|
"step": 15799 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.797455535160453e-05, |
|
"loss": 1.9373, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.7911260206342176e-05, |
|
"loss": 1.8308, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.7847965061079817e-05, |
|
"loss": 1.8442, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.778466991581746e-05, |
|
"loss": 1.8558, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.77213747705551e-05, |
|
"loss": 1.8564, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.765807962529274e-05, |
|
"loss": 1.8578, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7594784480030382e-05, |
|
"loss": 1.8312, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.7531489334768024e-05, |
|
"loss": 1.8784, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.746819418950567e-05, |
|
"loss": 1.8497, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.740489904424331e-05, |
|
"loss": 1.8528, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.734160389898095e-05, |
|
"loss": 1.8645, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.7278308753718592e-05, |
|
"loss": 1.8563, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.7215013608456234e-05, |
|
"loss": 1.8616, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.7151718463193875e-05, |
|
"loss": 1.8699, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.7088423317931516e-05, |
|
"loss": 1.8583, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.7025128172669158e-05, |
|
"loss": 1.868, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.69618330274068e-05, |
|
"loss": 1.8534, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.689853788214444e-05, |
|
"loss": 1.8557, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.683524273688208e-05, |
|
"loss": 1.8709, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.6771947591619723e-05, |
|
"loss": 1.8544, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.6708652446357364e-05, |
|
"loss": 1.8803, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.6645357301095006e-05, |
|
"loss": 1.8573, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.658206215583265e-05, |
|
"loss": 1.8668, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.6518767010570292e-05, |
|
"loss": 1.8592, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.6455471865307933e-05, |
|
"loss": 1.8551, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.6392176720045574e-05, |
|
"loss": 1.8504, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6328881574783216e-05, |
|
"loss": 1.8578, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.6265586429520857e-05, |
|
"loss": 1.8614, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.62022912842585e-05, |
|
"loss": 1.8592, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.613899613899614e-05, |
|
"loss": 1.854, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.607570099373378e-05, |
|
"loss": 1.8536, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6012405848471422e-05, |
|
"loss": 1.8687, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.5787432193756104, |
|
"eval_runtime": 629.6856, |
|
"eval_samples_per_second": 401.434, |
|
"eval_steps_per_second": 25.09, |
|
"step": 31598 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.5949110703209064e-05, |
|
"loss": 1.7515, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5885815557946705e-05, |
|
"loss": 1.7233, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5822520412684346e-05, |
|
"loss": 1.754, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.5759225267421988e-05, |
|
"loss": 1.7302, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5695930122159632e-05, |
|
"loss": 1.7369, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5632634976897274e-05, |
|
"loss": 1.7294, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.5569339831634915e-05, |
|
"loss": 1.7317, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.5506044686372556e-05, |
|
"loss": 1.7457, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.5442749541110198e-05, |
|
"loss": 1.758, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.537945439584784e-05, |
|
"loss": 1.7442, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.531615925058548e-05, |
|
"loss": 1.7449, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.5252864105323122e-05, |
|
"loss": 1.7502, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.5189568960060765e-05, |
|
"loss": 1.7529, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.5126273814798406e-05, |
|
"loss": 1.7675, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.5062978669536047e-05, |
|
"loss": 1.7537, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4999683524273689e-05, |
|
"loss": 1.7546, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.493638837901133e-05, |
|
"loss": 1.7409, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4873093233748971e-05, |
|
"loss": 1.7599, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.4809798088486613e-05, |
|
"loss": 1.7467, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.4746502943224257e-05, |
|
"loss": 1.7426, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.4683207797961899e-05, |
|
"loss": 1.7421, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.461991265269954e-05, |
|
"loss": 1.7572, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.4556617507437181e-05, |
|
"loss": 1.7489, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.4493322362174823e-05, |
|
"loss": 1.7482, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4430027216912464e-05, |
|
"loss": 1.7578, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.4366732071650105e-05, |
|
"loss": 1.7608, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4303436926387748e-05, |
|
"loss": 1.7623, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.424014178112539e-05, |
|
"loss": 1.7534, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4176846635863031e-05, |
|
"loss": 1.7513, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.4113551490600672e-05, |
|
"loss": 1.7539, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4050256345338314e-05, |
|
"loss": 1.7529, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.4882566928863525, |
|
"eval_runtime": 671.3515, |
|
"eval_samples_per_second": 376.52, |
|
"eval_steps_per_second": 23.533, |
|
"step": 47397 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3986961200075955e-05, |
|
"loss": 1.7233, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.3923666054813596e-05, |
|
"loss": 1.6255, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.386037090955124e-05, |
|
"loss": 1.6566, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.379707576428888e-05, |
|
"loss": 1.6442, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.3733780619026522e-05, |
|
"loss": 1.6439, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.3670485473764163e-05, |
|
"loss": 1.6438, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.3607190328501805e-05, |
|
"loss": 1.6527, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.3543895183239446e-05, |
|
"loss": 1.6426, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.3480600037977087e-05, |
|
"loss": 1.6802, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.341730489271473e-05, |
|
"loss": 1.6568, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.3354009747452372e-05, |
|
"loss": 1.6657, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.3290714602190013e-05, |
|
"loss": 1.6734, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.3227419456927654e-05, |
|
"loss": 1.655, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.3164124311665296e-05, |
|
"loss": 1.6831, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.3100829166402937e-05, |
|
"loss": 1.6532, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.3037534021140578e-05, |
|
"loss": 1.6649, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.2974238875878221e-05, |
|
"loss": 1.6643, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.2910943730615863e-05, |
|
"loss": 1.6749, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.2847648585353504e-05, |
|
"loss": 1.6802, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.2784353440091145e-05, |
|
"loss": 1.6753, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2721058294828787e-05, |
|
"loss": 1.6759, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.2657763149566428e-05, |
|
"loss": 1.6756, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.259446800430407e-05, |
|
"loss": 1.6733, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.253117285904171e-05, |
|
"loss": 1.671, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.2467877713779355e-05, |
|
"loss": 1.6697, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.2404582568516997e-05, |
|
"loss": 1.668, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.2341287423254638e-05, |
|
"loss": 1.6689, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.227799227799228e-05, |
|
"loss": 1.6874, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.221469713272992e-05, |
|
"loss": 1.6926, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.2151401987467562e-05, |
|
"loss": 1.6819, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.2088106842205203e-05, |
|
"loss": 1.6599, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.2024811696942846e-05, |
|
"loss": 1.6886, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.417983055114746, |
|
"eval_runtime": 634.8433, |
|
"eval_samples_per_second": 398.172, |
|
"eval_steps_per_second": 24.886, |
|
"step": 63196 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.1961516551680488e-05, |
|
"loss": 1.6122, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.1898221406418129e-05, |
|
"loss": 1.578, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.183492626115577e-05, |
|
"loss": 1.5662, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.1771631115893412e-05, |
|
"loss": 1.5732, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.1708335970631053e-05, |
|
"loss": 1.5726, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.1645040825368694e-05, |
|
"loss": 1.5868, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.1581745680106337e-05, |
|
"loss": 1.5781, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.1518450534843979e-05, |
|
"loss": 1.5965, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.145515538958162e-05, |
|
"loss": 1.5934, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.1391860244319261e-05, |
|
"loss": 1.5791, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.1328565099056903e-05, |
|
"loss": 1.6037, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.1265269953794544e-05, |
|
"loss": 1.6046, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.1201974808532185e-05, |
|
"loss": 1.5903, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1138679663269828e-05, |
|
"loss": 1.5837, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.107538451800747e-05, |
|
"loss": 1.6162, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.1012089372745111e-05, |
|
"loss": 1.5988, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.0948794227482752e-05, |
|
"loss": 1.6082, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.0885499082220394e-05, |
|
"loss": 1.5832, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.0822203936958035e-05, |
|
"loss": 1.6153, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.0758908791695676e-05, |
|
"loss": 1.6178, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.0695613646433321e-05, |
|
"loss": 1.5981, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.0632318501170963e-05, |
|
"loss": 1.6135, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.0569023355908604e-05, |
|
"loss": 1.6122, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.0505728210646245e-05, |
|
"loss": 1.5929, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.0442433065383886e-05, |
|
"loss": 1.6069, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.0379137920121528e-05, |
|
"loss": 1.6025, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.0315842774859167e-05, |
|
"loss": 1.6284, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.0252547629596812e-05, |
|
"loss": 1.6134, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.0189252484334454e-05, |
|
"loss": 1.6092, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.0125957339072095e-05, |
|
"loss": 1.6194, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.0062662193809736e-05, |
|
"loss": 1.6227, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.3593807220458984, |
|
"eval_runtime": 634.5713, |
|
"eval_samples_per_second": 398.343, |
|
"eval_steps_per_second": 24.897, |
|
"step": 78995 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.999367048547378e-06, |
|
"loss": 1.6451, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 9.936071903285019e-06, |
|
"loss": 1.5186, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 9.87277675802266e-06, |
|
"loss": 1.5124, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 9.809481612760301e-06, |
|
"loss": 1.5223, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 9.746186467497943e-06, |
|
"loss": 1.5234, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 9.682891322235586e-06, |
|
"loss": 1.5298, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 9.619596176973227e-06, |
|
"loss": 1.5259, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 9.556301031710869e-06, |
|
"loss": 1.5463, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 9.49300588644851e-06, |
|
"loss": 1.5367, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 9.429710741186153e-06, |
|
"loss": 1.543, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 9.366415595923794e-06, |
|
"loss": 1.5379, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 9.303120450661436e-06, |
|
"loss": 1.5215, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 9.239825305399077e-06, |
|
"loss": 1.5339, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 9.176530160136718e-06, |
|
"loss": 1.5588, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 9.11323501487436e-06, |
|
"loss": 1.5522, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 9.049939869612e-06, |
|
"loss": 1.5516, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 8.986644724349644e-06, |
|
"loss": 1.5503, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 8.923349579087285e-06, |
|
"loss": 1.5459, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 8.860054433824927e-06, |
|
"loss": 1.5437, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.796759288562568e-06, |
|
"loss": 1.5452, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 8.73346414330021e-06, |
|
"loss": 1.5434, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 8.67016899803785e-06, |
|
"loss": 1.5633, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 8.606873852775492e-06, |
|
"loss": 1.5535, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 8.543578707513135e-06, |
|
"loss": 1.5692, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 8.480283562250776e-06, |
|
"loss": 1.5609, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.416988416988418e-06, |
|
"loss": 1.5529, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 8.353693271726059e-06, |
|
"loss": 1.5602, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 8.290398126463702e-06, |
|
"loss": 1.5547, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 8.227102981201343e-06, |
|
"loss": 1.5557, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.163807835938985e-06, |
|
"loss": 1.5488, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 8.100512690676626e-06, |
|
"loss": 1.5736, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 8.037217545414267e-06, |
|
"loss": 1.559, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.3149573802947998, |
|
"eval_runtime": 678.6783, |
|
"eval_samples_per_second": 372.455, |
|
"eval_steps_per_second": 23.279, |
|
"step": 94794 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 7.973922400151909e-06, |
|
"loss": 1.5248, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.91062725488955e-06, |
|
"loss": 1.4873, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 7.847332109627193e-06, |
|
"loss": 1.4885, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 7.784036964364834e-06, |
|
"loss": 1.4882, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 7.720741819102476e-06, |
|
"loss": 1.499, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 7.657446673840117e-06, |
|
"loss": 1.493, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.594151528577759e-06, |
|
"loss": 1.4864, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 7.5308563833154e-06, |
|
"loss": 1.4889, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 7.467561238053042e-06, |
|
"loss": 1.5047, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.404266092790684e-06, |
|
"loss": 1.4828, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.340970947528325e-06, |
|
"loss": 1.4884, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 7.2776758022659665e-06, |
|
"loss": 1.4981, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 7.214380657003608e-06, |
|
"loss": 1.494, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 7.15108551174125e-06, |
|
"loss": 1.4798, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 7.087790366478891e-06, |
|
"loss": 1.498, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 7.024495221216533e-06, |
|
"loss": 1.496, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.961200075954176e-06, |
|
"loss": 1.5097, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 6.897904930691817e-06, |
|
"loss": 1.5032, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 6.8346097854294576e-06, |
|
"loss": 1.5001, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 6.771314640167099e-06, |
|
"loss": 1.5097, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 6.708019494904742e-06, |
|
"loss": 1.5065, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 6.644724349642383e-06, |
|
"loss": 1.4961, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 6.5814292043800246e-06, |
|
"loss": 1.5092, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.518134059117667e-06, |
|
"loss": 1.5079, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 6.454838913855308e-06, |
|
"loss": 1.513, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.391543768592949e-06, |
|
"loss": 1.5076, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 6.328248623330591e-06, |
|
"loss": 1.5123, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 6.264953478068233e-06, |
|
"loss": 1.5117, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 6.201658332805874e-06, |
|
"loss": 1.5056, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 6.1383631875435156e-06, |
|
"loss": 1.517, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 6.075068042281157e-06, |
|
"loss": 1.515, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 6.011772897018799e-06, |
|
"loss": 1.5193, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.2794440984725952, |
|
"eval_runtime": 637.2277, |
|
"eval_samples_per_second": 396.682, |
|
"eval_steps_per_second": 24.793, |
|
"step": 110593 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 5.94847775175644e-06, |
|
"loss": 1.4557, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 5.885182606494082e-06, |
|
"loss": 1.4395, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 5.821887461231725e-06, |
|
"loss": 1.4518, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 5.758592315969366e-06, |
|
"loss": 1.4513, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 5.695297170707007e-06, |
|
"loss": 1.454, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 5.632002025444649e-06, |
|
"loss": 1.4597, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 5.568706880182291e-06, |
|
"loss": 1.4383, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.505411734919932e-06, |
|
"loss": 1.4529, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 5.442116589657574e-06, |
|
"loss": 1.4706, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 5.378821444395216e-06, |
|
"loss": 1.4576, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 5.315526299132857e-06, |
|
"loss": 1.4681, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 5.252231153870498e-06, |
|
"loss": 1.4537, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 5.18893600860814e-06, |
|
"loss": 1.4583, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 5.125640863345782e-06, |
|
"loss": 1.4645, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 5.062345718083423e-06, |
|
"loss": 1.455, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.999050572821065e-06, |
|
"loss": 1.4821, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 4.935755427558707e-06, |
|
"loss": 1.4605, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 4.872460282296348e-06, |
|
"loss": 1.4621, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.8091651370339894e-06, |
|
"loss": 1.4601, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 4.745869991771632e-06, |
|
"loss": 1.4648, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.682574846509273e-06, |
|
"loss": 1.4723, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.619279701246915e-06, |
|
"loss": 1.4733, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.5559845559845564e-06, |
|
"loss": 1.4723, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 4.492689410722198e-06, |
|
"loss": 1.4788, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.42939426545984e-06, |
|
"loss": 1.4665, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 4.366099120197481e-06, |
|
"loss": 1.4699, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 4.3028039749351235e-06, |
|
"loss": 1.4908, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 4.239508829672764e-06, |
|
"loss": 1.4712, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 4.176213684410406e-06, |
|
"loss": 1.4722, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.1129185391480474e-06, |
|
"loss": 1.4856, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 4.04962339388569e-06, |
|
"loss": 1.4793, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2516653537750244, |
|
"eval_runtime": 654.6089, |
|
"eval_samples_per_second": 386.15, |
|
"eval_steps_per_second": 24.135, |
|
"step": 126392 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.986328248623331e-06, |
|
"loss": 1.4563, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.923033103360972e-06, |
|
"loss": 1.4263, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 3.8597379580986145e-06, |
|
"loss": 1.4301, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 3.7964428128362558e-06, |
|
"loss": 1.43, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 3.7331476675738975e-06, |
|
"loss": 1.4355, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 3.669852522311539e-06, |
|
"loss": 1.4384, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3.6065573770491806e-06, |
|
"loss": 1.4398, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 3.543262231786822e-06, |
|
"loss": 1.425, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 3.4799670865244637e-06, |
|
"loss": 1.423, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 3.416671941262105e-06, |
|
"loss": 1.4278, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 3.3533767959997472e-06, |
|
"loss": 1.4368, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 3.290081650737389e-06, |
|
"loss": 1.4351, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 3.2267865054750303e-06, |
|
"loss": 1.4351, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.163491360212672e-06, |
|
"loss": 1.4299, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.1001962149503134e-06, |
|
"loss": 1.4265, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 3.036901069687955e-06, |
|
"loss": 1.4468, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 2.9736059244255965e-06, |
|
"loss": 1.4389, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 2.9103107791632386e-06, |
|
"loss": 1.4199, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 2.84701563390088e-06, |
|
"loss": 1.4361, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.7837204886385217e-06, |
|
"loss": 1.4401, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 2.7204253433761635e-06, |
|
"loss": 1.4423, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 2.657130198113805e-06, |
|
"loss": 1.4266, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.5938350528514466e-06, |
|
"loss": 1.4406, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.530539907589088e-06, |
|
"loss": 1.441, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 2.4672447623267296e-06, |
|
"loss": 1.4551, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 2.4039496170643714e-06, |
|
"loss": 1.4452, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 2.340654471802013e-06, |
|
"loss": 1.4392, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.2773593265396545e-06, |
|
"loss": 1.4361, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 2.2140641812772962e-06, |
|
"loss": 1.4313, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2.1507690360149376e-06, |
|
"loss": 1.4323, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 2.0874738907525793e-06, |
|
"loss": 1.4266, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 2.024178745490221e-06, |
|
"loss": 1.4354, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.2341375350952148, |
|
"eval_runtime": 642.6304, |
|
"eval_samples_per_second": 393.347, |
|
"eval_steps_per_second": 24.585, |
|
"step": 142191 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1.960883600227863e-06, |
|
"loss": 1.4034, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 1.8975884549655044e-06, |
|
"loss": 1.3966, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.834293309703146e-06, |
|
"loss": 1.3921, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 1.7709981644407874e-06, |
|
"loss": 1.396, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1.7077030191784292e-06, |
|
"loss": 1.411, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 1.6444078739160707e-06, |
|
"loss": 1.406, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 1.5811127286537123e-06, |
|
"loss": 1.407, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.5178175833913538e-06, |
|
"loss": 1.4182, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.4545224381289958e-06, |
|
"loss": 1.4116, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 1.3912272928666373e-06, |
|
"loss": 1.4166, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 1.3279321476042789e-06, |
|
"loss": 1.4063, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 1.2646370023419204e-06, |
|
"loss": 1.4025, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1.2013418570795622e-06, |
|
"loss": 1.4061, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.1380467118172037e-06, |
|
"loss": 1.4066, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 1.0747515665548455e-06, |
|
"loss": 1.4152, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.011456421292487e-06, |
|
"loss": 1.417, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 9.481612760301285e-07, |
|
"loss": 1.411, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 8.848661307677701e-07, |
|
"loss": 1.4162, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 8.215709855054118e-07, |
|
"loss": 1.4195, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 7.582758402430535e-07, |
|
"loss": 1.4226, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 6.94980694980695e-07, |
|
"loss": 1.4239, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 6.316855497183366e-07, |
|
"loss": 1.4078, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 5.683904044559782e-07, |
|
"loss": 1.4101, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 5.050952591936199e-07, |
|
"loss": 1.416, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 4.418001139312615e-07, |
|
"loss": 1.4182, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 3.785049686689031e-07, |
|
"loss": 1.4196, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 3.1520982340654476e-07, |
|
"loss": 1.4132, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 2.5191467814418635e-07, |
|
"loss": 1.4138, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 1.88619532881828e-07, |
|
"loss": 1.4333, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 1.253243876194696e-07, |
|
"loss": 1.413, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 6.202924235711122e-08, |
|
"loss": 1.4116, |
|
"step": 157500 |
|
} |
|
], |
|
"max_steps": 157990, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.820454731815322e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|