{ "best_metric": 1.4437962770462036, "best_model_checkpoint": "output/egor-letov/checkpoint-1113", "epoch": 7.0, "global_step": 1113, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00013686550750800417, "loss": 1.7651, "step": 5 }, { "epoch": 0.06, "learning_rate": 0.00013586529199199337, "loss": 1.744, "step": 10 }, { "epoch": 0.09, "learning_rate": 0.00013420910752136943, "loss": 1.6316, "step": 15 }, { "epoch": 0.13, "learning_rate": 0.0001319131051535897, "loss": 1.6707, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.0001289996754295761, "loss": 1.6026, "step": 25 }, { "epoch": 0.19, "learning_rate": 0.00012549723002188386, "loss": 1.6338, "step": 30 }, { "epoch": 0.22, "learning_rate": 0.00012143992466498807, "loss": 1.592, "step": 35 }, { "epoch": 0.25, "learning_rate": 0.00011686732606966479, "loss": 1.6485, "step": 40 }, { "epoch": 0.28, "learning_rate": 0.00011182402606970537, "loss": 1.5618, "step": 45 }, { "epoch": 0.31, "learning_rate": 0.00010635920676379414, "loss": 1.639, "step": 50 }, { "epoch": 0.35, "learning_rate": 0.00010052616089327038, "loss": 1.616, "step": 55 }, { "epoch": 0.38, "learning_rate": 9.438177213303676e-05, "loss": 1.6779, "step": 60 }, { "epoch": 0.41, "learning_rate": 8.798596036379876e-05, "loss": 1.6006, "step": 65 }, { "epoch": 0.44, "learning_rate": 8.140109733532407e-05, "loss": 1.6413, "step": 70 }, { "epoch": 0.47, "learning_rate": 7.469139841915332e-05, "loss": 1.6066, "step": 75 }, { "epoch": 0.5, "learning_rate": 6.792229638237148e-05, "loss": 1.5749, "step": 80 }, { "epoch": 0.53, "learning_rate": 6.11598032893766e-05, "loss": 1.5668, "step": 85 }, { "epoch": 0.57, "learning_rate": 5.446986675436074e-05, "loss": 1.6693, "step": 90 }, { "epoch": 0.6, "learning_rate": 4.7917726822305834e-05, "loss": 1.5177, "step": 95 }, { "epoch": 0.63, "learning_rate": 4.156727975017365e-05, "loss": 1.5749, "step": 100 }, { "epoch": 0.66, "learning_rate": 3.5480454892670955e-05, "loss": 1.7175, "step": 105 }, { "epoch": 0.69, "learning_rate": 2.9716610769183376e-05, "loss": 1.5363, "step": 110 }, { "epoch": 0.72, "learning_rate": 2.433195620141175e-05, "loss": 1.5101, "step": 115 }, { "epoch": 0.75, "learning_rate": 1.9379002166758904e-05, "loss": 1.454, "step": 120 }, { "epoch": 0.79, "learning_rate": 1.4906049712973773e-05, "loss": 1.5586, "step": 125 }, { "epoch": 0.82, "learning_rate": 1.0956718927894408e-05, "loss": 1.5785, "step": 130 }, { "epoch": 0.85, "learning_rate": 7.569523557760334e-06, "loss": 1.542, "step": 135 }, { "epoch": 0.88, "learning_rate": 4.777495422403531e-06, "loss": 1.5961, "step": 140 }, { "epoch": 0.91, "learning_rate": 2.607862290008129e-06, "loss": 1.5725, "step": 145 }, { "epoch": 0.94, "learning_rate": 1.0817823527948728e-06, "loss": 1.5678, "step": 150 }, { "epoch": 0.97, "learning_rate": 2.1413789301400542e-07, "loss": 1.5592, "step": 155 }, { "epoch": 1.0, "eval_loss": 1.5221866369247437, "eval_runtime": 5.3885, "eval_samples_per_second": 44.725, "eval_steps_per_second": 5.753, "step": 159 }, { "epoch": 1.01, "learning_rate": 1.339015140952895e-08, "loss": 1.5752, "step": 160 }, { "epoch": 1.04, "learning_rate": 4.814968134727319e-07, "loss": 1.6004, "step": 165 }, { "epoch": 1.07, "learning_rate": 1.6138929181565955e-06, "loss": 1.6208, "step": 170 }, { "epoch": 1.1, "learning_rate": 3.399535375228382e-06, "loss": 1.5048, "step": 175 }, { "epoch": 1.13, "learning_rate": 5.821010657128918e-06, "loss": 1.5368, "step": 180 }, { "epoch": 1.16, "learning_rate": 8.854704615130833e-06, "loss": 1.4625, "step": 185 }, { "epoch": 1.19, "learning_rate": 1.247103276375417e-05, "loss": 1.5028, "step": 190 }, { "epoch": 1.23, "learning_rate": 1.663472878771286e-05, "loss": 1.5655, "step": 195 }, { "epoch": 1.26, "learning_rate": 2.130518845788256e-05, "loss": 1.6462, "step": 200 }, { "epoch": 1.29, "learning_rate": 2.6436865602431155e-05, "loss": 1.5009, "step": 205 }, { "epoch": 1.32, "learning_rate": 3.197971627161538e-05, "loss": 1.4222, "step": 210 }, { "epoch": 1.35, "learning_rate": 3.7879686764761665e-05, "loss": 1.4581, "step": 215 }, { "epoch": 1.38, "learning_rate": 4.407924076020964e-05, "loss": 1.5815, "step": 220 }, { "epoch": 1.42, "learning_rate": 5.0517920407661895e-05, "loss": 1.4586, "step": 225 }, { "epoch": 1.45, "learning_rate": 5.713293591118741e-05, "loss": 1.6639, "step": 230 }, { "epoch": 1.48, "learning_rate": 6.385977785328257e-05, "loss": 1.4835, "step": 235 }, { "epoch": 1.51, "learning_rate": 7.063284628862755e-05, "loss": 1.4511, "step": 240 }, { "epoch": 1.54, "learning_rate": 7.738609047263356e-05, "loss": 1.5574, "step": 245 }, { "epoch": 1.57, "learning_rate": 8.405365298617052e-05, "loss": 1.5211, "step": 250 }, { "epoch": 1.6, "learning_rate": 9.057051197499065e-05, "loss": 1.5923, "step": 255 }, { "epoch": 1.64, "learning_rate": 9.687311524075133e-05, "loss": 1.4926, "step": 260 }, { "epoch": 1.67, "learning_rate": 0.00010289999999999994, "loss": 1.5, "step": 265 }, { "epoch": 1.7, "learning_rate": 0.00010859239226725281, "loss": 1.6531, "step": 270 }, { "epoch": 1.73, "learning_rate": 0.00011389478001700268, "loss": 1.5839, "step": 275 }, { "epoch": 1.76, "learning_rate": 0.0001187554545351989, "loss": 1.5527, "step": 280 }, { "epoch": 1.79, "learning_rate": 0.00012312701468095597, "loss": 1.4811, "step": 285 }, { "epoch": 1.82, "learning_rate": 0.0001269668291409485, "loss": 1.6154, "step": 290 }, { "epoch": 1.86, "learning_rate": 0.0001302374521686013, "loss": 1.611, "step": 295 }, { "epoch": 1.89, "learning_rate": 0.00013290698875380067, "loss": 1.5892, "step": 300 }, { "epoch": 1.92, "learning_rate": 0.0001349494056620028, "loss": 1.4601, "step": 305 }, { "epoch": 1.95, "learning_rate": 0.0001363447853094957, "loss": 1.4863, "step": 310 }, { "epoch": 1.98, "learning_rate": 0.00013707951999903243, "loss": 1.5381, "step": 315 }, { "epoch": 2.0, "eval_loss": 1.5293655395507812, "eval_runtime": 5.7302, "eval_samples_per_second": 42.058, "eval_steps_per_second": 5.41, "step": 318 }, { "epoch": 2.01, "learning_rate": 0.00013714644462165502, "loss": 1.4756, "step": 320 }, { "epoch": 2.04, "learning_rate": 0.00013654490653060552, "loss": 1.538, "step": 325 }, { "epoch": 2.08, "learning_rate": 0.00013528077190591619, "loss": 1.4616, "step": 330 }, { "epoch": 2.11, "learning_rate": 0.00013336636854761124, "loss": 1.4735, "step": 335 }, { "epoch": 2.14, "learning_rate": 0.00013082036565539916, "loss": 1.3918, "step": 340 }, { "epoch": 2.17, "learning_rate": 0.00012766759176724058, "loss": 1.4803, "step": 345 }, { "epoch": 2.2, "learning_rate": 0.00012393879263224768, "loss": 1.6293, "step": 350 }, { "epoch": 2.23, "learning_rate": 0.00011967033137913236, "loss": 1.5226, "step": 355 }, { "epoch": 2.26, "learning_rate": 0.00011490383390414918, "loss": 1.5126, "step": 360 }, { "epoch": 2.3, "learning_rate": 0.00010968578293670085, "loss": 1.5304, "step": 365 }, { "epoch": 2.33, "learning_rate": 0.00010406706474126147, "loss": 1.4686, "step": 370 }, { "epoch": 2.36, "learning_rate": 9.810247287616943e-05, "loss": 1.4261, "step": 375 }, { "epoch": 2.39, "learning_rate": 9.185017384861683e-05, "loss": 1.4695, "step": 380 }, { "epoch": 2.42, "learning_rate": 8.537113987675073e-05, "loss": 1.4489, "step": 385 }, { "epoch": 2.45, "learning_rate": 7.872855429057032e-05, "loss": 1.5209, "step": 390 }, { "epoch": 2.48, "learning_rate": 7.198719537013416e-05, "loss": 1.4409, "step": 395 }, { "epoch": 2.52, "learning_rate": 6.521280462986591e-05, "loss": 1.4272, "step": 400 }, { "epoch": 2.55, "learning_rate": 5.847144570942975e-05, "loss": 1.4187, "step": 405 }, { "epoch": 2.58, "learning_rate": 5.1828860123249337e-05, "loss": 1.4412, "step": 410 }, { "epoch": 2.61, "learning_rate": 4.5349826151383235e-05, "loss": 1.536, "step": 415 }, { "epoch": 2.64, "learning_rate": 3.909752712383064e-05, "loss": 1.468, "step": 420 }, { "epoch": 2.67, "learning_rate": 3.31329352587386e-05, "loss": 1.4604, "step": 425 }, { "epoch": 2.7, "learning_rate": 2.7514217063299194e-05, "loss": 1.4515, "step": 430 }, { "epoch": 2.74, "learning_rate": 2.229616609585087e-05, "loss": 1.4215, "step": 435 }, { "epoch": 2.77, "learning_rate": 1.7529668620867685e-05, "loss": 1.4177, "step": 440 }, { "epoch": 2.8, "learning_rate": 1.3261207367752373e-05, "loss": 1.4707, "step": 445 }, { "epoch": 2.83, "learning_rate": 9.53240823275947e-06, "loss": 1.4591, "step": 450 }, { "epoch": 2.86, "learning_rate": 6.379634344600892e-06, "loss": 1.5216, "step": 455 }, { "epoch": 2.89, "learning_rate": 3.833631452388783e-06, "loss": 1.4972, "step": 460 }, { "epoch": 2.92, "learning_rate": 1.919228094083838e-06, "loss": 1.5209, "step": 465 }, { "epoch": 2.96, "learning_rate": 6.550934693944858e-07, "loss": 1.45, "step": 470 }, { "epoch": 2.99, "learning_rate": 5.35553783449795e-08, "loss": 1.4454, "step": 475 }, { "epoch": 3.0, "eval_loss": 1.4749490022659302, "eval_runtime": 5.7804, "eval_samples_per_second": 41.693, "eval_steps_per_second": 5.363, "step": 477 }, { "epoch": 3.02, "learning_rate": 1.2048000096755528e-07, "loss": 1.5049, "step": 480 }, { "epoch": 3.05, "learning_rate": 8.552146905042678e-07, "loss": 1.4016, "step": 485 }, { "epoch": 3.08, "learning_rate": 2.2505943379971545e-06, "loss": 1.4147, "step": 490 }, { "epoch": 3.11, "learning_rate": 4.293011246199337e-06, "loss": 1.3828, "step": 495 }, { "epoch": 3.14, "learning_rate": 6.9625478313987545e-06, "loss": 1.431, "step": 500 }, { "epoch": 3.18, "learning_rate": 1.023317085905152e-05, "loss": 1.4227, "step": 505 }, { "epoch": 3.21, "learning_rate": 1.4072985319043966e-05, "loss": 1.4611, "step": 510 }, { "epoch": 3.24, "learning_rate": 1.8444545464801038e-05, "loss": 1.3548, "step": 515 }, { "epoch": 3.27, "learning_rate": 2.330521998299726e-05, "loss": 1.4269, "step": 520 }, { "epoch": 3.3, "learning_rate": 2.8607607732747138e-05, "loss": 1.4122, "step": 525 }, { "epoch": 3.33, "learning_rate": 3.429999999999989e-05, "loss": 1.42, "step": 530 }, { "epoch": 3.36, "learning_rate": 4.032688475924871e-05, "loss": 1.388, "step": 535 }, { "epoch": 3.4, "learning_rate": 4.66294880250094e-05, "loss": 1.369, "step": 540 }, { "epoch": 3.43, "learning_rate": 5.3146347013829526e-05, "loss": 1.3512, "step": 545 }, { "epoch": 3.46, "learning_rate": 5.981390952736637e-05, "loss": 1.3807, "step": 550 }, { "epoch": 3.49, "learning_rate": 6.656715371137237e-05, "loss": 1.466, "step": 555 }, { "epoch": 3.52, "learning_rate": 7.334022214671736e-05, "loss": 1.3711, "step": 560 }, { "epoch": 3.55, "learning_rate": 8.006706408881251e-05, "loss": 1.3797, "step": 565 }, { "epoch": 3.58, "learning_rate": 8.668207959233792e-05, "loss": 1.413, "step": 570 }, { "epoch": 3.62, "learning_rate": 9.312075923979041e-05, "loss": 1.3859, "step": 575 }, { "epoch": 3.65, "learning_rate": 9.932031323523837e-05, "loss": 1.4853, "step": 580 }, { "epoch": 3.68, "learning_rate": 0.00010522028372838466, "loss": 1.3925, "step": 585 }, { "epoch": 3.71, "learning_rate": 0.0001107631343975688, "loss": 1.4442, "step": 590 }, { "epoch": 3.74, "learning_rate": 0.00011589481154211739, "loss": 1.5125, "step": 595 }, { "epoch": 3.77, "learning_rate": 0.00012056527121228708, "loss": 1.3964, "step": 600 }, { "epoch": 3.81, "learning_rate": 0.0001247289672362458, "loss": 1.4655, "step": 605 }, { "epoch": 3.84, "learning_rate": 0.00012834529538486908, "loss": 1.3898, "step": 610 }, { "epoch": 3.87, "learning_rate": 0.00013137898934287108, "loss": 1.4001, "step": 615 }, { "epoch": 3.9, "learning_rate": 0.00013380046462477163, "loss": 1.4248, "step": 620 }, { "epoch": 3.93, "learning_rate": 0.0001355861070818434, "loss": 1.3956, "step": 625 }, { "epoch": 3.96, "learning_rate": 0.00013671850318652725, "loss": 1.4704, "step": 630 }, { "epoch": 3.99, "learning_rate": 0.0001371866098485905, "loss": 1.4419, "step": 635 }, { "epoch": 4.0, "eval_loss": 1.4980032444000244, "eval_runtime": 5.8381, "eval_samples_per_second": 41.28, "eval_steps_per_second": 5.31, "step": 636 }, { "epoch": 4.03, "learning_rate": 0.000136985862106986, "loss": 1.3728, "step": 640 }, { "epoch": 4.06, "learning_rate": 0.00013611821764720515, "loss": 1.3237, "step": 645 }, { "epoch": 4.09, "learning_rate": 0.00013459213770999185, "loss": 1.3533, "step": 650 }, { "epoch": 4.12, "learning_rate": 0.00013242250457759646, "loss": 1.4508, "step": 655 }, { "epoch": 4.15, "learning_rate": 0.00012963047644223966, "loss": 1.4551, "step": 660 }, { "epoch": 4.18, "learning_rate": 0.00012624328107210575, "loss": 1.3913, "step": 665 }, { "epoch": 4.21, "learning_rate": 0.00012229395028702628, "loss": 1.3365, "step": 670 }, { "epoch": 4.25, "learning_rate": 0.00011782099783324114, "loss": 1.4044, "step": 675 }, { "epoch": 4.28, "learning_rate": 0.00011286804379858831, "loss": 1.3806, "step": 680 }, { "epoch": 4.31, "learning_rate": 0.00010748338923081678, "loss": 1.3692, "step": 685 }, { "epoch": 4.34, "learning_rate": 0.00010171954510732902, "loss": 1.3377, "step": 690 }, { "epoch": 4.37, "learning_rate": 9.56327202498263e-05, "loss": 1.3899, "step": 695 }, { "epoch": 4.4, "learning_rate": 8.928227317769412e-05, "loss": 1.3824, "step": 700 }, { "epoch": 4.43, "learning_rate": 8.273013324563958e-05, "loss": 1.3517, "step": 705 }, { "epoch": 4.47, "learning_rate": 7.604019671062341e-05, "loss": 1.3471, "step": 710 }, { "epoch": 4.5, "learning_rate": 6.927770361762859e-05, "loss": 1.363, "step": 715 }, { "epoch": 4.53, "learning_rate": 6.250860158084675e-05, "loss": 1.4238, "step": 720 }, { "epoch": 4.56, "learning_rate": 5.5798902664676055e-05, "loss": 1.3159, "step": 725 }, { "epoch": 4.59, "learning_rate": 4.921403963620114e-05, "loss": 1.3496, "step": 730 }, { "epoch": 4.62, "learning_rate": 4.2818227866963195e-05, "loss": 1.3717, "step": 735 }, { "epoch": 4.65, "learning_rate": 3.667383910672979e-05, "loss": 1.4263, "step": 740 }, { "epoch": 4.69, "learning_rate": 3.084079323620607e-05, "loss": 1.4238, "step": 745 }, { "epoch": 4.72, "learning_rate": 2.537597393029464e-05, "loss": 1.3789, "step": 750 }, { "epoch": 4.75, "learning_rate": 2.0332673930335262e-05, "loss": 1.2943, "step": 755 }, { "epoch": 4.78, "learning_rate": 1.576007533501198e-05, "loss": 1.4457, "step": 760 }, { "epoch": 4.81, "learning_rate": 1.170276997811622e-05, "loss": 1.3882, "step": 765 }, { "epoch": 4.84, "learning_rate": 8.200324570423858e-06, "loss": 1.3435, "step": 770 }, { "epoch": 4.87, "learning_rate": 5.286894846410311e-06, "loss": 1.3625, "step": 775 }, { "epoch": 4.91, "learning_rate": 2.9908924786306224e-06, "loss": 1.3336, "step": 780 }, { "epoch": 4.94, "learning_rate": 1.3347080080066949e-06, "loss": 1.3698, "step": 785 }, { "epoch": 4.97, "learning_rate": 3.34492491995841e-07, "loss": 1.3109, "step": 790 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.3807, "step": 795 }, { "epoch": 5.0, "eval_loss": 1.4533822536468506, "eval_runtime": 5.8855, "eval_samples_per_second": 40.948, "eval_steps_per_second": 5.267, "step": 795 }, { "epoch": 5.03, "learning_rate": 3.3449249199583335e-07, "loss": 1.2678, "step": 800 }, { "epoch": 5.06, "learning_rate": 1.334708008006672e-06, "loss": 1.2155, "step": 805 }, { "epoch": 5.09, "learning_rate": 2.990892478630592e-06, "loss": 1.265, "step": 810 }, { "epoch": 5.13, "learning_rate": 5.286894846410265e-06, "loss": 1.2533, "step": 815 }, { "epoch": 5.16, "learning_rate": 8.200324570423804e-06, "loss": 1.2628, "step": 820 }, { "epoch": 5.19, "learning_rate": 1.1702769978116159e-05, "loss": 1.321, "step": 825 }, { "epoch": 5.22, "learning_rate": 1.5760075335011905e-05, "loss": 1.3229, "step": 830 }, { "epoch": 5.25, "learning_rate": 2.0332673930335177e-05, "loss": 1.3257, "step": 835 }, { "epoch": 5.28, "learning_rate": 2.537597393029455e-05, "loss": 1.3373, "step": 840 }, { "epoch": 5.31, "learning_rate": 3.0840793236205985e-05, "loss": 1.2971, "step": 845 }, { "epoch": 5.35, "learning_rate": 3.667383910672968e-05, "loss": 1.1974, "step": 850 }, { "epoch": 5.38, "learning_rate": 4.281822786696309e-05, "loss": 1.3887, "step": 855 }, { "epoch": 5.41, "learning_rate": 4.921403963620103e-05, "loss": 1.3505, "step": 860 }, { "epoch": 5.44, "learning_rate": 5.579890266467594e-05, "loss": 1.319, "step": 865 }, { "epoch": 5.47, "learning_rate": 6.250860158084664e-05, "loss": 1.3255, "step": 870 }, { "epoch": 5.5, "learning_rate": 6.927770361762848e-05, "loss": 1.2311, "step": 875 }, { "epoch": 5.53, "learning_rate": 7.604019671062329e-05, "loss": 1.3552, "step": 880 }, { "epoch": 5.57, "learning_rate": 8.273013324563947e-05, "loss": 1.3885, "step": 885 }, { "epoch": 5.6, "learning_rate": 8.928227317769401e-05, "loss": 1.3314, "step": 890 }, { "epoch": 5.63, "learning_rate": 9.563272024982619e-05, "loss": 1.3202, "step": 895 }, { "epoch": 5.66, "learning_rate": 0.00010171954510732891, "loss": 1.3272, "step": 900 }, { "epoch": 5.69, "learning_rate": 0.00010748338923081667, "loss": 1.2248, "step": 905 }, { "epoch": 5.72, "learning_rate": 0.00011286804379858823, "loss": 1.3777, "step": 910 }, { "epoch": 5.75, "learning_rate": 0.00011782099783324106, "loss": 1.3051, "step": 915 }, { "epoch": 5.79, "learning_rate": 0.0001222939502870262, "loss": 1.3295, "step": 920 }, { "epoch": 5.82, "learning_rate": 0.0001262432810721057, "loss": 1.3102, "step": 925 }, { "epoch": 5.85, "learning_rate": 0.00012963047644223957, "loss": 1.409, "step": 930 }, { "epoch": 5.88, "learning_rate": 0.0001324225045775964, "loss": 1.3435, "step": 935 }, { "epoch": 5.91, "learning_rate": 0.00013459213770999182, "loss": 1.3775, "step": 940 }, { "epoch": 5.94, "learning_rate": 0.00013611821764720515, "loss": 1.3406, "step": 945 }, { "epoch": 5.97, "learning_rate": 0.000136985862106986, "loss": 1.4464, "step": 950 }, { "epoch": 6.0, "eval_loss": 1.4797894954681396, "eval_runtime": 5.9157, "eval_samples_per_second": 40.739, "eval_steps_per_second": 5.24, "step": 954 }, { "epoch": 6.01, "learning_rate": 0.0001371866098485905, "loss": 1.2626, "step": 955 }, { "epoch": 6.04, "learning_rate": 0.00013671850318652725, "loss": 1.2796, "step": 960 }, { "epoch": 6.07, "learning_rate": 0.00013558610708184343, "loss": 1.1514, "step": 965 }, { "epoch": 6.1, "learning_rate": 0.00013380046462477168, "loss": 1.3043, "step": 970 }, { "epoch": 6.13, "learning_rate": 0.00013137898934287106, "loss": 1.327, "step": 975 }, { "epoch": 6.16, "learning_rate": 0.00012834529538486924, "loss": 1.214, "step": 980 }, { "epoch": 6.19, "learning_rate": 0.00012472896723624585, "loss": 1.1638, "step": 985 }, { "epoch": 6.23, "learning_rate": 0.00012056527121228703, "loss": 1.3165, "step": 990 }, { "epoch": 6.26, "learning_rate": 0.00011589481154211748, "loss": 1.3327, "step": 995 }, { "epoch": 6.29, "learning_rate": 0.00011076313439756889, "loss": 1.3318, "step": 1000 }, { "epoch": 6.32, "learning_rate": 0.00010522028372838475, "loss": 1.3088, "step": 1005 }, { "epoch": 6.35, "learning_rate": 9.932031323523849e-05, "loss": 1.4023, "step": 1010 }, { "epoch": 6.38, "learning_rate": 9.312075923979029e-05, "loss": 1.2767, "step": 1015 }, { "epoch": 6.42, "learning_rate": 8.668207959233826e-05, "loss": 1.3357, "step": 1020 }, { "epoch": 6.45, "learning_rate": 8.006706408881263e-05, "loss": 1.2732, "step": 1025 }, { "epoch": 6.48, "learning_rate": 7.334022214671723e-05, "loss": 1.2055, "step": 1030 }, { "epoch": 6.51, "learning_rate": 6.656715371137249e-05, "loss": 1.3083, "step": 1035 }, { "epoch": 6.54, "learning_rate": 5.981390952736648e-05, "loss": 1.3757, "step": 1040 }, { "epoch": 6.57, "learning_rate": 5.314634701382964e-05, "loss": 1.2944, "step": 1045 }, { "epoch": 6.6, "learning_rate": 4.66294880250095e-05, "loss": 1.3276, "step": 1050 }, { "epoch": 6.64, "learning_rate": 4.032688475924859e-05, "loss": 1.2886, "step": 1055 }, { "epoch": 6.67, "learning_rate": 3.4300000000000204e-05, "loss": 1.2137, "step": 1060 }, { "epoch": 6.7, "learning_rate": 2.8607607732747236e-05, "loss": 1.2814, "step": 1065 }, { "epoch": 6.73, "learning_rate": 2.3305219982997162e-05, "loss": 1.2878, "step": 1070 }, { "epoch": 6.76, "learning_rate": 1.844454546480112e-05, "loss": 1.2616, "step": 1075 }, { "epoch": 6.79, "learning_rate": 1.4072985319044034e-05, "loss": 1.2864, "step": 1080 }, { "epoch": 6.82, "learning_rate": 1.0233170859051581e-05, "loss": 1.3714, "step": 1085 }, { "epoch": 6.86, "learning_rate": 6.962547831398808e-06, "loss": 1.2827, "step": 1090 }, { "epoch": 6.89, "learning_rate": 4.293011246199291e-06, "loss": 1.3028, "step": 1095 }, { "epoch": 6.92, "learning_rate": 2.250594337997246e-06, "loss": 1.2778, "step": 1100 }, { "epoch": 6.95, "learning_rate": 8.552146905042907e-07, "loss": 1.2184, "step": 1105 }, { "epoch": 6.98, "learning_rate": 1.2048000096754006e-07, "loss": 1.241, "step": 1110 }, { "epoch": 7.0, "eval_loss": 1.4437962770462036, "eval_runtime": 5.9385, "eval_samples_per_second": 40.583, "eval_steps_per_second": 5.22, "step": 1113 } ], "max_steps": 1908, "num_train_epochs": 12, "total_flos": 1163272126464000.0, "trial_name": null, "trial_params": null }