diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { - "best_metric": 13.5764, - "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-58037", - "epoch": 15.0, + "best_metric": 12.1716, + "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-42562", + "epoch": 11.0, "eval_steps": 500, - "global_step": 58037, + "global_step": 42562, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -12891,4684 +12891,6 @@ "eval_samples_per_second": 2.165, "eval_steps_per_second": 1.083, "step": 42562 - }, - { - "epoch": 11.0054270577594, - "learning_rate": 1.3131725657980034e-07, - "loss": 2.4441, - "step": 42580 - }, - { - "epoch": 11.010595684196925, - "learning_rate": 1.31284843770258e-07, - "loss": 2.5027, - "step": 42600 - }, - { - "epoch": 11.015764310634449, - "learning_rate": 1.3125243096071566e-07, - "loss": 2.4709, - "step": 42620 - }, - { - "epoch": 11.020932937071974, - "learning_rate": 1.3122001815117335e-07, - "loss": 2.4576, - "step": 42640 - }, - { - "epoch": 11.026101563509497, - "learning_rate": 1.31187605341631e-07, - "loss": 2.4395, - "step": 42660 - }, - { - "epoch": 11.031270189947021, - "learning_rate": 1.3115519253208867e-07, - "loss": 2.504, - "step": 42680 - }, - { - "epoch": 11.036438816384546, - "learning_rate": 1.3112277972254634e-07, - "loss": 2.5521, - "step": 42700 - }, - { - "epoch": 11.04160744282207, - "learning_rate": 1.31090366913004e-07, - "loss": 2.5074, - "step": 42720 - }, - { - "epoch": 11.046776069259595, - "learning_rate": 1.3105795410346169e-07, - "loss": 2.5079, - "step": 42740 - }, - { - "epoch": 11.051944695697118, - "learning_rate": 1.3102554129391935e-07, - "loss": 2.5337, - "step": 42760 - }, - { - "epoch": 11.057113322134644, - "learning_rate": 1.30993128484377e-07, - "loss": 2.4815, - "step": 42780 - }, - { - "epoch": 11.062281948572167, - "learning_rate": 1.309607156748347e-07, - "loss": 2.4897, - "step": 42800 - }, - { - "epoch": 11.06745057500969, - "learning_rate": 1.3092830286529236e-07, - "loss": 2.5184, - "step": 42820 - }, - { - "epoch": 11.072619201447216, - "learning_rate": 1.3089589005575002e-07, - "loss": 2.459, - "step": 42840 - }, - { - "epoch": 11.07778782788474, - "learning_rate": 1.308634772462077e-07, - "loss": 2.4623, - "step": 42860 - }, - { - "epoch": 11.082956454322265, - "learning_rate": 1.3083106443666535e-07, - "loss": 2.4422, - "step": 42880 - }, - { - "epoch": 11.088125080759788, - "learning_rate": 1.3079865162712303e-07, - "loss": 2.5212, - "step": 42900 - }, - { - "epoch": 11.093293707197311, - "learning_rate": 1.307662388175807e-07, - "loss": 2.4923, - "step": 42920 - }, - { - "epoch": 11.098462333634837, - "learning_rate": 1.3073382600803836e-07, - "loss": 2.5076, - "step": 42940 - }, - { - "epoch": 11.10363096007236, - "learning_rate": 1.3070141319849605e-07, - "loss": 2.5291, - "step": 42960 - }, - { - "epoch": 11.108799586509885, - "learning_rate": 1.306690003889537e-07, - "loss": 2.4459, - "step": 42980 - }, - { - "epoch": 11.113968212947409, - "learning_rate": 1.3063658757941137e-07, - "loss": 2.4522, - "step": 43000 - }, - { - "epoch": 11.119136839384934, - "learning_rate": 1.3060417476986906e-07, - "loss": 2.4752, - "step": 43020 - }, - { - "epoch": 11.124305465822458, - "learning_rate": 1.3057176196032672e-07, - "loss": 2.4877, - "step": 43040 - }, - { - "epoch": 11.129474092259981, - "learning_rate": 1.3053934915078438e-07, - "loss": 2.4969, - "step": 43060 - }, - { - "epoch": 11.134642718697506, - "learning_rate": 1.3050693634124207e-07, - "loss": 2.5331, - "step": 43080 - }, - { - "epoch": 11.13981134513503, - "learning_rate": 1.304745235316997e-07, - "loss": 2.4214, - "step": 43100 - }, - { - "epoch": 11.144979971572555, - "learning_rate": 1.304421107221574e-07, - "loss": 2.4795, - "step": 43120 - }, - { - "epoch": 11.150148598010079, - "learning_rate": 1.3040969791261506e-07, - "loss": 2.491, - "step": 43140 - }, - { - "epoch": 11.155317224447604, - "learning_rate": 1.3037728510307272e-07, - "loss": 2.4823, - "step": 43160 - }, - { - "epoch": 11.160485850885127, - "learning_rate": 1.303448722935304e-07, - "loss": 2.5129, - "step": 43180 - }, - { - "epoch": 11.16565447732265, - "learning_rate": 1.3031245948398807e-07, - "loss": 2.4543, - "step": 43200 - }, - { - "epoch": 11.170823103760176, - "learning_rate": 1.3028004667444573e-07, - "loss": 2.4735, - "step": 43220 - }, - { - "epoch": 11.1759917301977, - "learning_rate": 1.3024763386490342e-07, - "loss": 2.5127, - "step": 43240 - }, - { - "epoch": 11.181160356635225, - "learning_rate": 1.3021522105536105e-07, - "loss": 2.4045, - "step": 43260 - }, - { - "epoch": 11.186328983072748, - "learning_rate": 1.3018280824581874e-07, - "loss": 2.4599, - "step": 43280 - }, - { - "epoch": 11.191497609510273, - "learning_rate": 1.301503954362764e-07, - "loss": 2.4875, - "step": 43300 - }, - { - "epoch": 11.196666235947797, - "learning_rate": 1.3011798262673407e-07, - "loss": 2.4523, - "step": 43320 - }, - { - "epoch": 11.20183486238532, - "learning_rate": 1.3008556981719175e-07, - "loss": 2.47, - "step": 43340 - }, - { - "epoch": 11.207003488822846, - "learning_rate": 1.3005315700764942e-07, - "loss": 2.464, - "step": 43360 - }, - { - "epoch": 11.212172115260369, - "learning_rate": 1.3002074419810708e-07, - "loss": 2.5223, - "step": 43380 - }, - { - "epoch": 11.217340741697894, - "learning_rate": 1.2998833138856477e-07, - "loss": 2.4749, - "step": 43400 - }, - { - "epoch": 11.222509368135418, - "learning_rate": 1.2995591857902243e-07, - "loss": 2.5268, - "step": 43420 - }, - { - "epoch": 11.227677994572943, - "learning_rate": 1.299235057694801e-07, - "loss": 2.4837, - "step": 43440 - }, - { - "epoch": 11.232846621010466, - "learning_rate": 1.2989109295993778e-07, - "loss": 2.4285, - "step": 43460 - }, - { - "epoch": 11.23801524744799, - "learning_rate": 1.2985868015039541e-07, - "loss": 2.4918, - "step": 43480 - }, - { - "epoch": 11.243183873885515, - "learning_rate": 1.298262673408531e-07, - "loss": 2.4585, - "step": 43500 - }, - { - "epoch": 11.248352500323039, - "learning_rate": 1.2979385453131076e-07, - "loss": 2.5156, - "step": 43520 - }, - { - "epoch": 11.253521126760564, - "learning_rate": 1.2976144172176843e-07, - "loss": 2.5031, - "step": 43540 - }, - { - "epoch": 11.258689753198087, - "learning_rate": 1.2972902891222611e-07, - "loss": 2.456, - "step": 43560 - }, - { - "epoch": 11.263858379635613, - "learning_rate": 1.2969661610268378e-07, - "loss": 2.5083, - "step": 43580 - }, - { - "epoch": 11.269027006073136, - "learning_rate": 1.2966420329314144e-07, - "loss": 2.5241, - "step": 43600 - }, - { - "epoch": 11.27419563251066, - "learning_rate": 1.2963179048359913e-07, - "loss": 2.4751, - "step": 43620 - }, - { - "epoch": 11.279364258948185, - "learning_rate": 1.295993776740568e-07, - "loss": 2.4695, - "step": 43640 - }, - { - "epoch": 11.284532885385708, - "learning_rate": 1.2956696486451445e-07, - "loss": 2.5578, - "step": 43660 - }, - { - "epoch": 11.289701511823234, - "learning_rate": 1.2953455205497214e-07, - "loss": 2.4649, - "step": 43680 - }, - { - "epoch": 11.294870138260757, - "learning_rate": 1.2950213924542977e-07, - "loss": 2.4747, - "step": 43700 - }, - { - "epoch": 11.300038764698282, - "learning_rate": 1.2946972643588746e-07, - "loss": 2.4679, - "step": 43720 - }, - { - "epoch": 11.305207391135806, - "learning_rate": 1.2943731362634512e-07, - "loss": 2.4683, - "step": 43740 - }, - { - "epoch": 11.31037601757333, - "learning_rate": 1.2940490081680279e-07, - "loss": 2.4857, - "step": 43760 - }, - { - "epoch": 11.315544644010854, - "learning_rate": 1.2937248800726047e-07, - "loss": 2.426, - "step": 43780 - }, - { - "epoch": 11.320713270448378, - "learning_rate": 1.2934007519771814e-07, - "loss": 2.4733, - "step": 43800 - }, - { - "epoch": 11.325881896885903, - "learning_rate": 1.293076623881758e-07, - "loss": 2.4812, - "step": 43820 - }, - { - "epoch": 11.331050523323427, - "learning_rate": 1.2927524957863349e-07, - "loss": 2.4528, - "step": 43840 - }, - { - "epoch": 11.33621914976095, - "learning_rate": 1.2924283676909112e-07, - "loss": 2.494, - "step": 43860 - }, - { - "epoch": 11.341387776198475, - "learning_rate": 1.292104239595488e-07, - "loss": 2.4795, - "step": 43880 - }, - { - "epoch": 11.346556402635999, - "learning_rate": 1.2917801115000647e-07, - "loss": 2.4816, - "step": 43900 - }, - { - "epoch": 11.351725029073524, - "learning_rate": 1.2914559834046413e-07, - "loss": 2.5282, - "step": 43920 - }, - { - "epoch": 11.356893655511048, - "learning_rate": 1.2911318553092182e-07, - "loss": 2.4849, - "step": 43940 - }, - { - "epoch": 11.362062281948573, - "learning_rate": 1.2908077272137948e-07, - "loss": 2.4246, - "step": 43960 - }, - { - "epoch": 11.367230908386096, - "learning_rate": 1.2904835991183715e-07, - "loss": 2.5491, - "step": 43980 - }, - { - "epoch": 11.372399534823622, - "learning_rate": 1.2901594710229483e-07, - "loss": 2.4584, - "step": 44000 - }, - { - "epoch": 11.377568161261145, - "learning_rate": 1.289835342927525e-07, - "loss": 2.5351, - "step": 44020 - }, - { - "epoch": 11.382736787698668, - "learning_rate": 1.2895112148321016e-07, - "loss": 2.5108, - "step": 44040 - }, - { - "epoch": 11.387905414136194, - "learning_rate": 1.2891870867366782e-07, - "loss": 2.5328, - "step": 44060 - }, - { - "epoch": 11.393074040573717, - "learning_rate": 1.2888629586412548e-07, - "loss": 2.4501, - "step": 44080 - }, - { - "epoch": 11.398242667011242, - "learning_rate": 1.2885388305458317e-07, - "loss": 2.434, - "step": 44100 - }, - { - "epoch": 11.403411293448766, - "learning_rate": 1.2882147024504083e-07, - "loss": 2.4713, - "step": 44120 - }, - { - "epoch": 11.40857991988629, - "learning_rate": 1.287890574354985e-07, - "loss": 2.5093, - "step": 44140 - }, - { - "epoch": 11.413748546323815, - "learning_rate": 1.2875664462595618e-07, - "loss": 2.5031, - "step": 44160 - }, - { - "epoch": 11.418917172761338, - "learning_rate": 1.2872423181641384e-07, - "loss": 2.4712, - "step": 44180 - }, - { - "epoch": 11.424085799198863, - "learning_rate": 1.286918190068715e-07, - "loss": 2.4963, - "step": 44200 - }, - { - "epoch": 11.429254425636387, - "learning_rate": 1.2865940619732917e-07, - "loss": 2.4866, - "step": 44220 - }, - { - "epoch": 11.434423052073912, - "learning_rate": 1.2862699338778686e-07, - "loss": 2.4665, - "step": 44240 - }, - { - "epoch": 11.439591678511436, - "learning_rate": 1.2859458057824452e-07, - "loss": 2.4525, - "step": 44260 - }, - { - "epoch": 11.444760304948959, - "learning_rate": 1.2856216776870218e-07, - "loss": 2.5271, - "step": 44280 - }, - { - "epoch": 11.449928931386484, - "learning_rate": 1.2852975495915984e-07, - "loss": 2.4676, - "step": 44300 - }, - { - "epoch": 11.455097557824008, - "learning_rate": 1.2849734214961753e-07, - "loss": 2.4996, - "step": 44320 - }, - { - "epoch": 11.460266184261533, - "learning_rate": 1.284649293400752e-07, - "loss": 2.4901, - "step": 44340 - }, - { - "epoch": 11.465434810699056, - "learning_rate": 1.2843251653053285e-07, - "loss": 2.411, - "step": 44360 - }, - { - "epoch": 11.470603437136582, - "learning_rate": 1.2840010372099054e-07, - "loss": 2.4688, - "step": 44380 - }, - { - "epoch": 11.475772063574105, - "learning_rate": 1.283676909114482e-07, - "loss": 2.4458, - "step": 44400 - }, - { - "epoch": 11.480940690011629, - "learning_rate": 1.2833527810190587e-07, - "loss": 2.4871, - "step": 44420 - }, - { - "epoch": 11.486109316449154, - "learning_rate": 1.2830286529236353e-07, - "loss": 2.4476, - "step": 44440 - }, - { - "epoch": 11.491277942886677, - "learning_rate": 1.282704524828212e-07, - "loss": 2.4647, - "step": 44460 - }, - { - "epoch": 11.496446569324203, - "learning_rate": 1.2823803967327888e-07, - "loss": 2.4887, - "step": 44480 - }, - { - "epoch": 11.501615195761726, - "learning_rate": 1.2820562686373654e-07, - "loss": 2.4835, - "step": 44500 - }, - { - "epoch": 11.506783822199251, - "learning_rate": 1.281732140541942e-07, - "loss": 2.4435, - "step": 44520 - }, - { - "epoch": 11.511952448636775, - "learning_rate": 1.281408012446519e-07, - "loss": 2.4716, - "step": 44540 - }, - { - "epoch": 11.517121075074298, - "learning_rate": 1.2810838843510955e-07, - "loss": 2.457, - "step": 44560 - }, - { - "epoch": 11.522289701511824, - "learning_rate": 1.2807597562556721e-07, - "loss": 2.4465, - "step": 44580 - }, - { - "epoch": 11.527458327949347, - "learning_rate": 1.2804356281602488e-07, - "loss": 2.4804, - "step": 44600 - }, - { - "epoch": 11.532626954386872, - "learning_rate": 1.2801115000648256e-07, - "loss": 2.5168, - "step": 44620 - }, - { - "epoch": 11.537795580824396, - "learning_rate": 1.2797873719694023e-07, - "loss": 2.4759, - "step": 44640 - }, - { - "epoch": 11.542964207261921, - "learning_rate": 1.279463243873979e-07, - "loss": 2.4929, - "step": 44660 - }, - { - "epoch": 11.548132833699444, - "learning_rate": 1.2791391157785555e-07, - "loss": 2.4136, - "step": 44680 - }, - { - "epoch": 11.553301460136968, - "learning_rate": 1.2788149876831324e-07, - "loss": 2.4497, - "step": 44700 - }, - { - "epoch": 11.558470086574493, - "learning_rate": 1.278490859587709e-07, - "loss": 2.4912, - "step": 44720 - }, - { - "epoch": 11.563638713012017, - "learning_rate": 1.2781667314922856e-07, - "loss": 2.5089, - "step": 44740 - }, - { - "epoch": 11.568807339449542, - "learning_rate": 1.2778426033968622e-07, - "loss": 2.4516, - "step": 44760 - }, - { - "epoch": 11.573975965887065, - "learning_rate": 1.277518475301439e-07, - "loss": 2.5023, - "step": 44780 - }, - { - "epoch": 11.579144592324589, - "learning_rate": 1.2771943472060157e-07, - "loss": 2.4883, - "step": 44800 - }, - { - "epoch": 11.584313218762114, - "learning_rate": 1.2768702191105924e-07, - "loss": 2.4885, - "step": 44820 - }, - { - "epoch": 11.589481845199638, - "learning_rate": 1.2765460910151692e-07, - "loss": 2.5421, - "step": 44840 - }, - { - "epoch": 11.594650471637163, - "learning_rate": 1.2762219629197459e-07, - "loss": 2.4938, - "step": 44860 - }, - { - "epoch": 11.599819098074686, - "learning_rate": 1.2758978348243225e-07, - "loss": 2.4712, - "step": 44880 - }, - { - "epoch": 11.604987724512211, - "learning_rate": 1.275573706728899e-07, - "loss": 2.5227, - "step": 44900 - }, - { - "epoch": 11.610156350949735, - "learning_rate": 1.275249578633476e-07, - "loss": 2.4463, - "step": 44920 - }, - { - "epoch": 11.61532497738726, - "learning_rate": 1.2749254505380526e-07, - "loss": 2.4526, - "step": 44940 - }, - { - "epoch": 11.620493603824784, - "learning_rate": 1.2746013224426292e-07, - "loss": 2.4763, - "step": 44960 - }, - { - "epoch": 11.625662230262307, - "learning_rate": 1.2742771943472058e-07, - "loss": 2.5575, - "step": 44980 - }, - { - "epoch": 11.630830856699832, - "learning_rate": 1.2739530662517827e-07, - "loss": 2.5079, - "step": 45000 - }, - { - "epoch": 11.635999483137356, - "learning_rate": 1.2736289381563593e-07, - "loss": 2.4868, - "step": 45020 - }, - { - "epoch": 11.641168109574881, - "learning_rate": 1.273304810060936e-07, - "loss": 2.4547, - "step": 45040 - }, - { - "epoch": 11.646336736012405, - "learning_rate": 1.2729806819655126e-07, - "loss": 2.4594, - "step": 45060 - }, - { - "epoch": 11.651505362449928, - "learning_rate": 1.2726565538700895e-07, - "loss": 2.495, - "step": 45080 - }, - { - "epoch": 11.656673988887453, - "learning_rate": 1.272332425774666e-07, - "loss": 2.5174, - "step": 45100 - }, - { - "epoch": 11.661842615324977, - "learning_rate": 1.2720082976792427e-07, - "loss": 2.4223, - "step": 45120 - }, - { - "epoch": 11.667011241762502, - "learning_rate": 1.2716841695838196e-07, - "loss": 2.4523, - "step": 45140 - }, - { - "epoch": 11.672179868200026, - "learning_rate": 1.2713600414883962e-07, - "loss": 2.4271, - "step": 45160 - }, - { - "epoch": 11.67734849463755, - "learning_rate": 1.2710359133929728e-07, - "loss": 2.5235, - "step": 45180 - }, - { - "epoch": 11.682517121075074, - "learning_rate": 1.2707117852975494e-07, - "loss": 2.4666, - "step": 45200 - }, - { - "epoch": 11.687685747512598, - "learning_rate": 1.2703876572021263e-07, - "loss": 2.4413, - "step": 45220 - }, - { - "epoch": 11.692854373950123, - "learning_rate": 1.270063529106703e-07, - "loss": 2.422, - "step": 45240 - }, - { - "epoch": 11.698023000387646, - "learning_rate": 1.2697394010112796e-07, - "loss": 2.5057, - "step": 45260 - }, - { - "epoch": 11.703191626825172, - "learning_rate": 1.2694152729158562e-07, - "loss": 2.5153, - "step": 45280 - }, - { - "epoch": 11.708360253262695, - "learning_rate": 1.269091144820433e-07, - "loss": 2.4894, - "step": 45300 - }, - { - "epoch": 11.71352887970022, - "learning_rate": 1.2687670167250097e-07, - "loss": 2.5211, - "step": 45320 - }, - { - "epoch": 11.718697506137744, - "learning_rate": 1.2684428886295863e-07, - "loss": 2.4449, - "step": 45340 - }, - { - "epoch": 11.723866132575267, - "learning_rate": 1.268118760534163e-07, - "loss": 2.4859, - "step": 45360 - }, - { - "epoch": 11.729034759012793, - "learning_rate": 1.2677946324387398e-07, - "loss": 2.4578, - "step": 45380 - }, - { - "epoch": 11.734203385450316, - "learning_rate": 1.2674705043433164e-07, - "loss": 2.4715, - "step": 45400 - }, - { - "epoch": 11.739372011887841, - "learning_rate": 1.267146376247893e-07, - "loss": 2.4258, - "step": 45420 - }, - { - "epoch": 11.744540638325365, - "learning_rate": 1.26682224815247e-07, - "loss": 2.4635, - "step": 45440 - }, - { - "epoch": 11.74970926476289, - "learning_rate": 1.2664981200570465e-07, - "loss": 2.4803, - "step": 45460 - }, - { - "epoch": 11.754877891200413, - "learning_rate": 1.2661739919616232e-07, - "loss": 2.4333, - "step": 45480 - }, - { - "epoch": 11.760046517637937, - "learning_rate": 1.2658498638661998e-07, - "loss": 2.5486, - "step": 45500 - }, - { - "epoch": 11.765215144075462, - "learning_rate": 1.2655257357707767e-07, - "loss": 2.4803, - "step": 45520 - }, - { - "epoch": 11.770383770512986, - "learning_rate": 1.2652016076753533e-07, - "loss": 2.4958, - "step": 45540 - }, - { - "epoch": 11.775552396950511, - "learning_rate": 1.26487747957993e-07, - "loss": 2.4918, - "step": 45560 - }, - { - "epoch": 11.780721023388034, - "learning_rate": 1.2645533514845065e-07, - "loss": 2.513, - "step": 45580 - }, - { - "epoch": 11.78588964982556, - "learning_rate": 1.2642292233890834e-07, - "loss": 2.4897, - "step": 45600 - }, - { - "epoch": 11.791058276263083, - "learning_rate": 1.26390509529366e-07, - "loss": 2.4468, - "step": 45620 - }, - { - "epoch": 11.796226902700607, - "learning_rate": 1.2635809671982366e-07, - "loss": 2.4928, - "step": 45640 - }, - { - "epoch": 11.801395529138132, - "learning_rate": 1.2632568391028132e-07, - "loss": 2.4793, - "step": 45660 - }, - { - "epoch": 11.806564155575655, - "learning_rate": 1.26293271100739e-07, - "loss": 2.4788, - "step": 45680 - }, - { - "epoch": 11.81173278201318, - "learning_rate": 1.2626085829119668e-07, - "loss": 2.4913, - "step": 45700 - }, - { - "epoch": 11.816901408450704, - "learning_rate": 1.2622844548165434e-07, - "loss": 2.4966, - "step": 45720 - }, - { - "epoch": 11.82207003488823, - "learning_rate": 1.2619603267211203e-07, - "loss": 2.4954, - "step": 45740 - }, - { - "epoch": 11.827238661325753, - "learning_rate": 1.261636198625697e-07, - "loss": 2.4608, - "step": 45760 - }, - { - "epoch": 11.832407287763276, - "learning_rate": 1.2613120705302735e-07, - "loss": 2.4901, - "step": 45780 - }, - { - "epoch": 11.837575914200801, - "learning_rate": 1.26098794243485e-07, - "loss": 2.5462, - "step": 45800 - }, - { - "epoch": 11.842744540638325, - "learning_rate": 1.260663814339427e-07, - "loss": 2.5217, - "step": 45820 - }, - { - "epoch": 11.84791316707585, - "learning_rate": 1.2603396862440036e-07, - "loss": 2.4889, - "step": 45840 - }, - { - "epoch": 11.853081793513374, - "learning_rate": 1.2600155581485802e-07, - "loss": 2.5021, - "step": 45860 - }, - { - "epoch": 11.858250419950899, - "learning_rate": 1.2596914300531568e-07, - "loss": 2.5027, - "step": 45880 - }, - { - "epoch": 11.863419046388422, - "learning_rate": 1.2593673019577337e-07, - "loss": 2.4681, - "step": 45900 - }, - { - "epoch": 11.868587672825946, - "learning_rate": 1.2590431738623104e-07, - "loss": 2.409, - "step": 45920 - }, - { - "epoch": 11.873756299263471, - "learning_rate": 1.258719045766887e-07, - "loss": 2.4426, - "step": 45940 - }, - { - "epoch": 11.878924925700995, - "learning_rate": 1.2583949176714636e-07, - "loss": 2.4285, - "step": 45960 - }, - { - "epoch": 11.88409355213852, - "learning_rate": 1.2580707895760405e-07, - "loss": 2.4452, - "step": 45980 - }, - { - "epoch": 11.889262178576043, - "learning_rate": 1.257746661480617e-07, - "loss": 2.4816, - "step": 46000 - }, - { - "epoch": 11.894430805013567, - "learning_rate": 1.2574225333851937e-07, - "loss": 2.4585, - "step": 46020 - }, - { - "epoch": 11.899599431451092, - "learning_rate": 1.2570984052897706e-07, - "loss": 2.4171, - "step": 46040 - }, - { - "epoch": 11.904768057888615, - "learning_rate": 1.2567742771943472e-07, - "loss": 2.4597, - "step": 46060 - }, - { - "epoch": 11.90993668432614, - "learning_rate": 1.2564501490989238e-07, - "loss": 2.4195, - "step": 46080 - }, - { - "epoch": 11.915105310763664, - "learning_rate": 1.2561260210035004e-07, - "loss": 2.5034, - "step": 46100 - }, - { - "epoch": 11.92027393720119, - "learning_rate": 1.2558018929080773e-07, - "loss": 2.4407, - "step": 46120 - }, - { - "epoch": 11.925442563638713, - "learning_rate": 1.255477764812654e-07, - "loss": 2.4813, - "step": 46140 - }, - { - "epoch": 11.930611190076238, - "learning_rate": 1.2551536367172306e-07, - "loss": 2.4599, - "step": 46160 - }, - { - "epoch": 11.935779816513762, - "learning_rate": 1.2548295086218072e-07, - "loss": 2.4641, - "step": 46180 - }, - { - "epoch": 11.940948442951285, - "learning_rate": 1.254505380526384e-07, - "loss": 2.5262, - "step": 46200 - }, - { - "epoch": 11.94611706938881, - "learning_rate": 1.2541812524309607e-07, - "loss": 2.4518, - "step": 46220 - }, - { - "epoch": 11.951285695826334, - "learning_rate": 1.2538571243355373e-07, - "loss": 2.5018, - "step": 46240 - }, - { - "epoch": 11.956454322263859, - "learning_rate": 1.253532996240114e-07, - "loss": 2.4634, - "step": 46260 - }, - { - "epoch": 11.961622948701383, - "learning_rate": 1.2532088681446908e-07, - "loss": 2.4889, - "step": 46280 - }, - { - "epoch": 11.966791575138906, - "learning_rate": 1.2528847400492674e-07, - "loss": 2.4577, - "step": 46300 - }, - { - "epoch": 11.971960201576431, - "learning_rate": 1.252560611953844e-07, - "loss": 2.4812, - "step": 46320 - }, - { - "epoch": 11.977128828013955, - "learning_rate": 1.252236483858421e-07, - "loss": 2.4448, - "step": 46340 - }, - { - "epoch": 11.98229745445148, - "learning_rate": 1.2519123557629975e-07, - "loss": 2.4594, - "step": 46360 - }, - { - "epoch": 11.987466080889003, - "learning_rate": 1.2515882276675742e-07, - "loss": 2.4473, - "step": 46380 - }, - { - "epoch": 11.992634707326529, - "learning_rate": 1.2512640995721508e-07, - "loss": 2.4831, - "step": 46400 - }, - { - "epoch": 11.997803333764052, - "learning_rate": 1.2509399714767277e-07, - "loss": 2.4706, - "step": 46420 - }, - { - "epoch": 11.999870784339063, - "eval_bleu": 12.51, - "eval_gen_len": 37.4146, - "eval_loss": 2.4478776454925537, - "eval_runtime": 807.7318, - "eval_samples_per_second": 2.132, - "eval_steps_per_second": 1.066, - "step": 46428 - }, - { - "epoch": 12.002971960201576, - "learning_rate": 1.2506158433813043e-07, - "loss": 2.4527, - "step": 46440 - }, - { - "epoch": 12.008140586639101, - "learning_rate": 1.250291715285881e-07, - "loss": 2.436, - "step": 46460 - }, - { - "epoch": 12.013309213076624, - "learning_rate": 1.2499675871904575e-07, - "loss": 2.4514, - "step": 46480 - }, - { - "epoch": 12.01847783951415, - "learning_rate": 1.2496434590950344e-07, - "loss": 2.4921, - "step": 46500 - }, - { - "epoch": 12.023646465951673, - "learning_rate": 1.249319330999611e-07, - "loss": 2.4637, - "step": 46520 - }, - { - "epoch": 12.028815092389198, - "learning_rate": 1.2489952029041876e-07, - "loss": 2.4924, - "step": 46540 - }, - { - "epoch": 12.033983718826722, - "learning_rate": 1.2486710748087643e-07, - "loss": 2.4788, - "step": 46560 - }, - { - "epoch": 12.039152345264245, - "learning_rate": 1.2483469467133411e-07, - "loss": 2.5291, - "step": 46580 - }, - { - "epoch": 12.04432097170177, - "learning_rate": 1.2480228186179178e-07, - "loss": 2.4419, - "step": 46600 - }, - { - "epoch": 12.049489598139294, - "learning_rate": 1.2476986905224944e-07, - "loss": 2.4768, - "step": 46620 - }, - { - "epoch": 12.05465822457682, - "learning_rate": 1.2473745624270713e-07, - "loss": 2.444, - "step": 46640 - }, - { - "epoch": 12.059826851014343, - "learning_rate": 1.247050434331648e-07, - "loss": 2.5193, - "step": 46660 - }, - { - "epoch": 12.064995477451868, - "learning_rate": 1.2467263062362245e-07, - "loss": 2.5019, - "step": 46680 - }, - { - "epoch": 12.070164103889391, - "learning_rate": 1.246402178140801e-07, - "loss": 2.465, - "step": 46700 - }, - { - "epoch": 12.075332730326915, - "learning_rate": 1.246078050045378e-07, - "loss": 2.4508, - "step": 46720 - }, - { - "epoch": 12.08050135676444, - "learning_rate": 1.2457539219499546e-07, - "loss": 2.4784, - "step": 46740 - }, - { - "epoch": 12.085669983201964, - "learning_rate": 1.2454297938545312e-07, - "loss": 2.5013, - "step": 46760 - }, - { - "epoch": 12.090838609639489, - "learning_rate": 1.2451056657591079e-07, - "loss": 2.3997, - "step": 46780 - }, - { - "epoch": 12.096007236077012, - "learning_rate": 1.2447815376636847e-07, - "loss": 2.4475, - "step": 46800 - }, - { - "epoch": 12.101175862514538, - "learning_rate": 1.2444574095682614e-07, - "loss": 2.4564, - "step": 46820 - }, - { - "epoch": 12.106344488952061, - "learning_rate": 1.244133281472838e-07, - "loss": 2.5028, - "step": 46840 - }, - { - "epoch": 12.111513115389585, - "learning_rate": 1.2438091533774146e-07, - "loss": 2.4969, - "step": 46860 - }, - { - "epoch": 12.11668174182711, - "learning_rate": 1.2434850252819915e-07, - "loss": 2.4488, - "step": 46880 - }, - { - "epoch": 12.121850368264633, - "learning_rate": 1.243160897186568e-07, - "loss": 2.4779, - "step": 46900 - }, - { - "epoch": 12.127018994702158, - "learning_rate": 1.2428367690911447e-07, - "loss": 2.4605, - "step": 46920 - }, - { - "epoch": 12.132187621139682, - "learning_rate": 1.2425126409957216e-07, - "loss": 2.4592, - "step": 46940 - }, - { - "epoch": 12.137356247577207, - "learning_rate": 1.2421885129002982e-07, - "loss": 2.5115, - "step": 46960 - }, - { - "epoch": 12.14252487401473, - "learning_rate": 1.2418643848048748e-07, - "loss": 2.494, - "step": 46980 - }, - { - "epoch": 12.147693500452254, - "learning_rate": 1.2415402567094515e-07, - "loss": 2.4545, - "step": 47000 - }, - { - "epoch": 12.15286212688978, - "learning_rate": 1.2412161286140283e-07, - "loss": 2.5211, - "step": 47020 - }, - { - "epoch": 12.158030753327303, - "learning_rate": 1.240892000518605e-07, - "loss": 2.4661, - "step": 47040 - }, - { - "epoch": 12.163199379764828, - "learning_rate": 1.2405678724231816e-07, - "loss": 2.4593, - "step": 47060 - }, - { - "epoch": 12.168368006202352, - "learning_rate": 1.2402437443277582e-07, - "loss": 2.555, - "step": 47080 - }, - { - "epoch": 12.173536632639877, - "learning_rate": 1.239919616232335e-07, - "loss": 2.5006, - "step": 47100 - }, - { - "epoch": 12.1787052590774, - "learning_rate": 1.2395954881369117e-07, - "loss": 2.4607, - "step": 47120 - }, - { - "epoch": 12.183873885514924, - "learning_rate": 1.2392713600414883e-07, - "loss": 2.4595, - "step": 47140 - }, - { - "epoch": 12.189042511952449, - "learning_rate": 1.238947231946065e-07, - "loss": 2.444, - "step": 47160 - }, - { - "epoch": 12.194211138389973, - "learning_rate": 1.2386231038506418e-07, - "loss": 2.4657, - "step": 47180 - }, - { - "epoch": 12.199379764827498, - "learning_rate": 1.2382989757552184e-07, - "loss": 2.454, - "step": 47200 - }, - { - "epoch": 12.204548391265021, - "learning_rate": 1.237974847659795e-07, - "loss": 2.4939, - "step": 47220 - }, - { - "epoch": 12.209717017702545, - "learning_rate": 1.237650719564372e-07, - "loss": 2.4465, - "step": 47240 - }, - { - "epoch": 12.21488564414007, - "learning_rate": 1.2373265914689486e-07, - "loss": 2.5464, - "step": 47260 - }, - { - "epoch": 12.220054270577593, - "learning_rate": 1.2370024633735252e-07, - "loss": 2.4521, - "step": 47280 - }, - { - "epoch": 12.225222897015119, - "learning_rate": 1.2366783352781018e-07, - "loss": 2.5132, - "step": 47300 - }, - { - "epoch": 12.230391523452642, - "learning_rate": 1.2363542071826787e-07, - "loss": 2.43, - "step": 47320 - }, - { - "epoch": 12.235560149890167, - "learning_rate": 1.2360300790872553e-07, - "loss": 2.4295, - "step": 47340 - }, - { - "epoch": 12.24072877632769, - "learning_rate": 1.235705950991832e-07, - "loss": 2.5134, - "step": 47360 - }, - { - "epoch": 12.245897402765214, - "learning_rate": 1.2353818228964085e-07, - "loss": 2.461, - "step": 47380 - }, - { - "epoch": 12.25106602920274, - "learning_rate": 1.2350576948009854e-07, - "loss": 2.4942, - "step": 47400 - }, - { - "epoch": 12.256234655640263, - "learning_rate": 1.234733566705562e-07, - "loss": 2.4607, - "step": 47420 - }, - { - "epoch": 12.261403282077788, - "learning_rate": 1.2344094386101387e-07, - "loss": 2.4634, - "step": 47440 - }, - { - "epoch": 12.266571908515312, - "learning_rate": 1.2340853105147153e-07, - "loss": 2.4448, - "step": 47460 - }, - { - "epoch": 12.271740534952837, - "learning_rate": 1.2337611824192922e-07, - "loss": 2.4803, - "step": 47480 - }, - { - "epoch": 12.27690916139036, - "learning_rate": 1.2334370543238688e-07, - "loss": 2.4154, - "step": 47500 - }, - { - "epoch": 12.282077787827884, - "learning_rate": 1.2331129262284454e-07, - "loss": 2.5181, - "step": 47520 - }, - { - "epoch": 12.28724641426541, - "learning_rate": 1.2327887981330223e-07, - "loss": 2.5086, - "step": 47540 - }, - { - "epoch": 12.292415040702933, - "learning_rate": 1.232464670037599e-07, - "loss": 2.4487, - "step": 47560 - }, - { - "epoch": 12.297583667140458, - "learning_rate": 1.2321405419421755e-07, - "loss": 2.4109, - "step": 47580 - }, - { - "epoch": 12.302752293577981, - "learning_rate": 1.2318164138467521e-07, - "loss": 2.4675, - "step": 47600 - }, - { - "epoch": 12.307920920015507, - "learning_rate": 1.2314922857513288e-07, - "loss": 2.4326, - "step": 47620 - }, - { - "epoch": 12.31308954645303, - "learning_rate": 1.2311681576559056e-07, - "loss": 2.441, - "step": 47640 - }, - { - "epoch": 12.318258172890554, - "learning_rate": 1.2308440295604823e-07, - "loss": 2.4154, - "step": 47660 - }, - { - "epoch": 12.323426799328079, - "learning_rate": 1.230519901465059e-07, - "loss": 2.4491, - "step": 47680 - }, - { - "epoch": 12.328595425765602, - "learning_rate": 1.2301957733696358e-07, - "loss": 2.4764, - "step": 47700 - }, - { - "epoch": 12.333764052203128, - "learning_rate": 1.2298716452742124e-07, - "loss": 2.4786, - "step": 47720 - }, - { - "epoch": 12.338932678640651, - "learning_rate": 1.229547517178789e-07, - "loss": 2.4033, - "step": 47740 - }, - { - "epoch": 12.344101305078176, - "learning_rate": 1.2292233890833656e-07, - "loss": 2.4742, - "step": 47760 - }, - { - "epoch": 12.3492699315157, - "learning_rate": 1.2288992609879422e-07, - "loss": 2.5316, - "step": 47780 - }, - { - "epoch": 12.354438557953223, - "learning_rate": 1.228575132892519e-07, - "loss": 2.4717, - "step": 47800 - }, - { - "epoch": 12.359607184390748, - "learning_rate": 1.2282510047970957e-07, - "loss": 2.4518, - "step": 47820 - }, - { - "epoch": 12.364775810828272, - "learning_rate": 1.2279268767016724e-07, - "loss": 2.4747, - "step": 47840 - }, - { - "epoch": 12.369944437265797, - "learning_rate": 1.2276027486062492e-07, - "loss": 2.4391, - "step": 47860 - }, - { - "epoch": 12.37511306370332, - "learning_rate": 1.2272786205108259e-07, - "loss": 2.4076, - "step": 47880 - }, - { - "epoch": 12.380281690140846, - "learning_rate": 1.2269544924154025e-07, - "loss": 2.4275, - "step": 47900 - }, - { - "epoch": 12.38545031657837, - "learning_rate": 1.2266303643199794e-07, - "loss": 2.4593, - "step": 47920 - }, - { - "epoch": 12.390618943015893, - "learning_rate": 1.226306236224556e-07, - "loss": 2.4666, - "step": 47940 - }, - { - "epoch": 12.395787569453418, - "learning_rate": 1.2259821081291326e-07, - "loss": 2.4939, - "step": 47960 - }, - { - "epoch": 12.400956195890942, - "learning_rate": 1.2256579800337092e-07, - "loss": 2.4933, - "step": 47980 - }, - { - "epoch": 12.406124822328467, - "learning_rate": 1.2253338519382858e-07, - "loss": 2.4298, - "step": 48000 - }, - { - "epoch": 12.41129344876599, - "learning_rate": 1.2250097238428627e-07, - "loss": 2.4923, - "step": 48020 - }, - { - "epoch": 12.416462075203516, - "learning_rate": 1.2246855957474393e-07, - "loss": 2.4368, - "step": 48040 - }, - { - "epoch": 12.421630701641039, - "learning_rate": 1.224361467652016e-07, - "loss": 2.4882, - "step": 48060 - }, - { - "epoch": 12.426799328078562, - "learning_rate": 1.2240373395565928e-07, - "loss": 2.4417, - "step": 48080 - }, - { - "epoch": 12.431967954516088, - "learning_rate": 1.2237132114611695e-07, - "loss": 2.4881, - "step": 48100 - }, - { - "epoch": 12.437136580953611, - "learning_rate": 1.223389083365746e-07, - "loss": 2.4633, - "step": 48120 - }, - { - "epoch": 12.442305207391136, - "learning_rate": 1.223064955270323e-07, - "loss": 2.4914, - "step": 48140 - }, - { - "epoch": 12.44747383382866, - "learning_rate": 1.2227408271748993e-07, - "loss": 2.4236, - "step": 48160 - }, - { - "epoch": 12.452642460266183, - "learning_rate": 1.2224166990794762e-07, - "loss": 2.4603, - "step": 48180 - }, - { - "epoch": 12.457811086703709, - "learning_rate": 1.2220925709840528e-07, - "loss": 2.5019, - "step": 48200 - }, - { - "epoch": 12.462979713141232, - "learning_rate": 1.2217684428886294e-07, - "loss": 2.4368, - "step": 48220 - }, - { - "epoch": 12.468148339578757, - "learning_rate": 1.2214443147932063e-07, - "loss": 2.5019, - "step": 48240 - }, - { - "epoch": 12.47331696601628, - "learning_rate": 1.221120186697783e-07, - "loss": 2.4857, - "step": 48260 - }, - { - "epoch": 12.478485592453806, - "learning_rate": 1.2207960586023596e-07, - "loss": 2.4496, - "step": 48280 - }, - { - "epoch": 12.48365421889133, - "learning_rate": 1.2204719305069364e-07, - "loss": 2.5045, - "step": 48300 - }, - { - "epoch": 12.488822845328853, - "learning_rate": 1.2201478024115128e-07, - "loss": 2.4589, - "step": 48320 - }, - { - "epoch": 12.493991471766378, - "learning_rate": 1.2198236743160897e-07, - "loss": 2.4778, - "step": 48340 - }, - { - "epoch": 12.499160098203902, - "learning_rate": 1.2194995462206663e-07, - "loss": 2.4297, - "step": 48360 - }, - { - "epoch": 12.504328724641427, - "learning_rate": 1.219175418125243e-07, - "loss": 2.395, - "step": 48380 - }, - { - "epoch": 12.50949735107895, - "learning_rate": 1.2188512900298198e-07, - "loss": 2.4515, - "step": 48400 - }, - { - "epoch": 12.514665977516476, - "learning_rate": 1.2185271619343964e-07, - "loss": 2.4912, - "step": 48420 - }, - { - "epoch": 12.519834603954, - "learning_rate": 1.218203033838973e-07, - "loss": 2.4681, - "step": 48440 - }, - { - "epoch": 12.525003230391523, - "learning_rate": 1.21787890574355e-07, - "loss": 2.4598, - "step": 48460 - }, - { - "epoch": 12.530171856829048, - "learning_rate": 1.2175547776481265e-07, - "loss": 2.4792, - "step": 48480 - }, - { - "epoch": 12.535340483266571, - "learning_rate": 1.2172306495527032e-07, - "loss": 2.4331, - "step": 48500 - }, - { - "epoch": 12.540509109704097, - "learning_rate": 1.21690652145728e-07, - "loss": 2.4537, - "step": 48520 - }, - { - "epoch": 12.54567773614162, - "learning_rate": 1.2165823933618564e-07, - "loss": 2.5384, - "step": 48540 - }, - { - "epoch": 12.550846362579145, - "learning_rate": 1.2162582652664333e-07, - "loss": 2.5251, - "step": 48560 - }, - { - "epoch": 12.556014989016669, - "learning_rate": 1.21593413717101e-07, - "loss": 2.4403, - "step": 48580 - }, - { - "epoch": 12.561183615454192, - "learning_rate": 1.2156100090755865e-07, - "loss": 2.4652, - "step": 48600 - }, - { - "epoch": 12.566352241891718, - "learning_rate": 1.2152858809801634e-07, - "loss": 2.5051, - "step": 48620 - }, - { - "epoch": 12.571520868329241, - "learning_rate": 1.21496175288474e-07, - "loss": 2.4572, - "step": 48640 - }, - { - "epoch": 12.576689494766766, - "learning_rate": 1.2146376247893166e-07, - "loss": 2.4891, - "step": 48660 - }, - { - "epoch": 12.58185812120429, - "learning_rate": 1.2143134966938935e-07, - "loss": 2.5186, - "step": 48680 - }, - { - "epoch": 12.587026747641815, - "learning_rate": 1.21398936859847e-07, - "loss": 2.4751, - "step": 48700 - }, - { - "epoch": 12.592195374079338, - "learning_rate": 1.2136652405030468e-07, - "loss": 2.4716, - "step": 48720 - }, - { - "epoch": 12.597364000516862, - "learning_rate": 1.2133411124076234e-07, - "loss": 2.4849, - "step": 48740 - }, - { - "epoch": 12.602532626954387, - "learning_rate": 1.2130169843122e-07, - "loss": 2.4891, - "step": 48760 - }, - { - "epoch": 12.60770125339191, - "learning_rate": 1.212692856216777e-07, - "loss": 2.4776, - "step": 48780 - }, - { - "epoch": 12.612869879829436, - "learning_rate": 1.2123687281213535e-07, - "loss": 2.5003, - "step": 48800 - }, - { - "epoch": 12.61803850626696, - "learning_rate": 1.21204460002593e-07, - "loss": 2.4541, - "step": 48820 - }, - { - "epoch": 12.623207132704485, - "learning_rate": 1.211720471930507e-07, - "loss": 2.3995, - "step": 48840 - }, - { - "epoch": 12.628375759142008, - "learning_rate": 1.2113963438350836e-07, - "loss": 2.4991, - "step": 48860 - }, - { - "epoch": 12.633544385579532, - "learning_rate": 1.2110722157396602e-07, - "loss": 2.4393, - "step": 48880 - }, - { - "epoch": 12.638713012017057, - "learning_rate": 1.2107480876442369e-07, - "loss": 2.4266, - "step": 48900 - }, - { - "epoch": 12.64388163845458, - "learning_rate": 1.2104239595488135e-07, - "loss": 2.4504, - "step": 48920 - }, - { - "epoch": 12.649050264892105, - "learning_rate": 1.2100998314533904e-07, - "loss": 2.4426, - "step": 48940 - }, - { - "epoch": 12.654218891329629, - "learning_rate": 1.209775703357967e-07, - "loss": 2.4279, - "step": 48960 - }, - { - "epoch": 12.659387517767154, - "learning_rate": 1.2094515752625436e-07, - "loss": 2.4973, - "step": 48980 - }, - { - "epoch": 12.664556144204678, - "learning_rate": 1.2091274471671205e-07, - "loss": 2.4447, - "step": 49000 - }, - { - "epoch": 12.669724770642201, - "learning_rate": 1.208803319071697e-07, - "loss": 2.497, - "step": 49020 - }, - { - "epoch": 12.674893397079726, - "learning_rate": 1.2084791909762737e-07, - "loss": 2.4223, - "step": 49040 - }, - { - "epoch": 12.68006202351725, - "learning_rate": 1.2081550628808503e-07, - "loss": 2.447, - "step": 49060 - }, - { - "epoch": 12.685230649954775, - "learning_rate": 1.2078309347854272e-07, - "loss": 2.4652, - "step": 49080 - }, - { - "epoch": 12.690399276392299, - "learning_rate": 1.2075068066900038e-07, - "loss": 2.4167, - "step": 49100 - }, - { - "epoch": 12.695567902829822, - "learning_rate": 1.2071826785945805e-07, - "loss": 2.4103, - "step": 49120 - }, - { - "epoch": 12.700736529267347, - "learning_rate": 1.206858550499157e-07, - "loss": 2.4225, - "step": 49140 - }, - { - "epoch": 12.70590515570487, - "learning_rate": 1.206534422403734e-07, - "loss": 2.4668, - "step": 49160 - }, - { - "epoch": 12.711073782142396, - "learning_rate": 1.2062102943083106e-07, - "loss": 2.4861, - "step": 49180 - }, - { - "epoch": 12.71624240857992, - "learning_rate": 1.2058861662128872e-07, - "loss": 2.433, - "step": 49200 - }, - { - "epoch": 12.721411035017445, - "learning_rate": 1.205562038117464e-07, - "loss": 2.4395, - "step": 49220 - }, - { - "epoch": 12.726579661454968, - "learning_rate": 1.2052379100220407e-07, - "loss": 2.4702, - "step": 49240 - }, - { - "epoch": 12.731748287892493, - "learning_rate": 1.2049137819266173e-07, - "loss": 2.4585, - "step": 49260 - }, - { - "epoch": 12.736916914330017, - "learning_rate": 1.204589653831194e-07, - "loss": 2.414, - "step": 49280 - }, - { - "epoch": 12.74208554076754, - "learning_rate": 1.2042655257357706e-07, - "loss": 2.4743, - "step": 49300 - }, - { - "epoch": 12.747254167205066, - "learning_rate": 1.2039413976403474e-07, - "loss": 2.4963, - "step": 49320 - }, - { - "epoch": 12.75242279364259, - "learning_rate": 1.203617269544924e-07, - "loss": 2.4157, - "step": 49340 - }, - { - "epoch": 12.757591420080114, - "learning_rate": 1.2032931414495007e-07, - "loss": 2.4355, - "step": 49360 - }, - { - "epoch": 12.762760046517638, - "learning_rate": 1.2029690133540776e-07, - "loss": 2.4174, - "step": 49380 - }, - { - "epoch": 12.767928672955161, - "learning_rate": 1.2026448852586542e-07, - "loss": 2.4903, - "step": 49400 - }, - { - "epoch": 12.773097299392687, - "learning_rate": 1.2023207571632308e-07, - "loss": 2.4285, - "step": 49420 - }, - { - "epoch": 12.77826592583021, - "learning_rate": 1.2019966290678074e-07, - "loss": 2.5139, - "step": 49440 - }, - { - "epoch": 12.783434552267735, - "learning_rate": 1.2016725009723843e-07, - "loss": 2.4306, - "step": 49460 - }, - { - "epoch": 12.788603178705259, - "learning_rate": 1.201348372876961e-07, - "loss": 2.4208, - "step": 49480 - }, - { - "epoch": 12.793771805142784, - "learning_rate": 1.2010242447815375e-07, - "loss": 2.4154, - "step": 49500 - }, - { - "epoch": 12.798940431580307, - "learning_rate": 1.2007001166861142e-07, - "loss": 2.4862, - "step": 49520 - }, - { - "epoch": 12.804109058017831, - "learning_rate": 1.200375988590691e-07, - "loss": 2.4184, - "step": 49540 - }, - { - "epoch": 12.809277684455356, - "learning_rate": 1.2000518604952677e-07, - "loss": 2.374, - "step": 49560 - }, - { - "epoch": 12.81444631089288, - "learning_rate": 1.1997277323998443e-07, - "loss": 2.4885, - "step": 49580 - }, - { - "epoch": 12.819614937330405, - "learning_rate": 1.199403604304421e-07, - "loss": 2.4429, - "step": 49600 - }, - { - "epoch": 12.824783563767928, - "learning_rate": 1.1990794762089978e-07, - "loss": 2.3834, - "step": 49620 - }, - { - "epoch": 12.829952190205454, - "learning_rate": 1.1987553481135744e-07, - "loss": 2.415, - "step": 49640 - }, - { - "epoch": 12.835120816642977, - "learning_rate": 1.198431220018151e-07, - "loss": 2.5206, - "step": 49660 - }, - { - "epoch": 12.8402894430805, - "learning_rate": 1.198107091922728e-07, - "loss": 2.4883, - "step": 49680 - }, - { - "epoch": 12.845458069518026, - "learning_rate": 1.1977829638273045e-07, - "loss": 2.4729, - "step": 49700 - }, - { - "epoch": 12.85062669595555, - "learning_rate": 1.1974588357318811e-07, - "loss": 2.3815, - "step": 49720 - }, - { - "epoch": 12.855795322393075, - "learning_rate": 1.1971347076364578e-07, - "loss": 2.453, - "step": 49740 - }, - { - "epoch": 12.860963948830598, - "learning_rate": 1.1968105795410346e-07, - "loss": 2.4414, - "step": 49760 - }, - { - "epoch": 12.866132575268123, - "learning_rate": 1.1964864514456113e-07, - "loss": 2.5048, - "step": 49780 - }, - { - "epoch": 12.871301201705647, - "learning_rate": 1.196162323350188e-07, - "loss": 2.4836, - "step": 49800 - }, - { - "epoch": 12.87646982814317, - "learning_rate": 1.1958381952547645e-07, - "loss": 2.4018, - "step": 49820 - }, - { - "epoch": 12.881638454580695, - "learning_rate": 1.1955140671593414e-07, - "loss": 2.4562, - "step": 49840 - }, - { - "epoch": 12.886807081018219, - "learning_rate": 1.195189939063918e-07, - "loss": 2.4431, - "step": 49860 - }, - { - "epoch": 12.891975707455744, - "learning_rate": 1.1948658109684946e-07, - "loss": 2.4554, - "step": 49880 - }, - { - "epoch": 12.897144333893268, - "learning_rate": 1.1945416828730712e-07, - "loss": 2.4341, - "step": 49900 - }, - { - "epoch": 12.902312960330793, - "learning_rate": 1.194217554777648e-07, - "loss": 2.4189, - "step": 49920 - }, - { - "epoch": 12.907481586768316, - "learning_rate": 1.1938934266822247e-07, - "loss": 2.4739, - "step": 49940 - }, - { - "epoch": 12.91265021320584, - "learning_rate": 1.1935692985868014e-07, - "loss": 2.3941, - "step": 49960 - }, - { - "epoch": 12.917818839643365, - "learning_rate": 1.1932451704913782e-07, - "loss": 2.4357, - "step": 49980 - }, - { - "epoch": 12.922987466080889, - "learning_rate": 1.1929210423959549e-07, - "loss": 2.4676, - "step": 50000 - }, - { - "epoch": 12.928156092518414, - "learning_rate": 1.1925969143005315e-07, - "loss": 2.4551, - "step": 50020 - }, - { - "epoch": 12.933324718955937, - "learning_rate": 1.192272786205108e-07, - "loss": 2.4811, - "step": 50040 - }, - { - "epoch": 12.938493345393463, - "learning_rate": 1.191948658109685e-07, - "loss": 2.4513, - "step": 50060 - }, - { - "epoch": 12.943661971830986, - "learning_rate": 1.1916245300142616e-07, - "loss": 2.4607, - "step": 50080 - }, - { - "epoch": 12.94883059826851, - "learning_rate": 1.1913004019188382e-07, - "loss": 2.482, - "step": 50100 - }, - { - "epoch": 12.953999224706035, - "learning_rate": 1.1909762738234151e-07, - "loss": 2.4465, - "step": 50120 - }, - { - "epoch": 12.959167851143558, - "learning_rate": 1.1906521457279917e-07, - "loss": 2.4672, - "step": 50140 - }, - { - "epoch": 12.964336477581083, - "learning_rate": 1.1903280176325683e-07, - "loss": 2.421, - "step": 50160 - }, - { - "epoch": 12.969505104018607, - "learning_rate": 1.190003889537145e-07, - "loss": 2.4271, - "step": 50180 - }, - { - "epoch": 12.974673730456132, - "learning_rate": 1.1896797614417218e-07, - "loss": 2.4226, - "step": 50200 - }, - { - "epoch": 12.979842356893656, - "learning_rate": 1.1893556333462985e-07, - "loss": 2.4162, - "step": 50220 - }, - { - "epoch": 12.985010983331179, - "learning_rate": 1.1890315052508751e-07, - "loss": 2.509, - "step": 50240 - }, - { - "epoch": 12.990179609768704, - "learning_rate": 1.1887073771554518e-07, - "loss": 2.4553, - "step": 50260 - }, - { - "epoch": 12.995348236206228, - "learning_rate": 1.1883832490600286e-07, - "loss": 2.4558, - "step": 50280 - }, - { - "epoch": 13.0, - "eval_bleu": 12.8144, - "eval_gen_len": 37.2979, - "eval_loss": 2.433004140853882, - "eval_runtime": 806.1399, - "eval_samples_per_second": 2.136, - "eval_steps_per_second": 1.068, - "step": 50298 - }, - { - "epoch": 13.000516862643753, - "learning_rate": 1.1880591209646052e-07, - "loss": 2.4458, - "step": 50300 - }, - { - "epoch": 13.005685489081277, - "learning_rate": 1.1877349928691818e-07, - "loss": 2.4137, - "step": 50320 - }, - { - "epoch": 13.0108541155188, - "learning_rate": 1.1874108647737586e-07, - "loss": 2.4151, - "step": 50340 - }, - { - "epoch": 13.016022741956325, - "learning_rate": 1.1870867366783353e-07, - "loss": 2.4124, - "step": 50360 - }, - { - "epoch": 13.021191368393849, - "learning_rate": 1.1867626085829119e-07, - "loss": 2.4259, - "step": 50380 - }, - { - "epoch": 13.026359994831374, - "learning_rate": 1.1864384804874886e-07, - "loss": 2.4933, - "step": 50400 - }, - { - "epoch": 13.031528621268897, - "learning_rate": 1.1861143523920653e-07, - "loss": 2.4579, - "step": 50420 - }, - { - "epoch": 13.036697247706423, - "learning_rate": 1.185790224296642e-07, - "loss": 2.4766, - "step": 50440 - }, - { - "epoch": 13.041865874143946, - "learning_rate": 1.1854660962012187e-07, - "loss": 2.4438, - "step": 50460 - }, - { - "epoch": 13.04703450058147, - "learning_rate": 1.1851419681057953e-07, - "loss": 2.4372, - "step": 50480 - }, - { - "epoch": 13.052203127018995, - "learning_rate": 1.184817840010372e-07, - "loss": 2.4097, - "step": 50500 - }, - { - "epoch": 13.057371753456518, - "learning_rate": 1.1844937119149488e-07, - "loss": 2.4199, - "step": 50520 - }, - { - "epoch": 13.062540379894044, - "learning_rate": 1.1841695838195254e-07, - "loss": 2.477, - "step": 50540 - }, - { - "epoch": 13.067709006331567, - "learning_rate": 1.183845455724102e-07, - "loss": 2.3863, - "step": 50560 - }, - { - "epoch": 13.072877632769092, - "learning_rate": 1.1835213276286788e-07, - "loss": 2.4797, - "step": 50580 - }, - { - "epoch": 13.078046259206616, - "learning_rate": 1.1831971995332555e-07, - "loss": 2.4302, - "step": 50600 - }, - { - "epoch": 13.08321488564414, - "learning_rate": 1.1828730714378322e-07, - "loss": 2.4693, - "step": 50620 - }, - { - "epoch": 13.088383512081665, - "learning_rate": 1.1825489433424088e-07, - "loss": 2.5014, - "step": 50640 - }, - { - "epoch": 13.093552138519188, - "learning_rate": 1.1822248152469855e-07, - "loss": 2.4204, - "step": 50660 - }, - { - "epoch": 13.098720764956713, - "learning_rate": 1.1819006871515623e-07, - "loss": 2.4063, - "step": 50680 - }, - { - "epoch": 13.103889391394237, - "learning_rate": 1.1815765590561389e-07, - "loss": 2.4893, - "step": 50700 - }, - { - "epoch": 13.109058017831762, - "learning_rate": 1.1812524309607155e-07, - "loss": 2.4443, - "step": 50720 - }, - { - "epoch": 13.114226644269285, - "learning_rate": 1.1809283028652923e-07, - "loss": 2.4188, - "step": 50740 - }, - { - "epoch": 13.119395270706809, - "learning_rate": 1.180604174769869e-07, - "loss": 2.4566, - "step": 50760 - }, - { - "epoch": 13.124563897144334, - "learning_rate": 1.1802800466744456e-07, - "loss": 2.4187, - "step": 50780 - }, - { - "epoch": 13.129732523581858, - "learning_rate": 1.1799559185790222e-07, - "loss": 2.4418, - "step": 50800 - }, - { - "epoch": 13.134901150019383, - "learning_rate": 1.179631790483599e-07, - "loss": 2.4705, - "step": 50820 - }, - { - "epoch": 13.140069776456906, - "learning_rate": 1.1793076623881758e-07, - "loss": 2.4966, - "step": 50840 - }, - { - "epoch": 13.145238402894432, - "learning_rate": 1.1789835342927524e-07, - "loss": 2.4615, - "step": 50860 - }, - { - "epoch": 13.150407029331955, - "learning_rate": 1.1786594061973291e-07, - "loss": 2.4513, - "step": 50880 - }, - { - "epoch": 13.155575655769479, - "learning_rate": 1.1783352781019059e-07, - "loss": 2.4163, - "step": 50900 - }, - { - "epoch": 13.160744282207004, - "learning_rate": 1.1780111500064825e-07, - "loss": 2.457, - "step": 50920 - }, - { - "epoch": 13.165912908644527, - "learning_rate": 1.1776870219110591e-07, - "loss": 2.4637, - "step": 50940 - }, - { - "epoch": 13.171081535082052, - "learning_rate": 1.1773628938156359e-07, - "loss": 2.4972, - "step": 50960 - }, - { - "epoch": 13.176250161519576, - "learning_rate": 1.1770387657202126e-07, - "loss": 2.4628, - "step": 50980 - }, - { - "epoch": 13.181418787957101, - "learning_rate": 1.1767146376247892e-07, - "loss": 2.404, - "step": 51000 - }, - { - "epoch": 13.186587414394625, - "learning_rate": 1.1763905095293658e-07, - "loss": 2.4146, - "step": 51020 - }, - { - "epoch": 13.191756040832148, - "learning_rate": 1.1760663814339426e-07, - "loss": 2.4528, - "step": 51040 - }, - { - "epoch": 13.196924667269673, - "learning_rate": 1.1757422533385193e-07, - "loss": 2.4843, - "step": 51060 - }, - { - "epoch": 13.202093293707197, - "learning_rate": 1.175418125243096e-07, - "loss": 2.4556, - "step": 51080 - }, - { - "epoch": 13.207261920144722, - "learning_rate": 1.1750939971476726e-07, - "loss": 2.4307, - "step": 51100 - }, - { - "epoch": 13.212430546582246, - "learning_rate": 1.1747698690522493e-07, - "loss": 2.4733, - "step": 51120 - }, - { - "epoch": 13.21759917301977, - "learning_rate": 1.1744457409568261e-07, - "loss": 2.4003, - "step": 51140 - }, - { - "epoch": 13.222767799457294, - "learning_rate": 1.1741216128614027e-07, - "loss": 2.4786, - "step": 51160 - }, - { - "epoch": 13.227936425894818, - "learning_rate": 1.1737974847659795e-07, - "loss": 2.4754, - "step": 51180 - }, - { - "epoch": 13.233105052332343, - "learning_rate": 1.1734733566705561e-07, - "loss": 2.4504, - "step": 51200 - }, - { - "epoch": 13.238273678769866, - "learning_rate": 1.1731492285751328e-07, - "loss": 2.3622, - "step": 51220 - }, - { - "epoch": 13.243442305207392, - "learning_rate": 1.1728251004797094e-07, - "loss": 2.4765, - "step": 51240 - }, - { - "epoch": 13.248610931644915, - "learning_rate": 1.1725009723842862e-07, - "loss": 2.5202, - "step": 51260 - }, - { - "epoch": 13.253779558082439, - "learning_rate": 1.1721768442888628e-07, - "loss": 2.4383, - "step": 51280 - }, - { - "epoch": 13.258948184519964, - "learning_rate": 1.1718527161934396e-07, - "loss": 2.4267, - "step": 51300 - }, - { - "epoch": 13.264116810957487, - "learning_rate": 1.1715285880980162e-07, - "loss": 2.4442, - "step": 51320 - }, - { - "epoch": 13.269285437395013, - "learning_rate": 1.171204460002593e-07, - "loss": 2.4723, - "step": 51340 - }, - { - "epoch": 13.274454063832536, - "learning_rate": 1.1708803319071696e-07, - "loss": 2.4552, - "step": 51360 - }, - { - "epoch": 13.279622690270061, - "learning_rate": 1.1705562038117463e-07, - "loss": 2.4261, - "step": 51380 - }, - { - "epoch": 13.284791316707585, - "learning_rate": 1.1702320757163229e-07, - "loss": 2.3728, - "step": 51400 - }, - { - "epoch": 13.28995994314511, - "learning_rate": 1.1699079476208997e-07, - "loss": 2.4434, - "step": 51420 - }, - { - "epoch": 13.295128569582634, - "learning_rate": 1.1695838195254763e-07, - "loss": 2.433, - "step": 51440 - }, - { - "epoch": 13.300297196020157, - "learning_rate": 1.169259691430053e-07, - "loss": 2.4091, - "step": 51460 - }, - { - "epoch": 13.305465822457682, - "learning_rate": 1.1689355633346298e-07, - "loss": 2.418, - "step": 51480 - }, - { - "epoch": 13.310634448895206, - "learning_rate": 1.1686114352392064e-07, - "loss": 2.4226, - "step": 51500 - }, - { - "epoch": 13.315803075332731, - "learning_rate": 1.168287307143783e-07, - "loss": 2.4603, - "step": 51520 - }, - { - "epoch": 13.320971701770254, - "learning_rate": 1.1679631790483598e-07, - "loss": 2.4939, - "step": 51540 - }, - { - "epoch": 13.326140328207778, - "learning_rate": 1.1676390509529365e-07, - "loss": 2.444, - "step": 51560 - }, - { - "epoch": 13.331308954645303, - "learning_rate": 1.1673149228575132e-07, - "loss": 2.5078, - "step": 51580 - }, - { - "epoch": 13.336477581082827, - "learning_rate": 1.1669907947620898e-07, - "loss": 2.5158, - "step": 51600 - }, - { - "epoch": 13.341646207520352, - "learning_rate": 1.1666666666666665e-07, - "loss": 2.5183, - "step": 51620 - }, - { - "epoch": 13.346814833957875, - "learning_rate": 1.1663425385712433e-07, - "loss": 2.3932, - "step": 51640 - }, - { - "epoch": 13.3519834603954, - "learning_rate": 1.1660184104758199e-07, - "loss": 2.413, - "step": 51660 - }, - { - "epoch": 13.357152086832924, - "learning_rate": 1.1656942823803965e-07, - "loss": 2.4563, - "step": 51680 - }, - { - "epoch": 13.362320713270448, - "learning_rate": 1.1653701542849733e-07, - "loss": 2.4141, - "step": 51700 - }, - { - "epoch": 13.367489339707973, - "learning_rate": 1.16504602618955e-07, - "loss": 2.486, - "step": 51720 - }, - { - "epoch": 13.372657966145496, - "learning_rate": 1.1647218980941266e-07, - "loss": 2.444, - "step": 51740 - }, - { - "epoch": 13.377826592583022, - "learning_rate": 1.1643977699987033e-07, - "loss": 2.4608, - "step": 51760 - }, - { - "epoch": 13.382995219020545, - "learning_rate": 1.1640736419032801e-07, - "loss": 2.4173, - "step": 51780 - }, - { - "epoch": 13.38816384545807, - "learning_rate": 1.1637495138078568e-07, - "loss": 2.4247, - "step": 51800 - }, - { - "epoch": 13.393332471895594, - "learning_rate": 1.1634253857124334e-07, - "loss": 2.4383, - "step": 51820 - }, - { - "epoch": 13.398501098333117, - "learning_rate": 1.16310125761701e-07, - "loss": 2.4455, - "step": 51840 - }, - { - "epoch": 13.403669724770642, - "learning_rate": 1.1627771295215869e-07, - "loss": 2.4771, - "step": 51860 - }, - { - "epoch": 13.408838351208166, - "learning_rate": 1.1624530014261635e-07, - "loss": 2.4686, - "step": 51880 - }, - { - "epoch": 13.414006977645691, - "learning_rate": 1.1621288733307401e-07, - "loss": 2.4417, - "step": 51900 - }, - { - "epoch": 13.419175604083215, - "learning_rate": 1.1618047452353167e-07, - "loss": 2.4765, - "step": 51920 - }, - { - "epoch": 13.42434423052074, - "learning_rate": 1.1614806171398936e-07, - "loss": 2.4501, - "step": 51940 - }, - { - "epoch": 13.429512856958263, - "learning_rate": 1.1611564890444702e-07, - "loss": 2.467, - "step": 51960 - }, - { - "epoch": 13.434681483395787, - "learning_rate": 1.1608323609490469e-07, - "loss": 2.4404, - "step": 51980 - }, - { - "epoch": 13.439850109833312, - "learning_rate": 1.1605082328536235e-07, - "loss": 2.4413, - "step": 52000 - }, - { - "epoch": 13.445018736270836, - "learning_rate": 1.1601841047582004e-07, - "loss": 2.4529, - "step": 52020 - }, - { - "epoch": 13.45018736270836, - "learning_rate": 1.159859976662777e-07, - "loss": 2.4719, - "step": 52040 - }, - { - "epoch": 13.455355989145884, - "learning_rate": 1.1595358485673536e-07, - "loss": 2.3729, - "step": 52060 - }, - { - "epoch": 13.46052461558341, - "learning_rate": 1.1592117204719305e-07, - "loss": 2.4296, - "step": 52080 - }, - { - "epoch": 13.465693242020933, - "learning_rate": 1.1588875923765071e-07, - "loss": 2.4939, - "step": 52100 - }, - { - "epoch": 13.470861868458456, - "learning_rate": 1.1585634642810837e-07, - "loss": 2.4214, - "step": 52120 - }, - { - "epoch": 13.476030494895982, - "learning_rate": 1.1582393361856603e-07, - "loss": 2.4435, - "step": 52140 - }, - { - "epoch": 13.481199121333505, - "learning_rate": 1.1579152080902372e-07, - "loss": 2.4724, - "step": 52160 - }, - { - "epoch": 13.48636774777103, - "learning_rate": 1.1575910799948138e-07, - "loss": 2.4219, - "step": 52180 - }, - { - "epoch": 13.491536374208554, - "learning_rate": 1.1572669518993905e-07, - "loss": 2.4166, - "step": 52200 - }, - { - "epoch": 13.49670500064608, - "learning_rate": 1.1569428238039671e-07, - "loss": 2.456, - "step": 52220 - }, - { - "epoch": 13.501873627083603, - "learning_rate": 1.156618695708544e-07, - "loss": 2.4335, - "step": 52240 - }, - { - "epoch": 13.507042253521126, - "learning_rate": 1.1562945676131206e-07, - "loss": 2.4549, - "step": 52260 - }, - { - "epoch": 13.512210879958651, - "learning_rate": 1.1559704395176972e-07, - "loss": 2.4274, - "step": 52280 - }, - { - "epoch": 13.517379506396175, - "learning_rate": 1.1556463114222738e-07, - "loss": 2.4509, - "step": 52300 - }, - { - "epoch": 13.5225481328337, - "learning_rate": 1.1553221833268507e-07, - "loss": 2.4466, - "step": 52320 - }, - { - "epoch": 13.527716759271224, - "learning_rate": 1.1549980552314273e-07, - "loss": 2.486, - "step": 52340 - }, - { - "epoch": 13.532885385708749, - "learning_rate": 1.1546739271360039e-07, - "loss": 2.4423, - "step": 52360 - }, - { - "epoch": 13.538054012146272, - "learning_rate": 1.1543497990405808e-07, - "loss": 2.3907, - "step": 52380 - }, - { - "epoch": 13.543222638583796, - "learning_rate": 1.1540256709451574e-07, - "loss": 2.4323, - "step": 52400 - }, - { - "epoch": 13.548391265021321, - "learning_rate": 1.153701542849734e-07, - "loss": 2.4375, - "step": 52420 - }, - { - "epoch": 13.553559891458844, - "learning_rate": 1.1533774147543107e-07, - "loss": 2.4197, - "step": 52440 - }, - { - "epoch": 13.55872851789637, - "learning_rate": 1.1530532866588876e-07, - "loss": 2.4902, - "step": 52460 - }, - { - "epoch": 13.563897144333893, - "learning_rate": 1.1527291585634642e-07, - "loss": 2.4065, - "step": 52480 - }, - { - "epoch": 13.569065770771417, - "learning_rate": 1.1524050304680408e-07, - "loss": 2.4606, - "step": 52500 - }, - { - "epoch": 13.574234397208942, - "learning_rate": 1.1520809023726174e-07, - "loss": 2.4576, - "step": 52520 - }, - { - "epoch": 13.579403023646465, - "learning_rate": 1.1517567742771943e-07, - "loss": 2.4596, - "step": 52540 - }, - { - "epoch": 13.58457165008399, - "learning_rate": 1.1514326461817709e-07, - "loss": 2.4678, - "step": 52560 - }, - { - "epoch": 13.589740276521514, - "learning_rate": 1.1511085180863475e-07, - "loss": 2.4333, - "step": 52580 - }, - { - "epoch": 13.59490890295904, - "learning_rate": 1.1507843899909241e-07, - "loss": 2.4255, - "step": 52600 - }, - { - "epoch": 13.600077529396563, - "learning_rate": 1.150460261895501e-07, - "loss": 2.414, - "step": 52620 - }, - { - "epoch": 13.605246155834088, - "learning_rate": 1.1501361338000777e-07, - "loss": 2.4667, - "step": 52640 - }, - { - "epoch": 13.610414782271612, - "learning_rate": 1.1498120057046543e-07, - "loss": 2.4574, - "step": 52660 - }, - { - "epoch": 13.615583408709135, - "learning_rate": 1.1494878776092312e-07, - "loss": 2.4472, - "step": 52680 - }, - { - "epoch": 13.62075203514666, - "learning_rate": 1.1491637495138078e-07, - "loss": 2.4838, - "step": 52700 - }, - { - "epoch": 13.625920661584184, - "learning_rate": 1.1488396214183844e-07, - "loss": 2.4244, - "step": 52720 - }, - { - "epoch": 13.631089288021709, - "learning_rate": 1.148515493322961e-07, - "loss": 2.4486, - "step": 52740 - }, - { - "epoch": 13.636257914459232, - "learning_rate": 1.1481913652275379e-07, - "loss": 2.4595, - "step": 52760 - }, - { - "epoch": 13.641426540896756, - "learning_rate": 1.1478672371321145e-07, - "loss": 2.4267, - "step": 52780 - }, - { - "epoch": 13.646595167334281, - "learning_rate": 1.1475431090366911e-07, - "loss": 2.4348, - "step": 52800 - }, - { - "epoch": 13.651763793771805, - "learning_rate": 1.1472189809412677e-07, - "loss": 2.4963, - "step": 52820 - }, - { - "epoch": 13.65693242020933, - "learning_rate": 1.1468948528458446e-07, - "loss": 2.3726, - "step": 52840 - }, - { - "epoch": 13.662101046646853, - "learning_rate": 1.1465707247504213e-07, - "loss": 2.3944, - "step": 52860 - }, - { - "epoch": 13.667269673084379, - "learning_rate": 1.1462465966549979e-07, - "loss": 2.5007, - "step": 52880 - }, - { - "epoch": 13.672438299521902, - "learning_rate": 1.1459224685595745e-07, - "loss": 2.4834, - "step": 52900 - }, - { - "epoch": 13.677606925959426, - "learning_rate": 1.1455983404641514e-07, - "loss": 2.3963, - "step": 52920 - }, - { - "epoch": 13.68277555239695, - "learning_rate": 1.1452742123687283e-07, - "loss": 2.4304, - "step": 52940 - }, - { - "epoch": 13.687944178834474, - "learning_rate": 1.1449500842733046e-07, - "loss": 2.4219, - "step": 52960 - }, - { - "epoch": 13.693112805272, - "learning_rate": 1.1446259561778815e-07, - "loss": 2.5008, - "step": 52980 - }, - { - "epoch": 13.698281431709523, - "learning_rate": 1.1443018280824581e-07, - "loss": 2.4243, - "step": 53000 - }, - { - "epoch": 13.703450058147048, - "learning_rate": 1.143977699987035e-07, - "loss": 2.462, - "step": 53020 - }, - { - "epoch": 13.708618684584572, - "learning_rate": 1.1436535718916113e-07, - "loss": 2.4512, - "step": 53040 - }, - { - "epoch": 13.713787311022095, - "learning_rate": 1.1433294437961882e-07, - "loss": 2.508, - "step": 53060 - }, - { - "epoch": 13.71895593745962, - "learning_rate": 1.1430053157007649e-07, - "loss": 2.4558, - "step": 53080 - }, - { - "epoch": 13.724124563897144, - "learning_rate": 1.1426811876053417e-07, - "loss": 2.4658, - "step": 53100 - }, - { - "epoch": 13.729293190334669, - "learning_rate": 1.1423570595099181e-07, - "loss": 2.3885, - "step": 53120 - }, - { - "epoch": 13.734461816772193, - "learning_rate": 1.142032931414495e-07, - "loss": 2.44, - "step": 53140 - }, - { - "epoch": 13.739630443209718, - "learning_rate": 1.1417088033190716e-07, - "loss": 2.4517, - "step": 53160 - }, - { - "epoch": 13.744799069647241, - "learning_rate": 1.1413846752236485e-07, - "loss": 2.4766, - "step": 53180 - }, - { - "epoch": 13.749967696084765, - "learning_rate": 1.1410605471282248e-07, - "loss": 2.447, - "step": 53200 - }, - { - "epoch": 13.75513632252229, - "learning_rate": 1.1407364190328017e-07, - "loss": 2.4432, - "step": 53220 - }, - { - "epoch": 13.760304948959813, - "learning_rate": 1.1404122909373786e-07, - "loss": 2.4773, - "step": 53240 - }, - { - "epoch": 13.765473575397339, - "learning_rate": 1.140088162841955e-07, - "loss": 2.455, - "step": 53260 - }, - { - "epoch": 13.770642201834862, - "learning_rate": 1.1397640347465318e-07, - "loss": 2.4651, - "step": 53280 - }, - { - "epoch": 13.775810828272387, - "learning_rate": 1.1394399066511085e-07, - "loss": 2.4309, - "step": 53300 - }, - { - "epoch": 13.780979454709911, - "learning_rate": 1.1391157785556853e-07, - "loss": 2.4539, - "step": 53320 - }, - { - "epoch": 13.786148081147434, - "learning_rate": 1.1387916504602617e-07, - "loss": 2.4259, - "step": 53340 - }, - { - "epoch": 13.79131670758496, - "learning_rate": 1.1384675223648386e-07, - "loss": 2.357, - "step": 53360 - }, - { - "epoch": 13.796485334022483, - "learning_rate": 1.1381433942694152e-07, - "loss": 2.4122, - "step": 53380 - }, - { - "epoch": 13.801653960460008, - "learning_rate": 1.1378192661739921e-07, - "loss": 2.435, - "step": 53400 - }, - { - "epoch": 13.806822586897532, - "learning_rate": 1.1374951380785684e-07, - "loss": 2.4501, - "step": 53420 - }, - { - "epoch": 13.811991213335055, - "learning_rate": 1.1371710099831453e-07, - "loss": 2.4191, - "step": 53440 - }, - { - "epoch": 13.81715983977258, - "learning_rate": 1.1368468818877219e-07, - "loss": 2.4168, - "step": 53460 - }, - { - "epoch": 13.822328466210104, - "learning_rate": 1.1365227537922988e-07, - "loss": 2.4114, - "step": 53480 - }, - { - "epoch": 13.82749709264763, - "learning_rate": 1.1361986256968752e-07, - "loss": 2.4437, - "step": 53500 - }, - { - "epoch": 13.832665719085153, - "learning_rate": 1.135874497601452e-07, - "loss": 2.5263, - "step": 53520 - }, - { - "epoch": 13.837834345522678, - "learning_rate": 1.1355503695060289e-07, - "loss": 2.3909, - "step": 53540 - }, - { - "epoch": 13.843002971960201, - "learning_rate": 1.1352262414106056e-07, - "loss": 2.4661, - "step": 53560 - }, - { - "epoch": 13.848171598397727, - "learning_rate": 1.1349021133151822e-07, - "loss": 2.3978, - "step": 53580 - }, - { - "epoch": 13.85334022483525, - "learning_rate": 1.1345779852197588e-07, - "loss": 2.4506, - "step": 53600 - }, - { - "epoch": 13.858508851272774, - "learning_rate": 1.1342538571243357e-07, - "loss": 2.4424, - "step": 53620 - }, - { - "epoch": 13.863677477710299, - "learning_rate": 1.1339297290289123e-07, - "loss": 2.4247, - "step": 53640 - }, - { - "epoch": 13.868846104147822, - "learning_rate": 1.1336056009334889e-07, - "loss": 2.4441, - "step": 53660 - }, - { - "epoch": 13.874014730585348, - "learning_rate": 1.1332814728380655e-07, - "loss": 2.4742, - "step": 53680 - }, - { - "epoch": 13.879183357022871, - "learning_rate": 1.1329573447426424e-07, - "loss": 2.4507, - "step": 53700 - }, - { - "epoch": 13.884351983460395, - "learning_rate": 1.132633216647219e-07, - "loss": 2.4422, - "step": 53720 - }, - { - "epoch": 13.88952060989792, - "learning_rate": 1.1323090885517956e-07, - "loss": 2.5032, - "step": 53740 - }, - { - "epoch": 13.894689236335443, - "learning_rate": 1.1319849604563723e-07, - "loss": 2.4601, - "step": 53760 - }, - { - "epoch": 13.899857862772969, - "learning_rate": 1.1316608323609492e-07, - "loss": 2.3983, - "step": 53780 - }, - { - "epoch": 13.905026489210492, - "learning_rate": 1.1313367042655258e-07, - "loss": 2.3913, - "step": 53800 - }, - { - "epoch": 13.910195115648017, - "learning_rate": 1.1310125761701024e-07, - "loss": 2.4417, - "step": 53820 - }, - { - "epoch": 13.91536374208554, - "learning_rate": 1.1306884480746793e-07, - "loss": 2.422, - "step": 53840 - }, - { - "epoch": 13.920532368523064, - "learning_rate": 1.1303643199792559e-07, - "loss": 2.4629, - "step": 53860 - }, - { - "epoch": 13.92570099496059, - "learning_rate": 1.1300401918838325e-07, - "loss": 2.4489, - "step": 53880 - }, - { - "epoch": 13.930869621398113, - "learning_rate": 1.1297160637884091e-07, - "loss": 2.4243, - "step": 53900 - }, - { - "epoch": 13.936038247835638, - "learning_rate": 1.129391935692986e-07, - "loss": 2.4241, - "step": 53920 - }, - { - "epoch": 13.941206874273162, - "learning_rate": 1.1290678075975626e-07, - "loss": 2.4386, - "step": 53940 - }, - { - "epoch": 13.946375500710687, - "learning_rate": 1.1287436795021392e-07, - "loss": 2.4117, - "step": 53960 - }, - { - "epoch": 13.95154412714821, - "learning_rate": 1.1284195514067159e-07, - "loss": 2.4356, - "step": 53980 - }, - { - "epoch": 13.956712753585734, - "learning_rate": 1.1280954233112928e-07, - "loss": 2.4823, - "step": 54000 - }, - { - "epoch": 13.961881380023259, - "learning_rate": 1.1277712952158694e-07, - "loss": 2.4368, - "step": 54020 - }, - { - "epoch": 13.967050006460783, - "learning_rate": 1.127447167120446e-07, - "loss": 2.4806, - "step": 54040 - }, - { - "epoch": 13.972218632898308, - "learning_rate": 1.1271230390250226e-07, - "loss": 2.3614, - "step": 54060 - }, - { - "epoch": 13.977387259335831, - "learning_rate": 1.1267989109295995e-07, - "loss": 2.4549, - "step": 54080 - }, - { - "epoch": 13.982555885773357, - "learning_rate": 1.1264747828341761e-07, - "loss": 2.4648, - "step": 54100 - }, - { - "epoch": 13.98772451221088, - "learning_rate": 1.1261506547387527e-07, - "loss": 2.482, - "step": 54120 - }, - { - "epoch": 13.992893138648403, - "learning_rate": 1.1258265266433296e-07, - "loss": 2.4405, - "step": 54140 - }, - { - "epoch": 13.998061765085929, - "learning_rate": 1.1255023985479062e-07, - "loss": 2.4125, - "step": 54160 - }, - { - "epoch": 13.999870784339063, - "eval_bleu": 13.0772, - "eval_gen_len": 37.0436, - "eval_loss": 2.4198501110076904, - "eval_runtime": 802.9059, - "eval_samples_per_second": 2.145, - "eval_steps_per_second": 1.072, - "step": 54167 - }, - { - "epoch": 14.003230391523452, - "learning_rate": 1.1251782704524828e-07, - "loss": 2.497, - "step": 54180 - }, - { - "epoch": 14.008399017960977, - "learning_rate": 1.1248541423570595e-07, - "loss": 2.4448, - "step": 54200 - }, - { - "epoch": 14.013567644398501, - "learning_rate": 1.1245300142616364e-07, - "loss": 2.4748, - "step": 54220 - }, - { - "epoch": 14.018736270836026, - "learning_rate": 1.124205886166213e-07, - "loss": 2.5235, - "step": 54240 - }, - { - "epoch": 14.02390489727355, - "learning_rate": 1.1238817580707896e-07, - "loss": 2.4309, - "step": 54260 - }, - { - "epoch": 14.029073523711073, - "learning_rate": 1.1235576299753662e-07, - "loss": 2.4434, - "step": 54280 - }, - { - "epoch": 14.034242150148598, - "learning_rate": 1.1232335018799431e-07, - "loss": 2.4613, - "step": 54300 - }, - { - "epoch": 14.039410776586122, - "learning_rate": 1.1229093737845197e-07, - "loss": 2.4494, - "step": 54320 - }, - { - "epoch": 14.044579403023647, - "learning_rate": 1.1225852456890963e-07, - "loss": 2.4625, - "step": 54340 - }, - { - "epoch": 14.04974802946117, - "learning_rate": 1.122261117593673e-07, - "loss": 2.4266, - "step": 54360 - }, - { - "epoch": 14.054916655898696, - "learning_rate": 1.1219369894982498e-07, - "loss": 2.4216, - "step": 54380 - }, - { - "epoch": 14.06008528233622, - "learning_rate": 1.1216128614028264e-07, - "loss": 2.4619, - "step": 54400 - }, - { - "epoch": 14.065253908773743, - "learning_rate": 1.1212887333074031e-07, - "loss": 2.4161, - "step": 54420 - }, - { - "epoch": 14.070422535211268, - "learning_rate": 1.12096460521198e-07, - "loss": 2.4824, - "step": 54440 - }, - { - "epoch": 14.075591161648791, - "learning_rate": 1.1206404771165566e-07, - "loss": 2.3989, - "step": 54460 - }, - { - "epoch": 14.080759788086317, - "learning_rate": 1.1203163490211332e-07, - "loss": 2.4347, - "step": 54480 - }, - { - "epoch": 14.08592841452384, - "learning_rate": 1.1199922209257098e-07, - "loss": 2.4447, - "step": 54500 - }, - { - "epoch": 14.091097040961365, - "learning_rate": 1.1196680928302867e-07, - "loss": 2.4, - "step": 54520 - }, - { - "epoch": 14.096265667398889, - "learning_rate": 1.1193439647348633e-07, - "loss": 2.421, - "step": 54540 - }, - { - "epoch": 14.101434293836412, - "learning_rate": 1.1190198366394399e-07, - "loss": 2.4029, - "step": 54560 - }, - { - "epoch": 14.106602920273938, - "learning_rate": 1.1186957085440165e-07, - "loss": 2.4102, - "step": 54580 - }, - { - "epoch": 14.111771546711461, - "learning_rate": 1.1183715804485934e-07, - "loss": 2.391, - "step": 54600 - }, - { - "epoch": 14.116940173148986, - "learning_rate": 1.11804745235317e-07, - "loss": 2.4074, - "step": 54620 - }, - { - "epoch": 14.12210879958651, - "learning_rate": 1.1177233242577467e-07, - "loss": 2.4133, - "step": 54640 - }, - { - "epoch": 14.127277426024033, - "learning_rate": 1.1173991961623233e-07, - "loss": 2.4479, - "step": 54660 - }, - { - "epoch": 14.132446052461558, - "learning_rate": 1.1170750680669002e-07, - "loss": 2.4476, - "step": 54680 - }, - { - "epoch": 14.137614678899082, - "learning_rate": 1.1167509399714768e-07, - "loss": 2.4295, - "step": 54700 - }, - { - "epoch": 14.142783305336607, - "learning_rate": 1.1164268118760534e-07, - "loss": 2.4715, - "step": 54720 - }, - { - "epoch": 14.14795193177413, - "learning_rate": 1.1161026837806302e-07, - "loss": 2.4425, - "step": 54740 - }, - { - "epoch": 14.153120558211656, - "learning_rate": 1.1157785556852069e-07, - "loss": 2.4664, - "step": 54760 - }, - { - "epoch": 14.15828918464918, - "learning_rate": 1.1154544275897835e-07, - "loss": 2.424, - "step": 54780 - }, - { - "epoch": 14.163457811086703, - "learning_rate": 1.1151302994943601e-07, - "loss": 2.4141, - "step": 54800 - }, - { - "epoch": 14.168626437524228, - "learning_rate": 1.1148061713989369e-07, - "loss": 2.4566, - "step": 54820 - }, - { - "epoch": 14.173795063961752, - "learning_rate": 1.1144820433035136e-07, - "loss": 2.4483, - "step": 54840 - }, - { - "epoch": 14.178963690399277, - "learning_rate": 1.1141579152080903e-07, - "loss": 2.4681, - "step": 54860 - }, - { - "epoch": 14.1841323168368, - "learning_rate": 1.1138337871126669e-07, - "loss": 2.4122, - "step": 54880 - }, - { - "epoch": 14.189300943274326, - "learning_rate": 1.1135096590172436e-07, - "loss": 2.4219, - "step": 54900 - }, - { - "epoch": 14.194469569711849, - "learning_rate": 1.1131855309218204e-07, - "loss": 2.398, - "step": 54920 - }, - { - "epoch": 14.199638196149373, - "learning_rate": 1.112861402826397e-07, - "loss": 2.4346, - "step": 54940 - }, - { - "epoch": 14.204806822586898, - "learning_rate": 1.1125372747309736e-07, - "loss": 2.477, - "step": 54960 - }, - { - "epoch": 14.209975449024421, - "learning_rate": 1.1122131466355504e-07, - "loss": 2.412, - "step": 54980 - }, - { - "epoch": 14.215144075461946, - "learning_rate": 1.1118890185401271e-07, - "loss": 2.4304, - "step": 55000 - }, - { - "epoch": 14.22031270189947, - "learning_rate": 1.1115648904447037e-07, - "loss": 2.4396, - "step": 55020 - }, - { - "epoch": 14.225481328336995, - "learning_rate": 1.1112407623492805e-07, - "loss": 2.4407, - "step": 55040 - }, - { - "epoch": 14.230649954774519, - "learning_rate": 1.1109166342538571e-07, - "loss": 2.3734, - "step": 55060 - }, - { - "epoch": 14.235818581212042, - "learning_rate": 1.1105925061584339e-07, - "loss": 2.4121, - "step": 55080 - }, - { - "epoch": 14.240987207649567, - "learning_rate": 1.1102683780630105e-07, - "loss": 2.4242, - "step": 55100 - }, - { - "epoch": 14.24615583408709, - "learning_rate": 1.1099442499675872e-07, - "loss": 2.446, - "step": 55120 - }, - { - "epoch": 14.251324460524616, - "learning_rate": 1.1096201218721639e-07, - "loss": 2.3918, - "step": 55140 - }, - { - "epoch": 14.25649308696214, - "learning_rate": 1.1092959937767406e-07, - "loss": 2.4142, - "step": 55160 - }, - { - "epoch": 14.261661713399665, - "learning_rate": 1.1089718656813172e-07, - "loss": 2.4401, - "step": 55180 - }, - { - "epoch": 14.266830339837188, - "learning_rate": 1.108647737585894e-07, - "loss": 2.4477, - "step": 55200 - }, - { - "epoch": 14.271998966274712, - "learning_rate": 1.1083236094904706e-07, - "loss": 2.434, - "step": 55220 - }, - { - "epoch": 14.277167592712237, - "learning_rate": 1.1079994813950473e-07, - "loss": 2.4021, - "step": 55240 - }, - { - "epoch": 14.28233621914976, - "learning_rate": 1.107675353299624e-07, - "loss": 2.4173, - "step": 55260 - }, - { - "epoch": 14.287504845587286, - "learning_rate": 1.1073512252042007e-07, - "loss": 2.4248, - "step": 55280 - }, - { - "epoch": 14.29267347202481, - "learning_rate": 1.1070270971087773e-07, - "loss": 2.449, - "step": 55300 - }, - { - "epoch": 14.297842098462334, - "learning_rate": 1.1067029690133541e-07, - "loss": 2.5167, - "step": 55320 - }, - { - "epoch": 14.303010724899858, - "learning_rate": 1.1063788409179308e-07, - "loss": 2.439, - "step": 55340 - }, - { - "epoch": 14.308179351337381, - "learning_rate": 1.1060547128225075e-07, - "loss": 2.4282, - "step": 55360 - }, - { - "epoch": 14.313347977774907, - "learning_rate": 1.1057305847270841e-07, - "loss": 2.4502, - "step": 55380 - }, - { - "epoch": 14.31851660421243, - "learning_rate": 1.1054064566316608e-07, - "loss": 2.4245, - "step": 55400 - }, - { - "epoch": 14.323685230649955, - "learning_rate": 1.1050823285362376e-07, - "loss": 2.4192, - "step": 55420 - }, - { - "epoch": 14.328853857087479, - "learning_rate": 1.1047582004408142e-07, - "loss": 2.4461, - "step": 55440 - }, - { - "epoch": 14.334022483525004, - "learning_rate": 1.1044340723453908e-07, - "loss": 2.3875, - "step": 55460 - }, - { - "epoch": 14.339191109962528, - "learning_rate": 1.1041099442499676e-07, - "loss": 2.3687, - "step": 55480 - }, - { - "epoch": 14.344359736400051, - "learning_rate": 1.1037858161545443e-07, - "loss": 2.5006, - "step": 55500 - }, - { - "epoch": 14.349528362837576, - "learning_rate": 1.1034616880591209e-07, - "loss": 2.4207, - "step": 55520 - }, - { - "epoch": 14.3546969892751, - "learning_rate": 1.1031375599636977e-07, - "loss": 2.4137, - "step": 55540 - }, - { - "epoch": 14.359865615712625, - "learning_rate": 1.1028134318682743e-07, - "loss": 2.4323, - "step": 55560 - }, - { - "epoch": 14.365034242150148, - "learning_rate": 1.102489303772851e-07, - "loss": 2.3947, - "step": 55580 - }, - { - "epoch": 14.370202868587672, - "learning_rate": 1.1021651756774277e-07, - "loss": 2.4286, - "step": 55600 - }, - { - "epoch": 14.375371495025197, - "learning_rate": 1.1018410475820044e-07, - "loss": 2.3842, - "step": 55620 - }, - { - "epoch": 14.38054012146272, - "learning_rate": 1.101516919486581e-07, - "loss": 2.4287, - "step": 55640 - }, - { - "epoch": 14.385708747900246, - "learning_rate": 1.1011927913911578e-07, - "loss": 2.441, - "step": 55660 - }, - { - "epoch": 14.39087737433777, - "learning_rate": 1.1008686632957344e-07, - "loss": 2.467, - "step": 55680 - }, - { - "epoch": 14.396046000775295, - "learning_rate": 1.1005445352003112e-07, - "loss": 2.4228, - "step": 55700 - }, - { - "epoch": 14.401214627212818, - "learning_rate": 1.1002204071048878e-07, - "loss": 2.4338, - "step": 55720 - }, - { - "epoch": 14.406383253650343, - "learning_rate": 1.0998962790094645e-07, - "loss": 2.4264, - "step": 55740 - }, - { - "epoch": 14.411551880087867, - "learning_rate": 1.0995721509140412e-07, - "loss": 2.3694, - "step": 55760 - }, - { - "epoch": 14.41672050652539, - "learning_rate": 1.0992480228186179e-07, - "loss": 2.4507, - "step": 55780 - }, - { - "epoch": 14.421889132962916, - "learning_rate": 1.0989238947231945e-07, - "loss": 2.4463, - "step": 55800 - }, - { - "epoch": 14.427057759400439, - "learning_rate": 1.0985997666277713e-07, - "loss": 2.4241, - "step": 55820 - }, - { - "epoch": 14.432226385837964, - "learning_rate": 1.0982756385323479e-07, - "loss": 2.4437, - "step": 55840 - }, - { - "epoch": 14.437395012275488, - "learning_rate": 1.0979515104369246e-07, - "loss": 2.4293, - "step": 55860 - }, - { - "epoch": 14.442563638713011, - "learning_rate": 1.0976273823415014e-07, - "loss": 2.4539, - "step": 55880 - }, - { - "epoch": 14.447732265150536, - "learning_rate": 1.097303254246078e-07, - "loss": 2.444, - "step": 55900 - }, - { - "epoch": 14.45290089158806, - "learning_rate": 1.0969791261506546e-07, - "loss": 2.4366, - "step": 55920 - }, - { - "epoch": 14.458069518025585, - "learning_rate": 1.0966549980552314e-07, - "loss": 2.3765, - "step": 55940 - }, - { - "epoch": 14.463238144463109, - "learning_rate": 1.0963308699598081e-07, - "loss": 2.4172, - "step": 55960 - }, - { - "epoch": 14.468406770900634, - "learning_rate": 1.0960067418643847e-07, - "loss": 2.4072, - "step": 55980 - }, - { - "epoch": 14.473575397338157, - "learning_rate": 1.0956826137689614e-07, - "loss": 2.4183, - "step": 56000 - }, - { - "epoch": 14.47874402377568, - "learning_rate": 1.0953584856735381e-07, - "loss": 2.4149, - "step": 56020 - }, - { - "epoch": 14.483912650213206, - "learning_rate": 1.0950343575781149e-07, - "loss": 2.4087, - "step": 56040 - }, - { - "epoch": 14.48908127665073, - "learning_rate": 1.0947102294826915e-07, - "loss": 2.4551, - "step": 56060 - }, - { - "epoch": 14.494249903088255, - "learning_rate": 1.0943861013872681e-07, - "loss": 2.431, - "step": 56080 - }, - { - "epoch": 14.499418529525778, - "learning_rate": 1.0940619732918449e-07, - "loss": 2.4769, - "step": 56100 - }, - { - "epoch": 14.504587155963304, - "learning_rate": 1.0937378451964216e-07, - "loss": 2.4141, - "step": 56120 - }, - { - "epoch": 14.509755782400827, - "learning_rate": 1.0934137171009982e-07, - "loss": 2.4434, - "step": 56140 - }, - { - "epoch": 14.51492440883835, - "learning_rate": 1.0930895890055748e-07, - "loss": 2.4858, - "step": 56160 - }, - { - "epoch": 14.520093035275876, - "learning_rate": 1.0927654609101516e-07, - "loss": 2.4497, - "step": 56180 - }, - { - "epoch": 14.5252616617134, - "learning_rate": 1.0924413328147283e-07, - "loss": 2.3597, - "step": 56200 - }, - { - "epoch": 14.530430288150924, - "learning_rate": 1.092117204719305e-07, - "loss": 2.4058, - "step": 56220 - }, - { - "epoch": 14.535598914588448, - "learning_rate": 1.0917930766238816e-07, - "loss": 2.4425, - "step": 56240 - }, - { - "epoch": 14.540767541025973, - "learning_rate": 1.0914689485284583e-07, - "loss": 2.4314, - "step": 56260 - }, - { - "epoch": 14.545936167463497, - "learning_rate": 1.0911448204330351e-07, - "loss": 2.4842, - "step": 56280 - }, - { - "epoch": 14.55110479390102, - "learning_rate": 1.0908206923376117e-07, - "loss": 2.4116, - "step": 56300 - }, - { - "epoch": 14.556273420338545, - "learning_rate": 1.0904965642421883e-07, - "loss": 2.4123, - "step": 56320 - }, - { - "epoch": 14.561442046776069, - "learning_rate": 1.0901724361467651e-07, - "loss": 2.4266, - "step": 56340 - }, - { - "epoch": 14.566610673213594, - "learning_rate": 1.0898483080513418e-07, - "loss": 2.4135, - "step": 56360 - }, - { - "epoch": 14.571779299651118, - "learning_rate": 1.0895241799559184e-07, - "loss": 2.4508, - "step": 56380 - }, - { - "epoch": 14.576947926088643, - "learning_rate": 1.089200051860495e-07, - "loss": 2.4001, - "step": 56400 - }, - { - "epoch": 14.582116552526166, - "learning_rate": 1.0888759237650718e-07, - "loss": 2.3891, - "step": 56420 - }, - { - "epoch": 14.58728517896369, - "learning_rate": 1.0885517956696486e-07, - "loss": 2.4768, - "step": 56440 - }, - { - "epoch": 14.592453805401215, - "learning_rate": 1.0882276675742252e-07, - "loss": 2.4198, - "step": 56460 - }, - { - "epoch": 14.597622431838738, - "learning_rate": 1.0879035394788018e-07, - "loss": 2.404, - "step": 56480 - }, - { - "epoch": 14.602791058276264, - "learning_rate": 1.0875794113833786e-07, - "loss": 2.4938, - "step": 56500 - }, - { - "epoch": 14.607959684713787, - "learning_rate": 1.0872552832879553e-07, - "loss": 2.4509, - "step": 56520 - }, - { - "epoch": 14.61312831115131, - "learning_rate": 1.0869311551925319e-07, - "loss": 2.4481, - "step": 56540 - }, - { - "epoch": 14.618296937588836, - "learning_rate": 1.0866070270971085e-07, - "loss": 2.4265, - "step": 56560 - }, - { - "epoch": 14.62346556402636, - "learning_rate": 1.0862828990016854e-07, - "loss": 2.4469, - "step": 56580 - }, - { - "epoch": 14.628634190463885, - "learning_rate": 1.085958770906262e-07, - "loss": 2.4133, - "step": 56600 - }, - { - "epoch": 14.633802816901408, - "learning_rate": 1.0856346428108387e-07, - "loss": 2.4522, - "step": 56620 - }, - { - "epoch": 14.638971443338933, - "learning_rate": 1.0853105147154153e-07, - "loss": 2.4041, - "step": 56640 - }, - { - "epoch": 14.644140069776457, - "learning_rate": 1.0849863866199922e-07, - "loss": 2.4375, - "step": 56660 - }, - { - "epoch": 14.649308696213982, - "learning_rate": 1.0846622585245688e-07, - "loss": 2.4063, - "step": 56680 - }, - { - "epoch": 14.654477322651505, - "learning_rate": 1.0843381304291454e-07, - "loss": 2.4281, - "step": 56700 - }, - { - "epoch": 14.659645949089029, - "learning_rate": 1.084014002333722e-07, - "loss": 2.3715, - "step": 56720 - }, - { - "epoch": 14.664814575526554, - "learning_rate": 1.0836898742382989e-07, - "loss": 2.4447, - "step": 56740 - }, - { - "epoch": 14.669983201964078, - "learning_rate": 1.0833657461428755e-07, - "loss": 2.4509, - "step": 56760 - }, - { - "epoch": 14.675151828401603, - "learning_rate": 1.0830416180474521e-07, - "loss": 2.4565, - "step": 56780 - }, - { - "epoch": 14.680320454839126, - "learning_rate": 1.0827174899520288e-07, - "loss": 2.4184, - "step": 56800 - }, - { - "epoch": 14.68548908127665, - "learning_rate": 1.0823933618566056e-07, - "loss": 2.3964, - "step": 56820 - }, - { - "epoch": 14.690657707714175, - "learning_rate": 1.0820692337611823e-07, - "loss": 2.3597, - "step": 56840 - }, - { - "epoch": 14.695826334151699, - "learning_rate": 1.0817451056657589e-07, - "loss": 2.4697, - "step": 56860 - }, - { - "epoch": 14.700994960589224, - "learning_rate": 1.0814209775703358e-07, - "loss": 2.4048, - "step": 56880 - }, - { - "epoch": 14.706163587026747, - "learning_rate": 1.0810968494749124e-07, - "loss": 2.3896, - "step": 56900 - }, - { - "epoch": 14.711332213464273, - "learning_rate": 1.080772721379489e-07, - "loss": 2.4108, - "step": 56920 - }, - { - "epoch": 14.716500839901796, - "learning_rate": 1.0804485932840656e-07, - "loss": 2.3934, - "step": 56940 - }, - { - "epoch": 14.72166946633932, - "learning_rate": 1.0801244651886425e-07, - "loss": 2.3797, - "step": 56960 - }, - { - "epoch": 14.726838092776845, - "learning_rate": 1.0798003370932191e-07, - "loss": 2.3676, - "step": 56980 - }, - { - "epoch": 14.732006719214368, - "learning_rate": 1.0794762089977957e-07, - "loss": 2.426, - "step": 57000 - }, - { - "epoch": 14.737175345651893, - "learning_rate": 1.0791520809023724e-07, - "loss": 2.4503, - "step": 57020 - }, - { - "epoch": 14.742343972089417, - "learning_rate": 1.0788279528069492e-07, - "loss": 2.449, - "step": 57040 - }, - { - "epoch": 14.747512598526942, - "learning_rate": 1.0785038247115259e-07, - "loss": 2.4417, - "step": 57060 - }, - { - "epoch": 14.752681224964466, - "learning_rate": 1.0781796966161025e-07, - "loss": 2.4536, - "step": 57080 - }, - { - "epoch": 14.75784985140199, - "learning_rate": 1.0778555685206791e-07, - "loss": 2.4133, - "step": 57100 - }, - { - "epoch": 14.763018477839514, - "learning_rate": 1.077531440425256e-07, - "loss": 2.4921, - "step": 57120 - }, - { - "epoch": 14.768187104277038, - "learning_rate": 1.0772073123298326e-07, - "loss": 2.4226, - "step": 57140 - }, - { - "epoch": 14.773355730714563, - "learning_rate": 1.0768831842344092e-07, - "loss": 2.4188, - "step": 57160 - }, - { - "epoch": 14.778524357152087, - "learning_rate": 1.0765590561389861e-07, - "loss": 2.4282, - "step": 57180 - }, - { - "epoch": 14.783692983589612, - "learning_rate": 1.0762349280435627e-07, - "loss": 2.4692, - "step": 57200 - }, - { - "epoch": 14.788861610027135, - "learning_rate": 1.0759107999481393e-07, - "loss": 2.394, - "step": 57220 - }, - { - "epoch": 14.794030236464659, - "learning_rate": 1.075586671852716e-07, - "loss": 2.4628, - "step": 57240 - }, - { - "epoch": 14.799198862902184, - "learning_rate": 1.0752625437572928e-07, - "loss": 2.4294, - "step": 57260 - }, - { - "epoch": 14.804367489339707, - "learning_rate": 1.0749384156618695e-07, - "loss": 2.4667, - "step": 57280 - }, - { - "epoch": 14.809536115777233, - "learning_rate": 1.0746142875664461e-07, - "loss": 2.4368, - "step": 57300 - }, - { - "epoch": 14.814704742214756, - "learning_rate": 1.0742901594710227e-07, - "loss": 2.3656, - "step": 57320 - }, - { - "epoch": 14.819873368652281, - "learning_rate": 1.0739660313755996e-07, - "loss": 2.4152, - "step": 57340 - }, - { - "epoch": 14.825041995089805, - "learning_rate": 1.0736419032801762e-07, - "loss": 2.4619, - "step": 57360 - }, - { - "epoch": 14.830210621527328, - "learning_rate": 1.0733177751847528e-07, - "loss": 2.4696, - "step": 57380 - }, - { - "epoch": 14.835379247964854, - "learning_rate": 1.0729936470893294e-07, - "loss": 2.4359, - "step": 57400 - }, - { - "epoch": 14.840547874402377, - "learning_rate": 1.0726695189939063e-07, - "loss": 2.4534, - "step": 57420 - }, - { - "epoch": 14.845716500839902, - "learning_rate": 1.072345390898483e-07, - "loss": 2.4296, - "step": 57440 - }, - { - "epoch": 14.850885127277426, - "learning_rate": 1.0720212628030596e-07, - "loss": 2.4417, - "step": 57460 - }, - { - "epoch": 14.856053753714951, - "learning_rate": 1.0716971347076364e-07, - "loss": 2.4688, - "step": 57480 - }, - { - "epoch": 14.861222380152475, - "learning_rate": 1.071373006612213e-07, - "loss": 2.4566, - "step": 57500 - }, - { - "epoch": 14.866391006589998, - "learning_rate": 1.0710488785167897e-07, - "loss": 2.4179, - "step": 57520 - }, - { - "epoch": 14.871559633027523, - "learning_rate": 1.0707247504213663e-07, - "loss": 2.4653, - "step": 57540 - }, - { - "epoch": 14.876728259465047, - "learning_rate": 1.0704006223259432e-07, - "loss": 2.4089, - "step": 57560 - }, - { - "epoch": 14.881896885902572, - "learning_rate": 1.0700764942305198e-07, - "loss": 2.4813, - "step": 57580 - }, - { - "epoch": 14.887065512340095, - "learning_rate": 1.0697523661350964e-07, - "loss": 2.3872, - "step": 57600 - }, - { - "epoch": 14.89223413877762, - "learning_rate": 1.069428238039673e-07, - "loss": 2.4628, - "step": 57620 - }, - { - "epoch": 14.897402765215144, - "learning_rate": 1.0691041099442499e-07, - "loss": 2.3651, - "step": 57640 - }, - { - "epoch": 14.902571391652668, - "learning_rate": 1.0687799818488265e-07, - "loss": 2.4236, - "step": 57660 - }, - { - "epoch": 14.907740018090193, - "learning_rate": 1.0684558537534032e-07, - "loss": 2.4099, - "step": 57680 - }, - { - "epoch": 14.912908644527716, - "learning_rate": 1.0681317256579798e-07, - "loss": 2.4265, - "step": 57700 - }, - { - "epoch": 14.918077270965242, - "learning_rate": 1.0678075975625567e-07, - "loss": 2.4424, - "step": 57720 - }, - { - "epoch": 14.923245897402765, - "learning_rate": 1.0674834694671335e-07, - "loss": 2.3981, - "step": 57740 - }, - { - "epoch": 14.928414523840289, - "learning_rate": 1.0671593413717099e-07, - "loss": 2.4498, - "step": 57760 - }, - { - "epoch": 14.933583150277814, - "learning_rate": 1.0668352132762868e-07, - "loss": 2.4153, - "step": 57780 - }, - { - "epoch": 14.938751776715337, - "learning_rate": 1.0665110851808634e-07, - "loss": 2.4706, - "step": 57800 - }, - { - "epoch": 14.943920403152863, - "learning_rate": 1.0661869570854403e-07, - "loss": 2.4511, - "step": 57820 - }, - { - "epoch": 14.949089029590386, - "learning_rate": 1.0658628289900166e-07, - "loss": 2.4389, - "step": 57840 - }, - { - "epoch": 14.954257656027911, - "learning_rate": 1.0655387008945935e-07, - "loss": 2.4287, - "step": 57860 - }, - { - "epoch": 14.959426282465435, - "learning_rate": 1.0652145727991701e-07, - "loss": 2.4765, - "step": 57880 - }, - { - "epoch": 14.96459490890296, - "learning_rate": 1.0648904447037468e-07, - "loss": 2.4132, - "step": 57900 - }, - { - "epoch": 14.969763535340483, - "learning_rate": 1.0645663166083234e-07, - "loss": 2.4539, - "step": 57920 - }, - { - "epoch": 14.974932161778007, - "learning_rate": 1.0642421885129003e-07, - "loss": 2.4428, - "step": 57940 - }, - { - "epoch": 14.980100788215532, - "learning_rate": 1.0639180604174769e-07, - "loss": 2.3815, - "step": 57960 - }, - { - "epoch": 14.985269414653056, - "learning_rate": 1.0635939323220535e-07, - "loss": 2.4527, - "step": 57980 - }, - { - "epoch": 14.99043804109058, - "learning_rate": 1.0632698042266301e-07, - "loss": 2.4376, - "step": 58000 - }, - { - "epoch": 14.995606667528104, - "learning_rate": 1.062945676131207e-07, - "loss": 2.4053, - "step": 58020 - }, - { - "epoch": 15.0, - "eval_bleu": 13.5764, - "eval_gen_len": 36.1492, - "eval_loss": 2.4080686569213867, - "eval_runtime": 787.1528, - "eval_samples_per_second": 2.188, - "eval_steps_per_second": 1.094, - "step": 58037 } ], "logging_steps": 20, @@ -17588,7 +12910,7 @@ "attributes": {} } }, - "total_flos": 3.80960626534613e+17, + "total_flos": 2.793935715793797e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null