{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.99995686494414, "global_step": 115910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.586206896551724e-07, "loss": 1.9686, "step": 1 }, { "epoch": 0.02, "learning_rate": 6.465517241379309e-05, "loss": 1.4946, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.00012931034482758618, "loss": 1.2379, "step": 500 }, { "epoch": 0.06, "learning_rate": 0.0001939655172413793, "loss": 1.1796, "step": 750 }, { "epoch": 0.09, "learning_rate": 0.00025862068965517237, "loss": 1.1506, "step": 1000 }, { "epoch": 0.11, "learning_rate": 0.00029999954465515996, "loss": 1.1289, "step": 1250 }, { "epoch": 0.13, "learning_rate": 0.0002999935015419672, "loss": 1.1127, "step": 1500 }, { "epoch": 0.15, "learning_rate": 0.0002999804318305346, "loss": 1.0975, "step": 1750 }, { "epoch": 0.17, "learning_rate": 0.00029996033613312583, "loss": 1.0853, "step": 2000 }, { "epoch": 0.19, "learning_rate": 0.00029993321539114454, "loss": 1.0772, "step": 2250 }, { "epoch": 0.22, "learning_rate": 0.00029989907087508934, "loss": 1.0658, "step": 2500 }, { "epoch": 0.24, "learning_rate": 0.00029985790418449485, "loss": 1.0585, "step": 2750 }, { "epoch": 0.26, "learning_rate": 0.00029980971724785665, "loss": 1.0505, "step": 3000 }, { "epoch": 0.28, "learning_rate": 0.00029975451232254073, "loss": 1.0458, "step": 3250 }, { "epoch": 0.3, "learning_rate": 0.0002996922919946781, "loss": 1.0388, "step": 3500 }, { "epoch": 0.32, "learning_rate": 0.0002996230591790433, "loss": 1.0336, "step": 3750 }, { "epoch": 0.35, "learning_rate": 0.0002995468171189181, "loss": 1.0278, "step": 4000 }, { "epoch": 0.37, "learning_rate": 0.0002994635693859393, "loss": 1.0248, "step": 4250 }, { "epoch": 0.39, "learning_rate": 0.00029937331987993176, "loss": 1.0181, "step": 4500 }, { "epoch": 0.41, "learning_rate": 0.00029927607282872543, "loss": 1.0141, "step": 4750 }, { "epoch": 0.43, "learning_rate": 0.00029917183278795734, "loss": 1.0099, "step": 5000 }, { "epoch": 0.45, "learning_rate": 0.0002990606046408584, "loss": 1.0064, "step": 5250 }, { "epoch": 0.47, "learning_rate": 0.0002989423935980243, "loss": 1.0016, "step": 5500 }, { "epoch": 0.5, "learning_rate": 0.0002988172051971717, "loss": 0.9979, "step": 5750 }, { "epoch": 0.52, "learning_rate": 0.00029868504530287856, "loss": 0.9947, "step": 6000 }, { "epoch": 0.54, "learning_rate": 0.00029854592010630966, "loss": 0.9902, "step": 6250 }, { "epoch": 0.56, "learning_rate": 0.0002983998361249265, "loss": 0.9873, "step": 6500 }, { "epoch": 0.58, "learning_rate": 0.0002982468002021818, "loss": 0.9837, "step": 6750 }, { "epoch": 0.6, "learning_rate": 0.00029808681950719917, "loss": 0.9816, "step": 7000 }, { "epoch": 0.63, "learning_rate": 0.0002979199015344371, "loss": 0.9764, "step": 7250 }, { "epoch": 0.65, "learning_rate": 0.0002977460541033379, "loss": 0.9762, "step": 7500 }, { "epoch": 0.67, "learning_rate": 0.0002975652853579614, "loss": 0.9732, "step": 7750 }, { "epoch": 0.69, "learning_rate": 0.0002973776037666034, "loss": 0.968, "step": 8000 }, { "epoch": 0.71, "learning_rate": 0.00029718301812139915, "loss": 0.9665, "step": 8250 }, { "epoch": 0.73, "learning_rate": 0.00029698153753791124, "loss": 0.962, "step": 8500 }, { "epoch": 0.75, "learning_rate": 0.00029677317145470266, "loss": 0.9613, "step": 8750 }, { "epoch": 0.78, "learning_rate": 0.0002965579296328946, "loss": 0.9581, "step": 9000 }, { "epoch": 0.8, "learning_rate": 0.00029633582215570934, "loss": 0.955, "step": 9250 }, { "epoch": 0.82, "learning_rate": 0.0002961068594279976, "loss": 0.9544, "step": 9500 }, { "epoch": 0.84, "learning_rate": 0.0002958710521757516, "loss": 0.9511, "step": 9750 }, { "epoch": 0.86, "learning_rate": 0.00029562841144560175, "loss": 0.9486, "step": 10000 }, { "epoch": 0.88, "learning_rate": 0.0002953789486043003, "loss": 0.9458, "step": 10250 }, { "epoch": 0.91, "learning_rate": 0.00029512267533818786, "loss": 0.944, "step": 10500 }, { "epoch": 0.93, "learning_rate": 0.0002948596036526464, "loss": 0.9405, "step": 10750 }, { "epoch": 0.95, "learning_rate": 0.0002945897458715367, "loss": 0.9397, "step": 11000 }, { "epoch": 0.97, "learning_rate": 0.0002943131146366213, "loss": 0.9386, "step": 11250 }, { "epoch": 0.99, "learning_rate": 0.0002940297229069719, "loss": 0.9351, "step": 11500 }, { "epoch": 1.0, "eval_alliteration_score": 0.4007220216606498, "eval_harmonic_meter_score": 0.21012850306426992, "eval_harmonic_rhyme_score": 0.2964593011664462, "eval_meter_score": 0.4084058997732567, "eval_rhyme_score": 0.7458416438967879, "eval_runtime": 2271.5688, "eval_samples_per_second": 1.189, "eval_steps_per_second": 0.074, "step": 11591 }, { "epoch": 1.01, "learning_rate": 0.00029373958395836246, "loss": 0.9233, "step": 11750 }, { "epoch": 1.04, "learning_rate": 0.0002934427113826474, "loss": 0.9132, "step": 12000 }, { "epoch": 1.06, "learning_rate": 0.00029313911908712464, "loss": 0.9124, "step": 12250 }, { "epoch": 1.08, "learning_rate": 0.00029282882129388416, "loss": 0.9113, "step": 12500 }, { "epoch": 1.1, "learning_rate": 0.0002925118325391421, "loss": 0.9109, "step": 12750 }, { "epoch": 1.12, "learning_rate": 0.0002921881676725592, "loss": 0.9089, "step": 13000 }, { "epoch": 1.14, "learning_rate": 0.00029185784185654554, "loss": 0.9066, "step": 13250 }, { "epoch": 1.16, "learning_rate": 0.0002915208705655501, "loss": 0.9053, "step": 13500 }, { "epoch": 1.19, "learning_rate": 0.0002911772695853361, "loss": 0.9045, "step": 13750 }, { "epoch": 1.21, "learning_rate": 0.00029082705501224114, "loss": 0.9016, "step": 14000 }, { "epoch": 1.23, "learning_rate": 0.00029047024325242333, "loss": 0.9015, "step": 14250 }, { "epoch": 1.25, "learning_rate": 0.00029010685102109267, "loss": 0.8978, "step": 14500 }, { "epoch": 1.27, "learning_rate": 0.0002897368953417281, "loss": 0.8968, "step": 14750 }, { "epoch": 1.29, "learning_rate": 0.00028936039354528003, "loss": 0.8975, "step": 15000 }, { "epoch": 1.32, "learning_rate": 0.0002889773632693582, "loss": 0.8942, "step": 15250 }, { "epoch": 1.34, "learning_rate": 0.0002885878224574058, "loss": 0.8928, "step": 15500 }, { "epoch": 1.36, "learning_rate": 0.00028819178935785857, "loss": 0.89, "step": 15750 }, { "epoch": 1.38, "learning_rate": 0.00028778928252329024, "loss": 0.8891, "step": 16000 }, { "epoch": 1.4, "learning_rate": 0.00028738032080954305, "loss": 0.8869, "step": 16250 }, { "epoch": 1.42, "learning_rate": 0.0002869649233748447, "loss": 0.8853, "step": 16500 }, { "epoch": 1.45, "learning_rate": 0.0002865431096789108, "loss": 0.8846, "step": 16750 }, { "epoch": 1.47, "learning_rate": 0.00028611489948203336, "loss": 0.883, "step": 17000 }, { "epoch": 1.49, "learning_rate": 0.0002856803128441548, "loss": 0.8817, "step": 17250 }, { "epoch": 1.51, "learning_rate": 0.0002852393701239284, "loss": 0.8803, "step": 17500 }, { "epoch": 1.53, "learning_rate": 0.0002847920919777649, "loss": 0.8777, "step": 17750 }, { "epoch": 1.55, "learning_rate": 0.000284338499358864, "loss": 0.8774, "step": 18000 }, { "epoch": 1.57, "learning_rate": 0.0002838786135162338, "loss": 0.8758, "step": 18250 }, { "epoch": 1.6, "learning_rate": 0.0002834124559936946, "loss": 0.8737, "step": 18500 }, { "epoch": 1.62, "learning_rate": 0.00028294004862887, "loss": 0.8726, "step": 18750 }, { "epoch": 1.64, "learning_rate": 0.0002824614135521637, "loss": 0.8702, "step": 19000 }, { "epoch": 1.66, "learning_rate": 0.0002819765731857232, "loss": 0.8699, "step": 19250 }, { "epoch": 1.68, "learning_rate": 0.0002814855502423888, "loss": 0.8684, "step": 19500 }, { "epoch": 1.7, "learning_rate": 0.00028098836772463007, "loss": 0.8667, "step": 19750 }, { "epoch": 1.73, "learning_rate": 0.00028048504892346806, "loss": 0.8651, "step": 20000 }, { "epoch": 1.75, "learning_rate": 0.0002799756174173843, "loss": 0.8638, "step": 20250 }, { "epoch": 1.77, "learning_rate": 0.0002794600970712162, "loss": 0.8625, "step": 20500 }, { "epoch": 1.79, "learning_rate": 0.000278938512035039, "loss": 0.8591, "step": 20750 }, { "epoch": 1.81, "learning_rate": 0.00027841088674303456, "loss": 0.8587, "step": 21000 }, { "epoch": 1.83, "learning_rate": 0.0002778772459123468, "loss": 0.8579, "step": 21250 }, { "epoch": 1.85, "learning_rate": 0.00027733761454192354, "loss": 0.8572, "step": 21500 }, { "epoch": 1.88, "learning_rate": 0.00027679201791134546, "loss": 0.8538, "step": 21750 }, { "epoch": 1.9, "learning_rate": 0.0002762404815796421, "loss": 0.8535, "step": 22000 }, { "epoch": 1.92, "learning_rate": 0.0002756830313840942, "loss": 0.8511, "step": 22250 }, { "epoch": 1.94, "learning_rate": 0.0002751196934390236, "loss": 0.8516, "step": 22500 }, { "epoch": 1.96, "learning_rate": 0.0002745504941345696, "loss": 0.8492, "step": 22750 }, { "epoch": 1.98, "learning_rate": 0.000273975460135453, "loss": 0.8469, "step": 23000 }, { "epoch": 2.0, "eval_alliteration_score": 0.43247815054976035, "eval_harmonic_meter_score": 0.20177755686034296, "eval_harmonic_rhyme_score": 0.4188003728656778, "eval_meter_score": 0.4238294535271108, "eval_rhyme_score": 0.8014050476978813, "eval_runtime": 2299.0707, "eval_samples_per_second": 1.174, "eval_steps_per_second": 0.074, "step": 23182 }, { "epoch": 2.01, "learning_rate": 0.0002733946183797268, "loss": 0.8197, "step": 23250 }, { "epoch": 2.03, "learning_rate": 0.0002728079960775141, "loss": 0.8178, "step": 23500 }, { "epoch": 2.05, "learning_rate": 0.0002722156207097338, "loss": 0.8178, "step": 23750 }, { "epoch": 2.07, "learning_rate": 0.00027161752002681283, "loss": 0.8182, "step": 24000 }, { "epoch": 2.09, "learning_rate": 0.0002710137220473864, "loss": 0.8183, "step": 24250 }, { "epoch": 2.11, "learning_rate": 0.00027040425505698554, "loss": 0.8173, "step": 24500 }, { "epoch": 2.14, "learning_rate": 0.00026978914760671156, "loss": 0.8155, "step": 24750 }, { "epoch": 2.16, "learning_rate": 0.00026916842851189906, "loss": 0.8167, "step": 25000 }, { "epoch": 2.18, "learning_rate": 0.0002685421268507658, "loss": 0.8137, "step": 25250 }, { "epoch": 2.2, "learning_rate": 0.00026791027196305055, "loss": 0.8124, "step": 25500 }, { "epoch": 2.22, "learning_rate": 0.00026727289344863857, "loss": 0.8132, "step": 25750 }, { "epoch": 2.24, "learning_rate": 0.0002666300211661751, "loss": 0.8129, "step": 26000 }, { "epoch": 2.26, "learning_rate": 0.00026598168523166653, "loss": 0.8095, "step": 26250 }, { "epoch": 2.29, "learning_rate": 0.0002653279160170696, "loss": 0.8095, "step": 26500 }, { "epoch": 2.31, "learning_rate": 0.00026466874414886847, "loss": 0.8077, "step": 26750 }, { "epoch": 2.33, "learning_rate": 0.00026400420050664024, "loss": 0.8086, "step": 27000 }, { "epoch": 2.35, "learning_rate": 0.0002633343162216082, "loss": 0.8057, "step": 27250 }, { "epoch": 2.37, "learning_rate": 0.0002626591226751835, "loss": 0.8071, "step": 27500 }, { "epoch": 2.39, "learning_rate": 0.00026197865149749495, "loss": 0.8034, "step": 27750 }, { "epoch": 2.42, "learning_rate": 0.00026129293456590756, "loss": 0.8029, "step": 28000 }, { "epoch": 2.44, "learning_rate": 0.00026060200400352876, "loss": 0.8041, "step": 28250 }, { "epoch": 2.46, "learning_rate": 0.00025990589217770406, "loss": 0.8028, "step": 28500 }, { "epoch": 2.48, "learning_rate": 0.00025920463169850033, "loss": 0.8009, "step": 28750 }, { "epoch": 2.5, "learning_rate": 0.00025849825541717853, "loss": 0.798, "step": 29000 }, { "epoch": 2.52, "learning_rate": 0.0002577867964246545, "loss": 0.7971, "step": 29250 }, { "epoch": 2.55, "learning_rate": 0.0002570702880499488, "loss": 0.7977, "step": 29500 }, { "epoch": 2.57, "learning_rate": 0.00025634876385862554, "loss": 0.7952, "step": 29750 }, { "epoch": 2.59, "learning_rate": 0.00025562225765121975, "loss": 0.7944, "step": 30000 }, { "epoch": 2.61, "learning_rate": 0.00025489080346165416, "loss": 0.7941, "step": 30250 }, { "epoch": 2.63, "learning_rate": 0.0002541544355556448, "loss": 0.7902, "step": 30500 }, { "epoch": 2.65, "learning_rate": 0.0002534131884290956, "loss": 0.79, "step": 30750 }, { "epoch": 2.67, "learning_rate": 0.0002526670968064827, "loss": 0.7896, "step": 31000 }, { "epoch": 2.7, "learning_rate": 0.00025191619563922745, "loss": 0.7877, "step": 31250 }, { "epoch": 2.72, "learning_rate": 0.0002511605201040593, "loss": 0.7864, "step": 31500 }, { "epoch": 2.74, "learning_rate": 0.0002504001056013679, "loss": 0.7874, "step": 31750 }, { "epoch": 2.76, "learning_rate": 0.0002496349877535445, "loss": 0.7848, "step": 32000 }, { "epoch": 2.78, "learning_rate": 0.0002488652024033133, "loss": 0.7828, "step": 32250 }, { "epoch": 2.8, "learning_rate": 0.00024809078561205264, "loss": 0.7825, "step": 32500 }, { "epoch": 2.83, "learning_rate": 0.00024731177365810525, "loss": 0.7805, "step": 32750 }, { "epoch": 2.85, "learning_rate": 0.00024652820303507897, "loss": 0.7791, "step": 33000 }, { "epoch": 2.87, "learning_rate": 0.0002457401104501371, "loss": 0.7783, "step": 33250 }, { "epoch": 2.89, "learning_rate": 0.00024494753282227894, "loss": 0.7774, "step": 33500 }, { "epoch": 2.91, "learning_rate": 0.0002441505072806101, "loss": 0.7764, "step": 33750 }, { "epoch": 2.93, "learning_rate": 0.00024334907116260328, "loss": 0.7753, "step": 34000 }, { "epoch": 2.95, "learning_rate": 0.0002425432620123492, "loss": 0.7734, "step": 34250 }, { "epoch": 2.98, "learning_rate": 0.0002417331175787975, "loss": 0.7729, "step": 34500 }, { "epoch": 3.0, "learning_rate": 0.00024091867581398877, "loss": 0.7719, "step": 34750 }, { "epoch": 3.0, "eval_alliteration_score": 0.43172298240714885, "eval_harmonic_meter_score": 0.208647467127026, "eval_harmonic_rhyme_score": 0.4413259788497785, "eval_meter_score": 0.457874360800918, "eval_rhyme_score": 0.8102502944648488, "eval_runtime": 2323.5118, "eval_samples_per_second": 1.162, "eval_steps_per_second": 0.145, "step": 34773 }, { "epoch": 3.02, "learning_rate": 0.00024009997487127647, "loss": 0.7391, "step": 35000 }, { "epoch": 3.04, "learning_rate": 0.00023927705310353945, "loss": 0.7344, "step": 35250 }, { "epoch": 3.06, "learning_rate": 0.00023844994906138548, "loss": 0.7352, "step": 35500 }, { "epoch": 3.08, "learning_rate": 0.00023761870149134522, "loss": 0.7349, "step": 35750 }, { "epoch": 3.11, "learning_rate": 0.0002367833493340571, "loss": 0.7381, "step": 36000 }, { "epoch": 3.13, "learning_rate": 0.00023594393172244322, "loss": 0.7372, "step": 36250 }, { "epoch": 3.15, "learning_rate": 0.00023510048797987585, "loss": 0.7338, "step": 36500 }, { "epoch": 3.17, "learning_rate": 0.00023425305761833555, "loss": 0.7338, "step": 36750 }, { "epoch": 3.19, "learning_rate": 0.00023340168033656017, "loss": 0.732, "step": 37000 }, { "epoch": 3.21, "learning_rate": 0.0002325463960181849, "loss": 0.7339, "step": 37250 }, { "epoch": 3.24, "learning_rate": 0.00023168724472987423, "loss": 0.7313, "step": 37500 }, { "epoch": 3.26, "learning_rate": 0.00023082426671944457, "loss": 0.7327, "step": 37750 }, { "epoch": 3.28, "learning_rate": 0.00022995750241397924, "loss": 0.7299, "step": 38000 }, { "epoch": 3.3, "learning_rate": 0.0002290869924179342, "loss": 0.7306, "step": 38250 }, { "epoch": 3.32, "learning_rate": 0.00022821277751123635, "loss": 0.7289, "step": 38500 }, { "epoch": 3.34, "learning_rate": 0.0002273348986473726, "loss": 0.7285, "step": 38750 }, { "epoch": 3.36, "learning_rate": 0.00022645339695147193, "loss": 0.7276, "step": 39000 }, { "epoch": 3.39, "learning_rate": 0.0002255683137183783, "loss": 0.7284, "step": 39250 }, { "epoch": 3.41, "learning_rate": 0.0002246796904107168, "loss": 0.7275, "step": 39500 }, { "epoch": 3.43, "learning_rate": 0.00022378756865695055, "loss": 0.7254, "step": 39750 }, { "epoch": 3.45, "learning_rate": 0.00022289199024943116, "loss": 0.7252, "step": 40000 }, { "epoch": 3.47, "learning_rate": 0.00022199299714244057, "loss": 0.7245, "step": 40250 }, { "epoch": 3.49, "learning_rate": 0.00022109063145022591, "loss": 0.7212, "step": 40500 }, { "epoch": 3.52, "learning_rate": 0.00022018493544502638, "loss": 0.7225, "step": 40750 }, { "epoch": 3.54, "learning_rate": 0.00021927595155509316, "loss": 0.7184, "step": 41000 }, { "epoch": 3.56, "learning_rate": 0.0002183637223627018, "loss": 0.72, "step": 41250 }, { "epoch": 3.58, "learning_rate": 0.00021744829060215716, "loss": 0.7186, "step": 41500 }, { "epoch": 3.6, "learning_rate": 0.00021652969915779183, "loss": 0.7181, "step": 41750 }, { "epoch": 3.62, "learning_rate": 0.0002156079910619569, "loss": 0.716, "step": 42000 }, { "epoch": 3.65, "learning_rate": 0.0002146832094930062, "loss": 0.7166, "step": 42250 }, { "epoch": 3.67, "learning_rate": 0.00021375539777327365, "loss": 0.7153, "step": 42500 }, { "epoch": 3.69, "learning_rate": 0.0002128245993670435, "loss": 0.7134, "step": 42750 }, { "epoch": 3.71, "learning_rate": 0.00021189085787851446, "loss": 0.713, "step": 43000 }, { "epoch": 3.73, "learning_rate": 0.00021095421704975687, "loss": 0.7101, "step": 43250 }, { "epoch": 3.75, "learning_rate": 0.00021001472075866362, "loss": 0.7097, "step": 43500 }, { "epoch": 3.77, "learning_rate": 0.00020907241301689473, "loss": 0.7093, "step": 43750 }, { "epoch": 3.8, "learning_rate": 0.00020812733796781542, "loss": 0.7072, "step": 44000 }, { "epoch": 3.82, "learning_rate": 0.00020717953988442822, "loss": 0.7073, "step": 44250 }, { "epoch": 3.84, "learning_rate": 0.000206229063167299, "loss": 0.7061, "step": 44500 }, { "epoch": 3.86, "learning_rate": 0.000205275952342477, "loss": 0.7021, "step": 44750 }, { "epoch": 3.88, "learning_rate": 0.00020432025205940874, "loss": 0.7021, "step": 45000 }, { "epoch": 3.9, "learning_rate": 0.00020336200708884674, "loss": 0.7005, "step": 45250 }, { "epoch": 3.93, "learning_rate": 0.00020240126232075196, "loss": 0.7011, "step": 45500 }, { "epoch": 3.95, "learning_rate": 0.0002014380627621909, "loss": 0.699, "step": 45750 }, { "epoch": 3.97, "learning_rate": 0.00020047245353522713, "loss": 0.6975, "step": 46000 }, { "epoch": 3.99, "learning_rate": 0.0001995044798748077, "loss": 0.6964, "step": 46250 }, { "epoch": 4.0, "eval_alliteration_score": 0.45886250690226393, "eval_harmonic_meter_score": 0.20919003027562127, "eval_harmonic_rhyme_score": 0.4443731034149568, "eval_meter_score": 0.4682188641643268, "eval_rhyme_score": 0.8157541677970918, "eval_runtime": 2407.9845, "eval_samples_per_second": 1.121, "eval_steps_per_second": 0.14, "step": 46364 }, { "epoch": 4.01, "learning_rate": 0.0001985341871266439, "loss": 0.6746, "step": 46500 }, { "epoch": 4.03, "learning_rate": 0.00019756162074508707, "loss": 0.6539, "step": 46750 }, { "epoch": 4.05, "learning_rate": 0.00019658682629099928, "loss": 0.6592, "step": 47000 }, { "epoch": 4.08, "learning_rate": 0.00019560984942961876, "loss": 0.6597, "step": 47250 }, { "epoch": 4.1, "learning_rate": 0.00019463073592842096, "loss": 0.6588, "step": 47500 }, { "epoch": 4.12, "learning_rate": 0.0001936495316549744, "loss": 0.6581, "step": 47750 }, { "epoch": 4.14, "learning_rate": 0.00019266628257479183, "loss": 0.6601, "step": 48000 }, { "epoch": 4.16, "learning_rate": 0.0001916810347491772, "loss": 0.6575, "step": 48250 }, { "epoch": 4.18, "learning_rate": 0.0001906938343330677, "loss": 0.6583, "step": 48500 }, { "epoch": 4.21, "learning_rate": 0.0001897047275728715, "loss": 0.6563, "step": 48750 }, { "epoch": 4.23, "learning_rate": 0.00018871376080430155, "loss": 0.6566, "step": 49000 }, { "epoch": 4.25, "learning_rate": 0.00018772098045020464, "loss": 0.6549, "step": 49250 }, { "epoch": 4.27, "learning_rate": 0.00018672643301838695, "loss": 0.6557, "step": 49500 }, { "epoch": 4.29, "learning_rate": 0.0001857301650994351, "loss": 0.656, "step": 49750 }, { "epoch": 4.31, "learning_rate": 0.00018473222336453377, "loss": 0.6535, "step": 50000 }, { "epoch": 4.34, "learning_rate": 0.00018373265456327918, "loss": 0.6536, "step": 50250 }, { "epoch": 4.36, "learning_rate": 0.00018273150552148925, "loss": 0.6525, "step": 50500 }, { "epoch": 4.38, "learning_rate": 0.00018172882313900965, "loss": 0.6541, "step": 50750 }, { "epoch": 4.4, "learning_rate": 0.0001807246543875172, "loss": 0.6521, "step": 51000 }, { "epoch": 4.42, "learning_rate": 0.00017971904630831908, "loss": 0.651, "step": 51250 }, { "epoch": 4.44, "learning_rate": 0.0001787120460101492, "loss": 0.6511, "step": 51500 }, { "epoch": 4.46, "learning_rate": 0.0001777037006669615, "loss": 0.6472, "step": 51750 }, { "epoch": 4.49, "learning_rate": 0.00017669405751571975, "loss": 0.646, "step": 52000 }, { "epoch": 4.51, "learning_rate": 0.00017568316385418507, "loss": 0.6476, "step": 52250 }, { "epoch": 4.53, "learning_rate": 0.0001746710670386998, "loss": 0.6465, "step": 52500 }, { "epoch": 4.55, "learning_rate": 0.0001736578144819695, "loss": 0.6447, "step": 52750 }, { "epoch": 4.57, "learning_rate": 0.00017264345365084128, "loss": 0.6471, "step": 53000 }, { "epoch": 4.59, "learning_rate": 0.0001716280320640808, "loss": 0.6429, "step": 53250 }, { "epoch": 4.62, "learning_rate": 0.00017061159729014568, "loss": 0.6396, "step": 53500 }, { "epoch": 4.64, "learning_rate": 0.00016959419694495754, "loss": 0.6434, "step": 53750 }, { "epoch": 4.66, "learning_rate": 0.00016857587868967087, "loss": 0.6406, "step": 54000 }, { "epoch": 4.68, "learning_rate": 0.00016755669022844086, "loss": 0.6382, "step": 54250 }, { "epoch": 4.7, "learning_rate": 0.00016653667930618827, "loss": 0.639, "step": 54500 }, { "epoch": 4.72, "learning_rate": 0.00016551589370636293, "loss": 0.6376, "step": 54750 }, { "epoch": 4.75, "learning_rate": 0.00016449438124870516, "loss": 0.637, "step": 55000 }, { "epoch": 4.77, "learning_rate": 0.0001634721897870057, "loss": 0.6345, "step": 55250 }, { "epoch": 4.79, "learning_rate": 0.00016244936720686393, "loss": 0.6334, "step": 55500 }, { "epoch": 4.81, "learning_rate": 0.00016142596142344466, "loss": 0.6325, "step": 55750 }, { "epoch": 4.83, "learning_rate": 0.00016040202037923343, "loss": 0.6303, "step": 56000 }, { "epoch": 4.85, "learning_rate": 0.00015937759204179055, "loss": 0.6301, "step": 56250 }, { "epoch": 4.87, "learning_rate": 0.0001583527244015042, "loss": 0.6291, "step": 56500 }, { "epoch": 4.9, "learning_rate": 0.000157327465469342, "loss": 0.6279, "step": 56750 }, { "epoch": 4.92, "learning_rate": 0.00015630186327460213, "loss": 0.626, "step": 57000 }, { "epoch": 4.94, "learning_rate": 0.00015527596586266328, "loss": 0.6246, "step": 57250 }, { "epoch": 4.96, "learning_rate": 0.00015424982129273382, "loss": 0.6256, "step": 57500 }, { "epoch": 4.98, "learning_rate": 0.00015322347763560055, "loss": 0.6247, "step": 57750 }, { "epoch": 5.0, "eval_alliteration_score": 0.4378994165045846, "eval_harmonic_meter_score": 0.20452236335398252, "eval_harmonic_rhyme_score": 0.4254891583797342, "eval_meter_score": 0.46303572443803803, "eval_rhyme_score": 0.8150200876525034, "eval_runtime": 2319.352, "eval_samples_per_second": 1.164, "eval_steps_per_second": 0.146, "step": 57955 }, { "epoch": 5.0, "learning_rate": 0.00015219698297137664, "loss": 0.6162, "step": 58000 }, { "epoch": 5.03, "learning_rate": 0.00015117038538724938, "loss": 0.5811, "step": 58250 }, { "epoch": 5.05, "learning_rate": 0.00015014373297522746, "loss": 0.5826, "step": 58500 }, { "epoch": 5.07, "learning_rate": 0.00014911707382988802, "loss": 0.5835, "step": 58750 }, { "epoch": 5.09, "learning_rate": 0.00014809045604612363, "loss": 0.585, "step": 59000 }, { "epoch": 5.11, "learning_rate": 0.00014706392771688935, "loss": 0.5839, "step": 59250 }, { "epoch": 5.13, "learning_rate": 0.00014603753693094937, "loss": 0.5838, "step": 59500 }, { "epoch": 5.15, "learning_rate": 0.00014501133177062474, "loss": 0.5836, "step": 59750 }, { "epoch": 5.18, "learning_rate": 0.00014398536030954076, "loss": 0.5828, "step": 60000 }, { "epoch": 5.2, "learning_rate": 0.0001429596706103746, "loss": 0.5842, "step": 60250 }, { "epoch": 5.22, "learning_rate": 0.0001419343107226042, "loss": 0.5839, "step": 60500 }, { "epoch": 5.24, "learning_rate": 0.00014090932868025707, "loss": 0.5819, "step": 60750 }, { "epoch": 5.26, "learning_rate": 0.00013988477249966025, "loss": 0.5839, "step": 61000 }, { "epoch": 5.28, "learning_rate": 0.0001388606901771907, "loss": 0.5818, "step": 61250 }, { "epoch": 5.31, "learning_rate": 0.00013783712968702718, "loss": 0.5816, "step": 61500 }, { "epoch": 5.33, "learning_rate": 0.00013681413897890257, "loss": 0.5811, "step": 61750 }, { "epoch": 5.35, "learning_rate": 0.00013579176597585784, "loss": 0.5789, "step": 62000 }, { "epoch": 5.37, "learning_rate": 0.0001347700585719968, "loss": 0.5798, "step": 62250 }, { "epoch": 5.39, "learning_rate": 0.0001337490646302426, "loss": 0.577, "step": 62500 }, { "epoch": 5.41, "learning_rate": 0.00013272883198009572, "loss": 0.5776, "step": 62750 }, { "epoch": 5.44, "learning_rate": 0.00013170940841539296, "loss": 0.5753, "step": 63000 }, { "epoch": 5.46, "learning_rate": 0.00013069084169206873, "loss": 0.5761, "step": 63250 }, { "epoch": 5.48, "learning_rate": 0.00012967317952591788, "loss": 0.577, "step": 63500 }, { "epoch": 5.5, "learning_rate": 0.00012865646959036038, "loss": 0.5739, "step": 63750 }, { "epoch": 5.52, "learning_rate": 0.00012764075951420792, "loss": 0.5737, "step": 64000 }, { "epoch": 5.54, "learning_rate": 0.00012662609687943277, "loss": 0.572, "step": 64250 }, { "epoch": 5.56, "learning_rate": 0.00012561252921893876, "loss": 0.5706, "step": 64500 }, { "epoch": 5.59, "learning_rate": 0.00012460010401433463, "loss": 0.5703, "step": 64750 }, { "epoch": 5.61, "learning_rate": 0.00012358886869370933, "loss": 0.5699, "step": 65000 }, { "epoch": 5.63, "learning_rate": 0.0001225788706294108, "loss": 0.5689, "step": 65250 }, { "epoch": 5.65, "learning_rate": 0.00012157015713582626, "loss": 0.568, "step": 65500 }, { "epoch": 5.67, "learning_rate": 0.00012056277546716603, "loss": 0.5662, "step": 65750 }, { "epoch": 5.69, "learning_rate": 0.0001195567728152496, "loss": 0.5642, "step": 66000 }, { "epoch": 5.72, "learning_rate": 0.0001185521963072951, "loss": 0.5638, "step": 66250 }, { "epoch": 5.74, "learning_rate": 0.00011754909300371153, "loss": 0.5639, "step": 66500 }, { "epoch": 5.76, "learning_rate": 0.0001165475098958941, "loss": 0.5617, "step": 66750 }, { "epoch": 5.78, "learning_rate": 0.00011554749390402273, "loss": 0.5592, "step": 67000 }, { "epoch": 5.8, "learning_rate": 0.0001145490918748645, "loss": 0.5616, "step": 67250 }, { "epoch": 5.82, "learning_rate": 0.00011355235057957859, "loss": 0.5614, "step": 67500 }, { "epoch": 5.85, "learning_rate": 0.00011255731671152548, "loss": 0.5582, "step": 67750 }, { "epoch": 5.87, "learning_rate": 0.00011156403688407935, "loss": 0.5568, "step": 68000 }, { "epoch": 5.89, "learning_rate": 0.0001105725576284447, "loss": 0.556, "step": 68250 }, { "epoch": 5.91, "learning_rate": 0.00010958292539147633, "loss": 0.5538, "step": 68500 }, { "epoch": 5.93, "learning_rate": 0.00010859518653350373, "loss": 0.5535, "step": 68750 }, { "epoch": 5.95, "learning_rate": 0.00010760938732615886, "loss": 0.5536, "step": 69000 }, { "epoch": 5.97, "learning_rate": 0.00010662557395020896, "loss": 0.5536, "step": 69250 }, { "epoch": 6.0, "learning_rate": 0.00010564379249339304, "loss": 0.551, "step": 69500 }, { "epoch": 6.0, "eval_alliteration_score": 0.4355458036204059, "eval_harmonic_meter_score": 0.184149411235037, "eval_harmonic_rhyme_score": 0.44674866827401805, "eval_meter_score": 0.44256677255617083, "eval_rhyme_score": 0.8221214981988054, "eval_runtime": 2288.5532, "eval_samples_per_second": 1.18, "eval_steps_per_second": 0.148, "step": 69546 }, { "epoch": 6.02, "learning_rate": 0.00010466408894826249, "loss": 0.5183, "step": 69750 }, { "epoch": 6.04, "learning_rate": 0.0001036865092100271, "loss": 0.5118, "step": 70000 }, { "epoch": 6.06, "learning_rate": 0.00010271109907440465, "loss": 0.5148, "step": 70250 }, { "epoch": 6.08, "learning_rate": 0.00010173790423547578, "loss": 0.512, "step": 70500 }, { "epoch": 6.1, "learning_rate": 0.00010076697028354309, "loss": 0.5148, "step": 70750 }, { "epoch": 6.13, "learning_rate": 9.97983427029958e-05, "loss": 0.5154, "step": 71000 }, { "epoch": 6.15, "learning_rate": 9.88320668701788e-05, "loss": 0.5114, "step": 71250 }, { "epoch": 6.17, "learning_rate": 9.786818805126695e-05, "loss": 0.5137, "step": 71500 }, { "epoch": 6.19, "learning_rate": 9.690675140014445e-05, "loss": 0.5132, "step": 71750 }, { "epoch": 6.21, "learning_rate": 9.594780195628981e-05, "loss": 0.5121, "step": 72000 }, { "epoch": 6.23, "learning_rate": 9.499138464266571e-05, "loss": 0.5125, "step": 72250 }, { "epoch": 6.25, "learning_rate": 9.403754426361472e-05, "loss": 0.5128, "step": 72500 }, { "epoch": 6.28, "learning_rate": 9.308632550276002e-05, "loss": 0.5132, "step": 72750 }, { "epoch": 6.3, "learning_rate": 9.213777292091273e-05, "loss": 0.513, "step": 73000 }, { "epoch": 6.32, "learning_rate": 9.119193095398385e-05, "loss": 0.5114, "step": 73250 }, { "epoch": 6.34, "learning_rate": 9.024884391090313e-05, "loss": 0.5095, "step": 73500 }, { "epoch": 6.36, "learning_rate": 8.930855597154284e-05, "loss": 0.5069, "step": 73750 }, { "epoch": 6.38, "learning_rate": 8.837111118464855e-05, "loss": 0.5066, "step": 74000 }, { "epoch": 6.41, "learning_rate": 8.743655346577544e-05, "loss": 0.5082, "step": 74250 }, { "epoch": 6.43, "learning_rate": 8.650492659523093e-05, "loss": 0.5065, "step": 74500 }, { "epoch": 6.45, "learning_rate": 8.557627421602401e-05, "loss": 0.5043, "step": 74750 }, { "epoch": 6.47, "learning_rate": 8.465063983182055e-05, "loss": 0.5071, "step": 75000 }, { "epoch": 6.49, "learning_rate": 8.37280668049054e-05, "loss": 0.5069, "step": 75250 }, { "epoch": 6.51, "learning_rate": 8.280859835415095e-05, "loss": 0.5025, "step": 75500 }, { "epoch": 6.54, "learning_rate": 8.189227755299259e-05, "loss": 0.5021, "step": 75750 }, { "epoch": 6.56, "learning_rate": 8.097914732741086e-05, "loss": 0.5031, "step": 76000 }, { "epoch": 6.58, "learning_rate": 8.006925045392063e-05, "loss": 0.5002, "step": 76250 }, { "epoch": 6.6, "learning_rate": 7.916262955756686e-05, "loss": 0.5015, "step": 76500 }, { "epoch": 6.62, "learning_rate": 7.825932710992824e-05, "loss": 0.4984, "step": 76750 }, { "epoch": 6.64, "learning_rate": 7.735938542712732e-05, "loss": 0.5004, "step": 77000 }, { "epoch": 6.66, "learning_rate": 7.64628466678482e-05, "loss": 0.4982, "step": 77250 }, { "epoch": 6.69, "learning_rate": 7.556975283136148e-05, "loss": 0.4965, "step": 77500 }, { "epoch": 6.71, "learning_rate": 7.468014575555687e-05, "loss": 0.4987, "step": 77750 }, { "epoch": 6.73, "learning_rate": 7.379406711498332e-05, "loss": 0.4947, "step": 78000 }, { "epoch": 6.75, "learning_rate": 7.291155841889657e-05, "loss": 0.4918, "step": 78250 }, { "epoch": 6.77, "learning_rate": 7.203266100931468e-05, "loss": 0.4957, "step": 78500 }, { "epoch": 6.79, "learning_rate": 7.115741605908132e-05, "loss": 0.4927, "step": 78750 }, { "epoch": 6.82, "learning_rate": 7.028586456993707e-05, "loss": 0.4912, "step": 79000 }, { "epoch": 6.84, "learning_rate": 6.941804737059833e-05, "loss": 0.4907, "step": 79250 }, { "epoch": 6.86, "learning_rate": 6.855400511484516e-05, "loss": 0.4908, "step": 79500 }, { "epoch": 6.88, "learning_rate": 6.769377827961639e-05, "loss": 0.4881, "step": 79750 }, { "epoch": 6.9, "learning_rate": 6.683740716311371e-05, "loss": 0.4878, "step": 80000 }, { "epoch": 6.92, "learning_rate": 6.598493188291358e-05, "loss": 0.4861, "step": 80250 }, { "epoch": 6.95, "learning_rate": 6.513639237408814e-05, "loss": 0.4843, "step": 80500 }, { "epoch": 6.97, "learning_rate": 6.429182838733434e-05, "loss": 0.484, "step": 80750 }, { "epoch": 6.99, "learning_rate": 6.345127948711172e-05, "loss": 0.4821, "step": 81000 }, { "epoch": 7.0, "eval_alliteration_score": 0.4303659202621518, "eval_harmonic_meter_score": 0.1753914687780149, "eval_harmonic_rhyme_score": 0.43386940648753536, "eval_meter_score": 0.4408589702287603, "eval_rhyme_score": 0.815514426880565, "eval_runtime": 2426.169, "eval_samples_per_second": 1.113, "eval_steps_per_second": 0.139, "step": 81137 }, { "epoch": 7.01, "learning_rate": 6.261478504978898e-05, "loss": 0.4684, "step": 81250 }, { "epoch": 7.03, "learning_rate": 6.178238426179942e-05, "loss": 0.4529, "step": 81500 }, { "epoch": 7.05, "learning_rate": 6.0954116117805176e-05, "loss": 0.4513, "step": 81750 }, { "epoch": 7.07, "learning_rate": 6.0130019418870444e-05, "loss": 0.452, "step": 82000 }, { "epoch": 7.1, "learning_rate": 5.931013277064377e-05, "loss": 0.4517, "step": 82250 }, { "epoch": 7.12, "learning_rate": 5.8494494581549705e-05, "loss": 0.4501, "step": 82500 }, { "epoch": 7.14, "learning_rate": 5.7683143060989395e-05, "loss": 0.4495, "step": 82750 }, { "epoch": 7.16, "learning_rate": 5.6876116217550485e-05, "loss": 0.451, "step": 83000 }, { "epoch": 7.18, "learning_rate": 5.6073451857226904e-05, "loss": 0.4504, "step": 83250 }, { "epoch": 7.2, "learning_rate": 5.527518758164754e-05, "loss": 0.4509, "step": 83500 }, { "epoch": 7.23, "learning_rate": 5.4481360786314834e-05, "loss": 0.4507, "step": 83750 }, { "epoch": 7.25, "learning_rate": 5.369200865885296e-05, "loss": 0.4511, "step": 84000 }, { "epoch": 7.27, "learning_rate": 5.2907168177265706e-05, "loss": 0.4489, "step": 84250 }, { "epoch": 7.29, "learning_rate": 5.212687610820424e-05, "loss": 0.448, "step": 84500 }, { "epoch": 7.31, "learning_rate": 5.135116900524474e-05, "loss": 0.4468, "step": 84750 }, { "epoch": 7.33, "learning_rate": 5.058008320717585e-05, "loss": 0.4472, "step": 85000 }, { "epoch": 7.35, "learning_rate": 4.981365483629661e-05, "loss": 0.4503, "step": 85250 }, { "epoch": 7.38, "learning_rate": 4.905191979672413e-05, "loss": 0.4455, "step": 85500 }, { "epoch": 7.4, "learning_rate": 4.829491377271165e-05, "loss": 0.447, "step": 85750 }, { "epoch": 7.42, "learning_rate": 4.7542672226976754e-05, "loss": 0.4447, "step": 86000 }, { "epoch": 7.44, "learning_rate": 4.679523039904028e-05, "loss": 0.4465, "step": 86250 }, { "epoch": 7.46, "learning_rate": 4.605262330357542e-05, "loss": 0.4443, "step": 86500 }, { "epoch": 7.48, "learning_rate": 4.531488572876734e-05, "loss": 0.4434, "step": 86750 }, { "epoch": 7.51, "learning_rate": 4.458205223468363e-05, "loss": 0.4461, "step": 87000 }, { "epoch": 7.53, "learning_rate": 4.3854157151655144e-05, "loss": 0.4438, "step": 87250 }, { "epoch": 7.55, "learning_rate": 4.313123457866796e-05, "loss": 0.4409, "step": 87500 }, { "epoch": 7.57, "learning_rate": 4.241331838176564e-05, "loss": 0.4428, "step": 87750 }, { "epoch": 7.59, "learning_rate": 4.170044219246323e-05, "loss": 0.441, "step": 88000 }, { "epoch": 7.61, "learning_rate": 4.0992639406171303e-05, "loss": 0.4405, "step": 88250 }, { "epoch": 7.64, "learning_rate": 4.028994318063184e-05, "loss": 0.4412, "step": 88500 }, { "epoch": 7.66, "learning_rate": 3.9592386434364634e-05, "loss": 0.4378, "step": 88750 }, { "epoch": 7.68, "learning_rate": 3.890000184512546e-05, "loss": 0.4388, "step": 89000 }, { "epoch": 7.7, "learning_rate": 3.8212821848375086e-05, "loss": 0.4364, "step": 89250 }, { "epoch": 7.72, "learning_rate": 3.753087863575993e-05, "loss": 0.4395, "step": 89500 }, { "epoch": 7.74, "learning_rate": 3.6854204153603714e-05, "loss": 0.4353, "step": 89750 }, { "epoch": 7.76, "learning_rate": 3.6182830101411315e-05, "loss": 0.435, "step": 90000 }, { "epoch": 7.79, "learning_rate": 3.551678793038358e-05, "loss": 0.435, "step": 90250 }, { "epoch": 7.81, "learning_rate": 3.485610884194389e-05, "loss": 0.4359, "step": 90500 }, { "epoch": 7.83, "learning_rate": 3.420082378627649e-05, "loss": 0.4338, "step": 90750 }, { "epoch": 7.85, "learning_rate": 3.3550963460876776e-05, "loss": 0.4353, "step": 91000 }, { "epoch": 7.87, "learning_rate": 3.2906558309113134e-05, "loss": 0.4316, "step": 91250 }, { "epoch": 7.89, "learning_rate": 3.226763851880079e-05, "loss": 0.4325, "step": 91500 }, { "epoch": 7.92, "learning_rate": 3.163423402078752e-05, "loss": 0.4318, "step": 91750 }, { "epoch": 7.94, "learning_rate": 3.100637448755172e-05, "loss": 0.4313, "step": 92000 }, { "epoch": 7.96, "learning_rate": 3.0384089331812305e-05, "loss": 0.4301, "step": 92250 }, { "epoch": 7.98, "learning_rate": 2.976740770515067e-05, "loss": 0.4322, "step": 92500 }, { "epoch": 8.0, "eval_alliteration_score": 0.42516268980477223, "eval_harmonic_meter_score": 0.19115865795897424, "eval_harmonic_rhyme_score": 0.4073988829309175, "eval_meter_score": 0.4526465021452277, "eval_rhyme_score": 0.8084745294411801, "eval_runtime": 2422.0648, "eval_samples_per_second": 1.115, "eval_steps_per_second": 0.14, "step": 92728 }, { "epoch": 8.0, "learning_rate": 2.9156358496645276e-05, "loss": 0.4282, "step": 92750 }, { "epoch": 8.02, "learning_rate": 2.8550970331518202e-05, "loss": 0.4058, "step": 93000 }, { "epoch": 8.05, "learning_rate": 2.795127156979429e-05, "loss": 0.4054, "step": 93250 }, { "epoch": 8.07, "learning_rate": 2.735729030497229e-05, "loss": 0.406, "step": 93500 }, { "epoch": 8.09, "learning_rate": 2.676905436270911e-05, "loss": 0.4057, "step": 93750 }, { "epoch": 8.11, "learning_rate": 2.6186591299516186e-05, "loss": 0.4054, "step": 94000 }, { "epoch": 8.13, "learning_rate": 2.5609928401468555e-05, "loss": 0.4061, "step": 94250 }, { "epoch": 8.15, "learning_rate": 2.503909268292653e-05, "loss": 0.4045, "step": 94500 }, { "epoch": 8.17, "learning_rate": 2.4474110885270343e-05, "loss": 0.403, "step": 94750 }, { "epoch": 8.2, "learning_rate": 2.3915009475647362e-05, "loss": 0.4041, "step": 95000 }, { "epoch": 8.22, "learning_rate": 2.3361814645732168e-05, "loss": 0.4047, "step": 95250 }, { "epoch": 8.24, "learning_rate": 2.2814552310499563e-05, "loss": 0.4044, "step": 95500 }, { "epoch": 8.26, "learning_rate": 2.2273248107010633e-05, "loss": 0.4035, "step": 95750 }, { "epoch": 8.28, "learning_rate": 2.173792739321174e-05, "loss": 0.4022, "step": 96000 }, { "epoch": 8.3, "learning_rate": 2.1208615246746545e-05, "loss": 0.4034, "step": 96250 }, { "epoch": 8.33, "learning_rate": 2.0685336463781288e-05, "loss": 0.4029, "step": 96500 }, { "epoch": 8.35, "learning_rate": 2.016811555784314e-05, "loss": 0.4009, "step": 96750 }, { "epoch": 8.37, "learning_rate": 1.9656976758671865e-05, "loss": 0.402, "step": 97000 }, { "epoch": 8.39, "learning_rate": 1.9151944011084696e-05, "loss": 0.4025, "step": 97250 }, { "epoch": 8.41, "learning_rate": 1.865304097385472e-05, "loss": 0.4032, "step": 97500 }, { "epoch": 8.43, "learning_rate": 1.8160291018602512e-05, "loss": 0.4006, "step": 97750 }, { "epoch": 8.45, "learning_rate": 1.767371722870125e-05, "loss": 0.4006, "step": 98000 }, { "epoch": 8.48, "learning_rate": 1.7193342398195286e-05, "loss": 0.4017, "step": 98250 }, { "epoch": 8.5, "learning_rate": 1.67191890307325e-05, "loss": 0.4001, "step": 98500 }, { "epoch": 8.52, "learning_rate": 1.625127933850997e-05, "loss": 0.3977, "step": 98750 }, { "epoch": 8.54, "learning_rate": 1.5789635241233484e-05, "loss": 0.4004, "step": 99000 }, { "epoch": 8.56, "learning_rate": 1.5334278365090597e-05, "loss": 0.4011, "step": 99250 }, { "epoch": 8.58, "learning_rate": 1.488523004173765e-05, "loss": 0.3995, "step": 99500 }, { "epoch": 8.61, "learning_rate": 1.4442511307300353e-05, "loss": 0.4012, "step": 99750 }, { "epoch": 8.63, "learning_rate": 1.4006142901388467e-05, "loss": 0.3993, "step": 100000 }, { "epoch": 8.65, "learning_rate": 1.3576145266124039e-05, "loss": 0.3996, "step": 100250 }, { "epoch": 8.67, "learning_rate": 1.3152538545183928e-05, "loss": 0.3973, "step": 100500 }, { "epoch": 8.69, "learning_rate": 1.2735342582856161e-05, "loss": 0.397, "step": 100750 }, { "epoch": 8.71, "learning_rate": 1.2324576923110202e-05, "loss": 0.3989, "step": 101000 }, { "epoch": 8.74, "learning_rate": 1.1920260808681403e-05, "loss": 0.3979, "step": 101250 }, { "epoch": 8.76, "learning_rate": 1.152241318016967e-05, "loss": 0.3945, "step": 101500 }, { "epoch": 8.78, "learning_rate": 1.1131052675152103e-05, "loss": 0.3966, "step": 101750 }, { "epoch": 8.8, "learning_rate": 1.0746197627309855e-05, "loss": 0.3985, "step": 102000 }, { "epoch": 8.82, "learning_rate": 1.0367866065569347e-05, "loss": 0.3964, "step": 102250 }, { "epoch": 8.84, "learning_rate": 9.996075713257652e-06, "loss": 0.3948, "step": 102500 }, { "epoch": 8.86, "learning_rate": 9.630843987272257e-06, "loss": 0.3952, "step": 102750 }, { "epoch": 8.89, "learning_rate": 9.272187997265013e-06, "loss": 0.3948, "step": 103000 }, { "epoch": 8.91, "learning_rate": 8.920124544840857e-06, "loss": 0.3965, "step": 103250 }, { "epoch": 8.93, "learning_rate": 8.574670122770521e-06, "loss": 0.3968, "step": 103500 }, { "epoch": 8.95, "learning_rate": 8.23584091421805e-06, "loss": 0.3945, "step": 103750 }, { "epoch": 8.97, "learning_rate": 7.903652791982501e-06, "loss": 0.3941, "step": 104000 }, { "epoch": 8.99, "learning_rate": 7.578121317754593e-06, "loss": 0.3928, "step": 104250 }, { "epoch": 9.0, "eval_alliteration_score": 0.4286099865047234, "eval_harmonic_meter_score": 0.1830439988800133, "eval_harmonic_rhyme_score": 0.3932058401427829, "eval_meter_score": 0.4515833277501449, "eval_rhyme_score": 0.8040101942334149, "eval_runtime": 2368.5368, "eval_samples_per_second": 1.14, "eval_steps_per_second": 0.143, "step": 104319 }, { "epoch": 9.02, "learning_rate": 7.25926174138754e-06, "loss": 0.3855, "step": 104500 }, { "epoch": 9.04, "learning_rate": 6.947089000182754e-06, "loss": 0.3792, "step": 104750 }, { "epoch": 9.06, "learning_rate": 6.641617718189974e-06, "loss": 0.3828, "step": 105000 }, { "epoch": 9.08, "learning_rate": 6.342862205522359e-06, "loss": 0.3822, "step": 105250 }, { "epoch": 9.1, "learning_rate": 6.050836457685948e-06, "loss": 0.383, "step": 105500 }, { "epoch": 9.12, "learning_rate": 5.765554154924096e-06, "loss": 0.3809, "step": 105750 }, { "epoch": 9.15, "learning_rate": 5.487028661576637e-06, "loss": 0.3791, "step": 106000 }, { "epoch": 9.17, "learning_rate": 5.2152730254538036e-06, "loss": 0.382, "step": 106250 }, { "epoch": 9.19, "learning_rate": 4.9502999772249475e-06, "loss": 0.3824, "step": 106500 }, { "epoch": 9.21, "learning_rate": 4.692121929822185e-06, "loss": 0.3797, "step": 106750 }, { "epoch": 9.23, "learning_rate": 4.4407509778589256e-06, "loss": 0.3819, "step": 107000 }, { "epoch": 9.25, "learning_rate": 4.1961988970632465e-06, "loss": 0.3797, "step": 107250 }, { "epoch": 9.27, "learning_rate": 3.95847714372628e-06, "loss": 0.3805, "step": 107500 }, { "epoch": 9.3, "learning_rate": 3.7275968541655097e-06, "loss": 0.3804, "step": 107750 }, { "epoch": 9.32, "learning_rate": 3.5035688442030917e-06, "loss": 0.3802, "step": 108000 }, { "epoch": 9.34, "learning_rate": 3.286403608659155e-06, "loss": 0.3803, "step": 108250 }, { "epoch": 9.36, "learning_rate": 3.0761113208601807e-06, "loss": 0.3783, "step": 108500 }, { "epoch": 9.38, "learning_rate": 2.872701832162433e-06, "loss": 0.3813, "step": 108750 }, { "epoch": 9.4, "learning_rate": 2.6761846714904277e-06, "loss": 0.3806, "step": 109000 }, { "epoch": 9.43, "learning_rate": 2.4865690448905573e-06, "loss": 0.3812, "step": 109250 }, { "epoch": 9.45, "learning_rate": 2.303863835099867e-06, "loss": 0.381, "step": 109500 }, { "epoch": 9.47, "learning_rate": 2.128077601129841e-06, "loss": 0.3803, "step": 109750 }, { "epoch": 9.49, "learning_rate": 1.959218577865518e-06, "loss": 0.3793, "step": 110000 }, { "epoch": 9.51, "learning_rate": 1.7972946756797058e-06, "loss": 0.3799, "step": 110250 }, { "epoch": 9.53, "learning_rate": 1.6423134800624237e-06, "loss": 0.3811, "step": 110500 }, { "epoch": 9.55, "learning_rate": 1.4942822512654884e-06, "loss": 0.3794, "step": 110750 }, { "epoch": 9.58, "learning_rate": 1.3532079239625016e-06, "loss": 0.3798, "step": 111000 }, { "epoch": 9.6, "learning_rate": 1.2190971069238942e-06, "loss": 0.38, "step": 111250 }, { "epoch": 9.62, "learning_rate": 1.0919560827073903e-06, "loss": 0.3784, "step": 111500 }, { "epoch": 9.64, "learning_rate": 9.717908073636594e-07, "loss": 0.3785, "step": 111750 }, { "epoch": 9.66, "learning_rate": 8.586069101573556e-07, "loss": 0.3785, "step": 112000 }, { "epoch": 9.68, "learning_rate": 7.524096933033131e-07, "loss": 0.3785, "step": 112250 }, { "epoch": 9.71, "learning_rate": 6.532041317182435e-07, "loss": 0.3811, "step": 112500 }, { "epoch": 9.73, "learning_rate": 5.609948727876567e-07, "loss": 0.3803, "step": 112750 }, { "epoch": 9.75, "learning_rate": 4.757862361481346e-07, "loss": 0.3803, "step": 113000 }, { "epoch": 9.77, "learning_rate": 3.97582213484976e-07, "loss": 0.3797, "step": 113250 }, { "epoch": 9.79, "learning_rate": 3.2638646834523e-07, "loss": 0.3795, "step": 113500 }, { "epoch": 9.81, "learning_rate": 2.6220233596603303e-07, "loss": 0.3783, "step": 113750 }, { "epoch": 9.84, "learning_rate": 2.0503282311838397e-07, "loss": 0.3783, "step": 114000 }, { "epoch": 9.86, "learning_rate": 1.5488060796630675e-07, "loss": 0.3764, "step": 114250 }, { "epoch": 9.88, "learning_rate": 1.1174803994136727e-07, "loss": 0.3796, "step": 114500 }, { "epoch": 9.9, "learning_rate": 7.563713963261164e-08, "loss": 0.3794, "step": 114750 }, { "epoch": 9.92, "learning_rate": 4.654959869190844e-08, "loss": 0.3776, "step": 115000 }, { "epoch": 9.94, "learning_rate": 2.448677975472879e-08, "loss": 0.3787, "step": 115250 }, { "epoch": 9.96, "learning_rate": 9.449716376264128e-09, "loss": 0.3794, "step": 115500 }, { "epoch": 9.99, "learning_rate": 1.4391129830815251e-09, "loss": 0.3801, "step": 115750 }, { "epoch": 10.0, "eval_alliteration_score": 0.42713157177880684, "eval_harmonic_meter_score": 0.18179456186285856, "eval_harmonic_rhyme_score": 0.3997335158862321, "eval_meter_score": 0.4478478237036721, "eval_rhyme_score": 0.8034619629050801, "eval_runtime": 2381.8504, "eval_samples_per_second": 1.134, "eval_steps_per_second": 0.142, "step": 115910 }, { "epoch": 10.0, "step": 115910, "total_flos": 5.348698838905389e+18, "train_loss": 0.4458793824766618, "train_runtime": 86475.3469, "train_samples_per_second": 171.576, "train_steps_per_second": 1.34 } ], "max_steps": 115910, "num_train_epochs": 10, "total_flos": 5.348698838905389e+18, "trial_name": null, "trial_params": null }