poetry-bygpt5-base-en / trainer_state.json
potamides's picture
add model files
523c73c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.999903428295509,
"global_step": 51770,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.1583011583011583e-06,
"loss": 2.3104,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 0.00028957528957528956,
"loss": 1.4338,
"step": 250
},
{
"epoch": 0.1,
"learning_rate": 0.0005791505791505791,
"loss": 1.2416,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 0.0005999696654176312,
"loss": 1.2139,
"step": 750
},
{
"epoch": 0.19,
"learning_rate": 0.0005998690722049872,
"loss": 1.1936,
"step": 1000
},
{
"epoch": 0.24,
"learning_rate": 0.0005996980613784548,
"loss": 1.1791,
"step": 1250
},
{
"epoch": 0.29,
"learning_rate": 0.0005994566730961414,
"loss": 1.1672,
"step": 1500
},
{
"epoch": 0.34,
"learning_rate": 0.0005991449640427416,
"loss": 1.1574,
"step": 1750
},
{
"epoch": 0.39,
"learning_rate": 0.0005987630074162269,
"loss": 1.1486,
"step": 2000
},
{
"epoch": 0.43,
"learning_rate": 0.0005983108929106564,
"loss": 1.1427,
"step": 2250
},
{
"epoch": 0.48,
"learning_rate": 0.0005977887266951138,
"loss": 1.1356,
"step": 2500
},
{
"epoch": 0.53,
"learning_rate": 0.0005971966313887766,
"loss": 1.1272,
"step": 2750
},
{
"epoch": 0.58,
"learning_rate": 0.0005965347460321212,
"loss": 1.1254,
"step": 3000
},
{
"epoch": 0.63,
"learning_rate": 0.0005958032260542726,
"loss": 1.118,
"step": 3250
},
{
"epoch": 0.68,
"learning_rate": 0.0005950022432365049,
"loss": 1.1146,
"step": 3500
},
{
"epoch": 0.72,
"learning_rate": 0.0005941319856719031,
"loss": 1.1097,
"step": 3750
},
{
"epoch": 0.77,
"learning_rate": 0.0005931926577211924,
"loss": 1.1063,
"step": 4000
},
{
"epoch": 0.82,
"learning_rate": 0.0005921844799647499,
"loss": 1.1027,
"step": 4250
},
{
"epoch": 0.87,
"learning_rate": 0.0005911076891508052,
"loss": 1.0999,
"step": 4500
},
{
"epoch": 0.92,
"learning_rate": 0.0005899625381398457,
"loss": 1.0966,
"step": 4750
},
{
"epoch": 0.97,
"learning_rate": 0.0005887492958452381,
"loss": 1.0931,
"step": 5000
},
{
"epoch": 1.0,
"eval_alliteration_score": 0.3998726520216492,
"eval_harmonic_meter_score": 0.1010308935212264,
"eval_harmonic_rhyme_score": 0.35292231999777424,
"eval_meter_score": 0.3077031352625674,
"eval_rhyme_score": 0.7291149689204202,
"eval_runtime": 3230.3537,
"eval_samples_per_second": 0.836,
"eval_steps_per_second": 0.026,
"step": 5177
},
{
"epoch": 1.01,
"learning_rate": 0.0005874682471700796,
"loss": 1.0865,
"step": 5250
},
{
"epoch": 1.06,
"learning_rate": 0.0005861196929402952,
"loss": 1.0684,
"step": 5500
},
{
"epoch": 1.11,
"learning_rate": 0.0005847039498339947,
"loss": 1.0705,
"step": 5750
},
{
"epoch": 1.16,
"learning_rate": 0.0005832213503071088,
"loss": 1.0694,
"step": 6000
},
{
"epoch": 1.21,
"learning_rate": 0.0005816722425153186,
"loss": 1.0678,
"step": 6250
},
{
"epoch": 1.26,
"learning_rate": 0.0005800569902322985,
"loss": 1.0669,
"step": 6500
},
{
"epoch": 1.3,
"learning_rate": 0.0005783759727642932,
"loss": 1.0634,
"step": 6750
},
{
"epoch": 1.35,
"learning_rate": 0.0005766295848610451,
"loss": 1.062,
"step": 7000
},
{
"epoch": 1.4,
"learning_rate": 0.0005748182366230962,
"loss": 1.0602,
"step": 7250
},
{
"epoch": 1.45,
"learning_rate": 0.0005729423534054853,
"loss": 1.0587,
"step": 7500
},
{
"epoch": 1.5,
"learning_rate": 0.0005710023757178627,
"loss": 1.0564,
"step": 7750
},
{
"epoch": 1.55,
"learning_rate": 0.000568998759121046,
"loss": 1.0547,
"step": 8000
},
{
"epoch": 1.59,
"learning_rate": 0.0005669319741200425,
"loss": 1.0536,
"step": 8250
},
{
"epoch": 1.64,
"learning_rate": 0.0005648025060535602,
"loss": 1.0517,
"step": 8500
},
{
"epoch": 1.69,
"learning_rate": 0.0005626108549800381,
"loss": 1.0498,
"step": 8750
},
{
"epoch": 1.74,
"learning_rate": 0.0005603575355602176,
"loss": 1.0482,
"step": 9000
},
{
"epoch": 1.79,
"learning_rate": 0.0005580430769362867,
"loss": 1.0479,
"step": 9250
},
{
"epoch": 1.83,
"learning_rate": 0.0005556680226076214,
"loss": 1.0446,
"step": 9500
},
{
"epoch": 1.88,
"learning_rate": 0.0005532329303031583,
"loss": 1.0444,
"step": 9750
},
{
"epoch": 1.93,
"learning_rate": 0.0005507383718504232,
"loss": 1.0416,
"step": 10000
},
{
"epoch": 1.98,
"learning_rate": 0.0005481849330412508,
"loss": 1.0392,
"step": 10250
},
{
"epoch": 2.0,
"eval_alliteration_score": 0.43183984747378457,
"eval_harmonic_meter_score": 0.09296526825958593,
"eval_harmonic_rhyme_score": 0.5441505816703797,
"eval_meter_score": 0.30327833996094755,
"eval_rhyme_score": 0.8429996906126559,
"eval_runtime": 2339.9147,
"eval_samples_per_second": 1.154,
"eval_steps_per_second": 0.036,
"step": 10354
},
{
"epoch": 2.03,
"learning_rate": 0.000545573213494224,
"loss": 1.0256,
"step": 10500
},
{
"epoch": 2.08,
"learning_rate": 0.0005429038265138671,
"loss": 1.0149,
"step": 10750
},
{
"epoch": 2.12,
"learning_rate": 0.0005401773989466244,
"loss": 1.0166,
"step": 11000
},
{
"epoch": 2.17,
"learning_rate": 0.0005373945710336596,
"loss": 1.0157,
"step": 11250
},
{
"epoch": 2.22,
"learning_rate": 0.0005345559962605089,
"loss": 1.0155,
"step": 11500
},
{
"epoch": 2.27,
"learning_rate": 0.0005316623412036252,
"loss": 1.0135,
"step": 11750
},
{
"epoch": 2.32,
"learning_rate": 0.000528714285373846,
"loss": 1.0134,
"step": 12000
},
{
"epoch": 2.37,
"learning_rate": 0.0005257125210568268,
"loss": 1.0118,
"step": 12250
},
{
"epoch": 2.41,
"learning_rate": 0.0005226577531504722,
"loss": 1.012,
"step": 12500
},
{
"epoch": 2.46,
"learning_rate": 0.0005195506989994064,
"loss": 1.0101,
"step": 12750
},
{
"epoch": 2.51,
"learning_rate": 0.0005163920882265211,
"loss": 1.0097,
"step": 13000
},
{
"epoch": 2.56,
"learning_rate": 0.0005131826625616392,
"loss": 1.0085,
"step": 13250
},
{
"epoch": 2.61,
"learning_rate": 0.0005099231756673361,
"loss": 1.0052,
"step": 13500
},
{
"epoch": 2.66,
"learning_rate": 0.0005066143929619589,
"loss": 1.0064,
"step": 13750
},
{
"epoch": 2.7,
"learning_rate": 0.000503257091439885,
"loss": 1.0044,
"step": 14000
},
{
"epoch": 2.75,
"learning_rate": 0.0004998520594890613,
"loss": 1.0015,
"step": 14250
},
{
"epoch": 2.8,
"learning_rate": 0.00049640009670587,
"loss": 1.0014,
"step": 14500
},
{
"epoch": 2.85,
"learning_rate": 0.0004929020137073603,
"loss": 1.0006,
"step": 14750
},
{
"epoch": 2.9,
"learning_rate": 0.0004893586319408926,
"loss": 0.9997,
"step": 15000
},
{
"epoch": 2.95,
"learning_rate": 0.0004857707834912409,
"loss": 0.9991,
"step": 15250
},
{
"epoch": 2.99,
"learning_rate": 0.0004821393108851951,
"loss": 0.9969,
"step": 15500
},
{
"epoch": 3.0,
"eval_alliteration_score": 0.40482822655524603,
"eval_harmonic_meter_score": 0.10445053009647749,
"eval_harmonic_rhyme_score": 0.49816467657522634,
"eval_meter_score": 0.3098783806475492,
"eval_rhyme_score": 0.8195044525640993,
"eval_runtime": 2332.5068,
"eval_samples_per_second": 1.158,
"eval_steps_per_second": 0.036,
"step": 15531
},
{
"epoch": 3.04,
"learning_rate": 0.0004784650668937127,
"loss": 0.9695,
"step": 15750
},
{
"epoch": 3.09,
"learning_rate": 0.0004747489143316642,
"loss": 0.9666,
"step": 16000
},
{
"epoch": 3.14,
"learning_rate": 0.0004709917258552203,
"loss": 0.9684,
"step": 16250
},
{
"epoch": 3.19,
"learning_rate": 0.00046719438375692797,
"loss": 0.9701,
"step": 16500
},
{
"epoch": 3.24,
"learning_rate": 0.0004633577797585233,
"loss": 0.9693,
"step": 16750
},
{
"epoch": 3.28,
"learning_rate": 0.0004594828148015305,
"loss": 0.9689,
"step": 17000
},
{
"epoch": 3.33,
"learning_rate": 0.00045557039883569595,
"loss": 0.9675,
"step": 17250
},
{
"epoch": 3.38,
"learning_rate": 0.0004516214506053063,
"loss": 0.966,
"step": 17500
},
{
"epoch": 3.43,
"learning_rate": 0.000447636897433442,
"loss": 0.9664,
"step": 17750
},
{
"epoch": 3.48,
"learning_rate": 0.000443617675004216,
"loss": 0.9645,
"step": 18000
},
{
"epoch": 3.53,
"learning_rate": 0.00043956472714304834,
"loss": 0.9635,
"step": 18250
},
{
"epoch": 3.57,
"learning_rate": 0.0004354790055950309,
"loss": 0.9621,
"step": 18500
},
{
"epoch": 3.62,
"learning_rate": 0.0004313614698014302,
"loss": 0.962,
"step": 18750
},
{
"epoch": 3.67,
"learning_rate": 0.00042721308667438394,
"loss": 0.9609,
"step": 19000
},
{
"epoch": 3.72,
"learning_rate": 0.00042303483036984366,
"loss": 0.9596,
"step": 19250
},
{
"epoch": 3.77,
"learning_rate": 0.00041882768205881495,
"loss": 0.9578,
"step": 19500
},
{
"epoch": 3.81,
"learning_rate": 0.00041459262969695184,
"loss": 0.9568,
"step": 19750
},
{
"epoch": 3.86,
"learning_rate": 0.0004103306677925571,
"loss": 0.9552,
"step": 20000
},
{
"epoch": 3.91,
"learning_rate": 0.00040604279717304357,
"loss": 0.9534,
"step": 20250
},
{
"epoch": 3.96,
"learning_rate": 0.0004017300247499127,
"loss": 0.9541,
"step": 20500
},
{
"epoch": 4.0,
"eval_alliteration_score": 0.43839452395768513,
"eval_harmonic_meter_score": 0.12086540507848588,
"eval_harmonic_rhyme_score": 0.4986143276050798,
"eval_meter_score": 0.338880767934961,
"eval_rhyme_score": 0.8207791218734892,
"eval_runtime": 2519.5282,
"eval_samples_per_second": 1.072,
"eval_steps_per_second": 0.034,
"step": 20708
},
{
"epoch": 4.01,
"learning_rate": 0.00039739336328230323,
"loss": 0.9477,
"step": 20750
},
{
"epoch": 4.06,
"learning_rate": 0.00039303383113916687,
"loss": 0.9123,
"step": 21000
},
{
"epoch": 4.1,
"learning_rate": 0.00038865245206012774,
"loss": 0.9168,
"step": 21250
},
{
"epoch": 4.15,
"learning_rate": 0.00038425025491507883,
"loss": 0.9193,
"step": 21500
},
{
"epoch": 4.2,
"learning_rate": 0.0003798282734625755,
"loss": 0.9201,
"step": 21750
},
{
"epoch": 4.25,
"learning_rate": 0.0003753875461070794,
"loss": 0.9188,
"step": 22000
},
{
"epoch": 4.3,
"learning_rate": 0.0003709291156551129,
"loss": 0.9192,
"step": 22250
},
{
"epoch": 4.35,
"learning_rate": 0.0003664540290703784,
"loss": 0.9173,
"step": 22500
},
{
"epoch": 4.39,
"learning_rate": 0.00036196333722790264,
"loss": 0.9166,
"step": 22750
},
{
"epoch": 4.44,
"learning_rate": 0.00035745809466726145,
"loss": 0.9145,
"step": 23000
},
{
"epoch": 4.49,
"learning_rate": 0.0003529393593449451,
"loss": 0.9144,
"step": 23250
},
{
"epoch": 4.54,
"learning_rate": 0.00034840819238591994,
"loss": 0.9139,
"step": 23500
},
{
"epoch": 4.59,
"learning_rate": 0.0003438656578344473,
"loss": 0.9126,
"step": 23750
},
{
"epoch": 4.64,
"learning_rate": 0.0003393128224042155,
"loss": 0.9119,
"step": 24000
},
{
"epoch": 4.68,
"learning_rate": 0.0003347507552278469,
"loss": 0.9099,
"step": 24250
},
{
"epoch": 4.73,
"learning_rate": 0.00033018052760583447,
"loss": 0.9072,
"step": 24500
},
{
"epoch": 4.78,
"learning_rate": 0.0003256032127549717,
"loss": 0.908,
"step": 24750
},
{
"epoch": 4.83,
"learning_rate": 0.0003210198855563304,
"loss": 0.9063,
"step": 25000
},
{
"epoch": 4.88,
"learning_rate": 0.00031643162230284954,
"loss": 0.9036,
"step": 25250
},
{
"epoch": 4.93,
"learning_rate": 0.00031183950044659135,
"loss": 0.9039,
"step": 25500
},
{
"epoch": 4.97,
"learning_rate": 0.0003072445983457252,
"loss": 0.9023,
"step": 25750
},
{
"epoch": 5.0,
"eval_alliteration_score": 0.432449105490438,
"eval_harmonic_meter_score": 0.11469247564220626,
"eval_harmonic_rhyme_score": 0.5565104008090618,
"eval_meter_score": 0.3233335011377191,
"eval_rhyme_score": 0.8467553072173599,
"eval_runtime": 2231.152,
"eval_samples_per_second": 1.21,
"eval_steps_per_second": 0.038,
"step": 25885
},
{
"epoch": 5.02,
"learning_rate": 0.0003026479950112996,
"loss": 0.8805,
"step": 26000
},
{
"epoch": 5.07,
"learning_rate": 0.00029805076985386,
"loss": 0.8592,
"step": 26250
},
{
"epoch": 5.12,
"learning_rate": 0.00029345400242997323,
"loss": 0.8613,
"step": 26500
},
{
"epoch": 5.17,
"learning_rate": 0.0002888587721887175,
"loss": 0.8621,
"step": 26750
},
{
"epoch": 5.22,
"learning_rate": 0.0002842661582181979,
"loss": 0.8628,
"step": 27000
},
{
"epoch": 5.26,
"learning_rate": 0.000279677238992146,
"loss": 0.8614,
"step": 27250
},
{
"epoch": 5.31,
"learning_rate": 0.00027509309211666463,
"loss": 0.8617,
"step": 27500
},
{
"epoch": 5.36,
"learning_rate": 0.0002705147940771754,
"loss": 0.8606,
"step": 27750
},
{
"epoch": 5.41,
"learning_rate": 0.0002659434199856307,
"loss": 0.8597,
"step": 28000
},
{
"epoch": 5.46,
"learning_rate": 0.0002613800433280466,
"loss": 0.8589,
"step": 28250
},
{
"epoch": 5.51,
"learning_rate": 0.0002568257357124192,
"loss": 0.856,
"step": 28500
},
{
"epoch": 5.55,
"learning_rate": 0.0002522815666170804,
"loss": 0.8542,
"step": 28750
},
{
"epoch": 5.6,
"learning_rate": 0.00024774860313955555,
"loss": 0.8544,
"step": 29000
},
{
"epoch": 5.65,
"learning_rate": 0.00024322790974597822,
"loss": 0.8517,
"step": 29250
},
{
"epoch": 5.7,
"learning_rate": 0.00023872054802112475,
"loss": 0.8522,
"step": 29500
},
{
"epoch": 5.75,
"learning_rate": 0.00023422757641912385,
"loss": 0.8512,
"step": 29750
},
{
"epoch": 5.79,
"learning_rate": 0.0002297500500149027,
"loss": 0.8495,
"step": 30000
},
{
"epoch": 5.84,
"learning_rate": 0.00022528902025642543,
"loss": 0.8473,
"step": 30250
},
{
"epoch": 5.89,
"learning_rate": 0.00022084553471778432,
"loss": 0.8451,
"step": 30500
},
{
"epoch": 5.94,
"learning_rate": 0.00021642063685319983,
"loss": 0.8414,
"step": 30750
},
{
"epoch": 5.99,
"learning_rate": 0.00021201536575198834,
"loss": 0.8411,
"step": 31000
},
{
"epoch": 6.0,
"eval_alliteration_score": 0.43164362519201227,
"eval_harmonic_meter_score": 0.10449656298407087,
"eval_harmonic_rhyme_score": 0.5587655117502281,
"eval_meter_score": 0.3140831187981907,
"eval_rhyme_score": 0.8511610758760736,
"eval_runtime": 2100.1558,
"eval_samples_per_second": 1.286,
"eval_steps_per_second": 0.04,
"step": 31062
},
{
"epoch": 6.04,
"learning_rate": 0.00020763075589455592,
"loss": 0.8075,
"step": 31250
},
{
"epoch": 6.08,
"learning_rate": 0.00020326783690947226,
"loss": 0.796,
"step": 31500
},
{
"epoch": 6.13,
"learning_rate": 0.00019892763333168628,
"loss": 0.7985,
"step": 31750
},
{
"epoch": 6.18,
"learning_rate": 0.000194611164361936,
"loss": 0.7995,
"step": 32000
},
{
"epoch": 6.23,
"learning_rate": 0.0001903194436274124,
"loss": 0.7987,
"step": 32250
},
{
"epoch": 6.28,
"learning_rate": 0.0001860534789437309,
"loss": 0.7977,
"step": 32500
},
{
"epoch": 6.33,
"learning_rate": 0.00018181427207826875,
"loss": 0.799,
"step": 32750
},
{
"epoch": 6.37,
"learning_rate": 0.0001776028185149218,
"loss": 0.797,
"step": 33000
},
{
"epoch": 6.42,
"learning_rate": 0.00017342010722033724,
"loss": 0.799,
"step": 33250
},
{
"epoch": 6.47,
"learning_rate": 0.00016926712041167666,
"loss": 0.7938,
"step": 33500
},
{
"epoch": 6.52,
"learning_rate": 0.00016514483332596397,
"loss": 0.7928,
"step": 33750
},
{
"epoch": 6.57,
"learning_rate": 0.000161054213991073,
"loss": 0.7948,
"step": 34000
},
{
"epoch": 6.62,
"learning_rate": 0.00015699622299840705,
"loss": 0.7913,
"step": 34250
},
{
"epoch": 6.66,
"learning_rate": 0.00015297181327732549,
"loss": 0.7911,
"step": 34500
},
{
"epoch": 6.71,
"learning_rate": 0.00014898192987136932,
"loss": 0.7883,
"step": 34750
},
{
"epoch": 6.76,
"learning_rate": 0.000145027509716339,
"loss": 0.7865,
"step": 35000
},
{
"epoch": 6.81,
"learning_rate": 0.0001411094814202753,
"loss": 0.7863,
"step": 35250
},
{
"epoch": 6.86,
"learning_rate": 0.00013722876504539635,
"loss": 0.7826,
"step": 35500
},
{
"epoch": 6.91,
"learning_rate": 0.00013338627189204153,
"loss": 0.7829,
"step": 35750
},
{
"epoch": 6.95,
"learning_rate": 0.0001295829042846731,
"loss": 0.7816,
"step": 36000
},
{
"epoch": 7.0,
"eval_alliteration_score": 0.44621513944223107,
"eval_harmonic_meter_score": 0.11710967524898745,
"eval_harmonic_rhyme_score": 0.5515777130298052,
"eval_meter_score": 0.33484803934701896,
"eval_rhyme_score": 0.8477574870320731,
"eval_runtime": 2122.0667,
"eval_samples_per_second": 1.272,
"eval_steps_per_second": 0.04,
"step": 36239
},
{
"epoch": 7.0,
"learning_rate": 0.00012581955535998448,
"loss": 0.7792,
"step": 36250
},
{
"epoch": 7.05,
"learning_rate": 0.0001220971088571674,
"loss": 0.734,
"step": 36500
},
{
"epoch": 7.1,
"learning_rate": 0.00011841643891038518,
"loss": 0.7362,
"step": 36750
},
{
"epoch": 7.15,
"learning_rate": 0.00011477840984350193,
"loss": 0.7386,
"step": 37000
},
{
"epoch": 7.2,
"learning_rate": 0.00011118387596711477,
"loss": 0.7384,
"step": 37250
},
{
"epoch": 7.24,
"learning_rate": 0.00010763368137793809,
"loss": 0.7364,
"step": 37500
},
{
"epoch": 7.29,
"learning_rate": 0.00010412865976058613,
"loss": 0.7367,
"step": 37750
},
{
"epoch": 7.34,
"learning_rate": 0.00010066963419180093,
"loss": 0.7365,
"step": 38000
},
{
"epoch": 7.39,
"learning_rate": 9.725741694717035e-05,
"loss": 0.7343,
"step": 38250
},
{
"epoch": 7.44,
"learning_rate": 9.389280931038336e-05,
"loss": 0.737,
"step": 38500
},
{
"epoch": 7.48,
"learning_rate": 9.057660138506682e-05,
"loss": 0.7345,
"step": 38750
},
{
"epoch": 7.53,
"learning_rate": 8.730957190924632e-05,
"loss": 0.7332,
"step": 39000
},
{
"epoch": 7.58,
"learning_rate": 8.409248807247727e-05,
"loss": 0.7335,
"step": 39250
},
{
"epoch": 7.63,
"learning_rate": 8.092610533568725e-05,
"loss": 0.7302,
"step": 39500
},
{
"epoch": 7.68,
"learning_rate": 7.781116725377309e-05,
"loss": 0.7291,
"step": 39750
},
{
"epoch": 7.73,
"learning_rate": 7.474840530099277e-05,
"loss": 0.7303,
"step": 40000
},
{
"epoch": 7.77,
"learning_rate": 7.173853869919559e-05,
"loss": 0.7288,
"step": 40250
},
{
"epoch": 7.82,
"learning_rate": 6.878227424892822e-05,
"loss": 0.7283,
"step": 40500
},
{
"epoch": 7.87,
"learning_rate": 6.588030616345898e-05,
"loss": 0.726,
"step": 40750
},
{
"epoch": 7.92,
"learning_rate": 6.303331590575642e-05,
"loss": 0.7246,
"step": 41000
},
{
"epoch": 7.97,
"learning_rate": 6.0241972028463316e-05,
"loss": 0.7243,
"step": 41250
},
{
"epoch": 8.0,
"eval_alliteration_score": 0.43950617283950616,
"eval_harmonic_meter_score": 0.11078968787875117,
"eval_harmonic_rhyme_score": 0.5599690603568903,
"eval_meter_score": 0.3230506264598415,
"eval_rhyme_score": 0.850570153117891,
"eval_runtime": 2165.4131,
"eval_samples_per_second": 1.247,
"eval_steps_per_second": 0.039,
"step": 41416
},
{
"epoch": 8.02,
"learning_rate": 5.7506930016901755e-05,
"loss": 0.7133,
"step": 41500
},
{
"epoch": 8.06,
"learning_rate": 5.4828832135146994e-05,
"loss": 0.6873,
"step": 41750
},
{
"epoch": 8.11,
"learning_rate": 5.2208307275205774e-05,
"loss": 0.6887,
"step": 42000
},
{
"epoch": 8.16,
"learning_rate": 4.9645970809335146e-05,
"loss": 0.6883,
"step": 42250
},
{
"epoch": 8.21,
"learning_rate": 4.7142424445535695e-05,
"loss": 0.6887,
"step": 42500
},
{
"epoch": 8.26,
"learning_rate": 4.4698256086254156e-05,
"loss": 0.688,
"step": 42750
},
{
"epoch": 8.31,
"learning_rate": 4.231403969032698e-05,
"loss": 0.6895,
"step": 43000
},
{
"epoch": 8.35,
"learning_rate": 3.999033513819922e-05,
"loss": 0.6901,
"step": 43250
},
{
"epoch": 8.4,
"learning_rate": 3.772768810044874e-05,
"loss": 0.6893,
"step": 43500
},
{
"epoch": 8.45,
"learning_rate": 3.552662990964793e-05,
"loss": 0.6873,
"step": 43750
},
{
"epoch": 8.5,
"learning_rate": 3.338767743559162e-05,
"loss": 0.6895,
"step": 44000
},
{
"epoch": 8.55,
"learning_rate": 3.131133296392159e-05,
"loss": 0.6868,
"step": 44250
},
{
"epoch": 8.6,
"learning_rate": 2.929808407817651e-05,
"loss": 0.6875,
"step": 44500
},
{
"epoch": 8.64,
"learning_rate": 2.734840354529305e-05,
"loss": 0.687,
"step": 44750
},
{
"epoch": 8.69,
"learning_rate": 2.5462749204587507e-05,
"loss": 0.6864,
"step": 45000
},
{
"epoch": 8.74,
"learning_rate": 2.3641563860241965e-05,
"loss": 0.6841,
"step": 45250
},
{
"epoch": 8.79,
"learning_rate": 2.1885275177322048e-05,
"loss": 0.6865,
"step": 45500
},
{
"epoch": 8.84,
"learning_rate": 2.019429558134873e-05,
"loss": 0.685,
"step": 45750
},
{
"epoch": 8.89,
"learning_rate": 1.856902216144962e-05,
"loss": 0.6834,
"step": 46000
},
{
"epoch": 8.93,
"learning_rate": 1.7009836577111302e-05,
"loss": 0.6827,
"step": 46250
},
{
"epoch": 8.98,
"learning_rate": 1.551710496855515e-05,
"loss": 0.6828,
"step": 46500
},
{
"epoch": 9.0,
"eval_alliteration_score": 0.44789762340036565,
"eval_harmonic_meter_score": 0.11082478811479961,
"eval_harmonic_rhyme_score": 0.5453593958718314,
"eval_meter_score": 0.328679747890422,
"eval_rhyme_score": 0.8485504968592449,
"eval_runtime": 2181.3379,
"eval_samples_per_second": 1.238,
"eval_steps_per_second": 0.039,
"step": 46593
},
{
"epoch": 9.03,
"learning_rate": 1.4091177870757209e-05,
"loss": 0.673,
"step": 46750
},
{
"epoch": 9.08,
"learning_rate": 1.2732390131132907e-05,
"loss": 0.6644,
"step": 47000
},
{
"epoch": 9.13,
"learning_rate": 1.1441060830905591e-05,
"loss": 0.6632,
"step": 47250
},
{
"epoch": 9.18,
"learning_rate": 1.0217493210177418e-05,
"loss": 0.6641,
"step": 47500
},
{
"epoch": 9.22,
"learning_rate": 9.061974596719934e-06,
"loss": 0.6654,
"step": 47750
},
{
"epoch": 9.27,
"learning_rate": 7.974776338501631e-06,
"loss": 0.6633,
"step": 48000
},
{
"epoch": 9.32,
"learning_rate": 6.956153739967863e-06,
"loss": 0.6656,
"step": 48250
},
{
"epoch": 9.37,
"learning_rate": 6.0063460020883915e-06,
"loss": 0.664,
"step": 48500
},
{
"epoch": 9.42,
"learning_rate": 5.125576166185996e-06,
"loss": 0.6638,
"step": 48750
},
{
"epoch": 9.46,
"learning_rate": 4.314051061560497e-06,
"loss": 0.6645,
"step": 49000
},
{
"epoch": 9.51,
"learning_rate": 3.571961256919276e-06,
"loss": 0.6624,
"step": 49250
},
{
"epoch": 9.56,
"learning_rate": 2.8994810156265035e-06,
"loss": 0.663,
"step": 49500
},
{
"epoch": 9.61,
"learning_rate": 2.2967682547812782e-06,
"loss": 0.6631,
"step": 49750
},
{
"epoch": 9.66,
"learning_rate": 1.7639645081341524e-06,
"loss": 0.663,
"step": 50000
},
{
"epoch": 9.71,
"learning_rate": 1.3011948928511873e-06,
"loss": 0.6641,
"step": 50250
},
{
"epoch": 9.75,
"learning_rate": 9.085680801330208e-07,
"loss": 0.6634,
"step": 50500
},
{
"epoch": 9.8,
"learning_rate": 5.861762696956151e-07,
"loss": 0.6629,
"step": 50750
},
{
"epoch": 9.85,
"learning_rate": 3.340951681194082e-07,
"loss": 0.6652,
"step": 51000
},
{
"epoch": 9.9,
"learning_rate": 1.523839710711683e-07,
"loss": 0.663,
"step": 51250
},
{
"epoch": 9.95,
"learning_rate": 4.108534940331365e-08,
"loss": 0.6623,
"step": 51500
},
{
"epoch": 10.0,
"learning_rate": 2.2543913346106945e-10,
"loss": 0.6634,
"step": 51750
},
{
"epoch": 10.0,
"eval_alliteration_score": 0.4406211936662607,
"eval_harmonic_meter_score": 0.11891243404089967,
"eval_harmonic_rhyme_score": 0.5297086635780841,
"eval_meter_score": 0.33801080805418304,
"eval_rhyme_score": 0.8432321768961265,
"eval_runtime": 2127.802,
"eval_samples_per_second": 1.269,
"eval_steps_per_second": 0.04,
"step": 51770
},
{
"epoch": 10.0,
"step": 51770,
"total_flos": 1.310953664443056e+18,
"train_loss": 0.8821613311353097,
"train_runtime": 68769.7958,
"train_samples_per_second": 96.366,
"train_steps_per_second": 0.753
}
],
"max_steps": 51770,
"num_train_epochs": 10,
"total_flos": 1.310953664443056e+18,
"trial_name": null,
"trial_params": null
}