ReactionT5v1-forward / trainer_state.json
sagawa's picture
Upload 7 files
75d39b9
raw
history blame
25.8 kB
{
"best_metric": 0.05791134759783745,
"best_model_checkpoint": "t5/checkpoint-2670878",
"epoch": 71.0,
"global_step": 2670878,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.00099,
"loss": 0.1435,
"step": 37618
},
{
"epoch": 1.0,
"eval_bleu": 16.08236782537881,
"eval_loss": 0.10417859256267548,
"eval_runtime": 2661.8884,
"eval_samples_per_second": 56.528,
"eval_steps_per_second": 1.767,
"step": 37618
},
{
"epoch": 2.0,
"learning_rate": 0.00098,
"loss": 0.1038,
"step": 75236
},
{
"epoch": 2.0,
"eval_bleu": 17.50001369399556,
"eval_loss": 0.0945146307349205,
"eval_runtime": 2678.123,
"eval_samples_per_second": 56.185,
"eval_steps_per_second": 1.756,
"step": 75236
},
{
"epoch": 3.0,
"learning_rate": 0.0009699999999999999,
"loss": 0.0955,
"step": 112854
},
{
"epoch": 3.0,
"eval_bleu": 17.593643245674098,
"eval_loss": 0.08871327340602875,
"eval_runtime": 2676.0187,
"eval_samples_per_second": 56.229,
"eval_steps_per_second": 1.757,
"step": 112854
},
{
"epoch": 4.0,
"learning_rate": 0.00096,
"loss": 0.0904,
"step": 150472
},
{
"epoch": 4.0,
"eval_bleu": 20.652846575163345,
"eval_loss": 0.08475670218467712,
"eval_runtime": 2670.173,
"eval_samples_per_second": 56.353,
"eval_steps_per_second": 1.761,
"step": 150472
},
{
"epoch": 5.0,
"learning_rate": 0.00095,
"loss": 0.0867,
"step": 188090
},
{
"epoch": 5.0,
"eval_bleu": 21.205818203220545,
"eval_loss": 0.08222997933626175,
"eval_runtime": 2666.3822,
"eval_samples_per_second": 56.433,
"eval_steps_per_second": 1.764,
"step": 188090
},
{
"epoch": 6.0,
"learning_rate": 0.00094,
"loss": 0.0842,
"step": 225708
},
{
"epoch": 6.0,
"eval_bleu": 20.91967058885492,
"eval_loss": 0.07997100055217743,
"eval_runtime": 2671.5123,
"eval_samples_per_second": 56.324,
"eval_steps_per_second": 1.76,
"step": 225708
},
{
"epoch": 7.0,
"learning_rate": 0.00093,
"loss": 0.0822,
"step": 263326
},
{
"epoch": 7.0,
"eval_bleu": 20.488227117972233,
"eval_loss": 0.07861300557851791,
"eval_runtime": 2670.7974,
"eval_samples_per_second": 56.339,
"eval_steps_per_second": 1.761,
"step": 263326
},
{
"epoch": 8.0,
"learning_rate": 0.00092,
"loss": 0.0805,
"step": 300944
},
{
"epoch": 8.0,
"eval_bleu": 20.975971960603545,
"eval_loss": 0.07657758891582489,
"eval_runtime": 2669.7857,
"eval_samples_per_second": 56.361,
"eval_steps_per_second": 1.762,
"step": 300944
},
{
"epoch": 9.0,
"learning_rate": 0.00091,
"loss": 0.08,
"step": 338562
},
{
"epoch": 9.0,
"eval_bleu": 22.305641242381824,
"eval_loss": 0.07586149871349335,
"eval_runtime": 2673.6038,
"eval_samples_per_second": 56.28,
"eval_steps_per_second": 1.759,
"step": 338562
},
{
"epoch": 10.0,
"learning_rate": 0.0009000000000000001,
"loss": 0.0786,
"step": 376180
},
{
"epoch": 10.0,
"eval_bleu": 22.107305825110537,
"eval_loss": 0.07636888325214386,
"eval_runtime": 2672.8776,
"eval_samples_per_second": 56.296,
"eval_steps_per_second": 1.76,
"step": 376180
},
{
"epoch": 11.0,
"learning_rate": 0.0008900000000000001,
"loss": 0.0778,
"step": 413798
},
{
"epoch": 11.0,
"eval_bleu": 22.038073053427944,
"eval_loss": 0.0757184773683548,
"eval_runtime": 2667.8522,
"eval_samples_per_second": 56.402,
"eval_steps_per_second": 1.763,
"step": 413798
},
{
"epoch": 12.0,
"learning_rate": 0.00088,
"loss": 0.077,
"step": 451416
},
{
"epoch": 12.0,
"eval_bleu": 21.406024838885678,
"eval_loss": 0.07439033687114716,
"eval_runtime": 2670.3202,
"eval_samples_per_second": 56.349,
"eval_steps_per_second": 1.761,
"step": 451416
},
{
"epoch": 13.0,
"learning_rate": 0.00087,
"loss": 0.0765,
"step": 489034
},
{
"epoch": 13.0,
"eval_bleu": 21.05042019059627,
"eval_loss": 0.07327836006879807,
"eval_runtime": 2668.8769,
"eval_samples_per_second": 56.38,
"eval_steps_per_second": 1.762,
"step": 489034
},
{
"epoch": 14.0,
"learning_rate": 0.00086,
"loss": 0.0761,
"step": 526652
},
{
"epoch": 14.0,
"eval_bleu": 21.625812853616345,
"eval_loss": 0.07405734062194824,
"eval_runtime": 2666.7447,
"eval_samples_per_second": 56.425,
"eval_steps_per_second": 1.764,
"step": 526652
},
{
"epoch": 15.0,
"learning_rate": 0.00085,
"loss": 0.076,
"step": 564270
},
{
"epoch": 15.0,
"eval_bleu": 23.036110403909763,
"eval_loss": 0.07310452312231064,
"eval_runtime": 2669.8214,
"eval_samples_per_second": 56.36,
"eval_steps_per_second": 1.762,
"step": 564270
},
{
"epoch": 16.0,
"learning_rate": 0.00084,
"loss": 0.0755,
"step": 601888
},
{
"epoch": 16.0,
"eval_bleu": 22.36117696412647,
"eval_loss": 0.07309335470199585,
"eval_runtime": 2681.265,
"eval_samples_per_second": 56.119,
"eval_steps_per_second": 1.754,
"step": 601888
},
{
"epoch": 17.0,
"learning_rate": 0.00083,
"loss": 0.0749,
"step": 639506
},
{
"epoch": 17.0,
"eval_bleu": 22.33041493191003,
"eval_loss": 0.07233680039644241,
"eval_runtime": 2675.1848,
"eval_samples_per_second": 56.247,
"eval_steps_per_second": 1.758,
"step": 639506
},
{
"epoch": 18.0,
"learning_rate": 0.00082,
"loss": 0.0742,
"step": 677124
},
{
"epoch": 18.0,
"eval_bleu": 22.339757553385123,
"eval_loss": 0.07237352430820465,
"eval_runtime": 2673.3113,
"eval_samples_per_second": 56.286,
"eval_steps_per_second": 1.759,
"step": 677124
},
{
"epoch": 19.0,
"learning_rate": 0.0008100000000000001,
"loss": 0.0737,
"step": 714742
},
{
"epoch": 19.0,
"eval_bleu": 22.925070747333475,
"eval_loss": 0.07049386203289032,
"eval_runtime": 2688.2356,
"eval_samples_per_second": 55.974,
"eval_steps_per_second": 1.749,
"step": 714742
},
{
"epoch": 20.0,
"learning_rate": 0.0008,
"loss": 0.073,
"step": 752360
},
{
"epoch": 20.0,
"eval_bleu": 22.979078336400935,
"eval_loss": 0.07081238925457001,
"eval_runtime": 2669.2634,
"eval_samples_per_second": 56.372,
"eval_steps_per_second": 1.762,
"step": 752360
},
{
"epoch": 21.0,
"learning_rate": 0.00079,
"loss": 0.0722,
"step": 789978
},
{
"epoch": 21.0,
"eval_bleu": 23.570886258164016,
"eval_loss": 0.06986574083566666,
"eval_runtime": 2672.5918,
"eval_samples_per_second": 56.302,
"eval_steps_per_second": 1.76,
"step": 789978
},
{
"epoch": 22.0,
"learning_rate": 0.0007800000000000001,
"loss": 0.072,
"step": 827596
},
{
"epoch": 22.0,
"eval_bleu": 23.048014836268056,
"eval_loss": 0.06995159387588501,
"eval_runtime": 2689.3174,
"eval_samples_per_second": 55.951,
"eval_steps_per_second": 1.749,
"step": 827596
},
{
"epoch": 23.0,
"learning_rate": 0.0007700000000000001,
"loss": 0.0716,
"step": 865214
},
{
"epoch": 23.0,
"eval_bleu": 23.188184731833086,
"eval_loss": 0.0698036402463913,
"eval_runtime": 2675.6412,
"eval_samples_per_second": 56.237,
"eval_steps_per_second": 1.758,
"step": 865214
},
{
"epoch": 24.0,
"learning_rate": 0.00076,
"loss": 0.0715,
"step": 902832
},
{
"epoch": 24.0,
"eval_bleu": 22.720403529413186,
"eval_loss": 0.06952499598264694,
"eval_runtime": 2667.7287,
"eval_samples_per_second": 56.404,
"eval_steps_per_second": 1.763,
"step": 902832
},
{
"epoch": 25.0,
"learning_rate": 0.00075,
"loss": 0.0711,
"step": 940450
},
{
"epoch": 25.0,
"eval_bleu": 23.946075678664805,
"eval_loss": 0.06875280290842056,
"eval_runtime": 2666.8035,
"eval_samples_per_second": 56.424,
"eval_steps_per_second": 1.764,
"step": 940450
},
{
"epoch": 26.0,
"learning_rate": 0.00074,
"loss": 0.0704,
"step": 978068
},
{
"epoch": 26.0,
"eval_bleu": 23.070285144081506,
"eval_loss": 0.06891094893217087,
"eval_runtime": 2661.6833,
"eval_samples_per_second": 56.532,
"eval_steps_per_second": 1.767,
"step": 978068
},
{
"epoch": 27.0,
"learning_rate": 0.00073,
"loss": 0.0698,
"step": 1015686
},
{
"epoch": 27.0,
"eval_bleu": 22.89659619862932,
"eval_loss": 0.06900988519191742,
"eval_runtime": 2677.8327,
"eval_samples_per_second": 56.191,
"eval_steps_per_second": 1.756,
"step": 1015686
},
{
"epoch": 28.0,
"learning_rate": 0.0007199999999999999,
"loss": 0.0692,
"step": 1053304
},
{
"epoch": 28.0,
"eval_bleu": 23.078849102736655,
"eval_loss": 0.06756620109081268,
"eval_runtime": 2681.6903,
"eval_samples_per_second": 56.111,
"eval_steps_per_second": 1.754,
"step": 1053304
},
{
"epoch": 29.0,
"learning_rate": 0.00071,
"loss": 0.0686,
"step": 1090922
},
{
"epoch": 29.0,
"eval_bleu": 23.263284227228446,
"eval_loss": 0.06696003675460815,
"eval_runtime": 2672.9451,
"eval_samples_per_second": 56.294,
"eval_steps_per_second": 1.759,
"step": 1090922
},
{
"epoch": 30.0,
"learning_rate": 0.0007,
"loss": 0.0683,
"step": 1128540
},
{
"epoch": 30.0,
"eval_bleu": 23.242985003734645,
"eval_loss": 0.0681118369102478,
"eval_runtime": 2670.4013,
"eval_samples_per_second": 56.348,
"eval_steps_per_second": 1.761,
"step": 1128540
},
{
"epoch": 31.0,
"learning_rate": 0.00069,
"loss": 0.0683,
"step": 1166158
},
{
"epoch": 31.0,
"eval_bleu": 23.22280583522071,
"eval_loss": 0.06741450726985931,
"eval_runtime": 2675.4265,
"eval_samples_per_second": 56.242,
"eval_steps_per_second": 1.758,
"step": 1166158
},
{
"epoch": 32.0,
"learning_rate": 0.00068,
"loss": 0.0677,
"step": 1203776
},
{
"epoch": 32.0,
"eval_bleu": 22.770991743362966,
"eval_loss": 0.06660505384206772,
"eval_runtime": 2669.114,
"eval_samples_per_second": 56.375,
"eval_steps_per_second": 1.762,
"step": 1203776
},
{
"epoch": 33.0,
"learning_rate": 0.00067,
"loss": 0.0666,
"step": 1241394
},
{
"epoch": 33.0,
"eval_bleu": 22.99469621069633,
"eval_loss": 0.06576640158891678,
"eval_runtime": 2674.8682,
"eval_samples_per_second": 56.254,
"eval_steps_per_second": 1.758,
"step": 1241394
},
{
"epoch": 34.0,
"learning_rate": 0.00066,
"loss": 0.0665,
"step": 1279012
},
{
"epoch": 34.0,
"eval_bleu": 22.281704582533195,
"eval_loss": 0.06800168007612228,
"eval_runtime": 2674.1235,
"eval_samples_per_second": 56.269,
"eval_steps_per_second": 1.759,
"step": 1279012
},
{
"epoch": 35.0,
"learning_rate": 0.0006500000000000001,
"loss": 0.066,
"step": 1316630
},
{
"epoch": 35.0,
"eval_bleu": 23.683396099034734,
"eval_loss": 0.06574786454439163,
"eval_runtime": 2672.7263,
"eval_samples_per_second": 56.299,
"eval_steps_per_second": 1.76,
"step": 1316630
},
{
"epoch": 36.0,
"learning_rate": 0.00064,
"loss": 0.0651,
"step": 1354248
},
{
"epoch": 36.0,
"eval_bleu": 23.63507591450045,
"eval_loss": 0.0649677962064743,
"eval_runtime": 2675.0211,
"eval_samples_per_second": 56.25,
"eval_steps_per_second": 1.758,
"step": 1354248
},
{
"epoch": 37.0,
"learning_rate": 0.00063,
"loss": 0.0648,
"step": 1391866
},
{
"epoch": 37.0,
"eval_bleu": 23.437600575634786,
"eval_loss": 0.06616898626089096,
"eval_runtime": 2673.6171,
"eval_samples_per_second": 56.28,
"eval_steps_per_second": 1.759,
"step": 1391866
},
{
"epoch": 38.0,
"learning_rate": 0.00062,
"loss": 0.0646,
"step": 1429484
},
{
"epoch": 38.0,
"eval_bleu": 23.221957446439767,
"eval_loss": 0.06528770178556442,
"eval_runtime": 2674.7944,
"eval_samples_per_second": 56.255,
"eval_steps_per_second": 1.758,
"step": 1429484
},
{
"epoch": 39.0,
"learning_rate": 0.00061,
"loss": 0.0644,
"step": 1467102
},
{
"epoch": 39.0,
"eval_bleu": 24.240861058705942,
"eval_loss": 0.0646873190999031,
"eval_runtime": 2673.7778,
"eval_samples_per_second": 56.277,
"eval_steps_per_second": 1.759,
"step": 1467102
},
{
"epoch": 40.0,
"learning_rate": 0.0006,
"loss": 0.064,
"step": 1504720
},
{
"epoch": 40.0,
"eval_bleu": 23.729064293901235,
"eval_loss": 0.06512880325317383,
"eval_runtime": 2675.4683,
"eval_samples_per_second": 56.241,
"eval_steps_per_second": 1.758,
"step": 1504720
},
{
"epoch": 41.0,
"learning_rate": 0.00059,
"loss": 0.0634,
"step": 1542338
},
{
"epoch": 41.0,
"eval_bleu": 24.1213425444633,
"eval_loss": 0.06399146467447281,
"eval_runtime": 2671.9713,
"eval_samples_per_second": 56.315,
"eval_steps_per_second": 1.76,
"step": 1542338
},
{
"epoch": 42.0,
"learning_rate": 0.00058,
"loss": 0.0627,
"step": 1579956
},
{
"epoch": 42.0,
"eval_bleu": 24.27204496697034,
"eval_loss": 0.0644916221499443,
"eval_runtime": 2672.4679,
"eval_samples_per_second": 56.304,
"eval_steps_per_second": 1.76,
"step": 1579956
},
{
"epoch": 43.0,
"learning_rate": 0.00057,
"loss": 0.0623,
"step": 1617574
},
{
"epoch": 43.0,
"eval_bleu": 23.36660065236038,
"eval_loss": 0.06378339231014252,
"eval_runtime": 2690.9183,
"eval_samples_per_second": 55.918,
"eval_steps_per_second": 1.748,
"step": 1617574
},
{
"epoch": 44.0,
"learning_rate": 0.0005600000000000001,
"loss": 0.0618,
"step": 1655192
},
{
"epoch": 44.0,
"eval_bleu": 24.679877688454404,
"eval_loss": 0.06299971789121628,
"eval_runtime": 2678.1362,
"eval_samples_per_second": 56.185,
"eval_steps_per_second": 1.756,
"step": 1655192
},
{
"epoch": 45.0,
"learning_rate": 0.00055,
"loss": 0.0612,
"step": 1692810
},
{
"epoch": 45.0,
"eval_bleu": 24.308227062427868,
"eval_loss": 0.06324643641710281,
"eval_runtime": 2670.1411,
"eval_samples_per_second": 56.353,
"eval_steps_per_second": 1.761,
"step": 1692810
},
{
"epoch": 46.0,
"learning_rate": 0.00054,
"loss": 0.0609,
"step": 1730428
},
{
"epoch": 46.0,
"eval_bleu": 24.338912136507822,
"eval_loss": 0.06320393830537796,
"eval_runtime": 2668.5724,
"eval_samples_per_second": 56.386,
"eval_steps_per_second": 1.762,
"step": 1730428
},
{
"epoch": 47.0,
"learning_rate": 0.0005300000000000001,
"loss": 0.0598,
"step": 1768046
},
{
"epoch": 47.0,
"eval_bleu": 24.534028212416924,
"eval_loss": 0.06217643991112709,
"eval_runtime": 2662.7925,
"eval_samples_per_second": 56.509,
"eval_steps_per_second": 1.766,
"step": 1768046
},
{
"epoch": 48.0,
"learning_rate": 0.0005200000000000001,
"loss": 0.059,
"step": 1805664
},
{
"epoch": 48.0,
"eval_bleu": 24.32405654969722,
"eval_loss": 0.06318702548742294,
"eval_runtime": 2669.7792,
"eval_samples_per_second": 56.361,
"eval_steps_per_second": 1.762,
"step": 1805664
},
{
"epoch": 49.0,
"learning_rate": 0.00051,
"loss": 0.0582,
"step": 1843282
},
{
"epoch": 49.0,
"eval_bleu": 24.558637559920374,
"eval_loss": 0.061768002808094025,
"eval_runtime": 2665.131,
"eval_samples_per_second": 56.459,
"eval_steps_per_second": 1.765,
"step": 1843282
},
{
"epoch": 50.0,
"learning_rate": 0.0005,
"loss": 0.0578,
"step": 1880900
},
{
"epoch": 50.0,
"eval_bleu": 24.992937128657733,
"eval_loss": 0.06192772835493088,
"eval_runtime": 2664.7257,
"eval_samples_per_second": 56.468,
"eval_steps_per_second": 1.765,
"step": 1880900
},
{
"epoch": 51.0,
"learning_rate": 0.00049,
"loss": 0.0572,
"step": 1918518
},
{
"epoch": 51.0,
"eval_bleu": 24.988204377041534,
"eval_loss": 0.06095375493168831,
"eval_runtime": 2667.4075,
"eval_samples_per_second": 56.411,
"eval_steps_per_second": 1.763,
"step": 1918518
},
{
"epoch": 52.0,
"learning_rate": 0.00048,
"loss": 0.0565,
"step": 1956136
},
{
"epoch": 52.0,
"eval_bleu": 24.4857260697586,
"eval_loss": 0.06117871776223183,
"eval_runtime": 2667.5375,
"eval_samples_per_second": 56.408,
"eval_steps_per_second": 1.763,
"step": 1956136
},
{
"epoch": 53.0,
"learning_rate": 0.00047,
"loss": 0.0558,
"step": 1993754
},
{
"epoch": 53.0,
"eval_bleu": 24.159661135658013,
"eval_loss": 0.06100020557641983,
"eval_runtime": 2664.962,
"eval_samples_per_second": 56.463,
"eval_steps_per_second": 1.765,
"step": 1993754
},
{
"epoch": 54.0,
"learning_rate": 0.00046,
"loss": 0.0549,
"step": 2031372
},
{
"epoch": 54.0,
"eval_bleu": 24.95693051141393,
"eval_loss": 0.0610371008515358,
"eval_runtime": 2672.4766,
"eval_samples_per_second": 56.304,
"eval_steps_per_second": 1.76,
"step": 2031372
},
{
"epoch": 55.0,
"learning_rate": 0.00045000000000000004,
"loss": 0.0543,
"step": 2068990
},
{
"epoch": 55.0,
"eval_bleu": 24.248582749421658,
"eval_loss": 0.0601356141269207,
"eval_runtime": 2680.6436,
"eval_samples_per_second": 56.132,
"eval_steps_per_second": 1.754,
"step": 2068990
},
{
"epoch": 56.0,
"learning_rate": 0.00044,
"loss": 0.0535,
"step": 2106608
},
{
"epoch": 56.0,
"eval_bleu": 24.81909657225387,
"eval_loss": 0.05975135788321495,
"eval_runtime": 2664.766,
"eval_samples_per_second": 56.467,
"eval_steps_per_second": 1.765,
"step": 2106608
},
{
"epoch": 57.0,
"learning_rate": 0.00043,
"loss": 0.0527,
"step": 2144226
},
{
"epoch": 57.0,
"eval_bleu": 25.123109135558508,
"eval_loss": 0.059335704892873764,
"eval_runtime": 2682.9755,
"eval_samples_per_second": 56.084,
"eval_steps_per_second": 1.753,
"step": 2144226
},
{
"epoch": 58.0,
"learning_rate": 0.00042,
"loss": 0.0519,
"step": 2181844
},
{
"epoch": 58.0,
"eval_bleu": 25.139286695396503,
"eval_loss": 0.059507254511117935,
"eval_runtime": 2671.3295,
"eval_samples_per_second": 56.328,
"eval_steps_per_second": 1.761,
"step": 2181844
},
{
"epoch": 59.0,
"learning_rate": 0.00041,
"loss": 0.0511,
"step": 2219462
},
{
"epoch": 59.0,
"eval_bleu": 24.181172367468452,
"eval_loss": 0.0596897155046463,
"eval_runtime": 2676.0935,
"eval_samples_per_second": 56.228,
"eval_steps_per_second": 1.757,
"step": 2219462
},
{
"epoch": 60.0,
"learning_rate": 0.0004,
"loss": 0.0503,
"step": 2257080
},
{
"epoch": 60.0,
"eval_bleu": 24.947492978876518,
"eval_loss": 0.059094786643981934,
"eval_runtime": 2675.1744,
"eval_samples_per_second": 56.247,
"eval_steps_per_second": 1.758,
"step": 2257080
},
{
"epoch": 61.0,
"learning_rate": 0.00039000000000000005,
"loss": 0.0495,
"step": 2294698
},
{
"epoch": 61.0,
"eval_bleu": 24.610208642695124,
"eval_loss": 0.05875646695494652,
"eval_runtime": 2666.1228,
"eval_samples_per_second": 56.438,
"eval_steps_per_second": 1.764,
"step": 2294698
},
{
"epoch": 62.0,
"learning_rate": 0.00038,
"loss": 0.0488,
"step": 2332316
},
{
"epoch": 62.0,
"eval_bleu": 25.548056034179105,
"eval_loss": 0.05863146856427193,
"eval_runtime": 2675.9582,
"eval_samples_per_second": 56.231,
"eval_steps_per_second": 1.758,
"step": 2332316
},
{
"epoch": 63.0,
"learning_rate": 0.00037,
"loss": 0.0478,
"step": 2369934
},
{
"epoch": 63.0,
"eval_bleu": 24.99392195466826,
"eval_loss": 0.059139810502529144,
"eval_runtime": 2664.7531,
"eval_samples_per_second": 56.467,
"eval_steps_per_second": 1.765,
"step": 2369934
},
{
"epoch": 64.0,
"learning_rate": 0.00035999999999999997,
"loss": 0.047,
"step": 2407552
},
{
"epoch": 64.0,
"eval_bleu": 25.64900731292891,
"eval_loss": 0.058654047548770905,
"eval_runtime": 2664.5443,
"eval_samples_per_second": 56.472,
"eval_steps_per_second": 1.765,
"step": 2407552
},
{
"epoch": 65.0,
"learning_rate": 0.00035,
"loss": 0.0461,
"step": 2445170
},
{
"epoch": 65.0,
"eval_bleu": 25.08417201795405,
"eval_loss": 0.058612920343875885,
"eval_runtime": 2684.7255,
"eval_samples_per_second": 56.047,
"eval_steps_per_second": 1.752,
"step": 2445170
},
{
"epoch": 66.0,
"learning_rate": 0.00034,
"loss": 0.045,
"step": 2482788
},
{
"epoch": 66.0,
"eval_bleu": 25.376435853039357,
"eval_loss": 0.05852239951491356,
"eval_runtime": 2662.5018,
"eval_samples_per_second": 56.515,
"eval_steps_per_second": 1.766,
"step": 2482788
},
{
"epoch": 67.0,
"learning_rate": 0.00033,
"loss": 0.0438,
"step": 2520406
},
{
"epoch": 67.0,
"eval_bleu": 25.438339513363914,
"eval_loss": 0.05865955725312233,
"eval_runtime": 2683.1857,
"eval_samples_per_second": 56.079,
"eval_steps_per_second": 1.753,
"step": 2520406
},
{
"epoch": 68.0,
"learning_rate": 0.00032,
"loss": 0.0424,
"step": 2558024
},
{
"epoch": 68.0,
"eval_bleu": 25.663631525748368,
"eval_loss": 0.05910369008779526,
"eval_runtime": 2680.255,
"eval_samples_per_second": 56.141,
"eval_steps_per_second": 1.755,
"step": 2558024
},
{
"epoch": 69.0,
"learning_rate": 0.00031,
"loss": 0.041,
"step": 2595642
},
{
"epoch": 69.0,
"eval_bleu": 25.7205207969251,
"eval_loss": 0.05821070075035095,
"eval_runtime": 2662.8391,
"eval_samples_per_second": 56.508,
"eval_steps_per_second": 1.766,
"step": 2595642
},
{
"epoch": 70.0,
"learning_rate": 0.0003,
"loss": 0.0396,
"step": 2633260
},
{
"epoch": 70.0,
"eval_bleu": 25.672438428551807,
"eval_loss": 0.058603301644325256,
"eval_runtime": 2660.5072,
"eval_samples_per_second": 56.557,
"eval_steps_per_second": 1.768,
"step": 2633260
},
{
"epoch": 71.0,
"learning_rate": 0.00029,
"loss": 0.0383,
"step": 2670878
},
{
"epoch": 71.0,
"eval_bleu": 25.561067707464947,
"eval_loss": 0.05791134759783745,
"eval_runtime": 2663.3342,
"eval_samples_per_second": 56.497,
"eval_steps_per_second": 1.766,
"step": 2670878
}
],
"max_steps": 3761800,
"num_train_epochs": 100,
"total_flos": 1.775364940369281e+19,
"trial_name": null,
"trial_params": null
}