{ "best_metric": 92.6157, "best_model_checkpoint": "qa2claim-base/checkpoint-12000", "epoch": 1.5512736773350753, "global_step": 38000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 3.2255, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.8800000000000005e-06, "loss": 3.2465, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.182e-05, "loss": 2.6401, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.782e-05, "loss": 2.2893, "step": 300 }, { "epoch": 0.02, "learning_rate": 2.3820000000000002e-05, "loss": 2.1163, "step": 400 }, { "epoch": 0.02, "learning_rate": 2.982e-05, "loss": 2.0047, "step": 500 }, { "epoch": 0.02, "learning_rate": 2.997105527638191e-05, "loss": 1.9368, "step": 600 }, { "epoch": 0.03, "learning_rate": 2.9940904522613068e-05, "loss": 1.9029, "step": 700 }, { "epoch": 0.03, "learning_rate": 2.9910753768844223e-05, "loss": 1.8627, "step": 800 }, { "epoch": 0.04, "learning_rate": 2.988090452261307e-05, "loss": 1.8403, "step": 900 }, { "epoch": 0.04, "learning_rate": 2.9850753768844224e-05, "loss": 1.835, "step": 1000 }, { "epoch": 0.04, "learning_rate": 2.9820603015075376e-05, "loss": 1.8027, "step": 1100 }, { "epoch": 0.05, "learning_rate": 2.979045226130653e-05, "loss": 1.8176, "step": 1200 }, { "epoch": 0.05, "learning_rate": 2.976030150753769e-05, "loss": 1.7983, "step": 1300 }, { "epoch": 0.06, "learning_rate": 2.9730150753768845e-05, "loss": 1.778, "step": 1400 }, { "epoch": 0.06, "learning_rate": 2.97e-05, "loss": 1.7764, "step": 1500 }, { "epoch": 0.07, "learning_rate": 2.9669849246231156e-05, "loss": 1.7599, "step": 1600 }, { "epoch": 0.07, "learning_rate": 2.9639698492462314e-05, "loss": 1.7572, "step": 1700 }, { "epoch": 0.07, "learning_rate": 2.960954773869347e-05, "loss": 1.7773, "step": 1800 }, { "epoch": 0.08, "learning_rate": 2.9579396984924625e-05, "loss": 1.7301, "step": 1900 }, { "epoch": 0.08, "learning_rate": 2.954924623115578e-05, "loss": 1.7536, "step": 2000 }, { "epoch": 0.08, "eval_bleu": 75.15293608723576, "eval_gen_len": 16.288, "eval_loss": 1.6770071983337402, "eval_meteor": 0.8848959776023978, "eval_rouge1": 92.6205, "eval_rouge2": 86.7136, "eval_rougeL": 89.2742, "eval_rougeLsum": 89.2914, "eval_runtime": 29.1055, "eval_samples_per_second": 17.179, "eval_steps_per_second": 2.165, "step": 2000 }, { "epoch": 0.09, "learning_rate": 2.951909547738694e-05, "loss": 1.7313, "step": 2100 }, { "epoch": 0.09, "learning_rate": 2.948894472361809e-05, "loss": 1.7308, "step": 2200 }, { "epoch": 0.09, "learning_rate": 2.9458793969849246e-05, "loss": 1.7186, "step": 2300 }, { "epoch": 0.1, "learning_rate": 2.94286432160804e-05, "loss": 1.7262, "step": 2400 }, { "epoch": 0.1, "learning_rate": 2.9398492462311556e-05, "loss": 1.702, "step": 2500 }, { "epoch": 0.11, "learning_rate": 2.9368341708542715e-05, "loss": 1.7107, "step": 2600 }, { "epoch": 0.11, "learning_rate": 2.933819095477387e-05, "loss": 1.7187, "step": 2700 }, { "epoch": 0.11, "learning_rate": 2.9308040201005025e-05, "loss": 1.707, "step": 2800 }, { "epoch": 0.12, "learning_rate": 2.927788944723618e-05, "loss": 1.7019, "step": 2900 }, { "epoch": 0.12, "learning_rate": 2.924773869346734e-05, "loss": 1.6977, "step": 3000 }, { "epoch": 0.13, "learning_rate": 2.9217587939698495e-05, "loss": 1.7047, "step": 3100 }, { "epoch": 0.13, "learning_rate": 2.918743718592965e-05, "loss": 1.6943, "step": 3200 }, { "epoch": 0.13, "learning_rate": 2.9157286432160802e-05, "loss": 1.6795, "step": 3300 }, { "epoch": 0.14, "learning_rate": 2.912713567839196e-05, "loss": 1.6781, "step": 3400 }, { "epoch": 0.14, "learning_rate": 2.9096984924623116e-05, "loss": 1.7098, "step": 3500 }, { "epoch": 0.15, "learning_rate": 2.906683417085427e-05, "loss": 1.6774, "step": 3600 }, { "epoch": 0.15, "learning_rate": 2.9036683417085426e-05, "loss": 1.6911, "step": 3700 }, { "epoch": 0.16, "learning_rate": 2.9006532663316585e-05, "loss": 1.6933, "step": 3800 }, { "epoch": 0.16, "learning_rate": 2.8976683417085427e-05, "loss": 1.6742, "step": 3900 }, { "epoch": 0.16, "learning_rate": 2.8946532663316586e-05, "loss": 1.6804, "step": 4000 }, { "epoch": 0.16, "eval_bleu": 77.38072312145013, "eval_gen_len": 16.306, "eval_loss": 1.6234748363494873, "eval_meteor": 0.892630886417938, "eval_rouge1": 93.2399, "eval_rouge2": 88.3086, "eval_rougeL": 90.3444, "eval_rougeLsum": 90.3735, "eval_runtime": 25.4106, "eval_samples_per_second": 19.677, "eval_steps_per_second": 2.479, "step": 4000 }, { "epoch": 0.17, "learning_rate": 2.891638190954774e-05, "loss": 1.673, "step": 4100 }, { "epoch": 0.17, "learning_rate": 2.8886231155778896e-05, "loss": 1.6754, "step": 4200 }, { "epoch": 0.18, "learning_rate": 2.885608040201005e-05, "loss": 1.6668, "step": 4300 }, { "epoch": 0.18, "learning_rate": 2.8825929648241207e-05, "loss": 1.6759, "step": 4400 }, { "epoch": 0.18, "learning_rate": 2.8795778894472362e-05, "loss": 1.6526, "step": 4500 }, { "epoch": 0.19, "learning_rate": 2.8765628140703517e-05, "loss": 1.6654, "step": 4600 }, { "epoch": 0.19, "learning_rate": 2.8735477386934672e-05, "loss": 1.6466, "step": 4700 }, { "epoch": 0.2, "learning_rate": 2.870532663316583e-05, "loss": 1.6789, "step": 4800 }, { "epoch": 0.2, "learning_rate": 2.8675175879396986e-05, "loss": 1.6583, "step": 4900 }, { "epoch": 0.2, "learning_rate": 2.864502512562814e-05, "loss": 1.6639, "step": 5000 }, { "epoch": 0.21, "learning_rate": 2.8614874371859297e-05, "loss": 1.6464, "step": 5100 }, { "epoch": 0.21, "learning_rate": 2.8584723618090452e-05, "loss": 1.6654, "step": 5200 }, { "epoch": 0.22, "learning_rate": 2.8554874371859297e-05, "loss": 1.6364, "step": 5300 }, { "epoch": 0.22, "learning_rate": 2.8525025125628143e-05, "loss": 1.6325, "step": 5400 }, { "epoch": 0.22, "learning_rate": 2.8494874371859298e-05, "loss": 1.6496, "step": 5500 }, { "epoch": 0.23, "learning_rate": 2.846502512562814e-05, "loss": 1.6542, "step": 5600 }, { "epoch": 0.23, "learning_rate": 2.8434874371859295e-05, "loss": 1.6418, "step": 5700 }, { "epoch": 0.24, "learning_rate": 2.8404723618090454e-05, "loss": 1.6421, "step": 5800 }, { "epoch": 0.24, "learning_rate": 2.8374874371859296e-05, "loss": 1.6427, "step": 5900 }, { "epoch": 0.24, "learning_rate": 2.8344723618090454e-05, "loss": 1.6423, "step": 6000 }, { "epoch": 0.24, "eval_bleu": 78.69642597526826, "eval_gen_len": 16.314, "eval_loss": 1.593881607055664, "eval_meteor": 0.8979075563574866, "eval_rouge1": 93.7314, "eval_rouge2": 89.4698, "eval_rougeL": 91.3989, "eval_rougeLsum": 91.4076, "eval_runtime": 25.6849, "eval_samples_per_second": 19.467, "eval_steps_per_second": 2.453, "step": 6000 }, { "epoch": 0.25, "learning_rate": 2.831457286432161e-05, "loss": 1.6491, "step": 6100 }, { "epoch": 0.25, "learning_rate": 2.8284422110552765e-05, "loss": 1.6533, "step": 6200 }, { "epoch": 0.26, "learning_rate": 2.825427135678392e-05, "loss": 1.6501, "step": 6300 }, { "epoch": 0.26, "learning_rate": 2.8224120603015075e-05, "loss": 1.6463, "step": 6400 }, { "epoch": 0.27, "learning_rate": 2.8193969849246234e-05, "loss": 1.6341, "step": 6500 }, { "epoch": 0.27, "learning_rate": 2.816381909547739e-05, "loss": 1.6424, "step": 6600 }, { "epoch": 0.27, "learning_rate": 2.813366834170854e-05, "loss": 1.6399, "step": 6700 }, { "epoch": 0.28, "learning_rate": 2.8103517587939697e-05, "loss": 1.6362, "step": 6800 }, { "epoch": 0.28, "learning_rate": 2.8073366834170855e-05, "loss": 1.6357, "step": 6900 }, { "epoch": 0.29, "learning_rate": 2.804321608040201e-05, "loss": 1.6284, "step": 7000 }, { "epoch": 0.29, "learning_rate": 2.8013065326633166e-05, "loss": 1.6333, "step": 7100 }, { "epoch": 0.29, "learning_rate": 2.798291457286432e-05, "loss": 1.6414, "step": 7200 }, { "epoch": 0.3, "learning_rate": 2.795276381909548e-05, "loss": 1.6166, "step": 7300 }, { "epoch": 0.3, "learning_rate": 2.7922613065326635e-05, "loss": 1.6368, "step": 7400 }, { "epoch": 0.31, "learning_rate": 2.789246231155779e-05, "loss": 1.6313, "step": 7500 }, { "epoch": 0.31, "learning_rate": 2.7862311557788945e-05, "loss": 1.6241, "step": 7600 }, { "epoch": 0.31, "learning_rate": 2.7832160804020104e-05, "loss": 1.619, "step": 7700 }, { "epoch": 0.32, "learning_rate": 2.7802010050251256e-05, "loss": 1.6339, "step": 7800 }, { "epoch": 0.32, "learning_rate": 2.777185929648241e-05, "loss": 1.6152, "step": 7900 }, { "epoch": 0.33, "learning_rate": 2.7741708542713567e-05, "loss": 1.6253, "step": 8000 }, { "epoch": 0.33, "eval_bleu": 80.00978050192599, "eval_gen_len": 16.226, "eval_loss": 1.5748662948608398, "eval_meteor": 0.9021016869942787, "eval_rouge1": 94.2752, "eval_rouge2": 90.7004, "eval_rougeL": 92.2246, "eval_rougeLsum": 92.2489, "eval_runtime": 25.2021, "eval_samples_per_second": 19.84, "eval_steps_per_second": 2.5, "step": 8000 }, { "epoch": 0.33, "learning_rate": 2.7711557788944725e-05, "loss": 1.6291, "step": 8100 }, { "epoch": 0.33, "learning_rate": 2.768140703517588e-05, "loss": 1.6288, "step": 8200 }, { "epoch": 0.34, "learning_rate": 2.7651557788944726e-05, "loss": 1.6158, "step": 8300 }, { "epoch": 0.34, "learning_rate": 2.762140703517588e-05, "loss": 1.6285, "step": 8400 }, { "epoch": 0.35, "learning_rate": 2.7591256281407036e-05, "loss": 1.6231, "step": 8500 }, { "epoch": 0.35, "learning_rate": 2.756110552763819e-05, "loss": 1.6237, "step": 8600 }, { "epoch": 0.36, "learning_rate": 2.753095477386935e-05, "loss": 1.6059, "step": 8700 }, { "epoch": 0.36, "learning_rate": 2.7500804020100505e-05, "loss": 1.6094, "step": 8800 }, { "epoch": 0.36, "learning_rate": 2.7470954773869347e-05, "loss": 1.6092, "step": 8900 }, { "epoch": 0.37, "learning_rate": 2.744110552763819e-05, "loss": 1.6162, "step": 9000 }, { "epoch": 0.37, "learning_rate": 2.7410954773869348e-05, "loss": 1.6059, "step": 9100 }, { "epoch": 0.38, "learning_rate": 2.7380804020100503e-05, "loss": 1.6135, "step": 9200 }, { "epoch": 0.38, "learning_rate": 2.735065326633166e-05, "loss": 1.6144, "step": 9300 }, { "epoch": 0.38, "learning_rate": 2.7320804020100504e-05, "loss": 1.6288, "step": 9400 }, { "epoch": 0.39, "learning_rate": 2.729065326633166e-05, "loss": 1.6098, "step": 9500 }, { "epoch": 0.39, "learning_rate": 2.7260804020100504e-05, "loss": 1.617, "step": 9600 }, { "epoch": 0.4, "learning_rate": 2.723065326633166e-05, "loss": 1.6079, "step": 9700 }, { "epoch": 0.4, "learning_rate": 2.7200502512562815e-05, "loss": 1.611, "step": 9800 }, { "epoch": 0.4, "learning_rate": 2.7170351758793974e-05, "loss": 1.5935, "step": 9900 }, { "epoch": 0.41, "learning_rate": 2.7140201005025125e-05, "loss": 1.5965, "step": 10000 }, { "epoch": 0.41, "eval_bleu": 80.29588431741519, "eval_gen_len": 16.244, "eval_loss": 1.5651723146438599, "eval_meteor": 0.9028810347439424, "eval_rouge1": 94.3213, "eval_rouge2": 90.869, "eval_rougeL": 92.4221, "eval_rougeLsum": 92.4429, "eval_runtime": 25.551, "eval_samples_per_second": 19.569, "eval_steps_per_second": 2.466, "step": 10000 }, { "epoch": 0.41, "learning_rate": 2.711035175879397e-05, "loss": 1.611, "step": 10100 }, { "epoch": 0.42, "learning_rate": 2.7080201005025126e-05, "loss": 1.6066, "step": 10200 }, { "epoch": 0.42, "learning_rate": 2.705005025125628e-05, "loss": 1.6028, "step": 10300 }, { "epoch": 0.42, "learning_rate": 2.7019899497487437e-05, "loss": 1.6192, "step": 10400 }, { "epoch": 0.43, "learning_rate": 2.6989748743718595e-05, "loss": 1.6114, "step": 10500 }, { "epoch": 0.43, "learning_rate": 2.695959798994975e-05, "loss": 1.6043, "step": 10600 }, { "epoch": 0.44, "learning_rate": 2.6929447236180906e-05, "loss": 1.5949, "step": 10700 }, { "epoch": 0.44, "learning_rate": 2.689929648241206e-05, "loss": 1.6017, "step": 10800 }, { "epoch": 0.44, "learning_rate": 2.6869145728643216e-05, "loss": 1.6001, "step": 10900 }, { "epoch": 0.45, "learning_rate": 2.683929648241206e-05, "loss": 1.5983, "step": 11000 }, { "epoch": 0.45, "learning_rate": 2.6809145728643213e-05, "loss": 1.5914, "step": 11100 }, { "epoch": 0.46, "learning_rate": 2.6778994974874372e-05, "loss": 1.5997, "step": 11200 }, { "epoch": 0.46, "learning_rate": 2.6748844221105527e-05, "loss": 1.5962, "step": 11300 }, { "epoch": 0.47, "learning_rate": 2.6718693467336683e-05, "loss": 1.5946, "step": 11400 }, { "epoch": 0.47, "learning_rate": 2.6688542713567838e-05, "loss": 1.5969, "step": 11500 }, { "epoch": 0.47, "learning_rate": 2.6658391959798997e-05, "loss": 1.6132, "step": 11600 }, { "epoch": 0.48, "learning_rate": 2.6628241206030152e-05, "loss": 1.5893, "step": 11700 }, { "epoch": 0.48, "learning_rate": 2.6598090452261307e-05, "loss": 1.6123, "step": 11800 }, { "epoch": 0.49, "learning_rate": 2.6567939698492462e-05, "loss": 1.5975, "step": 11900 }, { "epoch": 0.49, "learning_rate": 2.653778894472362e-05, "loss": 1.5908, "step": 12000 }, { "epoch": 0.49, "eval_bleu": 80.65979156040467, "eval_gen_len": 16.216, "eval_loss": 1.561901330947876, "eval_meteor": 0.9045236466427484, "eval_rouge1": 94.5279, "eval_rouge2": 91.2374, "eval_rougeL": 92.5949, "eval_rougeLsum": 92.6157, "eval_runtime": 25.5485, "eval_samples_per_second": 19.571, "eval_steps_per_second": 2.466, "step": 12000 }, { "epoch": 0.49, "learning_rate": 2.6508241206030153e-05, "loss": 1.6137, "step": 12100 }, { "epoch": 0.5, "learning_rate": 2.6478090452261305e-05, "loss": 2.1155, "step": 12200 }, { "epoch": 0.5, "learning_rate": 2.644793969849246e-05, "loss": 2.6991, "step": 12300 }, { "epoch": 0.51, "learning_rate": 2.641778894472362e-05, "loss": 2.8329, "step": 12400 }, { "epoch": 0.51, "learning_rate": 2.6387638190954774e-05, "loss": 2.8749, "step": 12500 }, { "epoch": 0.51, "learning_rate": 2.635748743718593e-05, "loss": 2.8538, "step": 12600 }, { "epoch": 0.52, "learning_rate": 2.6327336683417085e-05, "loss": 2.8509, "step": 12700 }, { "epoch": 0.52, "learning_rate": 2.6297185929648243e-05, "loss": 2.8037, "step": 12800 }, { "epoch": 0.53, "learning_rate": 2.62670351758794e-05, "loss": 2.8414, "step": 12900 }, { "epoch": 0.53, "learning_rate": 2.6236884422110554e-05, "loss": 2.8386, "step": 13000 }, { "epoch": 0.53, "learning_rate": 2.620673366834171e-05, "loss": 2.8152, "step": 13100 }, { "epoch": 0.54, "learning_rate": 2.6176582914572868e-05, "loss": 2.7805, "step": 13200 }, { "epoch": 0.54, "learning_rate": 2.6146432160804023e-05, "loss": 2.7591, "step": 13300 }, { "epoch": 0.55, "learning_rate": 2.6116281407035175e-05, "loss": 2.7748, "step": 13400 }, { "epoch": 0.55, "learning_rate": 2.608613065326633e-05, "loss": 2.773, "step": 13500 }, { "epoch": 0.56, "learning_rate": 2.605597989949749e-05, "loss": 2.7776, "step": 13600 }, { "epoch": 0.56, "learning_rate": 2.602613065326633e-05, "loss": 2.752, "step": 13700 }, { "epoch": 0.56, "learning_rate": 2.599597989949749e-05, "loss": 2.7383, "step": 13800 }, { "epoch": 0.57, "learning_rate": 2.5965829145728645e-05, "loss": 2.7375, "step": 13900 }, { "epoch": 0.57, "learning_rate": 2.59356783919598e-05, "loss": 2.7502, "step": 14000 }, { "epoch": 0.57, "eval_bleu": 76.79295767834411, "eval_gen_len": 16.252, "eval_loss": 2.6775336265563965, "eval_meteor": 0.890923129283697, "eval_rouge1": 92.9876, "eval_rouge2": 88.6491, "eval_rougeL": 91.2913, "eval_rougeLsum": 91.2855, "eval_runtime": 25.3646, "eval_samples_per_second": 19.712, "eval_steps_per_second": 2.484, "step": 14000 }, { "epoch": 0.58, "learning_rate": 2.5905527638190955e-05, "loss": 2.7346, "step": 14100 }, { "epoch": 0.58, "learning_rate": 2.5875376884422114e-05, "loss": 2.7492, "step": 14200 }, { "epoch": 0.58, "learning_rate": 2.584522613065327e-05, "loss": 2.7273, "step": 14300 }, { "epoch": 0.59, "learning_rate": 2.5815075376884424e-05, "loss": 2.7279, "step": 14400 }, { "epoch": 0.59, "learning_rate": 2.5784924623115576e-05, "loss": 2.7241, "step": 14500 }, { "epoch": 0.6, "learning_rate": 2.575477386934673e-05, "loss": 2.7457, "step": 14600 }, { "epoch": 0.6, "learning_rate": 2.572462311557789e-05, "loss": 2.7347, "step": 14700 }, { "epoch": 0.6, "learning_rate": 2.5694472361809045e-05, "loss": 2.7168, "step": 14800 }, { "epoch": 0.61, "learning_rate": 2.56643216080402e-05, "loss": 2.7086, "step": 14900 }, { "epoch": 0.61, "learning_rate": 2.5634170854271356e-05, "loss": 2.7265, "step": 15000 }, { "epoch": 0.62, "learning_rate": 2.5604020100502515e-05, "loss": 2.7228, "step": 15100 }, { "epoch": 0.62, "learning_rate": 2.557386934673367e-05, "loss": 2.7089, "step": 15200 }, { "epoch": 0.62, "learning_rate": 2.5543718592964825e-05, "loss": 2.6962, "step": 15300 }, { "epoch": 0.63, "learning_rate": 2.551356783919598e-05, "loss": 2.7067, "step": 15400 }, { "epoch": 0.63, "learning_rate": 2.548341708542714e-05, "loss": 2.7016, "step": 15500 }, { "epoch": 0.64, "learning_rate": 2.545326633165829e-05, "loss": 2.6746, "step": 15600 }, { "epoch": 0.64, "learning_rate": 2.5423115577889446e-05, "loss": 2.6954, "step": 15700 }, { "epoch": 0.65, "learning_rate": 2.53929648241206e-05, "loss": 2.6972, "step": 15800 }, { "epoch": 0.65, "learning_rate": 2.536281407035176e-05, "loss": 2.6871, "step": 15900 }, { "epoch": 0.65, "learning_rate": 2.5332663316582915e-05, "loss": 2.7134, "step": 16000 }, { "epoch": 0.65, "eval_bleu": 78.02775295125716, "eval_gen_len": 16.242, "eval_loss": 2.631150484085083, "eval_meteor": 0.8935096956153317, "eval_rouge1": 93.4121, "eval_rouge2": 89.2356, "eval_rougeL": 91.593, "eval_rougeLsum": 91.6198, "eval_runtime": 25.3474, "eval_samples_per_second": 19.726, "eval_steps_per_second": 2.485, "step": 16000 }, { "epoch": 0.66, "learning_rate": 2.530251256281407e-05, "loss": 2.7203, "step": 16100 }, { "epoch": 0.66, "learning_rate": 2.5272361809045226e-05, "loss": 2.7245, "step": 16200 }, { "epoch": 0.67, "learning_rate": 2.5242211055276385e-05, "loss": 2.7023, "step": 16300 }, { "epoch": 0.67, "learning_rate": 2.521206030150754e-05, "loss": 2.7224, "step": 16400 }, { "epoch": 0.67, "learning_rate": 2.5181909547738695e-05, "loss": 2.6802, "step": 16500 }, { "epoch": 0.68, "learning_rate": 2.515175879396985e-05, "loss": 2.6996, "step": 16600 }, { "epoch": 0.68, "learning_rate": 2.5121608040201006e-05, "loss": 2.681, "step": 16700 }, { "epoch": 0.69, "learning_rate": 2.509145728643216e-05, "loss": 2.6895, "step": 16800 }, { "epoch": 0.69, "learning_rate": 2.5061306532663316e-05, "loss": 2.698, "step": 16900 }, { "epoch": 0.69, "learning_rate": 2.503115577889447e-05, "loss": 2.6802, "step": 17000 }, { "epoch": 0.7, "learning_rate": 2.5001005025125627e-05, "loss": 2.6914, "step": 17100 }, { "epoch": 0.7, "learning_rate": 2.4970854271356785e-05, "loss": 2.7011, "step": 17200 }, { "epoch": 0.71, "learning_rate": 2.494070351758794e-05, "loss": 2.6659, "step": 17300 }, { "epoch": 0.71, "learning_rate": 2.4910552763819096e-05, "loss": 2.6905, "step": 17400 }, { "epoch": 0.71, "learning_rate": 2.488040201005025e-05, "loss": 2.6557, "step": 17500 }, { "epoch": 0.72, "learning_rate": 2.485025125628141e-05, "loss": 2.6648, "step": 17600 }, { "epoch": 0.72, "learning_rate": 2.4820100502512565e-05, "loss": 2.6954, "step": 17700 }, { "epoch": 0.73, "learning_rate": 2.478994974874372e-05, "loss": 2.682, "step": 17800 }, { "epoch": 0.73, "learning_rate": 2.4759798994974872e-05, "loss": 2.6767, "step": 17900 }, { "epoch": 0.73, "learning_rate": 2.472964824120603e-05, "loss": 2.6865, "step": 18000 }, { "epoch": 0.73, "eval_bleu": 78.46041201212769, "eval_gen_len": 16.228, "eval_loss": 2.584897756576538, "eval_meteor": 0.8947853545083831, "eval_rouge1": 93.4999, "eval_rouge2": 89.2925, "eval_rougeL": 91.7008, "eval_rougeLsum": 91.7289, "eval_runtime": 24.8047, "eval_samples_per_second": 20.157, "eval_steps_per_second": 2.54, "step": 18000 }, { "epoch": 0.74, "learning_rate": 2.4699497487437186e-05, "loss": 2.6607, "step": 18100 }, { "epoch": 0.74, "learning_rate": 2.466934673366834e-05, "loss": 2.6692, "step": 18200 }, { "epoch": 0.75, "learning_rate": 2.4639195979899497e-05, "loss": 2.6633, "step": 18300 }, { "epoch": 0.75, "learning_rate": 2.4609045226130655e-05, "loss": 2.6655, "step": 18400 }, { "epoch": 0.76, "learning_rate": 2.457889447236181e-05, "loss": 2.6623, "step": 18500 }, { "epoch": 0.76, "learning_rate": 2.4548743718592966e-05, "loss": 2.6679, "step": 18600 }, { "epoch": 0.76, "learning_rate": 2.451859296482412e-05, "loss": 2.674, "step": 18700 }, { "epoch": 0.77, "learning_rate": 2.448844221105528e-05, "loss": 2.6719, "step": 18800 }, { "epoch": 0.77, "learning_rate": 2.4458291457286435e-05, "loss": 2.6832, "step": 18900 }, { "epoch": 0.78, "learning_rate": 2.4428140703517587e-05, "loss": 2.6776, "step": 19000 }, { "epoch": 0.78, "learning_rate": 2.4397989949748742e-05, "loss": 2.6682, "step": 19100 }, { "epoch": 0.78, "learning_rate": 2.4367839195979898e-05, "loss": 2.6956, "step": 19200 }, { "epoch": 0.79, "learning_rate": 2.4337688442211056e-05, "loss": 2.6586, "step": 19300 }, { "epoch": 0.79, "learning_rate": 2.430753768844221e-05, "loss": 2.6666, "step": 19400 }, { "epoch": 0.8, "learning_rate": 2.4277386934673367e-05, "loss": 2.6663, "step": 19500 }, { "epoch": 0.8, "learning_rate": 2.4247236180904522e-05, "loss": 2.6638, "step": 19600 }, { "epoch": 0.8, "learning_rate": 2.421708542713568e-05, "loss": 2.6112, "step": 19700 }, { "epoch": 0.81, "learning_rate": 2.4186934673366836e-05, "loss": 1.9824, "step": 19800 }, { "epoch": 0.81, "learning_rate": 2.415708542713568e-05, "loss": 1.7259, "step": 19900 }, { "epoch": 0.82, "learning_rate": 2.4126934673366836e-05, "loss": 2.2838, "step": 20000 }, { "epoch": 0.82, "eval_bleu": 77.92153227866292, "eval_gen_len": 16.232, "eval_loss": 2.501798629760742, "eval_meteor": 0.8944690987739373, "eval_rouge1": 93.4893, "eval_rouge2": 89.2497, "eval_rougeL": 91.4623, "eval_rougeLsum": 91.51, "eval_runtime": 25.1905, "eval_samples_per_second": 19.849, "eval_steps_per_second": 2.501, "step": 20000 }, { "epoch": 0.82, "learning_rate": 2.409678391959799e-05, "loss": 2.6248, "step": 20100 }, { "epoch": 0.82, "learning_rate": 2.4066633165829144e-05, "loss": 2.6643, "step": 20200 }, { "epoch": 0.83, "learning_rate": 2.4036482412060302e-05, "loss": 2.642, "step": 20300 }, { "epoch": 0.83, "learning_rate": 2.4006331658291458e-05, "loss": 2.67, "step": 20400 }, { "epoch": 0.84, "learning_rate": 2.3976180904522613e-05, "loss": 2.6563, "step": 20500 }, { "epoch": 0.84, "learning_rate": 2.3946030150753768e-05, "loss": 2.6873, "step": 20600 }, { "epoch": 0.85, "learning_rate": 2.3915879396984927e-05, "loss": 2.65, "step": 20700 }, { "epoch": 0.85, "learning_rate": 2.3885728643216082e-05, "loss": 2.6635, "step": 20800 }, { "epoch": 0.85, "learning_rate": 2.3855577889447237e-05, "loss": 2.6443, "step": 20900 }, { "epoch": 0.86, "learning_rate": 2.3825427135678393e-05, "loss": 2.6504, "step": 21000 }, { "epoch": 0.86, "learning_rate": 2.379527638190955e-05, "loss": 2.6425, "step": 21100 }, { "epoch": 0.87, "learning_rate": 2.3765125628140703e-05, "loss": 2.6774, "step": 21200 }, { "epoch": 0.87, "learning_rate": 2.373497487437186e-05, "loss": 2.6575, "step": 21300 }, { "epoch": 0.87, "learning_rate": 2.3704824120603014e-05, "loss": 2.6542, "step": 21400 }, { "epoch": 0.88, "learning_rate": 2.3674673366834172e-05, "loss": 2.6508, "step": 21500 }, { "epoch": 0.88, "learning_rate": 2.3644522613065328e-05, "loss": 2.6648, "step": 21600 }, { "epoch": 0.89, "learning_rate": 2.3614673366834173e-05, "loss": 2.6623, "step": 21700 }, { "epoch": 0.89, "learning_rate": 2.3584522613065328e-05, "loss": 2.66, "step": 21800 }, { "epoch": 0.89, "learning_rate": 2.3554371859296483e-05, "loss": 2.6568, "step": 21900 }, { "epoch": 0.9, "learning_rate": 2.352422110552764e-05, "loss": 2.6591, "step": 22000 }, { "epoch": 0.9, "eval_bleu": 79.49958192973479, "eval_gen_len": 16.234, "eval_loss": 2.5640199184417725, "eval_meteor": 0.9025279208048376, "eval_rouge1": 94.1823, "eval_rouge2": 90.5552, "eval_rougeL": 92.428, "eval_rougeLsum": 92.4351, "eval_runtime": 25.4062, "eval_samples_per_second": 19.68, "eval_steps_per_second": 2.48, "step": 22000 }, { "epoch": 0.9, "learning_rate": 2.3494070351758794e-05, "loss": 2.6622, "step": 22100 }, { "epoch": 0.91, "learning_rate": 2.3463919597989953e-05, "loss": 2.6638, "step": 22200 }, { "epoch": 0.91, "learning_rate": 2.3433768844221108e-05, "loss": 2.6521, "step": 22300 }, { "epoch": 0.91, "learning_rate": 2.340361809045226e-05, "loss": 2.657, "step": 22400 }, { "epoch": 0.92, "learning_rate": 2.3373467336683415e-05, "loss": 2.632, "step": 22500 }, { "epoch": 0.92, "learning_rate": 2.3343316582914574e-05, "loss": 2.6544, "step": 22600 }, { "epoch": 0.93, "learning_rate": 2.331316582914573e-05, "loss": 2.6429, "step": 22700 }, { "epoch": 0.93, "learning_rate": 2.3283015075376884e-05, "loss": 2.6534, "step": 22800 }, { "epoch": 0.93, "learning_rate": 2.325286432160804e-05, "loss": 2.6827, "step": 22900 }, { "epoch": 0.94, "learning_rate": 2.3222713567839198e-05, "loss": 2.6506, "step": 23000 }, { "epoch": 0.94, "learning_rate": 2.3192562814070353e-05, "loss": 2.6396, "step": 23100 }, { "epoch": 0.95, "learning_rate": 2.316241206030151e-05, "loss": 2.6777, "step": 23200 }, { "epoch": 0.95, "learning_rate": 2.3132261306532664e-05, "loss": 2.6548, "step": 23300 }, { "epoch": 0.96, "learning_rate": 2.3102110552763823e-05, "loss": 2.6735, "step": 23400 }, { "epoch": 0.96, "learning_rate": 2.3071959798994974e-05, "loss": 2.6713, "step": 23500 }, { "epoch": 0.96, "learning_rate": 2.304180904522613e-05, "loss": 2.6752, "step": 23600 }, { "epoch": 0.97, "learning_rate": 2.3011658291457285e-05, "loss": 2.6533, "step": 23700 }, { "epoch": 0.97, "learning_rate": 2.2981507537688444e-05, "loss": 2.6623, "step": 23800 }, { "epoch": 0.98, "learning_rate": 2.29513567839196e-05, "loss": 2.6596, "step": 23900 }, { "epoch": 0.98, "learning_rate": 2.2921206030150754e-05, "loss": 2.662, "step": 24000 }, { "epoch": 0.98, "eval_bleu": 79.32255832423239, "eval_gen_len": 16.256, "eval_loss": 2.555393695831299, "eval_meteor": 0.9010940538998614, "eval_rouge1": 94.0079, "eval_rouge2": 90.2547, "eval_rougeL": 92.1916, "eval_rougeLsum": 92.2075, "eval_runtime": 25.4585, "eval_samples_per_second": 19.64, "eval_steps_per_second": 2.475, "step": 24000 }, { "epoch": 0.98, "learning_rate": 2.289105527638191e-05, "loss": 2.6431, "step": 24100 }, { "epoch": 0.99, "learning_rate": 2.2860904522613068e-05, "loss": 2.6577, "step": 24200 }, { "epoch": 0.99, "learning_rate": 2.2830753768844223e-05, "loss": 2.6452, "step": 24300 }, { "epoch": 1.0, "learning_rate": 2.280060301507538e-05, "loss": 2.6465, "step": 24400 }, { "epoch": 1.0, "learning_rate": 2.2770452261306534e-05, "loss": 2.6566, "step": 24500 }, { "epoch": 1.0, "learning_rate": 2.2740301507537686e-05, "loss": 2.6632, "step": 24600 }, { "epoch": 1.01, "learning_rate": 2.2710150753768844e-05, "loss": 2.6179, "step": 24700 }, { "epoch": 1.01, "learning_rate": 2.268e-05, "loss": 2.6355, "step": 24800 }, { "epoch": 1.02, "learning_rate": 2.2649849246231155e-05, "loss": 2.656, "step": 24900 }, { "epoch": 1.02, "learning_rate": 2.261969849246231e-05, "loss": 2.6485, "step": 25000 }, { "epoch": 1.02, "learning_rate": 2.258954773869347e-05, "loss": 2.6834, "step": 25100 }, { "epoch": 1.03, "learning_rate": 2.2559396984924624e-05, "loss": 2.6586, "step": 25200 }, { "epoch": 1.03, "learning_rate": 2.252924623115578e-05, "loss": 2.6245, "step": 25300 }, { "epoch": 1.04, "learning_rate": 2.2499095477386935e-05, "loss": 2.6439, "step": 25400 }, { "epoch": 1.04, "learning_rate": 2.2468944723618093e-05, "loss": 2.657, "step": 25500 }, { "epoch": 1.05, "learning_rate": 2.243879396984925e-05, "loss": 2.651, "step": 25600 }, { "epoch": 1.05, "learning_rate": 2.2408643216080404e-05, "loss": 2.5159, "step": 25700 }, { "epoch": 1.05, "learning_rate": 2.2378492462311556e-05, "loss": 1.8454, "step": 25800 }, { "epoch": 1.06, "learning_rate": 2.2348341708542714e-05, "loss": 1.7432, "step": 25900 }, { "epoch": 1.06, "learning_rate": 2.2318793969849246e-05, "loss": 1.8874, "step": 26000 }, { "epoch": 1.06, "eval_bleu": 79.38237465310864, "eval_gen_len": 16.272, "eval_loss": 2.2466025352478027, "eval_meteor": 0.9008152345684776, "eval_rouge1": 94.0369, "eval_rouge2": 90.1224, "eval_rougeL": 91.9066, "eval_rougeLsum": 91.944, "eval_runtime": 24.8009, "eval_samples_per_second": 20.161, "eval_steps_per_second": 2.54, "step": 26000 }, { "epoch": 1.07, "learning_rate": 2.22886432160804e-05, "loss": 2.5847, "step": 26100 }, { "epoch": 1.07, "learning_rate": 2.2258492462311557e-05, "loss": 2.623, "step": 26200 }, { "epoch": 1.07, "learning_rate": 2.2228341708542716e-05, "loss": 2.665, "step": 26300 }, { "epoch": 1.08, "learning_rate": 2.219819095477387e-05, "loss": 2.647, "step": 26400 }, { "epoch": 1.08, "learning_rate": 2.2168040201005026e-05, "loss": 2.6653, "step": 26500 }, { "epoch": 1.09, "learning_rate": 2.213788944723618e-05, "loss": 2.6546, "step": 26600 }, { "epoch": 1.09, "learning_rate": 2.210773869346734e-05, "loss": 2.6471, "step": 26700 }, { "epoch": 1.09, "learning_rate": 2.2077587939698495e-05, "loss": 2.6871, "step": 26800 }, { "epoch": 1.1, "learning_rate": 2.2047437185929647e-05, "loss": 2.6601, "step": 26900 }, { "epoch": 1.1, "learning_rate": 2.2017286432160802e-05, "loss": 2.6541, "step": 27000 }, { "epoch": 1.11, "learning_rate": 2.198713567839196e-05, "loss": 2.6255, "step": 27100 }, { "epoch": 1.11, "learning_rate": 2.1956984924623116e-05, "loss": 2.6765, "step": 27200 }, { "epoch": 1.11, "learning_rate": 2.192683417085427e-05, "loss": 2.6448, "step": 27300 }, { "epoch": 1.12, "learning_rate": 2.1896683417085427e-05, "loss": 2.6667, "step": 27400 }, { "epoch": 1.12, "learning_rate": 2.1866532663316582e-05, "loss": 2.6544, "step": 27500 }, { "epoch": 1.13, "learning_rate": 2.183638190954774e-05, "loss": 2.6492, "step": 27600 }, { "epoch": 1.13, "learning_rate": 2.1806231155778896e-05, "loss": 2.6541, "step": 27700 }, { "epoch": 1.13, "learning_rate": 2.177608040201005e-05, "loss": 2.6601, "step": 27800 }, { "epoch": 1.14, "learning_rate": 2.1745929648241207e-05, "loss": 2.653, "step": 27900 }, { "epoch": 1.14, "learning_rate": 2.1715778894472362e-05, "loss": 2.6527, "step": 28000 }, { "epoch": 1.14, "eval_bleu": 80.15532470386316, "eval_gen_len": 16.238, "eval_loss": 2.573094129562378, "eval_meteor": 0.9042327142167804, "eval_rouge1": 94.281, "eval_rouge2": 90.8096, "eval_rougeL": 92.559, "eval_rougeLsum": 92.5681, "eval_runtime": 25.5646, "eval_samples_per_second": 19.558, "eval_steps_per_second": 2.464, "step": 28000 }, { "epoch": 1.15, "learning_rate": 2.1685628140703517e-05, "loss": 2.6359, "step": 28100 }, { "epoch": 1.15, "learning_rate": 2.1655477386934672e-05, "loss": 2.6562, "step": 28200 }, { "epoch": 1.16, "learning_rate": 2.1625326633165828e-05, "loss": 2.6518, "step": 28300 }, { "epoch": 1.16, "learning_rate": 2.1595175879396986e-05, "loss": 2.6403, "step": 28400 }, { "epoch": 1.16, "learning_rate": 2.156502512562814e-05, "loss": 2.658, "step": 28500 }, { "epoch": 1.17, "learning_rate": 2.1534874371859297e-05, "loss": 2.6583, "step": 28600 }, { "epoch": 1.17, "learning_rate": 2.1504723618090452e-05, "loss": 2.6719, "step": 28700 }, { "epoch": 1.18, "learning_rate": 2.147457286432161e-05, "loss": 2.6451, "step": 28800 }, { "epoch": 1.18, "learning_rate": 2.1444422110552766e-05, "loss": 2.6701, "step": 28900 }, { "epoch": 1.18, "learning_rate": 2.141427135678392e-05, "loss": 2.6666, "step": 29000 }, { "epoch": 1.19, "learning_rate": 2.1384120603015073e-05, "loss": 2.6536, "step": 29100 }, { "epoch": 1.19, "learning_rate": 2.1353969849246232e-05, "loss": 2.6423, "step": 29200 }, { "epoch": 1.2, "learning_rate": 2.1323819095477387e-05, "loss": 2.6586, "step": 29300 }, { "epoch": 1.2, "learning_rate": 2.1293668341708542e-05, "loss": 2.6554, "step": 29400 }, { "epoch": 1.2, "learning_rate": 2.1263517587939698e-05, "loss": 2.6623, "step": 29500 }, { "epoch": 1.21, "learning_rate": 2.1233366834170856e-05, "loss": 2.6749, "step": 29600 }, { "epoch": 1.21, "learning_rate": 2.120321608040201e-05, "loss": 2.6446, "step": 29700 }, { "epoch": 1.22, "learning_rate": 2.1173065326633167e-05, "loss": 2.6764, "step": 29800 }, { "epoch": 1.22, "learning_rate": 2.1142914572864322e-05, "loss": 2.6522, "step": 29900 }, { "epoch": 1.22, "learning_rate": 2.1112763819095477e-05, "loss": 2.6618, "step": 30000 }, { "epoch": 1.22, "eval_bleu": 79.74215483171986, "eval_gen_len": 16.236, "eval_loss": 2.4326839447021484, "eval_meteor": 0.9044505502074225, "eval_rouge1": 94.3369, "eval_rouge2": 90.6586, "eval_rougeL": 92.1744, "eval_rougeLsum": 92.1895, "eval_runtime": 25.4582, "eval_samples_per_second": 19.64, "eval_steps_per_second": 2.475, "step": 30000 }, { "epoch": 1.23, "learning_rate": 2.1082613065326636e-05, "loss": 1.977, "step": 30100 }, { "epoch": 1.23, "learning_rate": 2.105246231155779e-05, "loss": 1.731, "step": 30200 }, { "epoch": 1.24, "learning_rate": 2.1022311557788943e-05, "loss": 1.7164, "step": 30300 }, { "epoch": 1.24, "learning_rate": 2.09921608040201e-05, "loss": 1.6936, "step": 30400 }, { "epoch": 1.25, "learning_rate": 2.0962010050251257e-05, "loss": 1.6888, "step": 30500 }, { "epoch": 1.25, "learning_rate": 2.0931859296482412e-05, "loss": 1.7005, "step": 30600 }, { "epoch": 1.25, "learning_rate": 2.0901708542713568e-05, "loss": 1.6815, "step": 30700 }, { "epoch": 1.26, "learning_rate": 2.0871557788944723e-05, "loss": 1.6894, "step": 30800 }, { "epoch": 1.26, "learning_rate": 2.0841708542713568e-05, "loss": 1.6858, "step": 30900 }, { "epoch": 1.27, "learning_rate": 2.0811557788944723e-05, "loss": 2.3491, "step": 31000 }, { "epoch": 1.27, "learning_rate": 2.0781407035175882e-05, "loss": 2.6593, "step": 31100 }, { "epoch": 1.27, "learning_rate": 2.0751256281407037e-05, "loss": 2.6468, "step": 31200 }, { "epoch": 1.28, "learning_rate": 2.0721407035175883e-05, "loss": 2.6608, "step": 31300 }, { "epoch": 1.28, "learning_rate": 2.069155778894472e-05, "loss": 2.6587, "step": 31400 }, { "epoch": 1.29, "learning_rate": 2.066140703517588e-05, "loss": 2.6772, "step": 31500 }, { "epoch": 1.29, "learning_rate": 2.0631256281407035e-05, "loss": 2.6861, "step": 31600 }, { "epoch": 1.29, "learning_rate": 2.060140703517588e-05, "loss": 2.6909, "step": 31700 }, { "epoch": 1.3, "learning_rate": 2.0571256281407036e-05, "loss": 2.7178, "step": 31800 }, { "epoch": 1.3, "learning_rate": 2.054110552763819e-05, "loss": 2.7448, "step": 31900 }, { "epoch": 1.31, "learning_rate": 2.0510954773869346e-05, "loss": 2.7368, "step": 32000 }, { "epoch": 1.31, "eval_bleu": 79.88380574425997, "eval_gen_len": 16.228, "eval_loss": 2.729801893234253, "eval_meteor": 0.9013053514191987, "eval_rouge1": 94.1293, "eval_rouge2": 90.4052, "eval_rougeL": 92.4025, "eval_rougeLsum": 92.4078, "eval_runtime": 25.6909, "eval_samples_per_second": 19.462, "eval_steps_per_second": 2.452, "step": 32000 }, { "epoch": 1.31, "learning_rate": 2.048110552763819e-05, "loss": 2.7485, "step": 32100 }, { "epoch": 1.31, "learning_rate": 2.0450954773869347e-05, "loss": 2.7678, "step": 32200 }, { "epoch": 1.32, "learning_rate": 2.0420804020100506e-05, "loss": 2.7518, "step": 32300 }, { "epoch": 1.32, "learning_rate": 2.039065326633166e-05, "loss": 2.7449, "step": 32400 }, { "epoch": 1.33, "learning_rate": 2.0360502512562813e-05, "loss": 2.7359, "step": 32500 }, { "epoch": 1.33, "learning_rate": 2.0330351758793968e-05, "loss": 2.7379, "step": 32600 }, { "epoch": 1.33, "learning_rate": 2.0300201005025127e-05, "loss": 2.743, "step": 32700 }, { "epoch": 1.34, "learning_rate": 2.0270050251256282e-05, "loss": 2.73, "step": 32800 }, { "epoch": 1.34, "learning_rate": 2.0240201005025127e-05, "loss": 2.7259, "step": 32900 }, { "epoch": 1.35, "learning_rate": 2.0210050251256282e-05, "loss": 2.6952, "step": 33000 }, { "epoch": 1.35, "learning_rate": 2.0179899497487438e-05, "loss": 2.6952, "step": 33100 }, { "epoch": 1.36, "learning_rate": 2.0149748743718593e-05, "loss": 2.6855, "step": 33200 }, { "epoch": 1.36, "learning_rate": 2.011959798994975e-05, "loss": 2.7112, "step": 33300 }, { "epoch": 1.36, "learning_rate": 2.0089447236180907e-05, "loss": 2.6988, "step": 33400 }, { "epoch": 1.37, "learning_rate": 2.0059296482412062e-05, "loss": 2.6994, "step": 33500 }, { "epoch": 1.37, "learning_rate": 2.0029145728643214e-05, "loss": 2.6774, "step": 33600 }, { "epoch": 1.38, "learning_rate": 1.9998994974874373e-05, "loss": 2.6856, "step": 33700 }, { "epoch": 1.38, "learning_rate": 1.9968844221105528e-05, "loss": 2.6875, "step": 33800 }, { "epoch": 1.38, "learning_rate": 1.9938693467336683e-05, "loss": 2.6905, "step": 33900 }, { "epoch": 1.39, "learning_rate": 1.990854271356784e-05, "loss": 2.6858, "step": 34000 }, { "epoch": 1.39, "eval_bleu": 80.43692634867155, "eval_gen_len": 16.234, "eval_loss": 2.6981565952301025, "eval_meteor": 0.9051906006959359, "eval_rouge1": 94.4532, "eval_rouge2": 91.0106, "eval_rougeL": 92.3314, "eval_rougeLsum": 92.3438, "eval_runtime": 24.9557, "eval_samples_per_second": 20.035, "eval_steps_per_second": 2.524, "step": 34000 }, { "epoch": 1.39, "learning_rate": 1.9878391959798994e-05, "loss": 2.6896, "step": 34100 }, { "epoch": 1.4, "learning_rate": 1.9848241206030152e-05, "loss": 2.688, "step": 34200 }, { "epoch": 1.4, "learning_rate": 1.9818090452261308e-05, "loss": 2.6817, "step": 34300 }, { "epoch": 1.4, "learning_rate": 1.9787939698492463e-05, "loss": 2.6833, "step": 34400 }, { "epoch": 1.41, "learning_rate": 1.9758090452261308e-05, "loss": 2.6626, "step": 34500 }, { "epoch": 1.41, "learning_rate": 1.9727939698492464e-05, "loss": 2.6504, "step": 34600 }, { "epoch": 1.42, "learning_rate": 1.9697788944723615e-05, "loss": 2.6603, "step": 34700 }, { "epoch": 1.42, "learning_rate": 1.9667638190954774e-05, "loss": 2.6381, "step": 34800 }, { "epoch": 1.42, "learning_rate": 1.963748743718593e-05, "loss": 2.6358, "step": 34900 }, { "epoch": 1.43, "learning_rate": 1.9607336683417085e-05, "loss": 2.6053, "step": 35000 }, { "epoch": 1.43, "learning_rate": 1.957718592964824e-05, "loss": 2.6376, "step": 35100 }, { "epoch": 1.44, "learning_rate": 1.95470351758794e-05, "loss": 2.619, "step": 35200 }, { "epoch": 1.44, "learning_rate": 1.9516884422110554e-05, "loss": 2.6132, "step": 35300 }, { "epoch": 1.45, "learning_rate": 1.948673366834171e-05, "loss": 2.6241, "step": 35400 }, { "epoch": 1.45, "learning_rate": 1.9456582914572864e-05, "loss": 2.6128, "step": 35500 }, { "epoch": 1.45, "learning_rate": 1.9426432160804023e-05, "loss": 2.6169, "step": 35600 }, { "epoch": 1.46, "learning_rate": 1.9396281407035178e-05, "loss": 2.6206, "step": 35700 }, { "epoch": 1.46, "learning_rate": 1.936613065326633e-05, "loss": 2.5968, "step": 35800 }, { "epoch": 1.47, "learning_rate": 1.9335979899497485e-05, "loss": 2.6079, "step": 35900 }, { "epoch": 1.47, "learning_rate": 1.9305829145728644e-05, "loss": 2.6236, "step": 36000 }, { "epoch": 1.47, "eval_bleu": 80.05686051541119, "eval_gen_len": 16.246, "eval_loss": 2.579113245010376, "eval_meteor": 0.9032701849367213, "eval_rouge1": 94.3329, "eval_rouge2": 90.4972, "eval_rougeL": 92.1838, "eval_rougeLsum": 92.1803, "eval_runtime": 25.6081, "eval_samples_per_second": 19.525, "eval_steps_per_second": 2.46, "step": 36000 }, { "epoch": 1.47, "learning_rate": 1.92756783919598e-05, "loss": 2.6189, "step": 36100 }, { "epoch": 1.48, "learning_rate": 1.9245527638190955e-05, "loss": 2.6045, "step": 36200 }, { "epoch": 1.48, "learning_rate": 1.921537688442211e-05, "loss": 2.6162, "step": 36300 }, { "epoch": 1.49, "learning_rate": 1.918522613065327e-05, "loss": 2.6158, "step": 36400 }, { "epoch": 1.49, "learning_rate": 1.9155075376884424e-05, "loss": 2.6142, "step": 36500 }, { "epoch": 1.49, "learning_rate": 1.912492462311558e-05, "loss": 2.6383, "step": 36600 }, { "epoch": 1.5, "learning_rate": 1.9095075376884424e-05, "loss": 2.6594, "step": 36700 }, { "epoch": 1.5, "learning_rate": 1.906522613065327e-05, "loss": 2.6318, "step": 36800 }, { "epoch": 1.51, "learning_rate": 1.9035075376884425e-05, "loss": 2.6514, "step": 36900 }, { "epoch": 1.51, "learning_rate": 1.9004924623115577e-05, "loss": 2.6613, "step": 37000 }, { "epoch": 1.51, "learning_rate": 1.8974773869346732e-05, "loss": 2.6357, "step": 37100 }, { "epoch": 1.52, "learning_rate": 1.894462311557789e-05, "loss": 2.6453, "step": 37200 }, { "epoch": 1.52, "learning_rate": 1.8914773869346733e-05, "loss": 2.6371, "step": 37300 }, { "epoch": 1.53, "learning_rate": 1.888462311557789e-05, "loss": 2.6155, "step": 37400 }, { "epoch": 1.53, "learning_rate": 1.8854773869346733e-05, "loss": 2.6097, "step": 37500 }, { "epoch": 1.53, "learning_rate": 1.882522613065327e-05, "loss": 2.6346, "step": 37600 }, { "epoch": 1.54, "learning_rate": 1.8795075376884424e-05, "loss": 2.6374, "step": 37700 }, { "epoch": 1.54, "learning_rate": 1.876492462311558e-05, "loss": 2.6266, "step": 37800 }, { "epoch": 1.55, "learning_rate": 1.8734773869346734e-05, "loss": 2.6392, "step": 37900 }, { "epoch": 1.55, "learning_rate": 1.870462311557789e-05, "loss": 2.6279, "step": 38000 }, { "epoch": 1.55, "eval_bleu": 80.26404550464542, "eval_gen_len": 16.212, "eval_loss": 2.613689422607422, "eval_meteor": 0.9019056042294404, "eval_rouge1": 94.1522, "eval_rouge2": 90.7421, "eval_rougeL": 92.4197, "eval_rougeLsum": 92.443, "eval_runtime": 25.3778, "eval_samples_per_second": 19.702, "eval_steps_per_second": 2.482, "step": 38000 } ], "max_steps": 100000, "num_train_epochs": 5, "total_flos": 1.8512320510138778e+17, "trial_name": null, "trial_params": null }