|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.13174555541730404, |
|
"global_step": 2250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.666666666666667e-06, |
|
"loss": 2.1425, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.1683833261066357e-06, |
|
"loss": 2.0497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.4618687578661045e-06, |
|
"loss": 1.8724, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6700999855466042e-06, |
|
"loss": 1.8389, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.8316166738933647e-06, |
|
"loss": 1.774, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.963585417306073e-06, |
|
"loss": 1.7948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.075163400023762e-06, |
|
"loss": 1.7367, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.171816644986573e-06, |
|
"loss": 1.7413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.257070849065542e-06, |
|
"loss": 1.7729, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.333333333333334e-06, |
|
"loss": 1.7465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.402321141930376e-06, |
|
"loss": 1.7541, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.4653020767460416e-06, |
|
"loss": 1.7026, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.523238920511395e-06, |
|
"loss": 1.7366, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5768800594637304e-06, |
|
"loss": 1.6725, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.626818765092802e-06, |
|
"loss": 1.6984, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6735333044265414e-06, |
|
"loss": 1.6792, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.717414868963791e-06, |
|
"loss": 1.6731, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.7587875085055104e-06, |
|
"loss": 1.7055, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.7979226682547152e-06, |
|
"loss": 1.6812, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.835049992773302e-06, |
|
"loss": 1.624, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.870365491223199e-06, |
|
"loss": 1.6899, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.9040378013703444e-06, |
|
"loss": 1.7062, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.936213060029322e-06, |
|
"loss": 1.6366, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.96701873618601e-06, |
|
"loss": 1.6621, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.996566681120062e-06, |
|
"loss": 1.7629, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_hard_accuracy": 0.8147876715117462, |
|
"eval_gsm8k_hard_loss": 0.76171875, |
|
"eval_gsm8k_hard_runtime": 2.971, |
|
"eval_gsm8k_hard_samples_per_second": 88.859, |
|
"eval_gsm8k_hard_steps_per_second": 2.02, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_webgpt_accuracy": 0.48539789112821163, |
|
"eval_webgpt_loss": 2.314453125, |
|
"eval_webgpt_runtime": 16.0854, |
|
"eval_webgpt_samples_per_second": 243.451, |
|
"eval_webgpt_steps_per_second": 5.098, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_squad_v2_accuracy": 0.8767175411152919, |
|
"eval_squad_v2_loss": 0.428466796875, |
|
"eval_squad_v2_runtime": 89.9878, |
|
"eval_squad_v2_samples_per_second": 289.639, |
|
"eval_squad_v2_steps_per_second": 6.034, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_adversarial_qa_accuracy": 0.7944240470433719, |
|
"eval_adversarial_qa_loss": 0.986328125, |
|
"eval_adversarial_qa_runtime": 22.0494, |
|
"eval_adversarial_qa_samples_per_second": 272.116, |
|
"eval_adversarial_qa_steps_per_second": 5.669, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_private_tuning_accuracy": 0.6531239810829162, |
|
"eval_private_tuning_loss": 1.337890625, |
|
"eval_private_tuning_runtime": 61.5659, |
|
"eval_private_tuning_samples_per_second": 343.989, |
|
"eval_private_tuning_steps_per_second": 7.179, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_oa_translated_accuracy": 0.6721688366960878, |
|
"eval_oa_translated_loss": 1.3828125, |
|
"eval_oa_translated_runtime": 712.3024, |
|
"eval_oa_translated_samples_per_second": 196.356, |
|
"eval_oa_translated_steps_per_second": 4.091, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_prosocial_dialogue_accuracy": 0.4919427530416725, |
|
"eval_prosocial_dialogue_loss": 1.9140625, |
|
"eval_prosocial_dialogue_runtime": 91.8198, |
|
"eval_prosocial_dialogue_samples_per_second": 293.869, |
|
"eval_prosocial_dialogue_steps_per_second": 6.132, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_math_qa_accuracy": 0.540921279895434, |
|
"eval_math_qa_loss": 2.076171875, |
|
"eval_math_qa_runtime": 19.0288, |
|
"eval_math_qa_samples_per_second": 313.63, |
|
"eval_math_qa_steps_per_second": 6.569, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wikihow_accuracy": 0.5926709194286507, |
|
"eval_wikihow_loss": 2.01171875, |
|
"eval_wikihow_runtime": 7.434, |
|
"eval_wikihow_samples_per_second": 308.448, |
|
"eval_wikihow_steps_per_second": 6.457, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_joke_accuracy": 0.4775398028809704, |
|
"eval_joke_loss": 2.380859375, |
|
"eval_joke_runtime": 0.5994, |
|
"eval_joke_samples_per_second": 126.794, |
|
"eval_joke_steps_per_second": 3.337, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_gsm8k_accuracy": 0.7411105358167392, |
|
"eval_gsm8k_loss": 1.0078125, |
|
"eval_gsm8k_runtime": 6.2732, |
|
"eval_gsm8k_samples_per_second": 238.315, |
|
"eval_gsm8k_steps_per_second": 5.101, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-hi_accuracy": 0.6262273619899629, |
|
"eval_ted_trans_en-hi_loss": 1.46875, |
|
"eval_ted_trans_en-hi_runtime": 1.1364, |
|
"eval_ted_trans_en-hi_samples_per_second": 90.639, |
|
"eval_ted_trans_en-hi_steps_per_second": 2.64, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_de-ja_accuracy": 0.6054150683400991, |
|
"eval_ted_trans_de-ja_loss": 1.76953125, |
|
"eval_ted_trans_de-ja_runtime": 3.475, |
|
"eval_ted_trans_de-ja_samples_per_second": 206.617, |
|
"eval_ted_trans_de-ja_steps_per_second": 4.317, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_nl-en_accuracy": 0.7055143773327025, |
|
"eval_ted_trans_nl-en_loss": 1.3232421875, |
|
"eval_ted_trans_nl-en_runtime": 4.3836, |
|
"eval_ted_trans_nl-en_samples_per_second": 175.884, |
|
"eval_ted_trans_nl-en_steps_per_second": 3.878, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ja_accuracy": 0.6165139119558755, |
|
"eval_ted_trans_en-ja_loss": 1.685546875, |
|
"eval_ted_trans_en-ja_runtime": 3.9868, |
|
"eval_ted_trans_en-ja_samples_per_second": 200.912, |
|
"eval_ted_trans_en-ja_steps_per_second": 4.264, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-es_accuracy": 0.7508869040130834, |
|
"eval_ted_trans_en-es_loss": 1.0810546875, |
|
"eval_ted_trans_en-es_runtime": 4.6601, |
|
"eval_ted_trans_en-es_samples_per_second": 177.248, |
|
"eval_ted_trans_en-es_steps_per_second": 3.863, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_ted_trans_en-ms_accuracy": 0.6461456102783726, |
|
"eval_ted_trans_en-ms_loss": 1.6650390625, |
|
"eval_ted_trans_en-ms_runtime": 0.4654, |
|
"eval_ted_trans_en-ms_samples_per_second": 90.238, |
|
"eval_ted_trans_en-ms_steps_per_second": 2.149, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_xsum_accuracy": 0.5982287223480477, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 192.75, |
|
"eval_xsum_samples_per_second": 211.72, |
|
"eval_xsum_steps_per_second": 4.415, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_cnn_dailymail_accuracy": 0.6676366328844463, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 274.4306, |
|
"eval_cnn_dailymail_samples_per_second": 209.244, |
|
"eval_cnn_dailymail_steps_per_second": 4.362, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_multi_news_accuracy": 0.5282928997840721, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 46.6919, |
|
"eval_multi_news_samples_per_second": 192.646, |
|
"eval_multi_news_steps_per_second": 4.026, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_tldr_news_accuracy": 0.5294836828740713, |
|
"eval_tldr_news_loss": 2.2265625, |
|
"eval_tldr_news_runtime": 3.3342, |
|
"eval_tldr_news_samples_per_second": 428.294, |
|
"eval_tldr_news_steps_per_second": 8.998, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_scitldr_accuracy": 0.49432739059967584, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.6633, |
|
"eval_scitldr_samples_per_second": 149.816, |
|
"eval_scitldr_steps_per_second": 3.379, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_samsum_accuracy": 0.6079303492003407, |
|
"eval_samsum_loss": 1.4892578125, |
|
"eval_samsum_runtime": 14.0655, |
|
"eval_samsum_samples_per_second": 209.52, |
|
"eval_samsum_steps_per_second": 4.408, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_debate_sum_accuracy": 0.9296994127333449, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 250.1193, |
|
"eval_debate_sum_samples_per_second": 192.364, |
|
"eval_debate_sum_steps_per_second": 4.01, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_billsum_accuracy": 0.6637741667488066, |
|
"eval_billsum_loss": 1.4560546875, |
|
"eval_billsum_runtime": 20.9773, |
|
"eval_billsum_samples_per_second": 180.672, |
|
"eval_billsum_steps_per_second": 3.766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_zh-en_accuracy": 0.6277540662129427, |
|
"eval_wmt2019_zh-en_loss": 1.6943359375, |
|
"eval_wmt2019_zh-en_runtime": 10.9759, |
|
"eval_wmt2019_zh-en_samples_per_second": 362.702, |
|
"eval_wmt2019_zh-en_steps_per_second": 7.562, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_ru-en_accuracy": 0.721860857670872, |
|
"eval_wmt2019_ru-en_loss": 1.11328125, |
|
"eval_wmt2019_ru-en_runtime": 10.5639, |
|
"eval_wmt2019_ru-en_samples_per_second": 283.986, |
|
"eval_wmt2019_ru-en_steps_per_second": 5.964, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_de-en_accuracy": 0.7348780028355294, |
|
"eval_wmt2019_de-en_loss": 1.083984375, |
|
"eval_wmt2019_de-en_runtime": 7.6263, |
|
"eval_wmt2019_de-en_samples_per_second": 393.113, |
|
"eval_wmt2019_de-en_steps_per_second": 8.261, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_wmt2019_fr-de_accuracy": 0.7198844756374025, |
|
"eval_wmt2019_fr-de_loss": 1.1572265625, |
|
"eval_wmt2019_fr-de_runtime": 5.8183, |
|
"eval_wmt2019_fr-de_samples_per_second": 259.868, |
|
"eval_wmt2019_fr-de_steps_per_second": 5.5, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_essay_instruction_accuracy": 0.5889370453088031, |
|
"eval_essay_instruction_loss": 2.01171875, |
|
"eval_essay_instruction_runtime": 4.9645, |
|
"eval_essay_instruction_samples_per_second": 83.191, |
|
"eval_essay_instruction_steps_per_second": 1.813, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_eli5_accuracy": 0.44436461029042645, |
|
"eval_reddit_eli5_loss": 2.56640625, |
|
"eval_reddit_eli5_runtime": 290.2795, |
|
"eval_reddit_eli5_samples_per_second": 187.843, |
|
"eval_reddit_eli5_steps_per_second": 3.913, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_askh_accuracy": 0.44699574235962536, |
|
"eval_reddit_askh_loss": 2.666015625, |
|
"eval_reddit_askh_runtime": 127.4754, |
|
"eval_reddit_askh_samples_per_second": 154.579, |
|
"eval_reddit_askh_steps_per_second": 3.224, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_reddit_asks_accuracy": 0.4559295270939454, |
|
"eval_reddit_asks_loss": 2.515625, |
|
"eval_reddit_asks_runtime": 148.9443, |
|
"eval_reddit_asks_samples_per_second": 176.952, |
|
"eval_reddit_asks_steps_per_second": 3.693, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.024955579951363e-06, |
|
"loss": 1.6457, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.05227294026498e-06, |
|
"loss": 1.6765, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.078596718903699e-06, |
|
"loss": 1.7331, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.103996663164927e-06, |
|
"loss": 1.7341, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.128535424532771e-06, |
|
"loss": 1.6712, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.152269489723789e-06, |
|
"loss": 1.7221, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.17524996386651e-06, |
|
"loss": 1.6535, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.197523233129813e-06, |
|
"loss": 1.7102, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.219131528403759e-06, |
|
"loss": 1.7204, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2401134072504595e-06, |
|
"loss": 1.6645, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.26050416794548e-06, |
|
"loss": 1.6375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.280336206778326e-06, |
|
"loss": 1.6983, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.299639327694684e-06, |
|
"loss": 1.7058, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.318441011710832e-06, |
|
"loss": 1.6323, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.336766652213271e-06, |
|
"loss": 1.6884, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.35463976119956e-06, |
|
"loss": 1.72, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.372082150663167e-06, |
|
"loss": 1.6697, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.389114092632645e-06, |
|
"loss": 1.6286, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.405754460810312e-06, |
|
"loss": 1.7086, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.42202085629224e-06, |
|
"loss": 1.6386, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.437929719469291e-06, |
|
"loss": 1.6138, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.453496429892863e-06, |
|
"loss": 1.6216, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.468735395625979e-06, |
|
"loss": 1.7481, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4836601333808566e-06, |
|
"loss": 1.6641, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.498283340560032e-06, |
|
"loss": 1.6732, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_gsm8k_hard_accuracy": 0.8998615691543503, |
|
"eval_gsm8k_hard_loss": 0.46142578125, |
|
"eval_gsm8k_hard_runtime": 2.1347, |
|
"eval_gsm8k_hard_samples_per_second": 123.669, |
|
"eval_gsm8k_hard_steps_per_second": 2.811, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_webgpt_accuracy": 0.4871765174992017, |
|
"eval_webgpt_loss": 2.30078125, |
|
"eval_webgpt_runtime": 17.9636, |
|
"eval_webgpt_samples_per_second": 217.996, |
|
"eval_webgpt_steps_per_second": 4.565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_squad_v2_accuracy": 0.8868008286475288, |
|
"eval_squad_v2_loss": 0.397216796875, |
|
"eval_squad_v2_runtime": 88.1593, |
|
"eval_squad_v2_samples_per_second": 295.647, |
|
"eval_squad_v2_steps_per_second": 6.159, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_adversarial_qa_accuracy": 0.8145726993224083, |
|
"eval_adversarial_qa_loss": 0.9189453125, |
|
"eval_adversarial_qa_runtime": 21.7596, |
|
"eval_adversarial_qa_samples_per_second": 275.74, |
|
"eval_adversarial_qa_steps_per_second": 5.745, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_private_tuning_accuracy": 0.6609470207986216, |
|
"eval_private_tuning_loss": 1.2919921875, |
|
"eval_private_tuning_runtime": 64.7973, |
|
"eval_private_tuning_samples_per_second": 326.834, |
|
"eval_private_tuning_steps_per_second": 6.821, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_oa_translated_accuracy": 0.6798870038090434, |
|
"eval_oa_translated_loss": 1.341796875, |
|
"eval_oa_translated_runtime": 733.6761, |
|
"eval_oa_translated_samples_per_second": 190.636, |
|
"eval_oa_translated_steps_per_second": 3.972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_prosocial_dialogue_accuracy": 0.5295571842042209, |
|
"eval_prosocial_dialogue_loss": 1.8427734375, |
|
"eval_prosocial_dialogue_runtime": 58.5686, |
|
"eval_prosocial_dialogue_samples_per_second": 460.707, |
|
"eval_prosocial_dialogue_steps_per_second": 9.613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_math_qa_accuracy": 0.5540361105203919, |
|
"eval_math_qa_loss": 1.9853515625, |
|
"eval_math_qa_runtime": 19.0275, |
|
"eval_math_qa_samples_per_second": 313.651, |
|
"eval_math_qa_steps_per_second": 6.569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wikihow_accuracy": 0.6014838441270282, |
|
"eval_wikihow_loss": 1.9541015625, |
|
"eval_wikihow_runtime": 7.3976, |
|
"eval_wikihow_samples_per_second": 309.967, |
|
"eval_wikihow_steps_per_second": 6.489, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_joke_accuracy": 0.4797194844579227, |
|
"eval_joke_loss": 2.34375, |
|
"eval_joke_runtime": 0.5281, |
|
"eval_joke_samples_per_second": 143.924, |
|
"eval_joke_steps_per_second": 3.787, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_gsm8k_accuracy": 0.7496816445333818, |
|
"eval_gsm8k_loss": 0.9697265625, |
|
"eval_gsm8k_runtime": 6.3519, |
|
"eval_gsm8k_samples_per_second": 235.363, |
|
"eval_gsm8k_steps_per_second": 5.038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-hi_accuracy": 0.6267826086956522, |
|
"eval_ted_trans_en-hi_loss": 1.455078125, |
|
"eval_ted_trans_en-hi_runtime": 1.042, |
|
"eval_ted_trans_en-hi_samples_per_second": 98.852, |
|
"eval_ted_trans_en-hi_steps_per_second": 2.879, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_de-ja_accuracy": 0.6095081533548741, |
|
"eval_ted_trans_de-ja_loss": 1.736328125, |
|
"eval_ted_trans_de-ja_runtime": 3.4864, |
|
"eval_ted_trans_de-ja_samples_per_second": 205.945, |
|
"eval_ted_trans_de-ja_steps_per_second": 4.302, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_nl-en_accuracy": 0.7156337065657362, |
|
"eval_ted_trans_nl-en_loss": 1.2724609375, |
|
"eval_ted_trans_nl-en_runtime": 4.8663, |
|
"eval_ted_trans_nl-en_samples_per_second": 158.438, |
|
"eval_ted_trans_nl-en_steps_per_second": 3.493, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-ja_accuracy": 0.624158725585033, |
|
"eval_ted_trans_en-ja_loss": 1.63671875, |
|
"eval_ted_trans_en-ja_runtime": 4.5137, |
|
"eval_ted_trans_en-ja_samples_per_second": 177.461, |
|
"eval_ted_trans_en-ja_steps_per_second": 3.766, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-es_accuracy": 0.7583100576231097, |
|
"eval_ted_trans_en-es_loss": 1.048828125, |
|
"eval_ted_trans_en-es_runtime": 3.4017, |
|
"eval_ted_trans_en-es_samples_per_second": 242.822, |
|
"eval_ted_trans_en-es_steps_per_second": 5.292, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_ted_trans_en-ms_accuracy": 0.7228320526893524, |
|
"eval_ted_trans_en-ms_loss": 1.3466796875, |
|
"eval_ted_trans_en-ms_runtime": 0.9049, |
|
"eval_ted_trans_en-ms_samples_per_second": 46.416, |
|
"eval_ted_trans_en-ms_steps_per_second": 1.105, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_xsum_accuracy": 0.6011633358116925, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 194.0576, |
|
"eval_xsum_samples_per_second": 210.293, |
|
"eval_xsum_steps_per_second": 4.385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_cnn_dailymail_accuracy": 0.6701096765236707, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 276.9166, |
|
"eval_cnn_dailymail_samples_per_second": 207.366, |
|
"eval_cnn_dailymail_steps_per_second": 4.323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_multi_news_accuracy": 0.5313963642137016, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 45.9972, |
|
"eval_multi_news_samples_per_second": 195.555, |
|
"eval_multi_news_steps_per_second": 4.087, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_tldr_news_accuracy": 0.5344681651462428, |
|
"eval_tldr_news_loss": 2.201171875, |
|
"eval_tldr_news_runtime": 3.1785, |
|
"eval_tldr_news_samples_per_second": 449.262, |
|
"eval_tldr_news_steps_per_second": 9.438, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_scitldr_accuracy": 0.49756888168557534, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.9905, |
|
"eval_scitldr_samples_per_second": 133.421, |
|
"eval_scitldr_steps_per_second": 3.009, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_samsum_accuracy": 0.6183671538076762, |
|
"eval_samsum_loss": 1.4326171875, |
|
"eval_samsum_runtime": 13.5218, |
|
"eval_samsum_samples_per_second": 217.944, |
|
"eval_samsum_steps_per_second": 4.585, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_debate_sum_accuracy": 0.9346053252084863, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 244.8422, |
|
"eval_debate_sum_samples_per_second": 196.51, |
|
"eval_debate_sum_steps_per_second": 4.097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_billsum_accuracy": 0.6686931696172409, |
|
"eval_billsum_loss": 1.427734375, |
|
"eval_billsum_runtime": 27.101, |
|
"eval_billsum_samples_per_second": 139.847, |
|
"eval_billsum_steps_per_second": 2.915, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_zh-en_accuracy": 0.6249207026609304, |
|
"eval_wmt2019_zh-en_loss": 1.7001953125, |
|
"eval_wmt2019_zh-en_runtime": 12.5289, |
|
"eval_wmt2019_zh-en_samples_per_second": 317.745, |
|
"eval_wmt2019_zh-en_steps_per_second": 6.625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_ru-en_accuracy": 0.7221410449334468, |
|
"eval_wmt2019_ru-en_loss": 1.1083984375, |
|
"eval_wmt2019_ru-en_runtime": 10.0702, |
|
"eval_wmt2019_ru-en_samples_per_second": 297.91, |
|
"eval_wmt2019_ru-en_steps_per_second": 6.256, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_de-en_accuracy": 0.7360578210047292, |
|
"eval_wmt2019_de-en_loss": 1.072265625, |
|
"eval_wmt2019_de-en_runtime": 7.6459, |
|
"eval_wmt2019_de-en_samples_per_second": 392.106, |
|
"eval_wmt2019_de-en_steps_per_second": 8.24, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_wmt2019_fr-de_accuracy": 0.7223688705319711, |
|
"eval_wmt2019_fr-de_loss": 1.15234375, |
|
"eval_wmt2019_fr-de_runtime": 5.1746, |
|
"eval_wmt2019_fr-de_samples_per_second": 292.195, |
|
"eval_wmt2019_fr-de_steps_per_second": 6.184, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_essay_instruction_accuracy": 0.5920659841231232, |
|
"eval_essay_instruction_loss": 2.001953125, |
|
"eval_essay_instruction_runtime": 4.5291, |
|
"eval_essay_instruction_samples_per_second": 91.188, |
|
"eval_essay_instruction_steps_per_second": 1.987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_eli5_accuracy": 0.4464491832743919, |
|
"eval_reddit_eli5_loss": 2.560546875, |
|
"eval_reddit_eli5_runtime": 282.6054, |
|
"eval_reddit_eli5_samples_per_second": 192.944, |
|
"eval_reddit_eli5_steps_per_second": 4.02, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_askh_accuracy": 0.4485475495544, |
|
"eval_reddit_askh_loss": 2.65625, |
|
"eval_reddit_askh_runtime": 129.6151, |
|
"eval_reddit_askh_samples_per_second": 152.027, |
|
"eval_reddit_askh_steps_per_second": 3.171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_reddit_asks_accuracy": 0.4581163067460401, |
|
"eval_reddit_asks_loss": 2.5078125, |
|
"eval_reddit_asks_runtime": 148.6855, |
|
"eval_reddit_asks_samples_per_second": 177.26, |
|
"eval_reddit_asks_steps_per_second": 3.699, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.512616960163228e-06, |
|
"loss": 1.6485, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.526672239391333e-06, |
|
"loss": 1.6097, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.540459782667983e-06, |
|
"loss": 1.6802, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.553989599704948e-06, |
|
"loss": 1.624, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.5672711491570735e-06, |
|
"loss": 1.6027, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.5803133783436676e-06, |
|
"loss": 1.6412, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.5931247594541535e-06, |
|
"loss": 1.6312, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.605713322604896e-06, |
|
"loss": 1.6101, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.61808668607024e-06, |
|
"loss": 1.6172, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.63025208397274e-06, |
|
"loss": 1.6527, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.642216391684613e-06, |
|
"loss": 1.6598, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.653986149163757e-06, |
|
"loss": 1.6289, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.6655675824226375e-06, |
|
"loss": 1.6069, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.676966623306479e-06, |
|
"loss": 1.5908, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.688188927738093e-06, |
|
"loss": 1.6184, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.699239892569782e-06, |
|
"loss": 1.6511, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.710124671168044e-06, |
|
"loss": 1.6089, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.720848187843727e-06, |
|
"loss": 1.74, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.73141515122876e-06, |
|
"loss": 1.5827, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.741830066690428e-06, |
|
"loss": 1.6384, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.752097247865126e-06, |
|
"loss": 1.6331, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7622208273854484e-06, |
|
"loss": 1.6444, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.772204766867427e-06, |
|
"loss": 1.597, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.782052866218294e-06, |
|
"loss": 1.6041, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7917687723195e-06, |
|
"loss": 1.584, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_gsm8k_hard_accuracy": 0.9101217377142624, |
|
"eval_gsm8k_hard_loss": 0.4091796875, |
|
"eval_gsm8k_hard_runtime": 2.6619, |
|
"eval_gsm8k_hard_samples_per_second": 99.177, |
|
"eval_gsm8k_hard_steps_per_second": 2.254, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_webgpt_accuracy": 0.4874197343145836, |
|
"eval_webgpt_loss": 2.296875, |
|
"eval_webgpt_runtime": 16.2391, |
|
"eval_webgpt_samples_per_second": 241.147, |
|
"eval_webgpt_steps_per_second": 5.05, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_squad_v2_accuracy": 0.8846798855677223, |
|
"eval_squad_v2_loss": 0.363037109375, |
|
"eval_squad_v2_runtime": 89.6157, |
|
"eval_squad_v2_samples_per_second": 290.842, |
|
"eval_squad_v2_steps_per_second": 6.059, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_adversarial_qa_accuracy": 0.8026626070863556, |
|
"eval_adversarial_qa_loss": 0.8447265625, |
|
"eval_adversarial_qa_runtime": 21.271, |
|
"eval_adversarial_qa_samples_per_second": 282.075, |
|
"eval_adversarial_qa_steps_per_second": 5.877, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_private_tuning_accuracy": 0.6632682821768406, |
|
"eval_private_tuning_loss": 1.2705078125, |
|
"eval_private_tuning_runtime": 65.8398, |
|
"eval_private_tuning_samples_per_second": 321.659, |
|
"eval_private_tuning_steps_per_second": 6.713, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_oa_translated_accuracy": 0.6840566265427955, |
|
"eval_oa_translated_loss": 1.31640625, |
|
"eval_oa_translated_runtime": 739.9323, |
|
"eval_oa_translated_samples_per_second": 189.024, |
|
"eval_oa_translated_steps_per_second": 3.938, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_prosocial_dialogue_accuracy": 0.5235860358801359, |
|
"eval_prosocial_dialogue_loss": 1.8115234375, |
|
"eval_prosocial_dialogue_runtime": 61.6452, |
|
"eval_prosocial_dialogue_samples_per_second": 437.715, |
|
"eval_prosocial_dialogue_steps_per_second": 9.133, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_math_qa_accuracy": 0.5631207954480619, |
|
"eval_math_qa_loss": 1.921875, |
|
"eval_math_qa_runtime": 17.923, |
|
"eval_math_qa_samples_per_second": 332.98, |
|
"eval_math_qa_steps_per_second": 6.974, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_wikihow_accuracy": 0.6049785050617112, |
|
"eval_wikihow_loss": 1.923828125, |
|
"eval_wikihow_runtime": 7.5032, |
|
"eval_wikihow_samples_per_second": 305.602, |
|
"eval_wikihow_steps_per_second": 6.397, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_joke_accuracy": 0.4799090219863533, |
|
"eval_joke_loss": 2.30078125, |
|
"eval_joke_runtime": 1.3898, |
|
"eval_joke_samples_per_second": 54.685, |
|
"eval_joke_steps_per_second": 1.439, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_gsm8k_accuracy": 0.7521515232084633, |
|
"eval_gsm8k_loss": 0.94091796875, |
|
"eval_gsm8k_runtime": 5.0267, |
|
"eval_gsm8k_samples_per_second": 297.41, |
|
"eval_gsm8k_steps_per_second": 6.366, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_en-hi_accuracy": 0.6381951731374607, |
|
"eval_ted_trans_en-hi_loss": 1.3837890625, |
|
"eval_ted_trans_en-hi_runtime": 3.1926, |
|
"eval_ted_trans_en-hi_samples_per_second": 32.262, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.94, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_de-ja_accuracy": 0.6046756766931446, |
|
"eval_ted_trans_de-ja_loss": 1.75390625, |
|
"eval_ted_trans_de-ja_runtime": 4.4495, |
|
"eval_ted_trans_de-ja_samples_per_second": 161.365, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.371, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_nl-en_accuracy": 0.7248159831756046, |
|
"eval_ted_trans_nl-en_loss": 1.2216796875, |
|
"eval_ted_trans_nl-en_runtime": 3.3794, |
|
"eval_ted_trans_nl-en_samples_per_second": 228.144, |
|
"eval_ted_trans_nl-en_steps_per_second": 5.03, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_en-ja_accuracy": 0.62236684020825, |
|
"eval_ted_trans_en-ja_loss": 1.6015625, |
|
"eval_ted_trans_en-ja_runtime": 4.3536, |
|
"eval_ted_trans_en-ja_samples_per_second": 183.987, |
|
"eval_ted_trans_en-ja_steps_per_second": 3.905, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_en-es_accuracy": 0.7690527730088826, |
|
"eval_ted_trans_en-es_loss": 0.99658203125, |
|
"eval_ted_trans_en-es_runtime": 4.0371, |
|
"eval_ted_trans_en-es_samples_per_second": 204.604, |
|
"eval_ted_trans_en-es_steps_per_second": 4.459, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_ted_trans_en-ms_accuracy": 0.6281984334203655, |
|
"eval_ted_trans_en-ms_loss": 1.7548828125, |
|
"eval_ted_trans_en-ms_runtime": 1.3143, |
|
"eval_ted_trans_en-ms_samples_per_second": 31.955, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.761, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_xsum_accuracy": 0.6022120068856478, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 190.7745, |
|
"eval_xsum_samples_per_second": 213.912, |
|
"eval_xsum_steps_per_second": 4.461, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_cnn_dailymail_accuracy": 0.6730714054329214, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 278.1455, |
|
"eval_cnn_dailymail_samples_per_second": 206.45, |
|
"eval_cnn_dailymail_steps_per_second": 4.304, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_multi_news_accuracy": 0.5342626698844151, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 44.9541, |
|
"eval_multi_news_samples_per_second": 200.093, |
|
"eval_multi_news_steps_per_second": 4.182, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_tldr_news_accuracy": 0.5577447568889307, |
|
"eval_tldr_news_loss": 2.03125, |
|
"eval_tldr_news_runtime": 4.8998, |
|
"eval_tldr_news_samples_per_second": 291.441, |
|
"eval_tldr_news_steps_per_second": 6.123, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_scitldr_accuracy": 0.5008103727714749, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.5302, |
|
"eval_scitldr_samples_per_second": 157.698, |
|
"eval_scitldr_steps_per_second": 3.557, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_samsum_accuracy": 0.6208006056591274, |
|
"eval_samsum_loss": 1.40625, |
|
"eval_samsum_runtime": 14.6527, |
|
"eval_samsum_samples_per_second": 201.123, |
|
"eval_samsum_steps_per_second": 4.231, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_debate_sum_accuracy": 0.9359010655534944, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 244.5165, |
|
"eval_debate_sum_samples_per_second": 196.772, |
|
"eval_debate_sum_steps_per_second": 4.102, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_billsum_accuracy": 0.6674552130307286, |
|
"eval_billsum_loss": 1.412109375, |
|
"eval_billsum_runtime": 27.8571, |
|
"eval_billsum_samples_per_second": 136.052, |
|
"eval_billsum_steps_per_second": 2.836, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_wmt2019_zh-en_accuracy": 0.6281871623861053, |
|
"eval_wmt2019_zh-en_loss": 1.689453125, |
|
"eval_wmt2019_zh-en_runtime": 11.4111, |
|
"eval_wmt2019_zh-en_samples_per_second": 348.872, |
|
"eval_wmt2019_zh-en_steps_per_second": 7.274, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_wmt2019_ru-en_accuracy": 0.7208443499252788, |
|
"eval_wmt2019_ru-en_loss": 1.1123046875, |
|
"eval_wmt2019_ru-en_runtime": 10.8964, |
|
"eval_wmt2019_ru-en_samples_per_second": 275.321, |
|
"eval_wmt2019_ru-en_steps_per_second": 5.782, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_wmt2019_de-en_accuracy": 0.7330685618729097, |
|
"eval_wmt2019_de-en_loss": 1.0859375, |
|
"eval_wmt2019_de-en_runtime": 7.6477, |
|
"eval_wmt2019_de-en_samples_per_second": 392.011, |
|
"eval_wmt2019_de-en_steps_per_second": 8.238, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_wmt2019_fr-de_accuracy": 0.7199981458877335, |
|
"eval_wmt2019_fr-de_loss": 1.1474609375, |
|
"eval_wmt2019_fr-de_runtime": 5.4342, |
|
"eval_wmt2019_fr-de_samples_per_second": 278.24, |
|
"eval_wmt2019_fr-de_steps_per_second": 5.889, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_essay_instruction_accuracy": 0.5924197863918802, |
|
"eval_essay_instruction_loss": 1.9921875, |
|
"eval_essay_instruction_runtime": 4.79, |
|
"eval_essay_instruction_samples_per_second": 86.221, |
|
"eval_essay_instruction_steps_per_second": 1.879, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_reddit_eli5_accuracy": 0.4455537602670511, |
|
"eval_reddit_eli5_loss": 2.5546875, |
|
"eval_reddit_eli5_runtime": 268.3918, |
|
"eval_reddit_eli5_samples_per_second": 203.162, |
|
"eval_reddit_eli5_steps_per_second": 4.233, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_reddit_askh_accuracy": 0.44842410632057694, |
|
"eval_reddit_askh_loss": 2.65234375, |
|
"eval_reddit_askh_runtime": 150.7797, |
|
"eval_reddit_askh_samples_per_second": 130.687, |
|
"eval_reddit_askh_steps_per_second": 2.726, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_reddit_asks_accuracy": 0.4572903640086479, |
|
"eval_reddit_asks_loss": 2.501953125, |
|
"eval_reddit_asks_runtime": 135.6226, |
|
"eval_reddit_asks_samples_per_second": 194.333, |
|
"eval_reddit_asks_steps_per_second": 4.055, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.801355987134653e-06, |
|
"loss": 1.5707, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.81081787528747e-06, |
|
"loss": 1.6039, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.820157671150801e-06, |
|
"loss": 1.5763, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.82937848548407e-06, |
|
"loss": 1.6415, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.83848331165324e-06, |
|
"loss": 1.6192, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.847475031464417e-06, |
|
"loss": 1.8104, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.856356420639528e-06, |
|
"loss": 1.6151, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8651301539601235e-06, |
|
"loss": 1.6213, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.873798810103137e-06, |
|
"loss": 1.5999, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.882364876190489e-06, |
|
"loss": 1.5919, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.890830752072613e-06, |
|
"loss": 1.6093, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.899198754364365e-06, |
|
"loss": 1.6407, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.907471120250281e-06, |
|
"loss": 1.6171, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.915650011074855e-06, |
|
"loss": 1.6894, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.923737515732209e-06, |
|
"loss": 1.6495, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.931735653868489e-06, |
|
"loss": 1.6688, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.93964637890926e-06, |
|
"loss": 1.6085, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9474715809232256e-06, |
|
"loss": 1.6499, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.955213089332832e-06, |
|
"loss": 1.6319, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.962872675481414e-06, |
|
"loss": 1.5965, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.970452055065948e-06, |
|
"loss": 1.5977, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.977952890443742e-06, |
|
"loss": 1.6161, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.985376792820825e-06, |
|
"loss": 1.5886, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.992725324329251e-06, |
|
"loss": 1.5945, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6211, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gsm8k_hard_accuracy": 0.9133382191278857, |
|
"eval_gsm8k_hard_loss": 0.38720703125, |
|
"eval_gsm8k_hard_runtime": 2.5023, |
|
"eval_gsm8k_hard_samples_per_second": 105.503, |
|
"eval_gsm8k_hard_steps_per_second": 2.398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_webgpt_accuracy": 0.4873552074043802, |
|
"eval_webgpt_loss": 2.29296875, |
|
"eval_webgpt_runtime": 18.3874, |
|
"eval_webgpt_samples_per_second": 212.972, |
|
"eval_webgpt_steps_per_second": 4.46, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_squad_v2_accuracy": 0.8973280344987951, |
|
"eval_squad_v2_loss": 0.33642578125, |
|
"eval_squad_v2_runtime": 87.2934, |
|
"eval_squad_v2_samples_per_second": 298.579, |
|
"eval_squad_v2_steps_per_second": 6.22, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_adversarial_qa_accuracy": 0.810095221038178, |
|
"eval_adversarial_qa_loss": 0.85498046875, |
|
"eval_adversarial_qa_runtime": 21.5732, |
|
"eval_adversarial_qa_samples_per_second": 278.123, |
|
"eval_adversarial_qa_steps_per_second": 5.794, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_private_tuning_accuracy": 0.6643376777215743, |
|
"eval_private_tuning_loss": 1.2626953125, |
|
"eval_private_tuning_runtime": 61.2475, |
|
"eval_private_tuning_samples_per_second": 345.777, |
|
"eval_private_tuning_steps_per_second": 7.217, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_oa_translated_accuracy": 0.6876637305407464, |
|
"eval_oa_translated_loss": 1.298828125, |
|
"eval_oa_translated_runtime": 714.9582, |
|
"eval_oa_translated_samples_per_second": 195.627, |
|
"eval_oa_translated_steps_per_second": 4.076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_prosocial_dialogue_accuracy": 0.533683742122458, |
|
"eval_prosocial_dialogue_loss": 1.8115234375, |
|
"eval_prosocial_dialogue_runtime": 77.2682, |
|
"eval_prosocial_dialogue_samples_per_second": 349.212, |
|
"eval_prosocial_dialogue_steps_per_second": 7.286, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_math_qa_accuracy": 0.5697754633111511, |
|
"eval_math_qa_loss": 1.884765625, |
|
"eval_math_qa_runtime": 19.141, |
|
"eval_math_qa_samples_per_second": 311.791, |
|
"eval_math_qa_steps_per_second": 6.53, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_wikihow_accuracy": 0.6076965746775759, |
|
"eval_wikihow_loss": 1.91015625, |
|
"eval_wikihow_runtime": 7.4493, |
|
"eval_wikihow_samples_per_second": 307.815, |
|
"eval_wikihow_steps_per_second": 6.444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_joke_accuracy": 0.4844579226686884, |
|
"eval_joke_loss": 2.287109375, |
|
"eval_joke_runtime": 0.5532, |
|
"eval_joke_samples_per_second": 137.392, |
|
"eval_joke_steps_per_second": 3.616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_gsm8k_accuracy": 0.7594911909992863, |
|
"eval_gsm8k_loss": 0.9140625, |
|
"eval_gsm8k_runtime": 5.3345, |
|
"eval_gsm8k_samples_per_second": 280.251, |
|
"eval_gsm8k_steps_per_second": 5.999, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_en-hi_accuracy": 0.6449365772509877, |
|
"eval_ted_trans_en-hi_loss": 1.306640625, |
|
"eval_ted_trans_en-hi_runtime": 1.7092, |
|
"eval_ted_trans_en-hi_samples_per_second": 60.262, |
|
"eval_ted_trans_en-hi_steps_per_second": 1.755, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_de-ja_accuracy": 0.6195650127106676, |
|
"eval_ted_trans_de-ja_loss": 1.6767578125, |
|
"eval_ted_trans_de-ja_runtime": 4.3842, |
|
"eval_ted_trans_de-ja_samples_per_second": 163.768, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.421, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_nl-en_accuracy": 0.7198614136853986, |
|
"eval_ted_trans_nl-en_loss": 1.236328125, |
|
"eval_ted_trans_nl-en_runtime": 3.7037, |
|
"eval_ted_trans_nl-en_samples_per_second": 208.171, |
|
"eval_ted_trans_nl-en_steps_per_second": 4.59, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_en-ja_accuracy": 0.6259939079868666, |
|
"eval_ted_trans_en-ja_loss": 1.5947265625, |
|
"eval_ted_trans_en-ja_runtime": 4.2274, |
|
"eval_ted_trans_en-ja_samples_per_second": 189.48, |
|
"eval_ted_trans_en-ja_steps_per_second": 4.021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_en-es_accuracy": 0.7666360545061928, |
|
"eval_ted_trans_en-es_loss": 1.01171875, |
|
"eval_ted_trans_en-es_runtime": 4.7495, |
|
"eval_ted_trans_en-es_samples_per_second": 173.914, |
|
"eval_ted_trans_en-es_steps_per_second": 3.79, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_ted_trans_en-ms_accuracy": 0.6276762402088772, |
|
"eval_ted_trans_en-ms_loss": 1.7060546875, |
|
"eval_ted_trans_en-ms_runtime": 0.3249, |
|
"eval_ted_trans_en-ms_samples_per_second": 129.281, |
|
"eval_ted_trans_en-ms_steps_per_second": 3.078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_xsum_accuracy": 0.6038378677782439, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 194.7148, |
|
"eval_xsum_samples_per_second": 209.583, |
|
"eval_xsum_steps_per_second": 4.37, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_cnn_dailymail_accuracy": 0.6724346337174325, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 276.6313, |
|
"eval_cnn_dailymail_samples_per_second": 207.58, |
|
"eval_cnn_dailymail_steps_per_second": 4.327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_multi_news_accuracy": 0.5417725136542614, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 44.7388, |
|
"eval_multi_news_samples_per_second": 201.056, |
|
"eval_multi_news_steps_per_second": 4.202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_tldr_news_accuracy": 0.5576977334712687, |
|
"eval_tldr_news_loss": 2.015625, |
|
"eval_tldr_news_runtime": 4.3825, |
|
"eval_tldr_news_samples_per_second": 325.843, |
|
"eval_tldr_news_steps_per_second": 6.845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_scitldr_accuracy": 0.5008103727714749, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.9018, |
|
"eval_scitldr_samples_per_second": 137.5, |
|
"eval_scitldr_steps_per_second": 3.102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_samsum_accuracy": 0.6229095972637186, |
|
"eval_samsum_loss": 1.4013671875, |
|
"eval_samsum_runtime": 14.884, |
|
"eval_samsum_samples_per_second": 197.998, |
|
"eval_samsum_steps_per_second": 4.166, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_debate_sum_accuracy": 0.9370327058673479, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 243.4339, |
|
"eval_debate_sum_samples_per_second": 197.647, |
|
"eval_debate_sum_steps_per_second": 4.12, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_billsum_accuracy": 0.6730090224558118, |
|
"eval_billsum_loss": 1.3984375, |
|
"eval_billsum_runtime": 26.3919, |
|
"eval_billsum_samples_per_second": 143.605, |
|
"eval_billsum_steps_per_second": 2.993, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_wmt2019_zh-en_accuracy": 0.6340022173592529, |
|
"eval_wmt2019_zh-en_loss": 1.66015625, |
|
"eval_wmt2019_zh-en_runtime": 12.9276, |
|
"eval_wmt2019_zh-en_samples_per_second": 307.945, |
|
"eval_wmt2019_zh-en_steps_per_second": 6.42, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_wmt2019_ru-en_accuracy": 0.7246060048314736, |
|
"eval_wmt2019_ru-en_loss": 1.09375, |
|
"eval_wmt2019_ru-en_runtime": 10.2691, |
|
"eval_wmt2019_ru-en_samples_per_second": 292.138, |
|
"eval_wmt2019_ru-en_steps_per_second": 6.135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_wmt2019_de-en_accuracy": 0.7373287943940118, |
|
"eval_wmt2019_de-en_loss": 1.076171875, |
|
"eval_wmt2019_de-en_runtime": 8.2087, |
|
"eval_wmt2019_de-en_samples_per_second": 365.221, |
|
"eval_wmt2019_de-en_steps_per_second": 7.675, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_wmt2019_fr-de_accuracy": 0.7239072325829329, |
|
"eval_wmt2019_fr-de_loss": 1.1376953125, |
|
"eval_wmt2019_fr-de_runtime": 5.7413, |
|
"eval_wmt2019_fr-de_samples_per_second": 263.356, |
|
"eval_wmt2019_fr-de_steps_per_second": 5.574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_essay_instruction_accuracy": 0.5944486212767839, |
|
"eval_essay_instruction_loss": 1.982421875, |
|
"eval_essay_instruction_runtime": 4.9211, |
|
"eval_essay_instruction_samples_per_second": 83.925, |
|
"eval_essay_instruction_steps_per_second": 1.829, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_reddit_eli5_accuracy": 0.44636294670867693, |
|
"eval_reddit_eli5_loss": 2.552734375, |
|
"eval_reddit_eli5_runtime": 266.8136, |
|
"eval_reddit_eli5_samples_per_second": 204.364, |
|
"eval_reddit_eli5_steps_per_second": 4.258, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_reddit_askh_accuracy": 0.4487002153615098, |
|
"eval_reddit_askh_loss": 2.65234375, |
|
"eval_reddit_askh_runtime": 136.1822, |
|
"eval_reddit_askh_samples_per_second": 144.696, |
|
"eval_reddit_askh_steps_per_second": 3.018, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_reddit_asks_accuracy": 0.45779545724968684, |
|
"eval_reddit_asks_loss": 2.501953125, |
|
"eval_reddit_asks_runtime": 151.9509, |
|
"eval_reddit_asks_samples_per_second": 173.451, |
|
"eval_reddit_asks_steps_per_second": 3.62, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.998642779587406e-06, |
|
"loss": 1.5937, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.997134756906744e-06, |
|
"loss": 1.6393, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.995626734226083e-06, |
|
"loss": 1.64, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.994118711545422e-06, |
|
"loss": 1.6529, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.992610688864761e-06, |
|
"loss": 1.6339, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9911026661841e-06, |
|
"loss": 1.7531, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.989594643503439e-06, |
|
"loss": 1.622, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.988086620822777e-06, |
|
"loss": 1.7041, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.986578598142116e-06, |
|
"loss": 1.5922, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9850705754614555e-06, |
|
"loss": 1.5937, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.983562552780794e-06, |
|
"loss": 1.6301, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.982054530100133e-06, |
|
"loss": 1.6054, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.980546507419472e-06, |
|
"loss": 1.745, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9790384847388105e-06, |
|
"loss": 1.6002, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.97753046205815e-06, |
|
"loss": 1.623, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.976022439377489e-06, |
|
"loss": 1.6432, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.974514416696827e-06, |
|
"loss": 1.5977, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.973006394016166e-06, |
|
"loss": 1.5784, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.971498371335505e-06, |
|
"loss": 1.7948, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.969990348654844e-06, |
|
"loss": 1.5515, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.968482325974183e-06, |
|
"loss": 1.6368, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.966974303293522e-06, |
|
"loss": 1.6672, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.96546628061286e-06, |
|
"loss": 1.6363, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9639582579321995e-06, |
|
"loss": 1.6082, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.962450235251539e-06, |
|
"loss": 1.5335, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gsm8k_hard_accuracy": 0.9144782378567647, |
|
"eval_gsm8k_hard_loss": 0.38037109375, |
|
"eval_gsm8k_hard_runtime": 2.4991, |
|
"eval_gsm8k_hard_samples_per_second": 105.638, |
|
"eval_gsm8k_hard_steps_per_second": 2.401, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_webgpt_accuracy": 0.48714508131217954, |
|
"eval_webgpt_loss": 2.287109375, |
|
"eval_webgpt_runtime": 17.8711, |
|
"eval_webgpt_samples_per_second": 219.124, |
|
"eval_webgpt_steps_per_second": 4.588, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_squad_v2_accuracy": 0.8998647106075339, |
|
"eval_squad_v2_loss": 0.327392578125, |
|
"eval_squad_v2_runtime": 89.3681, |
|
"eval_squad_v2_samples_per_second": 291.648, |
|
"eval_squad_v2_steps_per_second": 6.076, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_adversarial_qa_accuracy": 0.8150204471508313, |
|
"eval_adversarial_qa_loss": 0.82080078125, |
|
"eval_adversarial_qa_runtime": 21.2941, |
|
"eval_adversarial_qa_samples_per_second": 281.769, |
|
"eval_adversarial_qa_steps_per_second": 5.87, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_private_tuning_accuracy": 0.6658206227322084, |
|
"eval_private_tuning_loss": 1.251953125, |
|
"eval_private_tuning_runtime": 65.5451, |
|
"eval_private_tuning_samples_per_second": 323.106, |
|
"eval_private_tuning_steps_per_second": 6.743, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_oa_translated_accuracy": 0.692265258615827, |
|
"eval_oa_translated_loss": 1.2744140625, |
|
"eval_oa_translated_runtime": 743.3603, |
|
"eval_oa_translated_samples_per_second": 188.499, |
|
"eval_oa_translated_steps_per_second": 3.928, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_prosocial_dialogue_accuracy": 0.5311811844172045, |
|
"eval_prosocial_dialogue_loss": 1.7724609375, |
|
"eval_prosocial_dialogue_runtime": 51.2881, |
|
"eval_prosocial_dialogue_samples_per_second": 526.106, |
|
"eval_prosocial_dialogue_steps_per_second": 10.977, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_math_qa_accuracy": 0.5769073170225678, |
|
"eval_math_qa_loss": 1.8466796875, |
|
"eval_math_qa_runtime": 19.0785, |
|
"eval_math_qa_samples_per_second": 312.813, |
|
"eval_math_qa_steps_per_second": 6.552, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_wikihow_accuracy": 0.6035709332963528, |
|
"eval_wikihow_loss": 1.896484375, |
|
"eval_wikihow_runtime": 7.4264, |
|
"eval_wikihow_samples_per_second": 308.762, |
|
"eval_wikihow_steps_per_second": 6.463, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_joke_accuracy": 0.48474222896133434, |
|
"eval_joke_loss": 2.275390625, |
|
"eval_joke_runtime": 0.6166, |
|
"eval_joke_samples_per_second": 123.256, |
|
"eval_joke_steps_per_second": 3.244, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gsm8k_accuracy": 0.7641650690586473, |
|
"eval_gsm8k_loss": 0.8916015625, |
|
"eval_gsm8k_runtime": 5.7365, |
|
"eval_gsm8k_samples_per_second": 260.61, |
|
"eval_gsm8k_steps_per_second": 5.578, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_en-hi_accuracy": 0.6325462370594394, |
|
"eval_ted_trans_en-hi_loss": 1.4697265625, |
|
"eval_ted_trans_en-hi_runtime": 1.283, |
|
"eval_ted_trans_en-hi_samples_per_second": 80.282, |
|
"eval_ted_trans_en-hi_steps_per_second": 2.338, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_de-ja_accuracy": 0.6211990075587098, |
|
"eval_ted_trans_de-ja_loss": 1.662109375, |
|
"eval_ted_trans_de-ja_runtime": 3.8274, |
|
"eval_ted_trans_de-ja_samples_per_second": 187.596, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.919, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_nl-en_accuracy": 0.7194199077125907, |
|
"eval_ted_trans_nl-en_loss": 1.2353515625, |
|
"eval_ted_trans_nl-en_runtime": 4.0007, |
|
"eval_ted_trans_nl-en_samples_per_second": 192.717, |
|
"eval_ted_trans_nl-en_steps_per_second": 4.249, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_en-ja_accuracy": 0.6241126045950204, |
|
"eval_ted_trans_en-ja_loss": 1.603515625, |
|
"eval_ted_trans_en-ja_runtime": 4.7838, |
|
"eval_ted_trans_en-ja_samples_per_second": 167.442, |
|
"eval_ted_trans_en-ja_steps_per_second": 3.554, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_en-es_accuracy": 0.7645402663284718, |
|
"eval_ted_trans_en-es_loss": 1.001953125, |
|
"eval_ted_trans_en-es_runtime": 4.101, |
|
"eval_ted_trans_en-es_samples_per_second": 201.413, |
|
"eval_ted_trans_en-es_steps_per_second": 4.389, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_ted_trans_en-ms_accuracy": 0.6673585884795018, |
|
"eval_ted_trans_en-ms_loss": 1.525390625, |
|
"eval_ted_trans_en-ms_runtime": 1.2996, |
|
"eval_ted_trans_en-ms_samples_per_second": 32.318, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.769, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_xsum_accuracy": 0.6053592197062103, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 191.7698, |
|
"eval_xsum_samples_per_second": 212.802, |
|
"eval_xsum_steps_per_second": 4.438, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_cnn_dailymail_accuracy": 0.6746041001434587, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 278.9634, |
|
"eval_cnn_dailymail_samples_per_second": 205.844, |
|
"eval_cnn_dailymail_steps_per_second": 4.291, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_multi_news_accuracy": 0.5434468524251806, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 45.3002, |
|
"eval_multi_news_samples_per_second": 198.564, |
|
"eval_multi_news_steps_per_second": 4.15, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_tldr_news_accuracy": 0.5667732530800339, |
|
"eval_tldr_news_loss": 1.9794921875, |
|
"eval_tldr_news_runtime": 3.9274, |
|
"eval_tldr_news_samples_per_second": 363.601, |
|
"eval_tldr_news_steps_per_second": 7.639, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_scitldr_accuracy": 0.49108589951377635, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.8246, |
|
"eval_scitldr_samples_per_second": 141.259, |
|
"eval_scitldr_steps_per_second": 3.186, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_samsum_accuracy": 0.6251537806378348, |
|
"eval_samsum_loss": 1.3818359375, |
|
"eval_samsum_runtime": 13.864, |
|
"eval_samsum_samples_per_second": 212.565, |
|
"eval_samsum_steps_per_second": 4.472, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_debate_sum_accuracy": 0.9387001084116295, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 243.8994, |
|
"eval_debate_sum_samples_per_second": 197.27, |
|
"eval_debate_sum_steps_per_second": 4.112, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_billsum_accuracy": 0.6722638786214665, |
|
"eval_billsum_loss": 1.392578125, |
|
"eval_billsum_runtime": 27.2836, |
|
"eval_billsum_samples_per_second": 138.911, |
|
"eval_billsum_steps_per_second": 2.896, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_wmt2019_zh-en_accuracy": 0.6307350879022818, |
|
"eval_wmt2019_zh-en_loss": 1.67578125, |
|
"eval_wmt2019_zh-en_runtime": 11.4742, |
|
"eval_wmt2019_zh-en_samples_per_second": 346.954, |
|
"eval_wmt2019_zh-en_steps_per_second": 7.234, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_wmt2019_ru-en_accuracy": 0.7234031766265182, |
|
"eval_wmt2019_ru-en_loss": 1.1044921875, |
|
"eval_wmt2019_ru-en_runtime": 11.1292, |
|
"eval_wmt2019_ru-en_samples_per_second": 269.561, |
|
"eval_wmt2019_ru-en_steps_per_second": 5.661, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_wmt2019_de-en_accuracy": 0.7347881745809497, |
|
"eval_wmt2019_de-en_loss": 1.0703125, |
|
"eval_wmt2019_de-en_runtime": 6.7145, |
|
"eval_wmt2019_de-en_samples_per_second": 446.495, |
|
"eval_wmt2019_de-en_steps_per_second": 9.383, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_wmt2019_fr-de_accuracy": 0.7223718008231981, |
|
"eval_wmt2019_fr-de_loss": 1.150390625, |
|
"eval_wmt2019_fr-de_runtime": 5.999, |
|
"eval_wmt2019_fr-de_samples_per_second": 252.041, |
|
"eval_wmt2019_fr-de_steps_per_second": 5.334, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_essay_instruction_accuracy": 0.5959412245981027, |
|
"eval_essay_instruction_loss": 1.9736328125, |
|
"eval_essay_instruction_runtime": 4.9416, |
|
"eval_essay_instruction_samples_per_second": 83.576, |
|
"eval_essay_instruction_steps_per_second": 1.821, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_reddit_eli5_accuracy": 0.44527728061091026, |
|
"eval_reddit_eli5_loss": 2.548828125, |
|
"eval_reddit_eli5_runtime": 290.9524, |
|
"eval_reddit_eli5_samples_per_second": 187.409, |
|
"eval_reddit_eli5_steps_per_second": 3.904, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_reddit_askh_accuracy": 0.44824522875076156, |
|
"eval_reddit_askh_loss": 2.6484375, |
|
"eval_reddit_askh_runtime": 112.213, |
|
"eval_reddit_askh_samples_per_second": 175.604, |
|
"eval_reddit_askh_steps_per_second": 3.663, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_reddit_asks_accuracy": 0.45689164053284076, |
|
"eval_reddit_asks_loss": 2.498046875, |
|
"eval_reddit_asks_runtime": 165.2712, |
|
"eval_reddit_asks_samples_per_second": 159.471, |
|
"eval_reddit_asks_steps_per_second": 3.328, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.960942212570877e-06, |
|
"loss": 1.6061, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.959434189890216e-06, |
|
"loss": 1.6272, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.957926167209555e-06, |
|
"loss": 1.58, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9564181445288936e-06, |
|
"loss": 1.5576, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9549101218482336e-06, |
|
"loss": 1.5794, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.953402099167572e-06, |
|
"loss": 1.5694, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.95189407648691e-06, |
|
"loss": 1.6599, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.95038605380625e-06, |
|
"loss": 1.5774, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9488780311255885e-06, |
|
"loss": 1.5876, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.947370008444927e-06, |
|
"loss": 1.6382, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.945861985764267e-06, |
|
"loss": 1.6248, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.944353963083605e-06, |
|
"loss": 1.6227, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9428459404029434e-06, |
|
"loss": 1.592, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9413379177222834e-06, |
|
"loss": 1.6044, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.939829895041622e-06, |
|
"loss": 1.6386, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.938321872360961e-06, |
|
"loss": 1.5863, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9368138496803e-06, |
|
"loss": 1.6269, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.935305826999638e-06, |
|
"loss": 1.6145, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9337978043189775e-06, |
|
"loss": 1.5194, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.932289781638317e-06, |
|
"loss": 1.5841, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.930781758957655e-06, |
|
"loss": 1.5677, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.929273736276994e-06, |
|
"loss": 1.6513, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.927765713596333e-06, |
|
"loss": 1.5514, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.926257690915672e-06, |
|
"loss": 1.6071, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.924749668235011e-06, |
|
"loss": 1.5484, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_gsm8k_hard_accuracy": 0.9160661210862749, |
|
"eval_gsm8k_hard_loss": 0.36669921875, |
|
"eval_gsm8k_hard_runtime": 2.426, |
|
"eval_gsm8k_hard_samples_per_second": 108.822, |
|
"eval_gsm8k_hard_steps_per_second": 2.473, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_webgpt_accuracy": 0.4889121259300561, |
|
"eval_webgpt_loss": 2.275390625, |
|
"eval_webgpt_runtime": 16.7849, |
|
"eval_webgpt_samples_per_second": 233.305, |
|
"eval_webgpt_steps_per_second": 4.885, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_squad_v2_accuracy": 0.8985118166828732, |
|
"eval_squad_v2_loss": 0.3203125, |
|
"eval_squad_v2_runtime": 89.0026, |
|
"eval_squad_v2_samples_per_second": 292.845, |
|
"eval_squad_v2_steps_per_second": 6.101, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_adversarial_qa_accuracy": 0.8153786454135697, |
|
"eval_adversarial_qa_loss": 0.80224609375, |
|
"eval_adversarial_qa_runtime": 21.5777, |
|
"eval_adversarial_qa_samples_per_second": 278.064, |
|
"eval_adversarial_qa_steps_per_second": 5.793, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_private_tuning_accuracy": 0.6673820897933299, |
|
"eval_private_tuning_loss": 1.2412109375, |
|
"eval_private_tuning_runtime": 61.4345, |
|
"eval_private_tuning_samples_per_second": 344.725, |
|
"eval_private_tuning_steps_per_second": 7.195, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_oa_translated_accuracy": 0.694931804897923, |
|
"eval_oa_translated_loss": 1.259765625, |
|
"eval_oa_translated_runtime": 747.634, |
|
"eval_oa_translated_samples_per_second": 187.422, |
|
"eval_oa_translated_steps_per_second": 3.906, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_prosocial_dialogue_accuracy": 0.5319418402546676, |
|
"eval_prosocial_dialogue_loss": 1.7685546875, |
|
"eval_prosocial_dialogue_runtime": 40.2134, |
|
"eval_prosocial_dialogue_samples_per_second": 670.995, |
|
"eval_prosocial_dialogue_steps_per_second": 14.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_math_qa_accuracy": 0.5807196170064887, |
|
"eval_math_qa_loss": 1.8212890625, |
|
"eval_math_qa_runtime": 19.1438, |
|
"eval_math_qa_samples_per_second": 311.745, |
|
"eval_math_qa_steps_per_second": 6.53, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_wikihow_accuracy": 0.6014214394674803, |
|
"eval_wikihow_loss": 1.896484375, |
|
"eval_wikihow_runtime": 7.4397, |
|
"eval_wikihow_samples_per_second": 308.21, |
|
"eval_wikihow_steps_per_second": 6.452, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_joke_accuracy": 0.4866376042456406, |
|
"eval_joke_loss": 2.24609375, |
|
"eval_joke_runtime": 0.5334, |
|
"eval_joke_samples_per_second": 142.487, |
|
"eval_joke_steps_per_second": 3.75, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_gsm8k_accuracy": 0.7673556205482711, |
|
"eval_gsm8k_loss": 0.87646484375, |
|
"eval_gsm8k_runtime": 6.0291, |
|
"eval_gsm8k_samples_per_second": 247.963, |
|
"eval_gsm8k_steps_per_second": 5.308, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_en-hi_accuracy": 0.6494347770862441, |
|
"eval_ted_trans_en-hi_loss": 1.390625, |
|
"eval_ted_trans_en-hi_runtime": 1.0351, |
|
"eval_ted_trans_en-hi_samples_per_second": 99.508, |
|
"eval_ted_trans_en-hi_steps_per_second": 2.898, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_de-ja_accuracy": 0.616848081514485, |
|
"eval_ted_trans_de-ja_loss": 1.677734375, |
|
"eval_ted_trans_de-ja_runtime": 3.7491, |
|
"eval_ted_trans_de-ja_samples_per_second": 191.511, |
|
"eval_ted_trans_de-ja_steps_per_second": 4.001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_nl-en_accuracy": 0.727337334175616, |
|
"eval_ted_trans_nl-en_loss": 1.2138671875, |
|
"eval_ted_trans_nl-en_runtime": 3.6692, |
|
"eval_ted_trans_nl-en_samples_per_second": 210.129, |
|
"eval_ted_trans_nl-en_steps_per_second": 4.633, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_en-ja_accuracy": 0.626860854480171, |
|
"eval_ted_trans_en-ja_loss": 1.591796875, |
|
"eval_ted_trans_en-ja_runtime": 4.8535, |
|
"eval_ted_trans_en-ja_samples_per_second": 165.036, |
|
"eval_ted_trans_en-ja_steps_per_second": 3.503, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_en-es_accuracy": 0.7669356066372355, |
|
"eval_ted_trans_en-es_loss": 0.98388671875, |
|
"eval_ted_trans_en-es_runtime": 4.1425, |
|
"eval_ted_trans_en-es_samples_per_second": 199.395, |
|
"eval_ted_trans_en-es_steps_per_second": 4.345, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_ted_trans_en-ms_accuracy": 0.6616502335236119, |
|
"eval_ted_trans_en-ms_loss": 1.5517578125, |
|
"eval_ted_trans_en-ms_runtime": 0.9865, |
|
"eval_ted_trans_en-ms_samples_per_second": 42.573, |
|
"eval_ted_trans_en-ms_steps_per_second": 1.014, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_xsum_accuracy": 0.6076375151351375, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 192.6396, |
|
"eval_xsum_samples_per_second": 211.841, |
|
"eval_xsum_steps_per_second": 4.418, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_cnn_dailymail_accuracy": 0.6761071775649035, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 278.6917, |
|
"eval_cnn_dailymail_samples_per_second": 206.045, |
|
"eval_cnn_dailymail_steps_per_second": 4.295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_multi_news_accuracy": 0.5442969643083958, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 45.8208, |
|
"eval_multi_news_samples_per_second": 196.308, |
|
"eval_multi_news_steps_per_second": 4.103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_tldr_news_accuracy": 0.5842189410326343, |
|
"eval_tldr_news_loss": 1.8740234375, |
|
"eval_tldr_news_runtime": 3.8029, |
|
"eval_tldr_news_samples_per_second": 375.502, |
|
"eval_tldr_news_steps_per_second": 7.889, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_scitldr_accuracy": 0.49108589951377635, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.7953, |
|
"eval_scitldr_samples_per_second": 142.738, |
|
"eval_scitldr_steps_per_second": 3.22, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_samsum_accuracy": 0.6296691857399721, |
|
"eval_samsum_loss": 1.359375, |
|
"eval_samsum_runtime": 14.6559, |
|
"eval_samsum_samples_per_second": 201.08, |
|
"eval_samsum_steps_per_second": 4.23, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_debate_sum_accuracy": 0.9376602666300555, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 243.7385, |
|
"eval_debate_sum_samples_per_second": 197.4, |
|
"eval_debate_sum_steps_per_second": 4.115, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_billsum_accuracy": 0.674664779787867, |
|
"eval_billsum_loss": 1.3837890625, |
|
"eval_billsum_runtime": 28.345, |
|
"eval_billsum_samples_per_second": 133.71, |
|
"eval_billsum_steps_per_second": 2.787, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_wmt2019_zh-en_accuracy": 0.6330300731414937, |
|
"eval_wmt2019_zh-en_loss": 1.6611328125, |
|
"eval_wmt2019_zh-en_runtime": 11.8726, |
|
"eval_wmt2019_zh-en_samples_per_second": 335.309, |
|
"eval_wmt2019_zh-en_steps_per_second": 6.991, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_wmt2019_ru-en_accuracy": 0.7280151437129657, |
|
"eval_wmt2019_ru-en_loss": 1.083984375, |
|
"eval_wmt2019_ru-en_runtime": 10.0609, |
|
"eval_wmt2019_ru-en_samples_per_second": 298.183, |
|
"eval_wmt2019_ru-en_steps_per_second": 6.262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_wmt2019_de-en_accuracy": 0.7374484938192584, |
|
"eval_wmt2019_de-en_loss": 1.05859375, |
|
"eval_wmt2019_de-en_runtime": 7.6833, |
|
"eval_wmt2019_de-en_samples_per_second": 390.199, |
|
"eval_wmt2019_de-en_steps_per_second": 8.2, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_wmt2019_fr-de_accuracy": 0.7249404264537492, |
|
"eval_wmt2019_fr-de_loss": 1.1298828125, |
|
"eval_wmt2019_fr-de_runtime": 4.7455, |
|
"eval_wmt2019_fr-de_samples_per_second": 318.618, |
|
"eval_wmt2019_fr-de_steps_per_second": 6.743, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_essay_instruction_accuracy": 0.5962563297437145, |
|
"eval_essay_instruction_loss": 1.96484375, |
|
"eval_essay_instruction_runtime": 4.8055, |
|
"eval_essay_instruction_samples_per_second": 85.943, |
|
"eval_essay_instruction_steps_per_second": 1.873, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_reddit_eli5_accuracy": 0.44550306361932773, |
|
"eval_reddit_eli5_loss": 2.546875, |
|
"eval_reddit_eli5_runtime": 268.1996, |
|
"eval_reddit_eli5_samples_per_second": 203.308, |
|
"eval_reddit_eli5_steps_per_second": 4.236, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_reddit_askh_accuracy": 0.44892602615508864, |
|
"eval_reddit_askh_loss": 2.642578125, |
|
"eval_reddit_askh_runtime": 135.1366, |
|
"eval_reddit_askh_samples_per_second": 145.815, |
|
"eval_reddit_askh_steps_per_second": 3.041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_reddit_asks_accuracy": 0.4570882832072159, |
|
"eval_reddit_asks_loss": 2.49609375, |
|
"eval_reddit_asks_runtime": 151.4006, |
|
"eval_reddit_asks_samples_per_second": 174.081, |
|
"eval_reddit_asks_steps_per_second": 3.633, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.92324164555435e-06, |
|
"loss": 1.5053, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.921733622873688e-06, |
|
"loss": 1.6214, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.920225600193027e-06, |
|
"loss": 1.5547, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9187175775123666e-06, |
|
"loss": 1.5706, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.917209554831705e-06, |
|
"loss": 1.6485, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.915701532151044e-06, |
|
"loss": 1.6188, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.914193509470383e-06, |
|
"loss": 1.5883, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9126854867897215e-06, |
|
"loss": 1.5876, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.911177464109061e-06, |
|
"loss": 1.5883, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9096694414284e-06, |
|
"loss": 1.584, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.908161418747738e-06, |
|
"loss": 1.6226, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.906653396067077e-06, |
|
"loss": 1.5241, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9051453733864164e-06, |
|
"loss": 1.552, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.903637350705755e-06, |
|
"loss": 1.6007, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.902129328025094e-06, |
|
"loss": 1.6413, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.900621305344433e-06, |
|
"loss": 1.5901, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.899113282663771e-06, |
|
"loss": 1.618, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8976052599831105e-06, |
|
"loss": 1.6042, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.89609723730245e-06, |
|
"loss": 1.6138, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.894589214621788e-06, |
|
"loss": 1.5651, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.893081191941127e-06, |
|
"loss": 1.557, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.891573169260466e-06, |
|
"loss": 1.589, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.890065146579805e-06, |
|
"loss": 1.6019, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.888557123899144e-06, |
|
"loss": 1.5311, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.887049101218483e-06, |
|
"loss": 1.5996, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gsm8k_hard_accuracy": 0.917939008998005, |
|
"eval_gsm8k_hard_loss": 0.353759765625, |
|
"eval_gsm8k_hard_runtime": 2.0742, |
|
"eval_gsm8k_hard_samples_per_second": 127.281, |
|
"eval_gsm8k_hard_steps_per_second": 2.893, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_webgpt_accuracy": 0.48935223254836624, |
|
"eval_webgpt_loss": 2.275390625, |
|
"eval_webgpt_runtime": 18.9851, |
|
"eval_webgpt_samples_per_second": 206.267, |
|
"eval_webgpt_steps_per_second": 4.319, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_squad_v2_accuracy": 0.8966656801815133, |
|
"eval_squad_v2_loss": 0.312255859375, |
|
"eval_squad_v2_runtime": 87.2749, |
|
"eval_squad_v2_samples_per_second": 298.642, |
|
"eval_squad_v2_steps_per_second": 6.222, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_adversarial_qa_accuracy": 0.7900062684695979, |
|
"eval_adversarial_qa_loss": 0.8447265625, |
|
"eval_adversarial_qa_runtime": 21.2806, |
|
"eval_adversarial_qa_samples_per_second": 281.947, |
|
"eval_adversarial_qa_steps_per_second": 5.874, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_private_tuning_accuracy": 0.6689839396232736, |
|
"eval_private_tuning_loss": 1.2333984375, |
|
"eval_private_tuning_runtime": 65.9372, |
|
"eval_private_tuning_samples_per_second": 321.185, |
|
"eval_private_tuning_steps_per_second": 6.703, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_oa_translated_accuracy": 0.697770983402211, |
|
"eval_oa_translated_loss": 1.24609375, |
|
"eval_oa_translated_runtime": 707.9067, |
|
"eval_oa_translated_samples_per_second": 197.94, |
|
"eval_oa_translated_steps_per_second": 4.125, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_prosocial_dialogue_accuracy": 0.5329268895641822, |
|
"eval_prosocial_dialogue_loss": 1.771484375, |
|
"eval_prosocial_dialogue_runtime": 94.875, |
|
"eval_prosocial_dialogue_samples_per_second": 284.406, |
|
"eval_prosocial_dialogue_steps_per_second": 5.934, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_math_qa_accuracy": 0.5841662474001151, |
|
"eval_math_qa_loss": 1.80078125, |
|
"eval_math_qa_runtime": 18.717, |
|
"eval_math_qa_samples_per_second": 318.854, |
|
"eval_math_qa_steps_per_second": 6.678, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_wikihow_accuracy": 0.6094855082512828, |
|
"eval_wikihow_loss": 1.8798828125, |
|
"eval_wikihow_runtime": 8.1158, |
|
"eval_wikihow_samples_per_second": 282.535, |
|
"eval_wikihow_steps_per_second": 5.914, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_joke_accuracy": 0.4837945413191812, |
|
"eval_joke_loss": 2.259765625, |
|
"eval_joke_runtime": 0.8936, |
|
"eval_joke_samples_per_second": 85.053, |
|
"eval_joke_steps_per_second": 2.238, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gsm8k_accuracy": 0.7703712514518408, |
|
"eval_gsm8k_loss": 0.86376953125, |
|
"eval_gsm8k_runtime": 5.5568, |
|
"eval_gsm8k_samples_per_second": 269.039, |
|
"eval_gsm8k_steps_per_second": 5.759, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_en-hi_accuracy": 0.6408450704225352, |
|
"eval_ted_trans_en-hi_loss": 1.4541015625, |
|
"eval_ted_trans_en-hi_runtime": 1.7176, |
|
"eval_ted_trans_en-hi_samples_per_second": 59.968, |
|
"eval_ted_trans_en-hi_steps_per_second": 1.747, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_de-ja_accuracy": 0.6207718785454686, |
|
"eval_ted_trans_de-ja_loss": 1.6513671875, |
|
"eval_ted_trans_de-ja_runtime": 4.1297, |
|
"eval_ted_trans_de-ja_samples_per_second": 173.862, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.632, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_nl-en_accuracy": 0.7211525867714473, |
|
"eval_ted_trans_nl-en_loss": 1.2236328125, |
|
"eval_ted_trans_nl-en_runtime": 4.0953, |
|
"eval_ted_trans_nl-en_samples_per_second": 188.265, |
|
"eval_ted_trans_nl-en_steps_per_second": 4.151, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_en-ja_accuracy": 0.6330966258927838, |
|
"eval_ted_trans_en-ja_loss": 1.5634765625, |
|
"eval_ted_trans_en-ja_runtime": 4.1542, |
|
"eval_ted_trans_en-ja_samples_per_second": 192.817, |
|
"eval_ted_trans_en-ja_steps_per_second": 4.092, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_en-es_accuracy": 0.7685958860355181, |
|
"eval_ted_trans_en-es_loss": 0.984375, |
|
"eval_ted_trans_en-es_runtime": 4.9915, |
|
"eval_ted_trans_en-es_samples_per_second": 165.482, |
|
"eval_ted_trans_en-es_steps_per_second": 3.606, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_ted_trans_en-ms_accuracy": 0.6491956408925791, |
|
"eval_ted_trans_en-ms_loss": 1.5751953125, |
|
"eval_ted_trans_en-ms_runtime": 0.4795, |
|
"eval_ted_trans_en-ms_samples_per_second": 87.587, |
|
"eval_ted_trans_en-ms_steps_per_second": 2.085, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_xsum_accuracy": 0.6072009770109794, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 194.4193, |
|
"eval_xsum_samples_per_second": 209.902, |
|
"eval_xsum_steps_per_second": 4.377, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_cnn_dailymail_accuracy": 0.6753130640011107, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 277.0946, |
|
"eval_cnn_dailymail_samples_per_second": 207.232, |
|
"eval_cnn_dailymail_steps_per_second": 4.32, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_multi_news_accuracy": 0.5451066937634955, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 46.0554, |
|
"eval_multi_news_samples_per_second": 195.308, |
|
"eval_multi_news_steps_per_second": 4.082, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_tldr_news_accuracy": 0.5815386062259005, |
|
"eval_tldr_news_loss": 1.869140625, |
|
"eval_tldr_news_runtime": 3.242, |
|
"eval_tldr_news_samples_per_second": 440.47, |
|
"eval_tldr_news_steps_per_second": 9.254, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_scitldr_accuracy": 0.49108589951377635, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.7516, |
|
"eval_scitldr_samples_per_second": 145.006, |
|
"eval_scitldr_steps_per_second": 3.271, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_samsum_accuracy": 0.6312103719125579, |
|
"eval_samsum_loss": 1.3564453125, |
|
"eval_samsum_runtime": 13.9794, |
|
"eval_samsum_samples_per_second": 210.81, |
|
"eval_samsum_steps_per_second": 4.435, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_debate_sum_accuracy": 0.9370392048784827, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 244.8227, |
|
"eval_debate_sum_samples_per_second": 196.526, |
|
"eval_debate_sum_steps_per_second": 4.097, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_billsum_accuracy": 0.6767193885992487, |
|
"eval_billsum_loss": 1.3740234375, |
|
"eval_billsum_runtime": 27.455, |
|
"eval_billsum_samples_per_second": 138.044, |
|
"eval_billsum_steps_per_second": 2.877, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_wmt2019_zh-en_accuracy": 0.6325426044271557, |
|
"eval_wmt2019_zh-en_loss": 1.658203125, |
|
"eval_wmt2019_zh-en_runtime": 12.0617, |
|
"eval_wmt2019_zh-en_samples_per_second": 330.053, |
|
"eval_wmt2019_zh-en_steps_per_second": 6.881, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_wmt2019_ru-en_accuracy": 0.7264274688562131, |
|
"eval_wmt2019_ru-en_loss": 1.0927734375, |
|
"eval_wmt2019_ru-en_runtime": 10.931, |
|
"eval_wmt2019_ru-en_samples_per_second": 274.449, |
|
"eval_wmt2019_ru-en_steps_per_second": 5.763, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_wmt2019_de-en_accuracy": 0.7404288514621754, |
|
"eval_wmt2019_de-en_loss": 1.0498046875, |
|
"eval_wmt2019_de-en_runtime": 7.1074, |
|
"eval_wmt2019_de-en_samples_per_second": 421.813, |
|
"eval_wmt2019_de-en_steps_per_second": 8.864, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_wmt2019_fr-de_accuracy": 0.7267868761037272, |
|
"eval_wmt2019_fr-de_loss": 1.1259765625, |
|
"eval_wmt2019_fr-de_runtime": 6.3509, |
|
"eval_wmt2019_fr-de_samples_per_second": 238.077, |
|
"eval_wmt2019_fr-de_steps_per_second": 5.039, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_essay_instruction_accuracy": 0.5968423147513433, |
|
"eval_essay_instruction_loss": 1.9580078125, |
|
"eval_essay_instruction_runtime": 4.8143, |
|
"eval_essay_instruction_samples_per_second": 85.786, |
|
"eval_essay_instruction_steps_per_second": 1.869, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_reddit_eli5_accuracy": 0.4457660851447585, |
|
"eval_reddit_eli5_loss": 2.54296875, |
|
"eval_reddit_eli5_runtime": 291.6459, |
|
"eval_reddit_eli5_samples_per_second": 186.963, |
|
"eval_reddit_eli5_steps_per_second": 3.895, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_reddit_askh_accuracy": 0.4493032630102438, |
|
"eval_reddit_askh_loss": 2.640625, |
|
"eval_reddit_askh_runtime": 111.9151, |
|
"eval_reddit_askh_samples_per_second": 176.071, |
|
"eval_reddit_askh_steps_per_second": 3.672, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_reddit_asks_accuracy": 0.4576012073512167, |
|
"eval_reddit_asks_loss": 2.4921875, |
|
"eval_reddit_asks_runtime": 151.9908, |
|
"eval_reddit_asks_samples_per_second": 173.405, |
|
"eval_reddit_asks_steps_per_second": 3.619, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.885541078537821e-06, |
|
"loss": 1.5723, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.88403305585716e-06, |
|
"loss": 1.5749, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8825250331764996e-06, |
|
"loss": 1.6074, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.881167812763905e-06, |
|
"loss": 1.611, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.879659790083243e-06, |
|
"loss": 1.5503, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.878151767402582e-06, |
|
"loss": 1.5197, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.876643744721921e-06, |
|
"loss": 1.6165, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.87513572204126e-06, |
|
"loss": 1.5622, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.873627699360599e-06, |
|
"loss": 1.6142, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.872119676679938e-06, |
|
"loss": 1.5699, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.870611653999276e-06, |
|
"loss": 1.5571, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8691036313186155e-06, |
|
"loss": 1.7093, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.867595608637955e-06, |
|
"loss": 1.5411, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.866087585957293e-06, |
|
"loss": 1.6657, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.864579563276632e-06, |
|
"loss": 1.6215, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.863071540595971e-06, |
|
"loss": 1.6317, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.86156351791531e-06, |
|
"loss": 1.573, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.860055495234649e-06, |
|
"loss": 1.5732, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.858547472553988e-06, |
|
"loss": 1.5991, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.857039449873326e-06, |
|
"loss": 1.6105, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.855531427192665e-06, |
|
"loss": 1.627, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8540234045120045e-06, |
|
"loss": 1.59, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.852515381831343e-06, |
|
"loss": 1.5923, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.851007359150682e-06, |
|
"loss": 1.5461, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.849499336470021e-06, |
|
"loss": 1.5792, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_gsm8k_hard_accuracy": 0.9194658197956109, |
|
"eval_gsm8k_hard_loss": 0.34375, |
|
"eval_gsm8k_hard_runtime": 2.5861, |
|
"eval_gsm8k_hard_samples_per_second": 102.084, |
|
"eval_gsm8k_hard_steps_per_second": 2.32, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_webgpt_accuracy": 0.48958552214679374, |
|
"eval_webgpt_loss": 2.26953125, |
|
"eval_webgpt_runtime": 18.454, |
|
"eval_webgpt_samples_per_second": 212.203, |
|
"eval_webgpt_steps_per_second": 4.443, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_squad_v2_accuracy": 0.902612776392001, |
|
"eval_squad_v2_loss": 0.312744140625, |
|
"eval_squad_v2_runtime": 87.2414, |
|
"eval_squad_v2_samples_per_second": 298.757, |
|
"eval_squad_v2_steps_per_second": 6.224, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_adversarial_qa_accuracy": 0.8242739022715739, |
|
"eval_adversarial_qa_loss": 0.765625, |
|
"eval_adversarial_qa_runtime": 21.8127, |
|
"eval_adversarial_qa_samples_per_second": 275.069, |
|
"eval_adversarial_qa_steps_per_second": 5.731, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_private_tuning_accuracy": 0.6703786408057409, |
|
"eval_private_tuning_loss": 1.2275390625, |
|
"eval_private_tuning_runtime": 61.3818, |
|
"eval_private_tuning_samples_per_second": 345.021, |
|
"eval_private_tuning_steps_per_second": 7.201, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_oa_translated_accuracy": 0.6998150442485734, |
|
"eval_oa_translated_loss": 1.2353515625, |
|
"eval_oa_translated_runtime": 717.5723, |
|
"eval_oa_translated_samples_per_second": 195.274, |
|
"eval_oa_translated_steps_per_second": 4.069, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_prosocial_dialogue_accuracy": 0.5407654479692391, |
|
"eval_prosocial_dialogue_loss": 1.75390625, |
|
"eval_prosocial_dialogue_runtime": 73.7989, |
|
"eval_prosocial_dialogue_samples_per_second": 365.629, |
|
"eval_prosocial_dialogue_steps_per_second": 7.629, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_math_qa_accuracy": 0.5880693164312722, |
|
"eval_math_qa_loss": 1.77734375, |
|
"eval_math_qa_runtime": 18.7243, |
|
"eval_math_qa_samples_per_second": 318.731, |
|
"eval_math_qa_steps_per_second": 6.676, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_wikihow_accuracy": 0.6112120371654417, |
|
"eval_wikihow_loss": 1.8701171875, |
|
"eval_wikihow_runtime": 8.0237, |
|
"eval_wikihow_samples_per_second": 285.777, |
|
"eval_wikihow_steps_per_second": 5.982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_joke_accuracy": 0.4860689916603487, |
|
"eval_joke_loss": 2.2421875, |
|
"eval_joke_runtime": 0.9634, |
|
"eval_joke_samples_per_second": 78.886, |
|
"eval_joke_steps_per_second": 2.076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_gsm8k_accuracy": 0.7713298162634165, |
|
"eval_gsm8k_loss": 0.857421875, |
|
"eval_gsm8k_runtime": 6.4806, |
|
"eval_gsm8k_samples_per_second": 230.688, |
|
"eval_gsm8k_steps_per_second": 4.938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_en-hi_accuracy": 0.6369557351344832, |
|
"eval_ted_trans_en-hi_loss": 1.482421875, |
|
"eval_ted_trans_en-hi_runtime": 1.0804, |
|
"eval_ted_trans_en-hi_samples_per_second": 95.332, |
|
"eval_ted_trans_en-hi_steps_per_second": 2.777, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_de-ja_accuracy": 0.6203946747238221, |
|
"eval_ted_trans_de-ja_loss": 1.6611328125, |
|
"eval_ted_trans_de-ja_runtime": 4.4636, |
|
"eval_ted_trans_de-ja_samples_per_second": 160.856, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.36, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_nl-en_accuracy": 0.7250118302749882, |
|
"eval_ted_trans_nl-en_loss": 1.2099609375, |
|
"eval_ted_trans_nl-en_runtime": 3.3901, |
|
"eval_ted_trans_nl-en_samples_per_second": 227.426, |
|
"eval_ted_trans_nl-en_steps_per_second": 5.015, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_en-ja_accuracy": 0.6299969453212504, |
|
"eval_ted_trans_en-ja_loss": 1.5703125, |
|
"eval_ted_trans_en-ja_runtime": 3.888, |
|
"eval_ted_trans_en-ja_samples_per_second": 206.017, |
|
"eval_ted_trans_en-ja_steps_per_second": 4.372, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_en-es_accuracy": 0.7667636252296387, |
|
"eval_ted_trans_en-es_loss": 0.9814453125, |
|
"eval_ted_trans_en-es_runtime": 4.4116, |
|
"eval_ted_trans_en-es_samples_per_second": 187.233, |
|
"eval_ted_trans_en-es_steps_per_second": 4.08, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_ted_trans_en-ms_accuracy": 0.6590555267254801, |
|
"eval_ted_trans_en-ms_loss": 1.509765625, |
|
"eval_ted_trans_en-ms_runtime": 1.256, |
|
"eval_ted_trans_en-ms_samples_per_second": 33.438, |
|
"eval_ted_trans_en-ms_steps_per_second": 0.796, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_xsum_accuracy": 0.6091027172290235, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 192.154, |
|
"eval_xsum_samples_per_second": 212.377, |
|
"eval_xsum_steps_per_second": 4.429, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_cnn_dailymail_accuracy": 0.6791855245499561, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 278.657, |
|
"eval_cnn_dailymail_samples_per_second": 206.071, |
|
"eval_cnn_dailymail_steps_per_second": 4.296, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_multi_news_accuracy": 0.547242906816125, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 46.0885, |
|
"eval_multi_news_samples_per_second": 195.168, |
|
"eval_multi_news_steps_per_second": 4.079, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_tldr_news_accuracy": 0.5899087745697358, |
|
"eval_tldr_news_loss": 1.82421875, |
|
"eval_tldr_news_runtime": 3.2945, |
|
"eval_tldr_news_samples_per_second": 433.455, |
|
"eval_tldr_news_steps_per_second": 9.106, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_scitldr_accuracy": 0.48946515397082657, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.999, |
|
"eval_scitldr_samples_per_second": 133.043, |
|
"eval_scitldr_steps_per_second": 3.001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_samsum_accuracy": 0.6357663345455529, |
|
"eval_samsum_loss": 1.3447265625, |
|
"eval_samsum_runtime": 13.9699, |
|
"eval_samsum_samples_per_second": 210.954, |
|
"eval_samsum_steps_per_second": 4.438, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_debate_sum_accuracy": 0.937797152052083, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 251.6686, |
|
"eval_debate_sum_samples_per_second": 191.18, |
|
"eval_debate_sum_steps_per_second": 3.985, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_billsum_accuracy": 0.6771982567431513, |
|
"eval_billsum_loss": 1.3671875, |
|
"eval_billsum_runtime": 20.1117, |
|
"eval_billsum_samples_per_second": 188.448, |
|
"eval_billsum_steps_per_second": 3.928, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_wmt2019_zh-en_accuracy": 0.6312172508742782, |
|
"eval_wmt2019_zh-en_loss": 1.6611328125, |
|
"eval_wmt2019_zh-en_runtime": 12.8125, |
|
"eval_wmt2019_zh-en_samples_per_second": 310.712, |
|
"eval_wmt2019_zh-en_steps_per_second": 6.478, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_wmt2019_ru-en_accuracy": 0.7286970006594347, |
|
"eval_wmt2019_ru-en_loss": 1.0859375, |
|
"eval_wmt2019_ru-en_runtime": 10.0956, |
|
"eval_wmt2019_ru-en_samples_per_second": 297.16, |
|
"eval_wmt2019_ru-en_steps_per_second": 6.24, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_wmt2019_de-en_accuracy": 0.7417590110813298, |
|
"eval_wmt2019_de-en_loss": 1.044921875, |
|
"eval_wmt2019_de-en_runtime": 7.9617, |
|
"eval_wmt2019_de-en_samples_per_second": 376.551, |
|
"eval_wmt2019_de-en_steps_per_second": 7.913, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_wmt2019_fr-de_accuracy": 0.7250758560901603, |
|
"eval_wmt2019_fr-de_loss": 1.130859375, |
|
"eval_wmt2019_fr-de_runtime": 4.4498, |
|
"eval_wmt2019_fr-de_samples_per_second": 339.792, |
|
"eval_wmt2019_fr-de_steps_per_second": 7.191, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_essay_instruction_accuracy": 0.5974504124007696, |
|
"eval_essay_instruction_loss": 1.953125, |
|
"eval_essay_instruction_runtime": 5.4069, |
|
"eval_essay_instruction_samples_per_second": 76.383, |
|
"eval_essay_instruction_steps_per_second": 1.665, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_reddit_eli5_accuracy": 0.44636360001599296, |
|
"eval_reddit_eli5_loss": 2.54296875, |
|
"eval_reddit_eli5_runtime": 271.6541, |
|
"eval_reddit_eli5_samples_per_second": 200.722, |
|
"eval_reddit_eli5_steps_per_second": 4.182, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_reddit_askh_accuracy": 0.449544836282747, |
|
"eval_reddit_askh_loss": 2.638671875, |
|
"eval_reddit_askh_runtime": 133.9706, |
|
"eval_reddit_askh_samples_per_second": 147.084, |
|
"eval_reddit_askh_steps_per_second": 3.068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_reddit_asks_accuracy": 0.4580279915626371, |
|
"eval_reddit_asks_loss": 2.490234375, |
|
"eval_reddit_asks_runtime": 148.6473, |
|
"eval_reddit_asks_samples_per_second": 177.306, |
|
"eval_reddit_asks_steps_per_second": 3.7, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8479913137893595e-06, |
|
"loss": 1.5467, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.846483291108699e-06, |
|
"loss": 1.6584, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.844975268428038e-06, |
|
"loss": 1.5414, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.843467245747376e-06, |
|
"loss": 1.5565, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.841959223066715e-06, |
|
"loss": 1.5377, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.840451200386054e-06, |
|
"loss": 1.5815, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.838943177705393e-06, |
|
"loss": 1.5763, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.837435155024732e-06, |
|
"loss": 1.5399, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.835927132344071e-06, |
|
"loss": 1.5486, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.834419109663409e-06, |
|
"loss": 1.5657, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8329110869827485e-06, |
|
"loss": 1.5551, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.831403064302088e-06, |
|
"loss": 1.5926, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.829895041621426e-06, |
|
"loss": 1.5123, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.828387018940765e-06, |
|
"loss": 1.6461, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.826878996260104e-06, |
|
"loss": 1.5276, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.825370973579443e-06, |
|
"loss": 1.6597, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.823862950898782e-06, |
|
"loss": 1.5458, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.822354928218121e-06, |
|
"loss": 1.565, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.820846905537459e-06, |
|
"loss": 1.5876, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.819338882856798e-06, |
|
"loss": 1.5786, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8178308601761375e-06, |
|
"loss": 1.5501, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.816322837495476e-06, |
|
"loss": 1.5303, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 1.6207, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.813306792134154e-06, |
|
"loss": 1.5776, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.811798769453493e-06, |
|
"loss": 1.6652, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_gsm8k_hard_accuracy": 0.9206872684336957, |
|
"eval_gsm8k_hard_loss": 0.338134765625, |
|
"eval_gsm8k_hard_runtime": 3.5208, |
|
"eval_gsm8k_hard_samples_per_second": 74.983, |
|
"eval_gsm8k_hard_steps_per_second": 1.704, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_webgpt_accuracy": 0.48976586658813137, |
|
"eval_webgpt_loss": 2.265625, |
|
"eval_webgpt_runtime": 18.5149, |
|
"eval_webgpt_samples_per_second": 211.505, |
|
"eval_webgpt_steps_per_second": 4.429, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_squad_v2_accuracy": 0.906819430938993, |
|
"eval_squad_v2_loss": 0.292724609375, |
|
"eval_squad_v2_runtime": 87.4932, |
|
"eval_squad_v2_samples_per_second": 297.897, |
|
"eval_squad_v2_steps_per_second": 6.206, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_adversarial_qa_accuracy": 0.8013193636010866, |
|
"eval_adversarial_qa_loss": 0.8310546875, |
|
"eval_adversarial_qa_runtime": 21.1349, |
|
"eval_adversarial_qa_samples_per_second": 283.891, |
|
"eval_adversarial_qa_steps_per_second": 5.914, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_private_tuning_accuracy": 0.6708063990236344, |
|
"eval_private_tuning_loss": 1.2236328125, |
|
"eval_private_tuning_runtime": 65.2825, |
|
"eval_private_tuning_samples_per_second": 324.405, |
|
"eval_private_tuning_steps_per_second": 6.771, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_oa_translated_accuracy": 0.7019998653150882, |
|
"eval_oa_translated_loss": 1.2255859375, |
|
"eval_oa_translated_runtime": 721.6165, |
|
"eval_oa_translated_samples_per_second": 194.179, |
|
"eval_oa_translated_steps_per_second": 4.046, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_prosocial_dialogue_accuracy": 0.5347828898075921, |
|
"eval_prosocial_dialogue_loss": 1.7451171875, |
|
"eval_prosocial_dialogue_runtime": 75.1624, |
|
"eval_prosocial_dialogue_samples_per_second": 358.996, |
|
"eval_prosocial_dialogue_steps_per_second": 7.49, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_math_qa_accuracy": 0.5915055732195003, |
|
"eval_math_qa_loss": 1.7578125, |
|
"eval_math_qa_runtime": 18.6859, |
|
"eval_math_qa_samples_per_second": 319.384, |
|
"eval_math_qa_steps_per_second": 6.69, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_wikihow_accuracy": 0.6088822632089863, |
|
"eval_wikihow_loss": 1.8623046875, |
|
"eval_wikihow_runtime": 7.0536, |
|
"eval_wikihow_samples_per_second": 325.084, |
|
"eval_wikihow_steps_per_second": 6.805, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_joke_accuracy": 0.4927975739196361, |
|
"eval_joke_loss": 2.203125, |
|
"eval_joke_runtime": 1.8797, |
|
"eval_joke_samples_per_second": 40.431, |
|
"eval_joke_steps_per_second": 1.064, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_gsm8k_accuracy": 0.7729670729488812, |
|
"eval_gsm8k_loss": 0.84765625, |
|
"eval_gsm8k_runtime": 5.4471, |
|
"eval_gsm8k_samples_per_second": 274.458, |
|
"eval_gsm8k_steps_per_second": 5.875, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_en-hi_accuracy": 0.6606576107655199, |
|
"eval_ted_trans_en-hi_loss": 1.3681640625, |
|
"eval_ted_trans_en-hi_runtime": 1.9419, |
|
"eval_ted_trans_en-hi_samples_per_second": 53.04, |
|
"eval_ted_trans_en-hi_steps_per_second": 1.545, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_de-ja_accuracy": 0.6181494047961354, |
|
"eval_ted_trans_de-ja_loss": 1.669921875, |
|
"eval_ted_trans_de-ja_runtime": 4.4657, |
|
"eval_ted_trans_de-ja_samples_per_second": 160.781, |
|
"eval_ted_trans_de-ja_steps_per_second": 3.359, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_nl-en_accuracy": 0.7275129478913435, |
|
"eval_ted_trans_nl-en_loss": 1.193359375, |
|
"eval_ted_trans_nl-en_runtime": 3.5649, |
|
"eval_ted_trans_nl-en_samples_per_second": 216.275, |
|
"eval_ted_trans_nl-en_steps_per_second": 4.769, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_en-ja_accuracy": 0.6332216902623955, |
|
"eval_ted_trans_en-ja_loss": 1.5615234375, |
|
"eval_ted_trans_en-ja_runtime": 4.3033, |
|
"eval_ted_trans_en-ja_samples_per_second": 186.138, |
|
"eval_ted_trans_en-ja_steps_per_second": 3.95, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_en-es_accuracy": 0.7696410515672396, |
|
"eval_ted_trans_en-es_loss": 0.96875, |
|
"eval_ted_trans_en-es_runtime": 5.0224, |
|
"eval_ted_trans_en-es_samples_per_second": 164.462, |
|
"eval_ted_trans_en-es_steps_per_second": 3.584, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_ted_trans_en-ms_accuracy": 0.6727053140096618, |
|
"eval_ted_trans_en-ms_loss": 1.4013671875, |
|
"eval_ted_trans_en-ms_runtime": 0.4904, |
|
"eval_ted_trans_en-ms_samples_per_second": 85.65, |
|
"eval_ted_trans_en-ms_steps_per_second": 2.039, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_xsum_accuracy": 0.6095022536611729, |
|
"eval_xsum_loss": NaN, |
|
"eval_xsum_runtime": 191.8464, |
|
"eval_xsum_samples_per_second": 212.717, |
|
"eval_xsum_steps_per_second": 4.436, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_cnn_dailymail_accuracy": 0.6799426165023833, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 276.5156, |
|
"eval_cnn_dailymail_samples_per_second": 207.666, |
|
"eval_cnn_dailymail_steps_per_second": 4.329, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_multi_news_accuracy": 0.547789914899022, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 46.7011, |
|
"eval_multi_news_samples_per_second": 192.608, |
|
"eval_multi_news_steps_per_second": 4.026, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_tldr_news_accuracy": 0.586805229004044, |
|
"eval_tldr_news_loss": 1.861328125, |
|
"eval_tldr_news_runtime": 4.1288, |
|
"eval_tldr_news_samples_per_second": 345.86, |
|
"eval_tldr_news_steps_per_second": 7.266, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_scitldr_accuracy": 0.49270664505672607, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 2.9031, |
|
"eval_scitldr_samples_per_second": 137.437, |
|
"eval_scitldr_steps_per_second": 3.1, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_samsum_accuracy": 0.6352255674674526, |
|
"eval_samsum_loss": 1.3408203125, |
|
"eval_samsum_runtime": 13.3091, |
|
"eval_samsum_samples_per_second": 221.427, |
|
"eval_samsum_steps_per_second": 4.658, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_debate_sum_accuracy": 0.9398914583902843, |
|
"eval_debate_sum_loss": NaN, |
|
"eval_debate_sum_runtime": 244.3103, |
|
"eval_debate_sum_samples_per_second": 196.938, |
|
"eval_debate_sum_steps_per_second": 4.105, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_billsum_accuracy": 0.6780196221313985, |
|
"eval_billsum_loss": 1.3662109375, |
|
"eval_billsum_runtime": 26.3691, |
|
"eval_billsum_samples_per_second": 143.729, |
|
"eval_billsum_steps_per_second": 2.996, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_wmt2019_zh-en_accuracy": 0.6364605543710021, |
|
"eval_wmt2019_zh-en_loss": 1.6376953125, |
|
"eval_wmt2019_zh-en_runtime": 14.2747, |
|
"eval_wmt2019_zh-en_samples_per_second": 278.886, |
|
"eval_wmt2019_zh-en_steps_per_second": 5.815, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_wmt2019_ru-en_accuracy": 0.7199560058060365, |
|
"eval_wmt2019_ru-en_loss": 1.107421875, |
|
"eval_wmt2019_ru-en_runtime": 8.7473, |
|
"eval_wmt2019_ru-en_samples_per_second": 342.965, |
|
"eval_wmt2019_ru-en_steps_per_second": 7.202, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_wmt2019_de-en_accuracy": 0.7420960891168457, |
|
"eval_wmt2019_de-en_loss": 1.0400390625, |
|
"eval_wmt2019_de-en_runtime": 8.7795, |
|
"eval_wmt2019_de-en_samples_per_second": 341.476, |
|
"eval_wmt2019_de-en_steps_per_second": 7.176, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_wmt2019_fr-de_accuracy": 0.7250604501209003, |
|
"eval_wmt2019_fr-de_loss": 1.12890625, |
|
"eval_wmt2019_fr-de_runtime": 4.8598, |
|
"eval_wmt2019_fr-de_samples_per_second": 311.121, |
|
"eval_wmt2019_fr-de_steps_per_second": 6.585, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_essay_instruction_accuracy": 0.5982741083077195, |
|
"eval_essay_instruction_loss": 1.94921875, |
|
"eval_essay_instruction_runtime": 5.5402, |
|
"eval_essay_instruction_samples_per_second": 74.546, |
|
"eval_essay_instruction_steps_per_second": 1.624, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_reddit_eli5_accuracy": 0.44749512828734445, |
|
"eval_reddit_eli5_loss": 2.541015625, |
|
"eval_reddit_eli5_runtime": 289.5058, |
|
"eval_reddit_eli5_samples_per_second": 188.345, |
|
"eval_reddit_eli5_steps_per_second": 3.924, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_reddit_askh_accuracy": 0.45043337961716656, |
|
"eval_reddit_askh_loss": 2.63671875, |
|
"eval_reddit_askh_runtime": 112.2629, |
|
"eval_reddit_askh_samples_per_second": 175.525, |
|
"eval_reddit_askh_steps_per_second": 3.661, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_reddit_asks_accuracy": 0.4590568852017904, |
|
"eval_reddit_asks_loss": 2.490234375, |
|
"eval_reddit_asks_runtime": 152.7985, |
|
"eval_reddit_asks_samples_per_second": 172.489, |
|
"eval_reddit_asks_steps_per_second": 3.6, |
|
"step": 2250 |
|
} |
|
], |
|
"max_steps": 34156, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.6587065990940983e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|