galactica-1.3b-v2 / trainer_state.json
theblackcat102's picture
Upload 8 files
bdf5de5
raw
history blame
40.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.217287974468663,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.666666666666667e-06,
"loss": 2.5689,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 3.410009602540877e-06,
"loss": 2.2872,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 3.899727994397217e-06,
"loss": 2.2045,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 4.242838952070665e-06,
"loss": 2.1665,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 4.507189546742703e-06,
"loss": 2.0691,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 4.722272031045718e-06,
"loss": 2.0554,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 4.9035975752993475e-06,
"loss": 2.0278,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 5.060338910107844e-06,
"loss": 2.0495,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 5.1983733510531e-06,
"loss": 2.0476,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 5.3216938522601335e-06,
"loss": 2.0547,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 5.433137327841662e-06,
"loss": 1.992,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 5.534791897046749e-06,
"loss": 2.0016,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 5.6282392274646635e-06,
"loss": 2.0251,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 5.71470613401092e-06,
"loss": 1.9642,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 5.795163382432731e-06,
"loss": 1.9489,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 5.870392331521204e-06,
"loss": 1.9492,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 5.941031212303129e-06,
"loss": 1.9794,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 6.007608082613048e-06,
"loss": 1.9443,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 6.070564811128651e-06,
"loss": 1.9561,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 6.130274870425884e-06,
"loss": 1.9445,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 6.187056762962811e-06,
"loss": 1.9922,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 6.241184306240316e-06,
"loss": 1.9092,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 6.292894619573035e-06,
"loss": 1.9877,
"step": 230
},
{
"epoch": 0.03,
"learning_rate": 6.3423944025283676e-06,
"loss": 1.9013,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 6.385204482203244e-06,
"loss": 1.9351,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 6.430985882568613e-06,
"loss": 1.9364,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 6.475026117410103e-06,
"loss": 1.9298,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 6.51745278911487e-06,
"loss": 1.9262,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 6.55837996735795e-06,
"loss": 1.9245,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 6.597910037536681e-06,
"loss": 1.9264,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 6.6361352440011835e-06,
"loss": 1.9318,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 6.673138986625154e-06,
"loss": 1.8992,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 6.708996916564478e-06,
"loss": 1.8958,
"step": 330
},
{
"epoch": 0.04,
"learning_rate": 6.74377786740708e-06,
"loss": 1.9317,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 6.777544650524216e-06,
"loss": 1.8891,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 6.810354737716999e-06,
"loss": 1.8944,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 6.842260849796047e-06,
"loss": 1.8904,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 6.873311466232601e-06,
"loss": 1.9326,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 6.903551268251219e-06,
"loss": 1.8795,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 6.933021525529835e-06,
"loss": 1.8729,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 6.961760434906346e-06,
"loss": 1.861,
"step": 410
},
{
"epoch": 0.05,
"learning_rate": 6.9898034180667605e-06,
"loss": 1.9003,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 7.017183384035125e-06,
"loss": 1.9061,
"step": 430
},
{
"epoch": 0.05,
"learning_rate": 7.043930961344266e-06,
"loss": 1.9146,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 7.07007470399505e-06,
"loss": 1.8741,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 7.095641274676984e-06,
"loss": 1.8402,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 7.120655608197665e-06,
"loss": 1.9109,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 7.145141057632318e-06,
"loss": 1.8829,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 7.169119525340562e-06,
"loss": 1.8557,
"step": 490
},
{
"epoch": 0.05,
"learning_rate": 7.19261158069258e-06,
"loss": 1.8577,
"step": 500
},
{
"epoch": 0.05,
"eval_webgpt_accuracy": 0.4847224518236187,
"eval_webgpt_loss": 2.392578125,
"eval_webgpt_runtime": 337.627,
"eval_webgpt_samples_per_second": 11.599,
"eval_webgpt_steps_per_second": 0.726,
"step": 500
},
{
"epoch": 0.05,
"eval_prompt_dialogue_accuracy": 0.5612746203747981,
"eval_prompt_dialogue_loss": 1.828125,
"eval_prompt_dialogue_runtime": 811.1162,
"eval_prompt_dialogue_samples_per_second": 12.71,
"eval_prompt_dialogue_steps_per_second": 0.795,
"step": 500
},
{
"epoch": 0.05,
"eval_squad_v2_accuracy": 0.9115060111379335,
"eval_squad_v2_loss": NaN,
"eval_squad_v2_runtime": 941.3453,
"eval_squad_v2_samples_per_second": 12.613,
"eval_squad_v2_steps_per_second": 0.789,
"step": 500
},
{
"epoch": 0.05,
"eval_adversarial_qa_accuracy": 0.8083160800552105,
"eval_adversarial_qa_loss": 0.86083984375,
"eval_adversarial_qa_runtime": 238.8648,
"eval_adversarial_qa_samples_per_second": 12.559,
"eval_adversarial_qa_steps_per_second": 0.787,
"step": 500
},
{
"epoch": 0.05,
"eval_trivia_qa_nocontext_accuracy": 0.45998188422713815,
"eval_trivia_qa_nocontext_loss": 3.12890625,
"eval_trivia_qa_nocontext_runtime": 1249.2124,
"eval_trivia_qa_nocontext_samples_per_second": 14.364,
"eval_trivia_qa_nocontext_steps_per_second": 0.898,
"step": 500
},
{
"epoch": 0.05,
"eval_m2m_translation_accuracy": 0.5733300105250808,
"eval_m2m_translation_loss": 1.8857421875,
"eval_m2m_translation_runtime": 3725.0595,
"eval_m2m_translation_samples_per_second": 14.208,
"eval_m2m_translation_steps_per_second": 0.888,
"step": 500
},
{
"epoch": 0.05,
"eval_xsum_accuracy": 0.5671291686677834,
"eval_xsum_loss": 1.9658203125,
"eval_xsum_runtime": 1011.0399,
"eval_xsum_samples_per_second": 11.208,
"eval_xsum_steps_per_second": 0.701,
"step": 500
},
{
"epoch": 0.05,
"eval_cnn_dailymail_accuracy": 0.6565405592058271,
"eval_cnn_dailymail_loss": 1.5810546875,
"eval_cnn_dailymail_runtime": 1213.6108,
"eval_cnn_dailymail_samples_per_second": 11.015,
"eval_cnn_dailymail_steps_per_second": 0.689,
"step": 500
},
{
"epoch": 0.05,
"eval_multi_news_accuracy": 0.5175576159426271,
"eval_multi_news_loss": 2.33203125,
"eval_multi_news_runtime": 512.3687,
"eval_multi_news_samples_per_second": 10.973,
"eval_multi_news_steps_per_second": 0.687,
"step": 500
},
{
"epoch": 0.05,
"eval_scitldr_accuracy": 0.46241087965899574,
"eval_scitldr_loss": 2.5234375,
"eval_scitldr_runtime": 57.2333,
"eval_scitldr_samples_per_second": 10.815,
"eval_scitldr_steps_per_second": 0.681,
"step": 500
},
{
"epoch": 0.05,
"eval_soda_accuracy": 0.7121364530942489,
"eval_soda_loss": 1.21484375,
"eval_soda_runtime": 82.3256,
"eval_soda_samples_per_second": 12.159,
"eval_soda_steps_per_second": 0.765,
"step": 500
},
{
"epoch": 0.05,
"eval_joke_accuracy": 0.454450170320552,
"eval_joke_loss": 2.62109375,
"eval_joke_runtime": 6.9296,
"eval_joke_samples_per_second": 10.967,
"eval_joke_steps_per_second": 0.722,
"step": 500
},
{
"epoch": 0.05,
"eval_gsm8k_accuracy": 0.8305793220560381,
"eval_gsm8k_loss": 0.61328125,
"eval_gsm8k_runtime": 113.5097,
"eval_gsm8k_samples_per_second": 11.62,
"eval_gsm8k_steps_per_second": 0.731,
"step": 500
},
{
"epoch": 0.05,
"eval_samsum_accuracy": 0.6179719084733047,
"eval_samsum_loss": 1.5537109375,
"eval_samsum_runtime": 73.3198,
"eval_samsum_samples_per_second": 11.157,
"eval_samsum_steps_per_second": 0.709,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 7.215636566090451e-06,
"loss": 1.8655,
"step": 510
},
{
"epoch": 0.06,
"learning_rate": 7.238212692653954e-06,
"loss": 1.9284,
"step": 520
},
{
"epoch": 0.06,
"learning_rate": 7.260357126756832e-06,
"loss": 1.8522,
"step": 530
},
{
"epoch": 0.06,
"learning_rate": 7.282086068443704e-06,
"loss": 1.8869,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 7.303414822624985e-06,
"loss": 1.905,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 7.324357863833543e-06,
"loss": 1.8483,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 7.344928895229384e-06,
"loss": 1.8628,
"step": 570
},
{
"epoch": 0.06,
"learning_rate": 7.365140902454744e-06,
"loss": 1.8982,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 7.385006202869703e-06,
"loss": 1.834,
"step": 590
},
{
"epoch": 0.07,
"learning_rate": 7.404536490635762e-06,
"loss": 1.8488,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 7.423742878060626e-06,
"loss": 1.8585,
"step": 610
},
{
"epoch": 0.07,
"learning_rate": 7.442635933570175e-06,
"loss": 1.8292,
"step": 620
},
{
"epoch": 0.07,
"learning_rate": 7.461225716632522e-06,
"loss": 1.8399,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 7.479521809923099e-06,
"loss": 1.8775,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 7.497533348988249e-06,
"loss": 1.8593,
"step": 650
},
{
"epoch": 0.07,
"learning_rate": 7.515269049637215e-06,
"loss": 1.8557,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 7.532737233268121e-06,
"loss": 1.8427,
"step": 670
},
{
"epoch": 0.07,
"learning_rate": 7.5499458503121685e-06,
"loss": 1.859,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 7.566902501961364e-06,
"loss": 1.8111,
"step": 690
},
{
"epoch": 0.08,
"learning_rate": 7.5836144603284295e-06,
"loss": 1.8246,
"step": 700
},
{
"epoch": 0.08,
"learning_rate": 7.600088687172717e-06,
"loss": 1.8613,
"step": 710
},
{
"epoch": 0.08,
"learning_rate": 7.616331851312801e-06,
"loss": 1.8478,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 7.632350344834765e-06,
"loss": 1.8839,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 7.648150298194777e-06,
"loss": 1.8625,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 7.663737594305231e-06,
"loss": 1.8291,
"step": 750
},
{
"epoch": 0.08,
"learning_rate": 7.679117881685476e-06,
"loss": 1.836,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 7.6942965867507e-06,
"loss": 1.8102,
"step": 770
},
{
"epoch": 0.08,
"learning_rate": 7.709278925305836e-06,
"loss": 1.8353,
"step": 780
},
{
"epoch": 0.09,
"learning_rate": 7.72406991330548e-06,
"loss": 1.8713,
"step": 790
},
{
"epoch": 0.09,
"learning_rate": 7.738674376935279e-06,
"loss": 1.8314,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 7.753096962065562e-06,
"loss": 1.8553,
"step": 810
},
{
"epoch": 0.09,
"learning_rate": 7.767342143123527e-06,
"loss": 1.8318,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 7.781414231426347e-06,
"loss": 1.8401,
"step": 830
},
{
"epoch": 0.09,
"learning_rate": 7.79531738301407e-06,
"loss": 1.8108,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 7.809055606017904e-06,
"loss": 1.8563,
"step": 850
},
{
"epoch": 0.09,
"learning_rate": 7.822632767596549e-06,
"loss": 1.8408,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 7.836052600470646e-06,
"loss": 1.8096,
"step": 870
},
{
"epoch": 0.1,
"learning_rate": 7.849318709082941e-06,
"loss": 1.8264,
"step": 880
},
{
"epoch": 0.1,
"learning_rate": 7.862434575409602e-06,
"loss": 1.8902,
"step": 890
},
{
"epoch": 0.1,
"learning_rate": 7.875403564446145e-06,
"loss": 1.7953,
"step": 900
},
{
"epoch": 0.1,
"learning_rate": 7.888228929389561e-06,
"loss": 1.837,
"step": 910
},
{
"epoch": 0.1,
"learning_rate": 7.900913816536647e-06,
"loss": 1.7704,
"step": 920
},
{
"epoch": 0.1,
"learning_rate": 7.913461269916965e-06,
"loss": 1.8251,
"step": 930
},
{
"epoch": 0.1,
"learning_rate": 7.925874235677506e-06,
"loss": 1.8294,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 7.938155566234842e-06,
"loss": 1.8306,
"step": 950
},
{
"epoch": 0.1,
"learning_rate": 7.950308024209451e-06,
"loss": 1.7889,
"step": 960
},
{
"epoch": 0.11,
"learning_rate": 7.962334286155715e-06,
"loss": 1.7922,
"step": 970
},
{
"epoch": 0.11,
"learning_rate": 7.974236946100272e-06,
"loss": 1.7679,
"step": 980
},
{
"epoch": 0.11,
"learning_rate": 7.986018518900343e-06,
"loss": 1.8068,
"step": 990
},
{
"epoch": 0.11,
"learning_rate": 7.997681443432989e-06,
"loss": 1.8467,
"step": 1000
},
{
"epoch": 0.11,
"eval_webgpt_accuracy": 0.4853852488269729,
"eval_webgpt_loss": 2.388671875,
"eval_webgpt_runtime": 337.66,
"eval_webgpt_samples_per_second": 11.597,
"eval_webgpt_steps_per_second": 0.726,
"step": 1000
},
{
"epoch": 0.11,
"eval_prompt_dialogue_accuracy": 0.5656945584798752,
"eval_prompt_dialogue_loss": 1.7978515625,
"eval_prompt_dialogue_runtime": 810.7166,
"eval_prompt_dialogue_samples_per_second": 12.716,
"eval_prompt_dialogue_steps_per_second": 0.796,
"step": 1000
},
{
"epoch": 0.11,
"eval_squad_v2_accuracy": 0.929180907765482,
"eval_squad_v2_loss": NaN,
"eval_squad_v2_runtime": 942.6897,
"eval_squad_v2_samples_per_second": 12.595,
"eval_squad_v2_steps_per_second": 0.788,
"step": 1000
},
{
"epoch": 0.11,
"eval_adversarial_qa_accuracy": 0.8106740280653324,
"eval_adversarial_qa_loss": 0.8291015625,
"eval_adversarial_qa_runtime": 239.9039,
"eval_adversarial_qa_samples_per_second": 12.505,
"eval_adversarial_qa_steps_per_second": 0.784,
"step": 1000
},
{
"epoch": 0.11,
"eval_trivia_qa_nocontext_accuracy": 0.46837345154352583,
"eval_trivia_qa_nocontext_loss": 3.052734375,
"eval_trivia_qa_nocontext_runtime": 1248.9886,
"eval_trivia_qa_nocontext_samples_per_second": 14.367,
"eval_trivia_qa_nocontext_steps_per_second": 0.898,
"step": 1000
},
{
"epoch": 0.11,
"eval_m2m_translation_accuracy": 0.6128791362753888,
"eval_m2m_translation_loss": 1.6904296875,
"eval_m2m_translation_runtime": 3724.3046,
"eval_m2m_translation_samples_per_second": 14.21,
"eval_m2m_translation_steps_per_second": 0.888,
"step": 1000
},
{
"epoch": 0.11,
"eval_xsum_accuracy": 0.576201200101912,
"eval_xsum_loss": 1.90625,
"eval_xsum_runtime": 1013.837,
"eval_xsum_samples_per_second": 11.177,
"eval_xsum_steps_per_second": 0.699,
"step": 1000
},
{
"epoch": 0.11,
"eval_cnn_dailymail_accuracy": 0.6590085257947359,
"eval_cnn_dailymail_loss": 1.5537109375,
"eval_cnn_dailymail_runtime": 1212.8269,
"eval_cnn_dailymail_samples_per_second": 11.022,
"eval_cnn_dailymail_steps_per_second": 0.689,
"step": 1000
},
{
"epoch": 0.11,
"eval_multi_news_accuracy": 0.5213214574389622,
"eval_multi_news_loss": 2.302734375,
"eval_multi_news_runtime": 514.4889,
"eval_multi_news_samples_per_second": 10.927,
"eval_multi_news_steps_per_second": 0.684,
"step": 1000
},
{
"epoch": 0.11,
"eval_scitldr_accuracy": 0.4667123357182727,
"eval_scitldr_loss": 2.50390625,
"eval_scitldr_runtime": 55.8523,
"eval_scitldr_samples_per_second": 11.083,
"eval_scitldr_steps_per_second": 0.698,
"step": 1000
},
{
"epoch": 0.11,
"eval_soda_accuracy": 0.7221954952984911,
"eval_soda_loss": 1.1513671875,
"eval_soda_runtime": 82.1743,
"eval_soda_samples_per_second": 12.181,
"eval_soda_steps_per_second": 0.767,
"step": 1000
},
{
"epoch": 0.11,
"eval_joke_accuracy": 0.45549829679447984,
"eval_joke_loss": 2.5625,
"eval_joke_runtime": 7.1872,
"eval_joke_samples_per_second": 10.574,
"eval_joke_steps_per_second": 0.696,
"step": 1000
},
{
"epoch": 0.11,
"eval_gsm8k_accuracy": 0.836947292796029,
"eval_gsm8k_loss": 0.591796875,
"eval_gsm8k_runtime": 112.7859,
"eval_gsm8k_samples_per_second": 11.695,
"eval_gsm8k_steps_per_second": 0.736,
"step": 1000
},
{
"epoch": 0.11,
"eval_samsum_accuracy": 0.6263182026593306,
"eval_samsum_loss": 1.5126953125,
"eval_samsum_runtime": 73.6542,
"eval_samsum_samples_per_second": 11.106,
"eval_samsum_steps_per_second": 0.706,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 7.997895686156621e-06,
"loss": 1.8114,
"step": 1010
},
{
"epoch": 0.11,
"learning_rate": 7.994889523523223e-06,
"loss": 1.857,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 7.991883360889824e-06,
"loss": 1.8195,
"step": 1030
},
{
"epoch": 0.11,
"learning_rate": 7.988877198256426e-06,
"loss": 1.8101,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 7.985871035623028e-06,
"loss": 1.8169,
"step": 1050
},
{
"epoch": 0.12,
"learning_rate": 7.982864872989627e-06,
"loss": 1.8414,
"step": 1060
},
{
"epoch": 0.12,
"learning_rate": 7.979858710356229e-06,
"loss": 1.825,
"step": 1070
},
{
"epoch": 0.12,
"learning_rate": 7.97685254772283e-06,
"loss": 1.7882,
"step": 1080
},
{
"epoch": 0.12,
"learning_rate": 7.973846385089432e-06,
"loss": 1.7857,
"step": 1090
},
{
"epoch": 0.12,
"learning_rate": 7.970840222456035e-06,
"loss": 1.8229,
"step": 1100
},
{
"epoch": 0.12,
"learning_rate": 7.967834059822637e-06,
"loss": 1.8152,
"step": 1110
},
{
"epoch": 0.12,
"learning_rate": 7.964827897189238e-06,
"loss": 1.7865,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 7.96182173455584e-06,
"loss": 1.8038,
"step": 1130
},
{
"epoch": 0.12,
"learning_rate": 7.958815571922442e-06,
"loss": 1.796,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 7.955809409289041e-06,
"loss": 1.7737,
"step": 1150
},
{
"epoch": 0.13,
"learning_rate": 7.952803246655643e-06,
"loss": 1.7958,
"step": 1160
},
{
"epoch": 0.13,
"learning_rate": 7.949797084022244e-06,
"loss": 1.7981,
"step": 1170
},
{
"epoch": 0.13,
"learning_rate": 7.946790921388846e-06,
"loss": 1.8061,
"step": 1180
},
{
"epoch": 0.13,
"learning_rate": 7.943784758755448e-06,
"loss": 1.8395,
"step": 1190
},
{
"epoch": 0.13,
"learning_rate": 7.940778596122049e-06,
"loss": 1.7803,
"step": 1200
},
{
"epoch": 0.13,
"learning_rate": 7.937772433488652e-06,
"loss": 1.7906,
"step": 1210
},
{
"epoch": 0.13,
"learning_rate": 7.934766270855254e-06,
"loss": 1.8076,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 7.931760108221855e-06,
"loss": 1.7971,
"step": 1230
},
{
"epoch": 0.13,
"learning_rate": 7.928753945588455e-06,
"loss": 1.8292,
"step": 1240
},
{
"epoch": 0.14,
"learning_rate": 7.925747782955057e-06,
"loss": 1.7876,
"step": 1250
},
{
"epoch": 0.14,
"learning_rate": 7.922741620321658e-06,
"loss": 1.7499,
"step": 1260
},
{
"epoch": 0.14,
"learning_rate": 7.91973545768826e-06,
"loss": 1.8209,
"step": 1270
},
{
"epoch": 0.14,
"learning_rate": 7.916729295054861e-06,
"loss": 1.7971,
"step": 1280
},
{
"epoch": 0.14,
"learning_rate": 7.913723132421463e-06,
"loss": 1.8168,
"step": 1290
},
{
"epoch": 0.14,
"learning_rate": 7.910716969788065e-06,
"loss": 1.7771,
"step": 1300
},
{
"epoch": 0.14,
"learning_rate": 7.907710807154668e-06,
"loss": 1.7611,
"step": 1310
},
{
"epoch": 0.14,
"learning_rate": 7.90470464452127e-06,
"loss": 1.7673,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 7.90169848188787e-06,
"loss": 1.8694,
"step": 1330
},
{
"epoch": 0.15,
"learning_rate": 7.89869231925447e-06,
"loss": 1.7543,
"step": 1340
},
{
"epoch": 0.15,
"learning_rate": 7.895686156621072e-06,
"loss": 1.759,
"step": 1350
},
{
"epoch": 0.15,
"learning_rate": 7.892679993987674e-06,
"loss": 1.7929,
"step": 1360
},
{
"epoch": 0.15,
"learning_rate": 7.889673831354275e-06,
"loss": 1.8223,
"step": 1370
},
{
"epoch": 0.15,
"learning_rate": 7.886667668720877e-06,
"loss": 1.7829,
"step": 1380
},
{
"epoch": 0.15,
"learning_rate": 7.883661506087479e-06,
"loss": 1.8088,
"step": 1390
},
{
"epoch": 0.15,
"learning_rate": 7.88065534345408e-06,
"loss": 1.8012,
"step": 1400
},
{
"epoch": 0.15,
"learning_rate": 7.877649180820682e-06,
"loss": 1.7337,
"step": 1410
},
{
"epoch": 0.15,
"learning_rate": 7.874643018187283e-06,
"loss": 1.7848,
"step": 1420
},
{
"epoch": 0.16,
"learning_rate": 7.871636855553885e-06,
"loss": 1.7741,
"step": 1430
},
{
"epoch": 0.16,
"learning_rate": 7.868630692920486e-06,
"loss": 1.8111,
"step": 1440
},
{
"epoch": 0.16,
"learning_rate": 7.865624530287088e-06,
"loss": 1.7663,
"step": 1450
},
{
"epoch": 0.16,
"learning_rate": 7.86261836765369e-06,
"loss": 1.7635,
"step": 1460
},
{
"epoch": 0.16,
"learning_rate": 7.859612205020291e-06,
"loss": 1.7613,
"step": 1470
},
{
"epoch": 0.16,
"learning_rate": 7.856606042386892e-06,
"loss": 1.7537,
"step": 1480
},
{
"epoch": 0.16,
"learning_rate": 7.853599879753494e-06,
"loss": 1.8094,
"step": 1490
},
{
"epoch": 0.16,
"learning_rate": 7.850593717120096e-06,
"loss": 1.7487,
"step": 1500
},
{
"epoch": 0.16,
"eval_webgpt_accuracy": 0.486666038374247,
"eval_webgpt_loss": 2.384765625,
"eval_webgpt_runtime": 336.8175,
"eval_webgpt_samples_per_second": 11.626,
"eval_webgpt_steps_per_second": 0.727,
"step": 1500
},
{
"epoch": 0.16,
"eval_prompt_dialogue_accuracy": 0.5696171266680501,
"eval_prompt_dialogue_loss": 1.7705078125,
"eval_prompt_dialogue_runtime": 809.6613,
"eval_prompt_dialogue_samples_per_second": 12.732,
"eval_prompt_dialogue_steps_per_second": 0.797,
"step": 1500
},
{
"epoch": 0.16,
"eval_squad_v2_accuracy": 0.9273763713281866,
"eval_squad_v2_loss": NaN,
"eval_squad_v2_runtime": 941.2422,
"eval_squad_v2_samples_per_second": 12.614,
"eval_squad_v2_steps_per_second": 0.789,
"step": 1500
},
{
"epoch": 0.16,
"eval_adversarial_qa_accuracy": 0.8228088336783989,
"eval_adversarial_qa_loss": 0.76416015625,
"eval_adversarial_qa_runtime": 238.4374,
"eval_adversarial_qa_samples_per_second": 12.582,
"eval_adversarial_qa_steps_per_second": 0.788,
"step": 1500
},
{
"epoch": 0.16,
"eval_trivia_qa_nocontext_accuracy": 0.47413712205101255,
"eval_trivia_qa_nocontext_loss": 2.9921875,
"eval_trivia_qa_nocontext_runtime": 1248.2736,
"eval_trivia_qa_nocontext_samples_per_second": 14.375,
"eval_trivia_qa_nocontext_steps_per_second": 0.899,
"step": 1500
},
{
"epoch": 0.16,
"eval_m2m_translation_accuracy": 0.6384202273383193,
"eval_m2m_translation_loss": 1.5703125,
"eval_m2m_translation_runtime": 3725.8719,
"eval_m2m_translation_samples_per_second": 14.204,
"eval_m2m_translation_steps_per_second": 0.888,
"step": 1500
},
{
"epoch": 0.16,
"eval_xsum_accuracy": 0.5815857676257413,
"eval_xsum_loss": 1.87109375,
"eval_xsum_runtime": 1009.1324,
"eval_xsum_samples_per_second": 11.229,
"eval_xsum_steps_per_second": 0.703,
"step": 1500
},
{
"epoch": 0.16,
"eval_cnn_dailymail_accuracy": 0.6627513117154793,
"eval_cnn_dailymail_loss": 1.537109375,
"eval_cnn_dailymail_runtime": 1211.2046,
"eval_cnn_dailymail_samples_per_second": 11.037,
"eval_cnn_dailymail_steps_per_second": 0.69,
"step": 1500
},
{
"epoch": 0.16,
"eval_multi_news_accuracy": 0.5242124214780086,
"eval_multi_news_loss": 2.287109375,
"eval_multi_news_runtime": 513.1245,
"eval_multi_news_samples_per_second": 10.956,
"eval_multi_news_steps_per_second": 0.686,
"step": 1500
},
{
"epoch": 0.16,
"eval_scitldr_accuracy": 0.46804191723122984,
"eval_scitldr_loss": 2.4921875,
"eval_scitldr_runtime": 56.2815,
"eval_scitldr_samples_per_second": 10.998,
"eval_scitldr_steps_per_second": 0.693,
"step": 1500
},
{
"epoch": 0.16,
"eval_soda_accuracy": 0.7266564618412421,
"eval_soda_loss": 1.12890625,
"eval_soda_runtime": 82.0556,
"eval_soda_samples_per_second": 12.199,
"eval_soda_steps_per_second": 0.768,
"step": 1500
},
{
"epoch": 0.16,
"eval_joke_accuracy": 0.45995283430867323,
"eval_joke_loss": 2.5,
"eval_joke_runtime": 7.0053,
"eval_joke_samples_per_second": 10.849,
"eval_joke_steps_per_second": 0.714,
"step": 1500
},
{
"epoch": 0.16,
"eval_gsm8k_accuracy": 0.8421233100385344,
"eval_gsm8k_loss": 0.57177734375,
"eval_gsm8k_runtime": 113.8436,
"eval_gsm8k_samples_per_second": 11.586,
"eval_gsm8k_steps_per_second": 0.729,
"step": 1500
},
{
"epoch": 0.16,
"eval_samsum_accuracy": 0.6305018339446482,
"eval_samsum_loss": 1.490234375,
"eval_samsum_runtime": 72.6647,
"eval_samsum_samples_per_second": 11.257,
"eval_samsum_steps_per_second": 0.716,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 7.847587554486697e-06,
"loss": 1.7693,
"step": 1510
},
{
"epoch": 0.17,
"learning_rate": 7.844882008116638e-06,
"loss": 1.7719,
"step": 1520
},
{
"epoch": 0.17,
"learning_rate": 7.84187584548324e-06,
"loss": 1.7777,
"step": 1530
},
{
"epoch": 0.17,
"learning_rate": 7.838869682849841e-06,
"loss": 1.7922,
"step": 1540
},
{
"epoch": 0.17,
"learning_rate": 7.835863520216443e-06,
"loss": 1.7688,
"step": 1550
},
{
"epoch": 0.17,
"learning_rate": 7.832857357583046e-06,
"loss": 1.7848,
"step": 1560
},
{
"epoch": 0.17,
"learning_rate": 7.829851194949648e-06,
"loss": 1.7795,
"step": 1570
},
{
"epoch": 0.17,
"learning_rate": 7.826845032316247e-06,
"loss": 1.7826,
"step": 1580
},
{
"epoch": 0.17,
"learning_rate": 7.823838869682849e-06,
"loss": 1.7511,
"step": 1590
},
{
"epoch": 0.17,
"learning_rate": 7.82083270704945e-06,
"loss": 1.7502,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 7.817826544416052e-06,
"loss": 1.7384,
"step": 1610
},
{
"epoch": 0.18,
"learning_rate": 7.814820381782654e-06,
"loss": 1.773,
"step": 1620
},
{
"epoch": 0.18,
"learning_rate": 7.811814219149255e-06,
"loss": 1.785,
"step": 1630
},
{
"epoch": 0.18,
"learning_rate": 7.808808056515857e-06,
"loss": 1.7407,
"step": 1640
},
{
"epoch": 0.18,
"learning_rate": 7.805801893882458e-06,
"loss": 1.8115,
"step": 1650
},
{
"epoch": 0.18,
"learning_rate": 7.80279573124906e-06,
"loss": 1.8207,
"step": 1660
},
{
"epoch": 0.18,
"learning_rate": 7.799789568615661e-06,
"loss": 1.7618,
"step": 1670
},
{
"epoch": 0.18,
"learning_rate": 7.796783405982263e-06,
"loss": 1.7725,
"step": 1680
},
{
"epoch": 0.18,
"learning_rate": 7.793777243348864e-06,
"loss": 1.7618,
"step": 1690
},
{
"epoch": 0.18,
"learning_rate": 7.790771080715466e-06,
"loss": 1.7567,
"step": 1700
},
{
"epoch": 0.19,
"learning_rate": 7.787764918082068e-06,
"loss": 1.7704,
"step": 1710
},
{
"epoch": 0.19,
"learning_rate": 7.784758755448669e-06,
"loss": 1.7426,
"step": 1720
},
{
"epoch": 0.19,
"learning_rate": 7.78175259281527e-06,
"loss": 1.7373,
"step": 1730
},
{
"epoch": 0.19,
"learning_rate": 7.778746430181872e-06,
"loss": 1.7966,
"step": 1740
},
{
"epoch": 0.19,
"learning_rate": 7.775740267548474e-06,
"loss": 1.773,
"step": 1750
},
{
"epoch": 0.19,
"learning_rate": 7.772734104915075e-06,
"loss": 1.7664,
"step": 1760
},
{
"epoch": 0.19,
"learning_rate": 7.769727942281677e-06,
"loss": 1.7518,
"step": 1770
},
{
"epoch": 0.19,
"learning_rate": 7.766721779648278e-06,
"loss": 1.7743,
"step": 1780
},
{
"epoch": 0.19,
"learning_rate": 7.76371561701488e-06,
"loss": 1.7882,
"step": 1790
},
{
"epoch": 0.2,
"learning_rate": 7.760709454381481e-06,
"loss": 1.7534,
"step": 1800
},
{
"epoch": 0.2,
"learning_rate": 7.757703291748083e-06,
"loss": 1.7761,
"step": 1810
},
{
"epoch": 0.2,
"learning_rate": 7.754697129114685e-06,
"loss": 1.7951,
"step": 1820
},
{
"epoch": 0.2,
"learning_rate": 7.751690966481286e-06,
"loss": 1.7287,
"step": 1830
},
{
"epoch": 0.2,
"learning_rate": 7.748684803847888e-06,
"loss": 1.7674,
"step": 1840
},
{
"epoch": 0.2,
"learning_rate": 7.74567864121449e-06,
"loss": 1.7664,
"step": 1850
},
{
"epoch": 0.2,
"learning_rate": 7.74267247858109e-06,
"loss": 1.7569,
"step": 1860
},
{
"epoch": 0.2,
"learning_rate": 7.739666315947692e-06,
"loss": 1.8026,
"step": 1870
},
{
"epoch": 0.2,
"learning_rate": 7.736960769577633e-06,
"loss": 1.7262,
"step": 1880
},
{
"epoch": 0.21,
"learning_rate": 7.733954606944235e-06,
"loss": 1.7266,
"step": 1890
},
{
"epoch": 0.21,
"learning_rate": 7.730948444310836e-06,
"loss": 1.7476,
"step": 1900
},
{
"epoch": 0.21,
"learning_rate": 7.727942281677438e-06,
"loss": 1.7544,
"step": 1910
},
{
"epoch": 0.21,
"learning_rate": 7.72493611904404e-06,
"loss": 1.7859,
"step": 1920
},
{
"epoch": 0.21,
"learning_rate": 7.721929956410641e-06,
"loss": 1.7386,
"step": 1930
},
{
"epoch": 0.21,
"learning_rate": 7.718923793777243e-06,
"loss": 1.7012,
"step": 1940
},
{
"epoch": 0.21,
"learning_rate": 7.715917631143844e-06,
"loss": 1.7736,
"step": 1950
},
{
"epoch": 0.21,
"learning_rate": 7.712911468510446e-06,
"loss": 1.7398,
"step": 1960
},
{
"epoch": 0.21,
"learning_rate": 7.709905305877047e-06,
"loss": 1.8026,
"step": 1970
},
{
"epoch": 0.22,
"learning_rate": 7.706899143243649e-06,
"loss": 1.798,
"step": 1980
},
{
"epoch": 0.22,
"learning_rate": 7.70389298061025e-06,
"loss": 1.802,
"step": 1990
},
{
"epoch": 0.22,
"learning_rate": 7.700886817976852e-06,
"loss": 1.7447,
"step": 2000
},
{
"epoch": 0.22,
"eval_webgpt_accuracy": 0.48722841158921415,
"eval_webgpt_loss": 2.37890625,
"eval_webgpt_runtime": 335.1847,
"eval_webgpt_samples_per_second": 11.683,
"eval_webgpt_steps_per_second": 0.731,
"step": 2000
},
{
"epoch": 0.22,
"eval_prompt_dialogue_accuracy": 0.5729272869993594,
"eval_prompt_dialogue_loss": 1.7509765625,
"eval_prompt_dialogue_runtime": 808.5089,
"eval_prompt_dialogue_samples_per_second": 12.751,
"eval_prompt_dialogue_steps_per_second": 0.798,
"step": 2000
},
{
"epoch": 0.22,
"eval_squad_v2_accuracy": 0.9350127871125027,
"eval_squad_v2_loss": NaN,
"eval_squad_v2_runtime": 937.9399,
"eval_squad_v2_samples_per_second": 12.659,
"eval_squad_v2_steps_per_second": 0.792,
"step": 2000
},
{
"epoch": 0.22,
"eval_adversarial_qa_accuracy": 0.8308028525419829,
"eval_adversarial_qa_loss": 0.7265625,
"eval_adversarial_qa_runtime": 238.5652,
"eval_adversarial_qa_samples_per_second": 12.575,
"eval_adversarial_qa_steps_per_second": 0.788,
"step": 2000
},
{
"epoch": 0.22,
"eval_trivia_qa_nocontext_accuracy": 0.47783426824006153,
"eval_trivia_qa_nocontext_loss": 2.9609375,
"eval_trivia_qa_nocontext_runtime": 1249.121,
"eval_trivia_qa_nocontext_samples_per_second": 14.365,
"eval_trivia_qa_nocontext_steps_per_second": 0.898,
"step": 2000
},
{
"epoch": 0.22,
"eval_m2m_translation_accuracy": 0.6576231472988909,
"eval_m2m_translation_loss": 1.4755859375,
"eval_m2m_translation_runtime": 3725.504,
"eval_m2m_translation_samples_per_second": 14.206,
"eval_m2m_translation_steps_per_second": 0.888,
"step": 2000
},
{
"epoch": 0.22,
"eval_xsum_accuracy": 0.5862788967827711,
"eval_xsum_loss": 1.8447265625,
"eval_xsum_runtime": 1007.4241,
"eval_xsum_samples_per_second": 11.248,
"eval_xsum_steps_per_second": 0.704,
"step": 2000
},
{
"epoch": 0.22,
"eval_cnn_dailymail_accuracy": 0.6635118400214548,
"eval_cnn_dailymail_loss": 1.5224609375,
"eval_cnn_dailymail_runtime": 1207.7216,
"eval_cnn_dailymail_samples_per_second": 11.069,
"eval_cnn_dailymail_steps_per_second": 0.692,
"step": 2000
},
{
"epoch": 0.22,
"eval_multi_news_accuracy": 0.5254140019732162,
"eval_multi_news_loss": 2.271484375,
"eval_multi_news_runtime": 511.4499,
"eval_multi_news_samples_per_second": 10.992,
"eval_multi_news_steps_per_second": 0.688,
"step": 2000
},
{
"epoch": 0.22,
"eval_scitldr_accuracy": 0.46584969807682547,
"eval_scitldr_loss": 2.50390625,
"eval_scitldr_runtime": 56.1361,
"eval_scitldr_samples_per_second": 11.027,
"eval_scitldr_steps_per_second": 0.695,
"step": 2000
},
{
"epoch": 0.22,
"eval_soda_accuracy": 0.7280997157227204,
"eval_soda_loss": 1.1171875,
"eval_soda_runtime": 81.9632,
"eval_soda_samples_per_second": 12.213,
"eval_soda_steps_per_second": 0.769,
"step": 2000
},
{
"epoch": 0.22,
"eval_joke_accuracy": 0.4692986287011966,
"eval_joke_loss": 2.46484375,
"eval_joke_runtime": 6.7476,
"eval_joke_samples_per_second": 11.263,
"eval_joke_steps_per_second": 0.741,
"step": 2000
},
{
"epoch": 0.22,
"eval_gsm8k_accuracy": 0.8437234667885833,
"eval_gsm8k_loss": 0.5634765625,
"eval_gsm8k_runtime": 113.7256,
"eval_gsm8k_samples_per_second": 11.598,
"eval_gsm8k_steps_per_second": 0.73,
"step": 2000
},
{
"epoch": 0.22,
"eval_samsum_accuracy": 0.6325025008336113,
"eval_samsum_loss": 1.4638671875,
"eval_samsum_runtime": 72.3462,
"eval_samsum_samples_per_second": 11.307,
"eval_samsum_steps_per_second": 0.719,
"step": 2000
}
],
"max_steps": 27612,
"num_train_epochs": 3,
"total_flos": 428521886187520.0,
"trial_name": null,
"trial_params": null
}