{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8212414090449476, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.0000000000000004e-08, "loss": 2.1648, "step": 10 }, { "epoch": 0.0, "learning_rate": 7.500000000000001e-08, "loss": 2.131, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.2500000000000002e-07, "loss": 1.9325, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.7500000000000002e-07, "loss": 1.8743, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.2500000000000002e-07, "loss": 1.8232, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.75e-07, "loss": 1.7315, "step": 60 }, { "epoch": 0.01, "learning_rate": 3.25e-07, "loss": 1.656, "step": 70 }, { "epoch": 0.01, "learning_rate": 3.75e-07, "loss": 1.6538, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.2500000000000006e-07, "loss": 1.5483, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.7500000000000006e-07, "loss": 1.5073, "step": 100 }, { "epoch": 0.02, "learning_rate": 5.250000000000001e-07, "loss": 1.501, "step": 110 }, { "epoch": 0.02, "learning_rate": 5.750000000000001e-07, "loss": 1.4804, "step": 120 }, { "epoch": 0.02, "learning_rate": 6.25e-07, "loss": 1.4357, "step": 130 }, { "epoch": 0.02, "learning_rate": 6.750000000000001e-07, "loss": 1.424, "step": 140 }, { "epoch": 0.02, "learning_rate": 7.25e-07, "loss": 1.4579, "step": 150 }, { "epoch": 0.03, "learning_rate": 7.750000000000001e-07, "loss": 1.4185, "step": 160 }, { "epoch": 0.03, "learning_rate": 8.250000000000001e-07, "loss": 1.4141, "step": 170 }, { "epoch": 0.03, "learning_rate": 8.75e-07, "loss": 1.4098, "step": 180 }, { "epoch": 0.03, "learning_rate": 9.25e-07, "loss": 1.4144, "step": 190 }, { "epoch": 0.03, "learning_rate": 9.7e-07, "loss": 1.3644, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.02e-06, "loss": 1.3524, "step": 210 }, { "epoch": 0.04, "learning_rate": 1.0700000000000001e-06, "loss": 1.3403, "step": 220 }, { "epoch": 0.04, "learning_rate": 1.12e-06, "loss": 1.3355, "step": 230 }, { "epoch": 0.04, "learning_rate": 1.1700000000000002e-06, "loss": 1.3448, "step": 240 }, { "epoch": 0.04, "learning_rate": 1.2200000000000002e-06, "loss": 1.322, "step": 250 }, { "epoch": 0.04, "learning_rate": 1.2700000000000001e-06, "loss": 1.3186, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.32e-06, "loss": 1.3038, "step": 270 }, { "epoch": 0.05, "learning_rate": 1.3700000000000002e-06, "loss": 1.2853, "step": 280 }, { "epoch": 0.05, "learning_rate": 1.42e-06, "loss": 1.2939, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.4700000000000001e-06, "loss": 1.2918, "step": 300 }, { "epoch": 0.05, "learning_rate": 1.52e-06, "loss": 1.2976, "step": 310 }, { "epoch": 0.05, "learning_rate": 1.5700000000000002e-06, "loss": 1.3128, "step": 320 }, { "epoch": 0.05, "learning_rate": 1.6200000000000002e-06, "loss": 1.2433, "step": 330 }, { "epoch": 0.06, "learning_rate": 1.6700000000000003e-06, "loss": 1.2978, "step": 340 }, { "epoch": 0.06, "learning_rate": 1.72e-06, "loss": 1.2964, "step": 350 }, { "epoch": 0.06, "learning_rate": 1.77e-06, "loss": 1.2625, "step": 360 }, { "epoch": 0.06, "learning_rate": 1.8200000000000002e-06, "loss": 1.2837, "step": 370 }, { "epoch": 0.06, "learning_rate": 1.87e-06, "loss": 1.2995, "step": 380 }, { "epoch": 0.06, "learning_rate": 1.9200000000000003e-06, "loss": 1.2706, "step": 390 }, { "epoch": 0.07, "learning_rate": 1.97e-06, "loss": 1.2819, "step": 400 }, { "epoch": 0.07, "learning_rate": 2.02e-06, "loss": 1.2522, "step": 410 }, { "epoch": 0.07, "learning_rate": 2.07e-06, "loss": 1.2955, "step": 420 }, { "epoch": 0.07, "learning_rate": 2.12e-06, "loss": 1.2506, "step": 430 }, { "epoch": 0.07, "learning_rate": 2.17e-06, "loss": 1.249, "step": 440 }, { "epoch": 0.07, "learning_rate": 2.2200000000000003e-06, "loss": 1.2413, "step": 450 }, { "epoch": 0.08, "learning_rate": 2.2700000000000003e-06, "loss": 1.2463, "step": 460 }, { "epoch": 0.08, "learning_rate": 2.3200000000000002e-06, "loss": 1.288, "step": 470 }, { "epoch": 0.08, "learning_rate": 2.37e-06, "loss": 1.2531, "step": 480 }, { "epoch": 0.08, "learning_rate": 2.42e-06, "loss": 1.2314, "step": 490 }, { "epoch": 0.08, "learning_rate": 2.47e-06, "loss": 1.2369, "step": 500 }, { "epoch": 0.08, "eval_multi_news_accuracy": 0.5592306537314586, "eval_multi_news_loss": 1.919921875, "eval_multi_news_runtime": 374.7444, "eval_multi_news_samples_per_second": 15.002, "eval_multi_news_steps_per_second": 1.876, "step": 500 }, { "epoch": 0.08, "eval_samsum_accuracy": 0.630043040249728, "eval_samsum_loss": 1.3271484375, "eval_samsum_runtime": 37.5336, "eval_samsum_samples_per_second": 21.794, "eval_samsum_steps_per_second": 2.744, "step": 500 }, { "epoch": 0.08, "eval_billsum_accuracy": 0.6415100921468554, "eval_billsum_loss": 1.4970703125, "eval_billsum_runtime": 204.4286, "eval_billsum_samples_per_second": 15.991, "eval_billsum_steps_per_second": 2.001, "step": 500 }, { "epoch": 0.08, "eval_wmt2019_zh-en_accuracy": 0.5844479239374446, "eval_wmt2019_zh-en_loss": 1.89453125, "eval_wmt2019_zh-en_runtime": 43.2897, "eval_wmt2019_zh-en_samples_per_second": 23.1, "eval_wmt2019_zh-en_steps_per_second": 2.888, "step": 500 }, { "epoch": 0.08, "eval_ted_trans_en-ja_accuracy": 0.5366497079329188, "eval_ted_trans_en-ja_loss": 2.01953125, "eval_ted_trans_en-ja_runtime": 36.4641, "eval_ted_trans_en-ja_samples_per_second": 21.967, "eval_ted_trans_en-ja_steps_per_second": 2.77, "step": 500 }, { "epoch": 0.08, "eval_ted_trans_zh-ja_accuracy": 0.44175365344467643, "eval_ted_trans_zh-ja_loss": 2.703125, "eval_ted_trans_zh-ja_runtime": 2.2264, "eval_ted_trans_zh-ja_samples_per_second": 18.864, "eval_ted_trans_zh-ja_steps_per_second": 2.695, "step": 500 }, { "epoch": 0.08, "eval_sharegpt_accuracy": 0.7056496488080175, "eval_sharegpt_loss": 1.1474609375, "eval_sharegpt_runtime": 735.7691, "eval_sharegpt_samples_per_second": 4.55, "eval_sharegpt_steps_per_second": 0.569, "step": 500 }, { "epoch": 0.08, "eval_dolly15k_accuracy": 0.5961999725877193, "eval_dolly15k_loss": 1.6650390625, "eval_dolly15k_runtime": 33.9484, "eval_dolly15k_samples_per_second": 22.122, "eval_dolly15k_steps_per_second": 2.769, "step": 500 }, { "epoch": 0.08, "eval_ikala_accuracy": 0.7306054447586751, "eval_ikala_loss": 1.0380859375, "eval_ikala_runtime": 887.5903, "eval_ikala_samples_per_second": 16.005, "eval_ikala_steps_per_second": 2.001, "step": 500 }, { "epoch": 0.08, "eval_oasst_export_accuracy": 0.656822117898619, "eval_oasst_export_loss": 1.60546875, "eval_oasst_export_runtime": 134.1688, "eval_oasst_export_samples_per_second": 15.644, "eval_oasst_export_steps_per_second": 1.96, "step": 500 }, { "epoch": 0.08, "eval_joke_accuracy": 0.48218347232752085, "eval_joke_loss": 2.29296875, "eval_joke_runtime": 3.5706, "eval_joke_samples_per_second": 21.285, "eval_joke_steps_per_second": 2.801, "step": 500 }, { "epoch": 0.08, "eval_gsm8k_accuracy": 0.7402563310685608, "eval_gsm8k_loss": 1.0068359375, "eval_gsm8k_runtime": 56.8505, "eval_gsm8k_samples_per_second": 23.201, "eval_gsm8k_steps_per_second": 2.902, "step": 500 }, { "epoch": 0.08, "eval_webgpt_accuracy": 0.4973525539337287, "eval_webgpt_loss": 2.21484375, "eval_webgpt_runtime": 155.091, "eval_webgpt_samples_per_second": 22.974, "eval_webgpt_steps_per_second": 2.876, "step": 500 }, { "epoch": 0.08, "learning_rate": 2.52e-06, "loss": 1.2409, "step": 510 }, { "epoch": 0.09, "learning_rate": 2.5700000000000004e-06, "loss": 1.2076, "step": 520 }, { "epoch": 0.09, "learning_rate": 2.6200000000000003e-06, "loss": 1.2425, "step": 530 }, { "epoch": 0.09, "learning_rate": 2.6700000000000003e-06, "loss": 1.267, "step": 540 }, { "epoch": 0.09, "learning_rate": 2.7200000000000002e-06, "loss": 1.238, "step": 550 }, { "epoch": 0.09, "learning_rate": 2.7700000000000006e-06, "loss": 1.2176, "step": 560 }, { "epoch": 0.09, "learning_rate": 2.82e-06, "loss": 1.2168, "step": 570 }, { "epoch": 0.1, "learning_rate": 2.87e-06, "loss": 1.2262, "step": 580 }, { "epoch": 0.1, "learning_rate": 2.92e-06, "loss": 1.2125, "step": 590 }, { "epoch": 0.1, "learning_rate": 2.97e-06, "loss": 1.2092, "step": 600 }, { "epoch": 0.1, "learning_rate": 3.0200000000000003e-06, "loss": 1.2521, "step": 610 }, { "epoch": 0.1, "learning_rate": 3.0700000000000003e-06, "loss": 1.2297, "step": 620 }, { "epoch": 0.1, "learning_rate": 3.12e-06, "loss": 1.2317, "step": 630 }, { "epoch": 0.11, "learning_rate": 3.17e-06, "loss": 1.2225, "step": 640 }, { "epoch": 0.11, "learning_rate": 3.2200000000000005e-06, "loss": 1.2227, "step": 650 }, { "epoch": 0.11, "learning_rate": 3.2700000000000005e-06, "loss": 1.2172, "step": 660 }, { "epoch": 0.11, "learning_rate": 3.3200000000000004e-06, "loss": 1.22, "step": 670 }, { "epoch": 0.11, "learning_rate": 3.3700000000000003e-06, "loss": 1.2164, "step": 680 }, { "epoch": 0.11, "learning_rate": 3.4200000000000007e-06, "loss": 1.2045, "step": 690 }, { "epoch": 0.11, "learning_rate": 3.4700000000000002e-06, "loss": 1.2334, "step": 700 }, { "epoch": 0.12, "learning_rate": 3.52e-06, "loss": 1.1979, "step": 710 }, { "epoch": 0.12, "learning_rate": 3.57e-06, "loss": 1.2066, "step": 720 }, { "epoch": 0.12, "learning_rate": 3.62e-06, "loss": 1.2153, "step": 730 }, { "epoch": 0.12, "learning_rate": 3.6700000000000004e-06, "loss": 1.2246, "step": 740 }, { "epoch": 0.12, "learning_rate": 3.7200000000000004e-06, "loss": 1.2027, "step": 750 }, { "epoch": 0.12, "learning_rate": 3.7700000000000003e-06, "loss": 1.233, "step": 760 }, { "epoch": 0.13, "learning_rate": 3.820000000000001e-06, "loss": 1.2156, "step": 770 }, { "epoch": 0.13, "learning_rate": 3.87e-06, "loss": 1.2067, "step": 780 }, { "epoch": 0.13, "learning_rate": 3.920000000000001e-06, "loss": 1.2077, "step": 790 }, { "epoch": 0.13, "learning_rate": 3.97e-06, "loss": 1.184, "step": 800 }, { "epoch": 0.13, "learning_rate": 4.0200000000000005e-06, "loss": 1.1747, "step": 810 }, { "epoch": 0.13, "learning_rate": 4.07e-06, "loss": 1.2055, "step": 820 }, { "epoch": 0.14, "learning_rate": 4.12e-06, "loss": 1.2137, "step": 830 }, { "epoch": 0.14, "learning_rate": 4.17e-06, "loss": 1.1934, "step": 840 }, { "epoch": 0.14, "learning_rate": 4.22e-06, "loss": 1.2154, "step": 850 }, { "epoch": 0.14, "learning_rate": 4.270000000000001e-06, "loss": 1.2216, "step": 860 }, { "epoch": 0.14, "learning_rate": 4.32e-06, "loss": 1.2002, "step": 870 }, { "epoch": 0.14, "learning_rate": 4.3700000000000005e-06, "loss": 1.1698, "step": 880 }, { "epoch": 0.15, "learning_rate": 4.42e-06, "loss": 1.2006, "step": 890 }, { "epoch": 0.15, "learning_rate": 4.47e-06, "loss": 1.1706, "step": 900 }, { "epoch": 0.15, "learning_rate": 4.520000000000001e-06, "loss": 1.1898, "step": 910 }, { "epoch": 0.15, "learning_rate": 4.57e-06, "loss": 1.1941, "step": 920 }, { "epoch": 0.15, "learning_rate": 4.620000000000001e-06, "loss": 1.1978, "step": 930 }, { "epoch": 0.15, "learning_rate": 4.670000000000001e-06, "loss": 1.1871, "step": 940 }, { "epoch": 0.16, "learning_rate": 4.7200000000000005e-06, "loss": 1.1673, "step": 950 }, { "epoch": 0.16, "learning_rate": 4.77e-06, "loss": 1.1938, "step": 960 }, { "epoch": 0.16, "learning_rate": 4.8200000000000004e-06, "loss": 1.1601, "step": 970 }, { "epoch": 0.16, "learning_rate": 4.87e-06, "loss": 1.1815, "step": 980 }, { "epoch": 0.16, "learning_rate": 4.92e-06, "loss": 1.1985, "step": 990 }, { "epoch": 0.16, "learning_rate": 4.970000000000001e-06, "loss": 1.1755, "step": 1000 }, { "epoch": 0.16, "eval_multi_news_accuracy": 0.5616533126883595, "eval_multi_news_loss": 1.9033203125, "eval_multi_news_runtime": 374.666, "eval_multi_news_samples_per_second": 15.005, "eval_multi_news_steps_per_second": 1.876, "step": 1000 }, { "epoch": 0.16, "eval_samsum_accuracy": 0.6358605685096722, "eval_samsum_loss": 1.2763671875, "eval_samsum_runtime": 36.4854, "eval_samsum_samples_per_second": 22.42, "eval_samsum_steps_per_second": 2.823, "step": 1000 }, { "epoch": 0.16, "eval_billsum_accuracy": 0.645555269329641, "eval_billsum_loss": 1.466796875, "eval_billsum_runtime": 205.3486, "eval_billsum_samples_per_second": 15.919, "eval_billsum_steps_per_second": 1.992, "step": 1000 }, { "epoch": 0.16, "eval_wmt2019_zh-en_accuracy": 0.5821662271706222, "eval_wmt2019_zh-en_loss": 1.908203125, "eval_wmt2019_zh-en_runtime": 42.6249, "eval_wmt2019_zh-en_samples_per_second": 23.46, "eval_wmt2019_zh-en_steps_per_second": 2.933, "step": 1000 }, { "epoch": 0.16, "eval_ted_trans_en-ja_accuracy": 0.5513235961740165, "eval_ted_trans_en-ja_loss": 1.9208984375, "eval_ted_trans_en-ja_runtime": 35.6003, "eval_ted_trans_en-ja_samples_per_second": 22.5, "eval_ted_trans_en-ja_steps_per_second": 2.837, "step": 1000 }, { "epoch": 0.16, "eval_ted_trans_zh-ja_accuracy": 0.4552332912988651, "eval_ted_trans_zh-ja_loss": 2.595703125, "eval_ted_trans_zh-ja_runtime": 2.6463, "eval_ted_trans_zh-ja_samples_per_second": 15.871, "eval_ted_trans_zh-ja_steps_per_second": 2.267, "step": 1000 }, { "epoch": 0.16, "eval_sharegpt_accuracy": 0.7199542010473684, "eval_sharegpt_loss": 1.0751953125, "eval_sharegpt_runtime": 733.0519, "eval_sharegpt_samples_per_second": 4.567, "eval_sharegpt_steps_per_second": 0.572, "step": 1000 }, { "epoch": 0.16, "eval_dolly15k_accuracy": 0.5963712993421053, "eval_dolly15k_loss": 1.6484375, "eval_dolly15k_runtime": 33.8269, "eval_dolly15k_samples_per_second": 22.201, "eval_dolly15k_steps_per_second": 2.779, "step": 1000 }, { "epoch": 0.16, "eval_ikala_accuracy": 0.7374268761235112, "eval_ikala_loss": 0.98876953125, "eval_ikala_runtime": 886.0533, "eval_ikala_samples_per_second": 16.033, "eval_ikala_steps_per_second": 2.004, "step": 1000 }, { "epoch": 0.16, "eval_oasst_export_accuracy": 0.6594323119298394, "eval_oasst_export_loss": 1.580078125, "eval_oasst_export_runtime": 134.3333, "eval_oasst_export_samples_per_second": 15.625, "eval_oasst_export_steps_per_second": 1.958, "step": 1000 }, { "epoch": 0.16, "eval_joke_accuracy": 0.4916603487490523, "eval_joke_loss": 2.20703125, "eval_joke_runtime": 3.5959, "eval_joke_samples_per_second": 21.135, "eval_joke_steps_per_second": 2.781, "step": 1000 }, { "epoch": 0.16, "eval_gsm8k_accuracy": 0.760284126003706, "eval_gsm8k_loss": 0.89794921875, "eval_gsm8k_runtime": 57.2198, "eval_gsm8k_samples_per_second": 23.051, "eval_gsm8k_steps_per_second": 2.884, "step": 1000 }, { "epoch": 0.16, "eval_webgpt_accuracy": 0.4994055667344498, "eval_webgpt_loss": 2.18359375, "eval_webgpt_runtime": 155.137, "eval_webgpt_samples_per_second": 22.967, "eval_webgpt_steps_per_second": 2.875, "step": 1000 }, { "epoch": 0.17, "learning_rate": 5.02e-06, "loss": 1.1772, "step": 1010 }, { "epoch": 0.17, "learning_rate": 5.070000000000001e-06, "loss": 1.2069, "step": 1020 }, { "epoch": 0.17, "learning_rate": 5.12e-06, "loss": 1.1755, "step": 1030 }, { "epoch": 0.17, "learning_rate": 5.1700000000000005e-06, "loss": 1.1658, "step": 1040 }, { "epoch": 0.17, "learning_rate": 5.220000000000001e-06, "loss": 1.1896, "step": 1050 }, { "epoch": 0.17, "learning_rate": 5.27e-06, "loss": 1.1743, "step": 1060 }, { "epoch": 0.18, "learning_rate": 5.320000000000001e-06, "loss": 1.1444, "step": 1070 }, { "epoch": 0.18, "learning_rate": 5.370000000000001e-06, "loss": 1.1812, "step": 1080 }, { "epoch": 0.18, "learning_rate": 5.420000000000001e-06, "loss": 1.1549, "step": 1090 }, { "epoch": 0.18, "learning_rate": 5.470000000000001e-06, "loss": 1.1929, "step": 1100 }, { "epoch": 0.18, "learning_rate": 5.5200000000000005e-06, "loss": 1.1317, "step": 1110 }, { "epoch": 0.18, "learning_rate": 5.570000000000001e-06, "loss": 1.1531, "step": 1120 }, { "epoch": 0.19, "learning_rate": 5.620000000000001e-06, "loss": 1.1871, "step": 1130 }, { "epoch": 0.19, "learning_rate": 5.67e-06, "loss": 1.1507, "step": 1140 }, { "epoch": 0.19, "learning_rate": 5.72e-06, "loss": 1.1916, "step": 1150 }, { "epoch": 0.19, "learning_rate": 5.77e-06, "loss": 1.1532, "step": 1160 }, { "epoch": 0.19, "learning_rate": 5.82e-06, "loss": 1.1763, "step": 1170 }, { "epoch": 0.19, "learning_rate": 5.8700000000000005e-06, "loss": 1.1719, "step": 1180 }, { "epoch": 0.2, "learning_rate": 5.92e-06, "loss": 1.1784, "step": 1190 }, { "epoch": 0.2, "learning_rate": 5.9700000000000004e-06, "loss": 1.1597, "step": 1200 }, { "epoch": 0.2, "learning_rate": 6.02e-06, "loss": 1.1594, "step": 1210 }, { "epoch": 0.2, "learning_rate": 6.07e-06, "loss": 1.1769, "step": 1220 }, { "epoch": 0.2, "learning_rate": 6.120000000000001e-06, "loss": 1.1692, "step": 1230 }, { "epoch": 0.2, "learning_rate": 6.17e-06, "loss": 1.1327, "step": 1240 }, { "epoch": 0.21, "learning_rate": 6.220000000000001e-06, "loss": 1.1733, "step": 1250 }, { "epoch": 0.21, "learning_rate": 6.27e-06, "loss": 1.16, "step": 1260 }, { "epoch": 0.21, "learning_rate": 6.3200000000000005e-06, "loss": 1.1701, "step": 1270 }, { "epoch": 0.21, "learning_rate": 6.370000000000001e-06, "loss": 1.1649, "step": 1280 }, { "epoch": 0.21, "learning_rate": 6.42e-06, "loss": 1.1477, "step": 1290 }, { "epoch": 0.21, "learning_rate": 6.470000000000001e-06, "loss": 1.1498, "step": 1300 }, { "epoch": 0.22, "learning_rate": 6.520000000000001e-06, "loss": 1.1881, "step": 1310 }, { "epoch": 0.22, "learning_rate": 6.570000000000001e-06, "loss": 1.1414, "step": 1320 }, { "epoch": 0.22, "learning_rate": 6.620000000000001e-06, "loss": 1.1663, "step": 1330 }, { "epoch": 0.22, "learning_rate": 6.6700000000000005e-06, "loss": 1.1555, "step": 1340 }, { "epoch": 0.22, "learning_rate": 6.720000000000001e-06, "loss": 1.1652, "step": 1350 }, { "epoch": 0.22, "learning_rate": 6.770000000000001e-06, "loss": 1.1539, "step": 1360 }, { "epoch": 0.23, "learning_rate": 6.820000000000001e-06, "loss": 1.1633, "step": 1370 }, { "epoch": 0.23, "learning_rate": 6.870000000000001e-06, "loss": 1.1583, "step": 1380 }, { "epoch": 0.23, "learning_rate": 6.92e-06, "loss": 1.1404, "step": 1390 }, { "epoch": 0.23, "learning_rate": 6.97e-06, "loss": 1.1436, "step": 1400 }, { "epoch": 0.23, "learning_rate": 7.0200000000000006e-06, "loss": 1.1856, "step": 1410 }, { "epoch": 0.23, "learning_rate": 7.07e-06, "loss": 1.1587, "step": 1420 }, { "epoch": 0.23, "learning_rate": 7.1200000000000004e-06, "loss": 1.1296, "step": 1430 }, { "epoch": 0.24, "learning_rate": 7.17e-06, "loss": 1.1171, "step": 1440 }, { "epoch": 0.24, "learning_rate": 7.22e-06, "loss": 1.1459, "step": 1450 }, { "epoch": 0.24, "learning_rate": 7.270000000000001e-06, "loss": 1.1621, "step": 1460 }, { "epoch": 0.24, "learning_rate": 7.32e-06, "loss": 1.1345, "step": 1470 }, { "epoch": 0.24, "learning_rate": 7.370000000000001e-06, "loss": 1.1711, "step": 1480 }, { "epoch": 0.24, "learning_rate": 7.420000000000001e-06, "loss": 1.1852, "step": 1490 }, { "epoch": 0.25, "learning_rate": 7.4700000000000005e-06, "loss": 1.1361, "step": 1500 }, { "epoch": 0.25, "eval_multi_news_accuracy": 0.5626650769023163, "eval_multi_news_loss": 1.9013671875, "eval_multi_news_runtime": 374.2125, "eval_multi_news_samples_per_second": 15.024, "eval_multi_news_steps_per_second": 1.879, "step": 1500 }, { "epoch": 0.25, "eval_samsum_accuracy": 0.641110533036939, "eval_samsum_loss": 1.267578125, "eval_samsum_runtime": 37.1994, "eval_samsum_samples_per_second": 21.99, "eval_samsum_steps_per_second": 2.769, "step": 1500 }, { "epoch": 0.25, "eval_billsum_accuracy": 0.648249370750216, "eval_billsum_loss": 1.453125, "eval_billsum_runtime": 204.445, "eval_billsum_samples_per_second": 15.99, "eval_billsum_steps_per_second": 2.001, "step": 1500 }, { "epoch": 0.25, "eval_wmt2019_zh-en_accuracy": 0.5873898487705391, "eval_wmt2019_zh-en_loss": 1.892578125, "eval_wmt2019_zh-en_runtime": 43.8258, "eval_wmt2019_zh-en_samples_per_second": 22.818, "eval_wmt2019_zh-en_steps_per_second": 2.852, "step": 1500 }, { "epoch": 0.25, "eval_ted_trans_en-ja_accuracy": 0.5575474107655961, "eval_ted_trans_en-ja_loss": 1.8818359375, "eval_ted_trans_en-ja_runtime": 35.7188, "eval_ted_trans_en-ja_samples_per_second": 22.425, "eval_ted_trans_en-ja_steps_per_second": 2.828, "step": 1500 }, { "epoch": 0.25, "eval_ted_trans_zh-ja_accuracy": 0.45999153259949194, "eval_ted_trans_zh-ja_loss": 2.556640625, "eval_ted_trans_zh-ja_runtime": 2.58, "eval_ted_trans_zh-ja_samples_per_second": 16.279, "eval_ted_trans_zh-ja_steps_per_second": 2.326, "step": 1500 }, { "epoch": 0.25, "eval_sharegpt_accuracy": 0.7297610954662402, "eval_sharegpt_loss": 1.0302734375, "eval_sharegpt_runtime": 732.545, "eval_sharegpt_samples_per_second": 4.57, "eval_sharegpt_steps_per_second": 0.572, "step": 1500 }, { "epoch": 0.25, "eval_dolly15k_accuracy": 0.5962685032894737, "eval_dolly15k_loss": 1.646484375, "eval_dolly15k_runtime": 33.5813, "eval_dolly15k_samples_per_second": 22.364, "eval_dolly15k_steps_per_second": 2.799, "step": 1500 }, { "epoch": 0.25, "eval_ikala_accuracy": 0.7406414384414164, "eval_ikala_loss": 0.96875, "eval_ikala_runtime": 885.454, "eval_ikala_samples_per_second": 16.044, "eval_ikala_steps_per_second": 2.006, "step": 1500 }, { "epoch": 0.25, "eval_oasst_export_accuracy": 0.6599712470813749, "eval_oasst_export_loss": 1.578125, "eval_oasst_export_runtime": 133.2511, "eval_oasst_export_samples_per_second": 15.752, "eval_oasst_export_steps_per_second": 1.974, "step": 1500 }, { "epoch": 0.25, "eval_joke_accuracy": 0.49838893100833964, "eval_joke_loss": 2.1953125, "eval_joke_runtime": 4.5928, "eval_joke_samples_per_second": 16.548, "eval_joke_steps_per_second": 2.177, "step": 1500 }, { "epoch": 0.25, "eval_gsm8k_accuracy": 0.7668082149474984, "eval_gsm8k_loss": 0.85791015625, "eval_gsm8k_runtime": 57.7515, "eval_gsm8k_samples_per_second": 22.839, "eval_gsm8k_steps_per_second": 2.857, "step": 1500 }, { "epoch": 0.25, "eval_webgpt_accuracy": 0.4995741373619939, "eval_webgpt_loss": 2.181640625, "eval_webgpt_runtime": 154.199, "eval_webgpt_samples_per_second": 23.107, "eval_webgpt_steps_per_second": 2.892, "step": 1500 }, { "epoch": 0.25, "learning_rate": 7.520000000000001e-06, "loss": 1.1574, "step": 1510 }, { "epoch": 0.25, "learning_rate": 7.57e-06, "loss": 1.1593, "step": 1520 }, { "epoch": 0.25, "learning_rate": 7.620000000000001e-06, "loss": 1.1255, "step": 1530 }, { "epoch": 0.25, "learning_rate": 7.670000000000001e-06, "loss": 1.1665, "step": 1540 }, { "epoch": 0.25, "learning_rate": 7.72e-06, "loss": 1.1459, "step": 1550 }, { "epoch": 0.26, "learning_rate": 7.77e-06, "loss": 1.1187, "step": 1560 }, { "epoch": 0.26, "learning_rate": 7.820000000000001e-06, "loss": 1.1469, "step": 1570 }, { "epoch": 0.26, "learning_rate": 7.870000000000001e-06, "loss": 1.1648, "step": 1580 }, { "epoch": 0.26, "learning_rate": 7.92e-06, "loss": 1.1314, "step": 1590 }, { "epoch": 0.26, "learning_rate": 7.970000000000002e-06, "loss": 1.1213, "step": 1600 }, { "epoch": 0.26, "learning_rate": 8.020000000000001e-06, "loss": 1.1424, "step": 1610 }, { "epoch": 0.27, "learning_rate": 8.07e-06, "loss": 1.1637, "step": 1620 }, { "epoch": 0.27, "learning_rate": 8.120000000000002e-06, "loss": 1.1403, "step": 1630 }, { "epoch": 0.27, "learning_rate": 8.17e-06, "loss": 1.1299, "step": 1640 }, { "epoch": 0.27, "learning_rate": 8.220000000000001e-06, "loss": 1.1361, "step": 1650 }, { "epoch": 0.27, "learning_rate": 8.27e-06, "loss": 1.1484, "step": 1660 }, { "epoch": 0.27, "learning_rate": 8.32e-06, "loss": 1.1292, "step": 1670 }, { "epoch": 0.28, "learning_rate": 8.370000000000001e-06, "loss": 1.1395, "step": 1680 }, { "epoch": 0.28, "learning_rate": 8.42e-06, "loss": 1.1299, "step": 1690 }, { "epoch": 0.28, "learning_rate": 8.47e-06, "loss": 1.145, "step": 1700 }, { "epoch": 0.28, "learning_rate": 8.52e-06, "loss": 1.1351, "step": 1710 }, { "epoch": 0.28, "learning_rate": 8.570000000000001e-06, "loss": 1.1579, "step": 1720 }, { "epoch": 0.28, "learning_rate": 8.62e-06, "loss": 1.1483, "step": 1730 }, { "epoch": 0.29, "learning_rate": 8.67e-06, "loss": 1.1278, "step": 1740 }, { "epoch": 0.29, "learning_rate": 8.720000000000001e-06, "loss": 1.1375, "step": 1750 }, { "epoch": 0.29, "learning_rate": 8.77e-06, "loss": 1.1526, "step": 1760 }, { "epoch": 0.29, "learning_rate": 8.82e-06, "loss": 1.1535, "step": 1770 }, { "epoch": 0.29, "learning_rate": 8.870000000000001e-06, "loss": 1.1377, "step": 1780 }, { "epoch": 0.29, "learning_rate": 8.920000000000001e-06, "loss": 1.1578, "step": 1790 }, { "epoch": 0.3, "learning_rate": 8.97e-06, "loss": 1.1598, "step": 1800 }, { "epoch": 0.3, "learning_rate": 9.020000000000002e-06, "loss": 1.1601, "step": 1810 }, { "epoch": 0.3, "learning_rate": 9.070000000000001e-06, "loss": 1.1292, "step": 1820 }, { "epoch": 0.3, "learning_rate": 9.12e-06, "loss": 1.111, "step": 1830 }, { "epoch": 0.3, "learning_rate": 9.17e-06, "loss": 1.12, "step": 1840 }, { "epoch": 0.3, "learning_rate": 9.220000000000002e-06, "loss": 1.1, "step": 1850 }, { "epoch": 0.31, "learning_rate": 9.270000000000001e-06, "loss": 1.099, "step": 1860 }, { "epoch": 0.31, "learning_rate": 9.32e-06, "loss": 1.1333, "step": 1870 }, { "epoch": 0.31, "learning_rate": 9.370000000000002e-06, "loss": 1.1386, "step": 1880 }, { "epoch": 0.31, "learning_rate": 9.42e-06, "loss": 1.1389, "step": 1890 }, { "epoch": 0.31, "learning_rate": 9.47e-06, "loss": 1.1294, "step": 1900 }, { "epoch": 0.31, "learning_rate": 9.52e-06, "loss": 1.1326, "step": 1910 }, { "epoch": 0.32, "learning_rate": 9.57e-06, "loss": 1.129, "step": 1920 }, { "epoch": 0.32, "learning_rate": 9.620000000000001e-06, "loss": 1.1224, "step": 1930 }, { "epoch": 0.32, "learning_rate": 9.67e-06, "loss": 1.1168, "step": 1940 }, { "epoch": 0.32, "learning_rate": 9.72e-06, "loss": 1.1223, "step": 1950 }, { "epoch": 0.32, "learning_rate": 9.770000000000001e-06, "loss": 1.1064, "step": 1960 }, { "epoch": 0.32, "learning_rate": 9.820000000000001e-06, "loss": 1.1303, "step": 1970 }, { "epoch": 0.33, "learning_rate": 9.87e-06, "loss": 1.1134, "step": 1980 }, { "epoch": 0.33, "learning_rate": 9.920000000000002e-06, "loss": 1.1396, "step": 1990 }, { "epoch": 0.33, "learning_rate": 9.970000000000001e-06, "loss": 1.1418, "step": 2000 }, { "epoch": 0.33, "eval_multi_news_accuracy": 0.5614524803428215, "eval_multi_news_loss": 1.9052734375, "eval_multi_news_runtime": 373.3978, "eval_multi_news_samples_per_second": 15.056, "eval_multi_news_steps_per_second": 1.883, "step": 2000 }, { "epoch": 0.33, "eval_samsum_accuracy": 0.6388875750839521, "eval_samsum_loss": 1.265625, "eval_samsum_runtime": 37.3723, "eval_samsum_samples_per_second": 21.888, "eval_samsum_steps_per_second": 2.756, "step": 2000 }, { "epoch": 0.33, "eval_billsum_accuracy": 0.6493294263495999, "eval_billsum_loss": 1.4462890625, "eval_billsum_runtime": 203.77, "eval_billsum_samples_per_second": 16.043, "eval_billsum_steps_per_second": 2.007, "step": 2000 }, { "epoch": 0.33, "eval_wmt2019_zh-en_accuracy": 0.5823181343543334, "eval_wmt2019_zh-en_loss": 1.9228515625, "eval_wmt2019_zh-en_runtime": 43.5037, "eval_wmt2019_zh-en_samples_per_second": 22.987, "eval_wmt2019_zh-en_steps_per_second": 2.873, "step": 2000 }, { "epoch": 0.33, "eval_ted_trans_en-ja_accuracy": 0.5623202978930665, "eval_ted_trans_en-ja_loss": 1.869140625, "eval_ted_trans_en-ja_runtime": 35.4889, "eval_ted_trans_en-ja_samples_per_second": 22.57, "eval_ted_trans_en-ja_steps_per_second": 2.846, "step": 2000 }, { "epoch": 0.33, "eval_ted_trans_zh-ja_accuracy": 0.46688327918020495, "eval_ted_trans_zh-ja_loss": 2.46875, "eval_ted_trans_zh-ja_runtime": 2.6642, "eval_ted_trans_zh-ja_samples_per_second": 15.765, "eval_ted_trans_zh-ja_steps_per_second": 2.252, "step": 2000 }, { "epoch": 0.33, "eval_sharegpt_accuracy": 0.7361997474453208, "eval_sharegpt_loss": 1.001953125, "eval_sharegpt_runtime": 732.4255, "eval_sharegpt_samples_per_second": 4.571, "eval_sharegpt_steps_per_second": 0.572, "step": 2000 }, { "epoch": 0.33, "eval_dolly15k_accuracy": 0.5939898574561403, "eval_dolly15k_loss": 1.65625, "eval_dolly15k_runtime": 33.8567, "eval_dolly15k_samples_per_second": 22.182, "eval_dolly15k_steps_per_second": 2.776, "step": 2000 }, { "epoch": 0.33, "eval_ikala_accuracy": 0.7422763555784087, "eval_ikala_loss": 0.9580078125, "eval_ikala_runtime": 885.2845, "eval_ikala_samples_per_second": 16.047, "eval_ikala_steps_per_second": 2.006, "step": 2000 }, { "epoch": 0.33, "eval_oasst_export_accuracy": 0.6593580262738169, "eval_oasst_export_loss": 1.578125, "eval_oasst_export_runtime": 132.7253, "eval_oasst_export_samples_per_second": 15.815, "eval_oasst_export_steps_per_second": 1.982, "step": 2000 }, { "epoch": 0.33, "eval_joke_accuracy": 0.49895754359363154, "eval_joke_loss": 2.171875, "eval_joke_runtime": 4.5049, "eval_joke_samples_per_second": 16.871, "eval_joke_steps_per_second": 2.22, "step": 2000 }, { "epoch": 0.33, "eval_gsm8k_accuracy": 0.775555898702903, "eval_gsm8k_loss": 0.8232421875, "eval_gsm8k_runtime": 56.3886, "eval_gsm8k_samples_per_second": 23.391, "eval_gsm8k_steps_per_second": 2.926, "step": 2000 }, { "epoch": 0.33, "eval_webgpt_accuracy": 0.4990524556304364, "eval_webgpt_loss": 2.185546875, "eval_webgpt_runtime": 154.0524, "eval_webgpt_samples_per_second": 23.128, "eval_webgpt_steps_per_second": 2.895, "step": 2000 }, { "epoch": 0.33, "learning_rate": 9.99914354230901e-06, "loss": 1.116, "step": 2010 }, { "epoch": 0.33, "learning_rate": 9.997002398081536e-06, "loss": 1.1243, "step": 2020 }, { "epoch": 0.33, "learning_rate": 9.99486125385406e-06, "loss": 1.1183, "step": 2030 }, { "epoch": 0.34, "learning_rate": 9.992720109626585e-06, "loss": 1.1324, "step": 2040 }, { "epoch": 0.34, "learning_rate": 9.99057896539911e-06, "loss": 1.1033, "step": 2050 }, { "epoch": 0.34, "learning_rate": 9.988437821171634e-06, "loss": 1.0962, "step": 2060 }, { "epoch": 0.34, "learning_rate": 9.98629667694416e-06, "loss": 1.1253, "step": 2070 }, { "epoch": 0.34, "learning_rate": 9.984155532716685e-06, "loss": 1.1522, "step": 2080 }, { "epoch": 0.34, "learning_rate": 9.98201438848921e-06, "loss": 1.142, "step": 2090 }, { "epoch": 0.34, "learning_rate": 9.979873244261734e-06, "loss": 1.1289, "step": 2100 }, { "epoch": 0.35, "learning_rate": 9.97773210003426e-06, "loss": 1.1367, "step": 2110 }, { "epoch": 0.35, "learning_rate": 9.975590955806785e-06, "loss": 1.1303, "step": 2120 }, { "epoch": 0.35, "learning_rate": 9.973449811579308e-06, "loss": 1.1041, "step": 2130 }, { "epoch": 0.35, "learning_rate": 9.971308667351834e-06, "loss": 1.1325, "step": 2140 }, { "epoch": 0.35, "learning_rate": 9.969167523124359e-06, "loss": 1.1371, "step": 2150 }, { "epoch": 0.35, "learning_rate": 9.967026378896883e-06, "loss": 1.112, "step": 2160 }, { "epoch": 0.36, "learning_rate": 9.964885234669408e-06, "loss": 1.1172, "step": 2170 }, { "epoch": 0.36, "learning_rate": 9.962744090441932e-06, "loss": 1.0959, "step": 2180 }, { "epoch": 0.36, "learning_rate": 9.960602946214459e-06, "loss": 1.1322, "step": 2190 }, { "epoch": 0.36, "learning_rate": 9.958461801986983e-06, "loss": 1.1098, "step": 2200 }, { "epoch": 0.36, "learning_rate": 9.956320657759508e-06, "loss": 1.1185, "step": 2210 }, { "epoch": 0.36, "learning_rate": 9.954179513532032e-06, "loss": 1.1027, "step": 2220 }, { "epoch": 0.37, "learning_rate": 9.952038369304557e-06, "loss": 1.1217, "step": 2230 }, { "epoch": 0.37, "learning_rate": 9.949897225077082e-06, "loss": 1.115, "step": 2240 }, { "epoch": 0.37, "learning_rate": 9.947756080849606e-06, "loss": 1.1197, "step": 2250 }, { "epoch": 0.37, "learning_rate": 9.945614936622131e-06, "loss": 1.0926, "step": 2260 }, { "epoch": 0.37, "learning_rate": 9.943473792394657e-06, "loss": 1.1085, "step": 2270 }, { "epoch": 0.37, "learning_rate": 9.94133264816718e-06, "loss": 1.139, "step": 2280 }, { "epoch": 0.38, "learning_rate": 9.939191503939706e-06, "loss": 1.1131, "step": 2290 }, { "epoch": 0.38, "learning_rate": 9.937050359712231e-06, "loss": 1.1281, "step": 2300 }, { "epoch": 0.38, "learning_rate": 9.934909215484757e-06, "loss": 1.0962, "step": 2310 }, { "epoch": 0.38, "learning_rate": 9.93276807125728e-06, "loss": 1.107, "step": 2320 }, { "epoch": 0.38, "learning_rate": 9.930626927029806e-06, "loss": 1.1082, "step": 2330 }, { "epoch": 0.38, "learning_rate": 9.928485782802331e-06, "loss": 1.1323, "step": 2340 }, { "epoch": 0.39, "learning_rate": 9.926344638574855e-06, "loss": 1.0984, "step": 2350 }, { "epoch": 0.39, "learning_rate": 9.92420349434738e-06, "loss": 1.118, "step": 2360 }, { "epoch": 0.39, "learning_rate": 9.922062350119905e-06, "loss": 1.1003, "step": 2370 }, { "epoch": 0.39, "learning_rate": 9.919921205892429e-06, "loss": 1.115, "step": 2380 }, { "epoch": 0.39, "learning_rate": 9.917780061664954e-06, "loss": 1.0974, "step": 2390 }, { "epoch": 0.39, "learning_rate": 9.915638917437478e-06, "loss": 1.107, "step": 2400 }, { "epoch": 0.4, "learning_rate": 9.913497773210005e-06, "loss": 1.1101, "step": 2410 }, { "epoch": 0.4, "learning_rate": 9.911356628982529e-06, "loss": 1.1115, "step": 2420 }, { "epoch": 0.4, "learning_rate": 9.909215484755054e-06, "loss": 1.0951, "step": 2430 }, { "epoch": 0.4, "learning_rate": 9.907074340527578e-06, "loss": 1.0938, "step": 2440 }, { "epoch": 0.4, "learning_rate": 9.904933196300103e-06, "loss": 1.0772, "step": 2450 }, { "epoch": 0.4, "learning_rate": 9.902792052072629e-06, "loss": 1.1028, "step": 2460 }, { "epoch": 0.41, "learning_rate": 9.900650907845152e-06, "loss": 1.0923, "step": 2470 }, { "epoch": 0.41, "learning_rate": 9.898509763617678e-06, "loss": 1.1238, "step": 2480 }, { "epoch": 0.41, "learning_rate": 9.896368619390203e-06, "loss": 1.1401, "step": 2490 }, { "epoch": 0.41, "learning_rate": 9.894227475162728e-06, "loss": 1.1142, "step": 2500 }, { "epoch": 0.41, "eval_multi_news_accuracy": 0.5619751512736382, "eval_multi_news_loss": 1.904296875, "eval_multi_news_runtime": 373.6389, "eval_multi_news_samples_per_second": 15.047, "eval_multi_news_steps_per_second": 1.881, "step": 2500 }, { "epoch": 0.41, "eval_samsum_accuracy": 0.6433334909899258, "eval_samsum_loss": 1.2607421875, "eval_samsum_runtime": 37.306, "eval_samsum_samples_per_second": 21.927, "eval_samsum_steps_per_second": 2.761, "step": 2500 }, { "epoch": 0.41, "eval_billsum_accuracy": 0.650991772793869, "eval_billsum_loss": 1.439453125, "eval_billsum_runtime": 203.4152, "eval_billsum_samples_per_second": 16.071, "eval_billsum_steps_per_second": 2.011, "step": 2500 }, { "epoch": 0.41, "eval_wmt2019_zh-en_accuracy": 0.5893870117057808, "eval_wmt2019_zh-en_loss": 1.904296875, "eval_wmt2019_zh-en_runtime": 43.0674, "eval_wmt2019_zh-en_samples_per_second": 23.219, "eval_wmt2019_zh-en_steps_per_second": 2.902, "step": 2500 }, { "epoch": 0.41, "eval_ted_trans_en-ja_accuracy": 0.563225558860213, "eval_ted_trans_en-ja_loss": 1.8388671875, "eval_ted_trans_en-ja_runtime": 35.7494, "eval_ted_trans_en-ja_samples_per_second": 22.406, "eval_ted_trans_en-ja_steps_per_second": 2.825, "step": 2500 }, { "epoch": 0.41, "eval_ted_trans_zh-ja_accuracy": 0.4661596958174905, "eval_ted_trans_zh-ja_loss": 2.453125, "eval_ted_trans_zh-ja_runtime": 2.525, "eval_ted_trans_zh-ja_samples_per_second": 16.634, "eval_ted_trans_zh-ja_steps_per_second": 2.376, "step": 2500 }, { "epoch": 0.41, "eval_sharegpt_accuracy": 0.7423548548826656, "eval_sharegpt_loss": 0.97265625, "eval_sharegpt_runtime": 732.2894, "eval_sharegpt_samples_per_second": 4.572, "eval_sharegpt_steps_per_second": 0.572, "step": 2500 }, { "epoch": 0.41, "eval_dolly15k_accuracy": 0.5930646929824561, "eval_dolly15k_loss": 1.6513671875, "eval_dolly15k_runtime": 33.3723, "eval_dolly15k_samples_per_second": 22.504, "eval_dolly15k_steps_per_second": 2.817, "step": 2500 }, { "epoch": 0.41, "eval_ikala_accuracy": 0.7440914978067725, "eval_ikala_loss": 0.9453125, "eval_ikala_runtime": 884.831, "eval_ikala_samples_per_second": 16.055, "eval_ikala_steps_per_second": 2.007, "step": 2500 }, { "epoch": 0.41, "eval_oasst_export_accuracy": 0.6600834038561538, "eval_oasst_export_loss": 1.5791015625, "eval_oasst_export_runtime": 133.5652, "eval_oasst_export_samples_per_second": 15.715, "eval_oasst_export_steps_per_second": 1.969, "step": 2500 }, { "epoch": 0.41, "eval_joke_accuracy": 0.5195223654283548, "eval_joke_loss": 2.078125, "eval_joke_runtime": 4.5929, "eval_joke_samples_per_second": 16.547, "eval_joke_steps_per_second": 2.177, "step": 2500 }, { "epoch": 0.41, "eval_gsm8k_accuracy": 0.782682211241507, "eval_gsm8k_loss": 0.796875, "eval_gsm8k_runtime": 56.5404, "eval_gsm8k_samples_per_second": 23.328, "eval_gsm8k_steps_per_second": 2.918, "step": 2500 }, { "epoch": 0.41, "eval_webgpt_accuracy": 0.49846334564786127, "eval_webgpt_loss": 2.189453125, "eval_webgpt_runtime": 154.9389, "eval_webgpt_samples_per_second": 22.996, "eval_webgpt_steps_per_second": 2.879, "step": 2500 }, { "epoch": 0.41, "learning_rate": 9.892086330935252e-06, "loss": 1.1335, "step": 2510 }, { "epoch": 0.41, "learning_rate": 9.889945186707778e-06, "loss": 1.0999, "step": 2520 }, { "epoch": 0.42, "learning_rate": 9.887804042480303e-06, "loss": 1.1324, "step": 2530 }, { "epoch": 0.42, "learning_rate": 9.885662898252827e-06, "loss": 1.0832, "step": 2540 }, { "epoch": 0.42, "learning_rate": 9.883521754025352e-06, "loss": 1.1, "step": 2550 }, { "epoch": 0.42, "learning_rate": 9.881380609797877e-06, "loss": 1.1153, "step": 2560 }, { "epoch": 0.42, "learning_rate": 9.879239465570401e-06, "loss": 1.0958, "step": 2570 }, { "epoch": 0.42, "learning_rate": 9.877098321342926e-06, "loss": 1.1154, "step": 2580 }, { "epoch": 0.43, "learning_rate": 9.874957177115452e-06, "loss": 1.1139, "step": 2590 }, { "epoch": 0.43, "learning_rate": 9.872816032887977e-06, "loss": 1.0739, "step": 2600 }, { "epoch": 0.43, "learning_rate": 9.8706748886605e-06, "loss": 1.1105, "step": 2610 }, { "epoch": 0.43, "learning_rate": 9.868533744433026e-06, "loss": 1.0969, "step": 2620 }, { "epoch": 0.43, "learning_rate": 9.866392600205552e-06, "loss": 1.1207, "step": 2630 }, { "epoch": 0.43, "learning_rate": 9.864251455978075e-06, "loss": 1.1392, "step": 2640 }, { "epoch": 0.44, "learning_rate": 9.8621103117506e-06, "loss": 1.1161, "step": 2650 }, { "epoch": 0.44, "learning_rate": 9.859969167523126e-06, "loss": 1.0767, "step": 2660 }, { "epoch": 0.44, "learning_rate": 9.85782802329565e-06, "loss": 1.1113, "step": 2670 }, { "epoch": 0.44, "learning_rate": 9.855686879068175e-06, "loss": 1.0801, "step": 2680 }, { "epoch": 0.44, "learning_rate": 9.853545734840699e-06, "loss": 1.0835, "step": 2690 }, { "epoch": 0.44, "learning_rate": 9.851404590613226e-06, "loss": 1.0635, "step": 2700 }, { "epoch": 0.45, "learning_rate": 9.84926344638575e-06, "loss": 1.095, "step": 2710 }, { "epoch": 0.45, "learning_rate": 9.847122302158275e-06, "loss": 1.0822, "step": 2720 }, { "epoch": 0.45, "learning_rate": 9.844981157930798e-06, "loss": 1.0983, "step": 2730 }, { "epoch": 0.45, "learning_rate": 9.842840013703324e-06, "loss": 1.1245, "step": 2740 }, { "epoch": 0.45, "learning_rate": 9.84069886947585e-06, "loss": 1.0768, "step": 2750 }, { "epoch": 0.45, "learning_rate": 9.838557725248373e-06, "loss": 1.0958, "step": 2760 }, { "epoch": 0.45, "learning_rate": 9.836416581020898e-06, "loss": 1.0869, "step": 2770 }, { "epoch": 0.46, "learning_rate": 9.834275436793424e-06, "loss": 1.126, "step": 2780 }, { "epoch": 0.46, "learning_rate": 9.832134292565947e-06, "loss": 1.0823, "step": 2790 }, { "epoch": 0.46, "learning_rate": 9.829993148338473e-06, "loss": 1.1057, "step": 2800 }, { "epoch": 0.46, "learning_rate": 9.827852004110998e-06, "loss": 1.0717, "step": 2810 }, { "epoch": 0.46, "learning_rate": 9.825710859883523e-06, "loss": 1.0835, "step": 2820 }, { "epoch": 0.46, "learning_rate": 9.823569715656047e-06, "loss": 1.1291, "step": 2830 }, { "epoch": 0.47, "learning_rate": 9.821428571428573e-06, "loss": 1.0856, "step": 2840 }, { "epoch": 0.47, "learning_rate": 9.819287427201098e-06, "loss": 1.0972, "step": 2850 }, { "epoch": 0.47, "learning_rate": 9.817146282973622e-06, "loss": 1.0833, "step": 2860 }, { "epoch": 0.47, "learning_rate": 9.815005138746147e-06, "loss": 1.1124, "step": 2870 }, { "epoch": 0.47, "learning_rate": 9.812863994518672e-06, "loss": 1.0905, "step": 2880 }, { "epoch": 0.47, "learning_rate": 9.810722850291196e-06, "loss": 1.0891, "step": 2890 }, { "epoch": 0.48, "learning_rate": 9.808581706063721e-06, "loss": 1.0931, "step": 2900 }, { "epoch": 0.48, "learning_rate": 9.806440561836245e-06, "loss": 1.1066, "step": 2910 }, { "epoch": 0.48, "learning_rate": 9.804299417608772e-06, "loss": 1.0759, "step": 2920 }, { "epoch": 0.48, "learning_rate": 9.802158273381296e-06, "loss": 1.0996, "step": 2930 }, { "epoch": 0.48, "learning_rate": 9.800017129153821e-06, "loss": 1.0868, "step": 2940 }, { "epoch": 0.48, "learning_rate": 9.797875984926345e-06, "loss": 1.0799, "step": 2950 }, { "epoch": 0.49, "learning_rate": 9.79573484069887e-06, "loss": 1.0989, "step": 2960 }, { "epoch": 0.49, "learning_rate": 9.793593696471396e-06, "loss": 1.0841, "step": 2970 }, { "epoch": 0.49, "learning_rate": 9.79145255224392e-06, "loss": 1.0745, "step": 2980 }, { "epoch": 0.49, "learning_rate": 9.789311408016445e-06, "loss": 1.0742, "step": 2990 }, { "epoch": 0.49, "learning_rate": 9.78717026378897e-06, "loss": 1.0745, "step": 3000 }, { "epoch": 0.49, "eval_multi_news_accuracy": 0.5619935239487821, "eval_multi_news_loss": 1.9033203125, "eval_multi_news_runtime": 373.8934, "eval_multi_news_samples_per_second": 15.036, "eval_multi_news_steps_per_second": 1.88, "step": 3000 }, { "epoch": 0.49, "eval_samsum_accuracy": 0.6469753582746063, "eval_samsum_loss": 1.24609375, "eval_samsum_runtime": 37.2777, "eval_samsum_samples_per_second": 21.943, "eval_samsum_steps_per_second": 2.763, "step": 3000 }, { "epoch": 0.49, "eval_billsum_accuracy": 0.6516022390022165, "eval_billsum_loss": 1.431640625, "eval_billsum_runtime": 204.7394, "eval_billsum_samples_per_second": 15.967, "eval_billsum_steps_per_second": 1.998, "step": 3000 }, { "epoch": 0.49, "eval_wmt2019_zh-en_accuracy": 0.5844544095665172, "eval_wmt2019_zh-en_loss": 1.8984375, "eval_wmt2019_zh-en_runtime": 42.4024, "eval_wmt2019_zh-en_samples_per_second": 23.584, "eval_wmt2019_zh-en_steps_per_second": 2.948, "step": 3000 }, { "epoch": 0.49, "eval_ted_trans_en-ja_accuracy": 0.577170182658057, "eval_ted_trans_en-ja_loss": 1.7958984375, "eval_ted_trans_en-ja_runtime": 35.5789, "eval_ted_trans_en-ja_samples_per_second": 22.513, "eval_ted_trans_en-ja_steps_per_second": 2.839, "step": 3000 }, { "epoch": 0.49, "eval_ted_trans_zh-ja_accuracy": 0.46690610569522834, "eval_ted_trans_zh-ja_loss": 2.515625, "eval_ted_trans_zh-ja_runtime": 2.5484, "eval_ted_trans_zh-ja_samples_per_second": 16.481, "eval_ted_trans_zh-ja_steps_per_second": 2.354, "step": 3000 }, { "epoch": 0.49, "eval_sharegpt_accuracy": 0.7475412115956699, "eval_sharegpt_loss": 0.9453125, "eval_sharegpt_runtime": 731.2857, "eval_sharegpt_samples_per_second": 4.578, "eval_sharegpt_steps_per_second": 0.573, "step": 3000 }, { "epoch": 0.49, "eval_dolly15k_accuracy": 0.5928933662280702, "eval_dolly15k_loss": 1.65625, "eval_dolly15k_runtime": 34.5989, "eval_dolly15k_samples_per_second": 21.706, "eval_dolly15k_steps_per_second": 2.717, "step": 3000 }, { "epoch": 0.49, "eval_ikala_accuracy": 0.7469942144047141, "eval_ikala_loss": 0.9296875, "eval_ikala_runtime": 884.7774, "eval_ikala_samples_per_second": 16.056, "eval_ikala_steps_per_second": 2.007, "step": 3000 }, { "epoch": 0.49, "eval_oasst_export_accuracy": 0.6607141036415994, "eval_oasst_export_loss": 1.5732421875, "eval_oasst_export_runtime": 132.9167, "eval_oasst_export_samples_per_second": 15.792, "eval_oasst_export_steps_per_second": 1.979, "step": 3000 }, { "epoch": 0.49, "eval_joke_accuracy": 0.5242608036391205, "eval_joke_loss": 2.025390625, "eval_joke_runtime": 4.5573, "eval_joke_samples_per_second": 16.677, "eval_joke_steps_per_second": 2.194, "step": 3000 }, { "epoch": 0.49, "eval_gsm8k_accuracy": 0.7849444101297097, "eval_gsm8k_loss": 0.783203125, "eval_gsm8k_runtime": 56.5634, "eval_gsm8k_samples_per_second": 23.319, "eval_gsm8k_steps_per_second": 2.917, "step": 3000 }, { "epoch": 0.49, "eval_webgpt_accuracy": 0.498837749883775, "eval_webgpt_loss": 2.19140625, "eval_webgpt_runtime": 153.8546, "eval_webgpt_samples_per_second": 23.158, "eval_webgpt_steps_per_second": 2.899, "step": 3000 }, { "epoch": 0.49, "learning_rate": 9.785029119561494e-06, "loss": 1.0876, "step": 3010 }, { "epoch": 0.5, "learning_rate": 9.782887975334019e-06, "loss": 1.0761, "step": 3020 }, { "epoch": 0.5, "learning_rate": 9.780746831106544e-06, "loss": 1.103, "step": 3030 }, { "epoch": 0.5, "learning_rate": 9.77860568687907e-06, "loss": 1.0891, "step": 3040 }, { "epoch": 0.5, "learning_rate": 9.776464542651593e-06, "loss": 1.0852, "step": 3050 }, { "epoch": 0.5, "learning_rate": 9.774323398424119e-06, "loss": 1.1041, "step": 3060 }, { "epoch": 0.5, "learning_rate": 9.772182254196644e-06, "loss": 1.0801, "step": 3070 }, { "epoch": 0.51, "learning_rate": 9.770041109969168e-06, "loss": 1.0851, "step": 3080 }, { "epoch": 0.51, "learning_rate": 9.767899965741693e-06, "loss": 1.0839, "step": 3090 }, { "epoch": 0.51, "learning_rate": 9.765758821514219e-06, "loss": 1.0756, "step": 3100 }, { "epoch": 0.51, "learning_rate": 9.763617677286742e-06, "loss": 1.0604, "step": 3110 }, { "epoch": 0.51, "learning_rate": 9.761476533059268e-06, "loss": 1.0613, "step": 3120 }, { "epoch": 0.51, "learning_rate": 9.759335388831791e-06, "loss": 1.0839, "step": 3130 }, { "epoch": 0.52, "learning_rate": 9.757194244604318e-06, "loss": 1.0873, "step": 3140 }, { "epoch": 0.52, "learning_rate": 9.755053100376842e-06, "loss": 1.0935, "step": 3150 }, { "epoch": 0.52, "learning_rate": 9.752911956149367e-06, "loss": 1.0821, "step": 3160 }, { "epoch": 0.52, "learning_rate": 9.750770811921891e-06, "loss": 1.0679, "step": 3170 }, { "epoch": 0.52, "learning_rate": 9.748629667694417e-06, "loss": 1.0939, "step": 3180 }, { "epoch": 0.52, "learning_rate": 9.746488523466942e-06, "loss": 1.0764, "step": 3190 }, { "epoch": 0.53, "learning_rate": 9.744347379239466e-06, "loss": 1.0772, "step": 3200 }, { "epoch": 0.53, "learning_rate": 9.742206235011991e-06, "loss": 1.0983, "step": 3210 }, { "epoch": 0.53, "learning_rate": 9.740065090784516e-06, "loss": 1.0649, "step": 3220 }, { "epoch": 0.53, "learning_rate": 9.73792394655704e-06, "loss": 1.0829, "step": 3230 }, { "epoch": 0.53, "learning_rate": 9.735782802329565e-06, "loss": 1.0914, "step": 3240 }, { "epoch": 0.53, "learning_rate": 9.73364165810209e-06, "loss": 1.0776, "step": 3250 }, { "epoch": 0.54, "learning_rate": 9.731500513874616e-06, "loss": 1.0698, "step": 3260 }, { "epoch": 0.54, "learning_rate": 9.72935936964714e-06, "loss": 1.074, "step": 3270 }, { "epoch": 0.54, "learning_rate": 9.727218225419665e-06, "loss": 1.0951, "step": 3280 }, { "epoch": 0.54, "learning_rate": 9.72507708119219e-06, "loss": 1.0586, "step": 3290 }, { "epoch": 0.54, "learning_rate": 9.722935936964714e-06, "loss": 1.066, "step": 3300 }, { "epoch": 0.54, "learning_rate": 9.72079479273724e-06, "loss": 1.0897, "step": 3310 }, { "epoch": 0.55, "learning_rate": 9.718653648509765e-06, "loss": 1.079, "step": 3320 }, { "epoch": 0.55, "learning_rate": 9.716512504282289e-06, "loss": 1.063, "step": 3330 }, { "epoch": 0.55, "learning_rate": 9.714371360054814e-06, "loss": 1.0688, "step": 3340 }, { "epoch": 0.55, "learning_rate": 9.712230215827338e-06, "loss": 1.0845, "step": 3350 }, { "epoch": 0.55, "learning_rate": 9.710089071599865e-06, "loss": 1.0421, "step": 3360 }, { "epoch": 0.55, "learning_rate": 9.707947927372388e-06, "loss": 1.0735, "step": 3370 }, { "epoch": 0.56, "learning_rate": 9.705806783144914e-06, "loss": 1.0848, "step": 3380 }, { "epoch": 0.56, "learning_rate": 9.703665638917438e-06, "loss": 1.0863, "step": 3390 }, { "epoch": 0.56, "learning_rate": 9.701524494689963e-06, "loss": 1.0372, "step": 3400 }, { "epoch": 0.56, "learning_rate": 9.699383350462488e-06, "loss": 1.0741, "step": 3410 }, { "epoch": 0.56, "learning_rate": 9.697242206235012e-06, "loss": 1.0988, "step": 3420 }, { "epoch": 0.56, "learning_rate": 9.695101062007537e-06, "loss": 1.0808, "step": 3430 }, { "epoch": 0.57, "learning_rate": 9.692959917780063e-06, "loss": 1.0717, "step": 3440 }, { "epoch": 0.57, "learning_rate": 9.690818773552586e-06, "loss": 1.0632, "step": 3450 }, { "epoch": 0.57, "learning_rate": 9.688677629325112e-06, "loss": 1.0539, "step": 3460 }, { "epoch": 0.57, "learning_rate": 9.686536485097637e-06, "loss": 1.0944, "step": 3470 }, { "epoch": 0.57, "learning_rate": 9.684395340870162e-06, "loss": 1.0682, "step": 3480 }, { "epoch": 0.57, "learning_rate": 9.682254196642686e-06, "loss": 1.066, "step": 3490 }, { "epoch": 0.57, "learning_rate": 9.680113052415212e-06, "loss": 1.0649, "step": 3500 }, { "epoch": 0.57, "eval_multi_news_accuracy": 0.5626207290657621, "eval_multi_news_loss": 1.90234375, "eval_multi_news_runtime": 374.9582, "eval_multi_news_samples_per_second": 14.994, "eval_multi_news_steps_per_second": 1.875, "step": 3500 }, { "epoch": 0.57, "eval_samsum_accuracy": 0.6482050796954074, "eval_samsum_loss": 1.244140625, "eval_samsum_runtime": 36.4554, "eval_samsum_samples_per_second": 22.438, "eval_samsum_steps_per_second": 2.825, "step": 3500 }, { "epoch": 0.57, "eval_billsum_accuracy": 0.6542856069509964, "eval_billsum_loss": 1.427734375, "eval_billsum_runtime": 204.9118, "eval_billsum_samples_per_second": 15.953, "eval_billsum_steps_per_second": 1.996, "step": 3500 }, { "epoch": 0.57, "eval_wmt2019_zh-en_accuracy": 0.5960585499733171, "eval_wmt2019_zh-en_loss": 1.8671875, "eval_wmt2019_zh-en_runtime": 42.5542, "eval_wmt2019_zh-en_samples_per_second": 23.499, "eval_wmt2019_zh-en_steps_per_second": 2.937, "step": 3500 }, { "epoch": 0.57, "eval_ted_trans_en-ja_accuracy": 0.5799230113905279, "eval_ted_trans_en-ja_loss": 1.7705078125, "eval_ted_trans_en-ja_runtime": 35.599, "eval_ted_trans_en-ja_samples_per_second": 22.501, "eval_ted_trans_en-ja_steps_per_second": 2.837, "step": 3500 }, { "epoch": 0.57, "eval_ted_trans_zh-ja_accuracy": 0.48124428179322964, "eval_ted_trans_zh-ja_loss": 2.44140625, "eval_ted_trans_zh-ja_runtime": 2.5311, "eval_ted_trans_zh-ja_samples_per_second": 16.594, "eval_ted_trans_zh-ja_steps_per_second": 2.371, "step": 3500 }, { "epoch": 0.57, "eval_sharegpt_accuracy": 0.7523055854464493, "eval_sharegpt_loss": 0.92236328125, "eval_sharegpt_runtime": 732.6588, "eval_sharegpt_samples_per_second": 4.57, "eval_sharegpt_steps_per_second": 0.572, "step": 3500 }, { "epoch": 0.57, "eval_dolly15k_accuracy": 0.5933216831140351, "eval_dolly15k_loss": 1.65625, "eval_dolly15k_runtime": 33.8299, "eval_dolly15k_samples_per_second": 22.199, "eval_dolly15k_steps_per_second": 2.779, "step": 3500 }, { "epoch": 0.57, "eval_ikala_accuracy": 0.7489160065784373, "eval_ikala_loss": 0.9208984375, "eval_ikala_runtime": 886.4258, "eval_ikala_samples_per_second": 16.026, "eval_ikala_steps_per_second": 2.004, "step": 3500 }, { "epoch": 0.57, "eval_oasst_export_accuracy": 0.6617497330814418, "eval_oasst_export_loss": 1.572265625, "eval_oasst_export_runtime": 135.1792, "eval_oasst_export_samples_per_second": 15.528, "eval_oasst_export_steps_per_second": 1.946, "step": 3500 }, { "epoch": 0.57, "eval_joke_accuracy": 0.5256823351023503, "eval_joke_loss": 2.013671875, "eval_joke_runtime": 3.6235, "eval_joke_samples_per_second": 20.974, "eval_joke_steps_per_second": 2.76, "step": 3500 }, { "epoch": 0.57, "eval_gsm8k_accuracy": 0.7892603458925262, "eval_gsm8k_loss": 0.77001953125, "eval_gsm8k_runtime": 56.8179, "eval_gsm8k_samples_per_second": 23.215, "eval_gsm8k_steps_per_second": 2.904, "step": 3500 }, { "epoch": 0.57, "eval_webgpt_accuracy": 0.49841011281811054, "eval_webgpt_loss": 2.19140625, "eval_webgpt_runtime": 157.0655, "eval_webgpt_samples_per_second": 22.685, "eval_webgpt_steps_per_second": 2.84, "step": 3500 }, { "epoch": 0.58, "learning_rate": 9.677971908187737e-06, "loss": 1.0692, "step": 3510 }, { "epoch": 0.58, "learning_rate": 9.67583076396026e-06, "loss": 1.1059, "step": 3520 }, { "epoch": 0.58, "learning_rate": 9.673689619732786e-06, "loss": 1.0758, "step": 3530 }, { "epoch": 0.58, "learning_rate": 9.671548475505311e-06, "loss": 1.0386, "step": 3540 }, { "epoch": 0.58, "learning_rate": 9.669407331277835e-06, "loss": 1.0865, "step": 3550 }, { "epoch": 0.58, "learning_rate": 9.66726618705036e-06, "loss": 1.0537, "step": 3560 }, { "epoch": 0.59, "learning_rate": 9.665125042822884e-06, "loss": 1.0481, "step": 3570 }, { "epoch": 0.59, "learning_rate": 9.662983898595411e-06, "loss": 1.0811, "step": 3580 }, { "epoch": 0.59, "learning_rate": 9.660842754367935e-06, "loss": 1.0518, "step": 3590 }, { "epoch": 0.59, "learning_rate": 9.65870161014046e-06, "loss": 1.0756, "step": 3600 }, { "epoch": 0.59, "learning_rate": 9.656560465912986e-06, "loss": 1.0594, "step": 3610 }, { "epoch": 0.59, "learning_rate": 9.65441932168551e-06, "loss": 1.0842, "step": 3620 }, { "epoch": 0.6, "learning_rate": 9.652278177458035e-06, "loss": 1.0703, "step": 3630 }, { "epoch": 0.6, "learning_rate": 9.650137033230558e-06, "loss": 1.0649, "step": 3640 }, { "epoch": 0.6, "learning_rate": 9.647995889003084e-06, "loss": 1.0869, "step": 3650 }, { "epoch": 0.6, "learning_rate": 9.645854744775609e-06, "loss": 1.0494, "step": 3660 }, { "epoch": 0.6, "learning_rate": 9.643713600548134e-06, "loss": 1.0575, "step": 3670 }, { "epoch": 0.6, "learning_rate": 9.641572456320658e-06, "loss": 1.0846, "step": 3680 }, { "epoch": 0.61, "learning_rate": 9.639431312093183e-06, "loss": 1.0815, "step": 3690 }, { "epoch": 0.61, "learning_rate": 9.637290167865709e-06, "loss": 1.0593, "step": 3700 }, { "epoch": 0.61, "learning_rate": 9.635149023638232e-06, "loss": 1.0936, "step": 3710 }, { "epoch": 0.61, "learning_rate": 9.633007879410758e-06, "loss": 1.0249, "step": 3720 }, { "epoch": 0.61, "learning_rate": 9.630866735183283e-06, "loss": 1.0382, "step": 3730 }, { "epoch": 0.61, "learning_rate": 9.628725590955807e-06, "loss": 1.0528, "step": 3740 }, { "epoch": 0.62, "learning_rate": 9.626584446728332e-06, "loss": 1.0469, "step": 3750 }, { "epoch": 0.62, "learning_rate": 9.624443302500858e-06, "loss": 1.053, "step": 3760 }, { "epoch": 0.62, "learning_rate": 9.622302158273383e-06, "loss": 1.0301, "step": 3770 }, { "epoch": 0.62, "learning_rate": 9.620161014045907e-06, "loss": 1.0913, "step": 3780 }, { "epoch": 0.62, "learning_rate": 9.618019869818432e-06, "loss": 1.0633, "step": 3790 }, { "epoch": 0.62, "learning_rate": 9.615878725590957e-06, "loss": 1.0743, "step": 3800 }, { "epoch": 0.63, "learning_rate": 9.613737581363481e-06, "loss": 1.0486, "step": 3810 }, { "epoch": 0.63, "learning_rate": 9.611596437136006e-06, "loss": 1.0491, "step": 3820 }, { "epoch": 0.63, "learning_rate": 9.609455292908532e-06, "loss": 1.0736, "step": 3830 }, { "epoch": 0.63, "learning_rate": 9.607314148681056e-06, "loss": 1.0729, "step": 3840 }, { "epoch": 0.63, "learning_rate": 9.605173004453581e-06, "loss": 1.0625, "step": 3850 }, { "epoch": 0.63, "learning_rate": 9.603031860226105e-06, "loss": 1.0726, "step": 3860 }, { "epoch": 0.64, "learning_rate": 9.600890715998632e-06, "loss": 1.0666, "step": 3870 }, { "epoch": 0.64, "learning_rate": 9.598749571771155e-06, "loss": 1.0773, "step": 3880 }, { "epoch": 0.64, "learning_rate": 9.59660842754368e-06, "loss": 1.065, "step": 3890 }, { "epoch": 0.64, "learning_rate": 9.594467283316204e-06, "loss": 1.0404, "step": 3900 }, { "epoch": 0.64, "learning_rate": 9.59232613908873e-06, "loss": 1.0717, "step": 3910 }, { "epoch": 0.64, "learning_rate": 9.590184994861255e-06, "loss": 1.0667, "step": 3920 }, { "epoch": 0.65, "learning_rate": 9.588043850633779e-06, "loss": 1.0603, "step": 3930 }, { "epoch": 0.65, "learning_rate": 9.585902706406304e-06, "loss": 1.0452, "step": 3940 }, { "epoch": 0.65, "learning_rate": 9.58376156217883e-06, "loss": 1.0681, "step": 3950 }, { "epoch": 0.65, "learning_rate": 9.581620417951353e-06, "loss": 1.075, "step": 3960 }, { "epoch": 0.65, "learning_rate": 9.579479273723879e-06, "loss": 1.0735, "step": 3970 }, { "epoch": 0.65, "learning_rate": 9.577338129496404e-06, "loss": 1.0859, "step": 3980 }, { "epoch": 0.66, "learning_rate": 9.57519698526893e-06, "loss": 1.0498, "step": 3990 }, { "epoch": 0.66, "learning_rate": 9.573055841041453e-06, "loss": 1.0353, "step": 4000 }, { "epoch": 0.66, "eval_multi_news_accuracy": 0.5627512384133357, "eval_multi_news_loss": 1.9013671875, "eval_multi_news_runtime": 374.2642, "eval_multi_news_samples_per_second": 15.021, "eval_multi_news_steps_per_second": 1.878, "step": 4000 }, { "epoch": 0.66, "eval_samsum_accuracy": 0.6499550678711631, "eval_samsum_loss": 1.228515625, "eval_samsum_runtime": 37.472, "eval_samsum_samples_per_second": 21.83, "eval_samsum_steps_per_second": 2.749, "step": 4000 }, { "epoch": 0.66, "eval_billsum_accuracy": 0.6559680786548813, "eval_billsum_loss": 1.4189453125, "eval_billsum_runtime": 204.7196, "eval_billsum_samples_per_second": 15.968, "eval_billsum_steps_per_second": 1.998, "step": 4000 }, { "epoch": 0.66, "eval_wmt2019_zh-en_accuracy": 0.605999539382773, "eval_wmt2019_zh-en_loss": 1.8330078125, "eval_wmt2019_zh-en_runtime": 43.2969, "eval_wmt2019_zh-en_samples_per_second": 23.096, "eval_wmt2019_zh-en_steps_per_second": 2.887, "step": 4000 }, { "epoch": 0.66, "eval_ted_trans_en-ja_accuracy": 0.591362074351765, "eval_ted_trans_en-ja_loss": 1.7236328125, "eval_ted_trans_en-ja_runtime": 36.1634, "eval_ted_trans_en-ja_samples_per_second": 22.149, "eval_ted_trans_en-ja_steps_per_second": 2.793, "step": 4000 }, { "epoch": 0.66, "eval_ted_trans_zh-ja_accuracy": 0.4934623430962343, "eval_ted_trans_zh-ja_loss": 2.33984375, "eval_ted_trans_zh-ja_runtime": 2.8371, "eval_ted_trans_zh-ja_samples_per_second": 14.804, "eval_ted_trans_zh-ja_steps_per_second": 2.115, "step": 4000 }, { "epoch": 0.66, "eval_sharegpt_accuracy": 0.7566927466258041, "eval_sharegpt_loss": 0.90234375, "eval_sharegpt_runtime": 732.9729, "eval_sharegpt_samples_per_second": 4.568, "eval_sharegpt_steps_per_second": 0.572, "step": 4000 }, { "epoch": 0.66, "eval_dolly15k_accuracy": 0.5929618969298246, "eval_dolly15k_loss": 1.65625, "eval_dolly15k_runtime": 33.6288, "eval_dolly15k_samples_per_second": 22.332, "eval_dolly15k_steps_per_second": 2.795, "step": 4000 }, { "epoch": 0.66, "eval_ikala_accuracy": 0.7508814779446873, "eval_ikala_loss": 0.91015625, "eval_ikala_runtime": 887.5958, "eval_ikala_samples_per_second": 16.005, "eval_ikala_steps_per_second": 2.001, "step": 4000 }, { "epoch": 0.66, "eval_oasst_export_accuracy": 0.6615836827915093, "eval_oasst_export_loss": 1.57421875, "eval_oasst_export_runtime": 134.7449, "eval_oasst_export_samples_per_second": 15.578, "eval_oasst_export_steps_per_second": 1.952, "step": 4000 }, { "epoch": 0.66, "eval_joke_accuracy": 0.535538286580743, "eval_joke_loss": 1.9736328125, "eval_joke_runtime": 3.6334, "eval_joke_samples_per_second": 20.917, "eval_joke_steps_per_second": 2.752, "step": 4000 }, { "epoch": 0.66, "eval_gsm8k_accuracy": 0.7943020382952439, "eval_gsm8k_loss": 0.74560546875, "eval_gsm8k_runtime": 57.4917, "eval_gsm8k_samples_per_second": 22.942, "eval_gsm8k_steps_per_second": 2.87, "step": 4000 }, { "epoch": 0.66, "eval_webgpt_accuracy": 0.49873483307959016, "eval_webgpt_loss": 2.19140625, "eval_webgpt_runtime": 155.9655, "eval_webgpt_samples_per_second": 22.845, "eval_webgpt_steps_per_second": 2.86, "step": 4000 }, { "epoch": 0.66, "learning_rate": 9.570914696813978e-06, "loss": 1.0657, "step": 4010 }, { "epoch": 0.66, "learning_rate": 9.568773552586504e-06, "loss": 1.0743, "step": 4020 }, { "epoch": 0.66, "learning_rate": 9.566632408359027e-06, "loss": 1.0543, "step": 4030 }, { "epoch": 0.66, "learning_rate": 9.564491264131553e-06, "loss": 1.0457, "step": 4040 }, { "epoch": 0.67, "learning_rate": 9.562350119904078e-06, "loss": 1.0546, "step": 4050 }, { "epoch": 0.67, "learning_rate": 9.560208975676602e-06, "loss": 1.0485, "step": 4060 }, { "epoch": 0.67, "learning_rate": 9.558067831449127e-06, "loss": 1.0535, "step": 4070 }, { "epoch": 0.67, "learning_rate": 9.555926687221651e-06, "loss": 1.0603, "step": 4080 }, { "epoch": 0.67, "learning_rate": 9.553785542994178e-06, "loss": 1.0444, "step": 4090 }, { "epoch": 0.67, "learning_rate": 9.551644398766702e-06, "loss": 1.0482, "step": 4100 }, { "epoch": 0.68, "learning_rate": 9.549503254539227e-06, "loss": 1.0509, "step": 4110 }, { "epoch": 0.68, "learning_rate": 9.54736211031175e-06, "loss": 1.036, "step": 4120 }, { "epoch": 0.68, "learning_rate": 9.545220966084276e-06, "loss": 1.0457, "step": 4130 }, { "epoch": 0.68, "learning_rate": 9.543079821856801e-06, "loss": 1.065, "step": 4140 }, { "epoch": 0.68, "learning_rate": 9.540938677629325e-06, "loss": 1.0441, "step": 4150 }, { "epoch": 0.68, "learning_rate": 9.53879753340185e-06, "loss": 1.047, "step": 4160 }, { "epoch": 0.68, "learning_rate": 9.536656389174376e-06, "loss": 1.05, "step": 4170 }, { "epoch": 0.69, "learning_rate": 9.5345152449469e-06, "loss": 1.0615, "step": 4180 }, { "epoch": 0.69, "learning_rate": 9.532374100719425e-06, "loss": 1.0575, "step": 4190 }, { "epoch": 0.69, "learning_rate": 9.53023295649195e-06, "loss": 1.0614, "step": 4200 }, { "epoch": 0.69, "learning_rate": 9.528091812264476e-06, "loss": 1.0504, "step": 4210 }, { "epoch": 0.69, "learning_rate": 9.525950668037e-06, "loss": 1.0401, "step": 4220 }, { "epoch": 0.69, "learning_rate": 9.524023638232272e-06, "loss": 1.0376, "step": 4230 }, { "epoch": 0.7, "learning_rate": 9.521882494004797e-06, "loss": 1.0265, "step": 4240 }, { "epoch": 0.7, "learning_rate": 9.519741349777321e-06, "loss": 1.0636, "step": 4250 }, { "epoch": 0.7, "learning_rate": 9.517600205549846e-06, "loss": 1.059, "step": 4260 }, { "epoch": 0.7, "learning_rate": 9.515459061322372e-06, "loss": 1.0552, "step": 4270 }, { "epoch": 0.7, "learning_rate": 9.513317917094897e-06, "loss": 1.0577, "step": 4280 }, { "epoch": 0.7, "learning_rate": 9.51117677286742e-06, "loss": 1.034, "step": 4290 }, { "epoch": 0.71, "learning_rate": 9.509035628639946e-06, "loss": 1.0697, "step": 4300 }, { "epoch": 0.71, "learning_rate": 9.506894484412471e-06, "loss": 1.0392, "step": 4310 }, { "epoch": 0.71, "learning_rate": 9.504753340184995e-06, "loss": 1.0069, "step": 4320 }, { "epoch": 0.71, "learning_rate": 9.50261219595752e-06, "loss": 1.0583, "step": 4330 }, { "epoch": 0.71, "learning_rate": 9.500471051730046e-06, "loss": 1.0522, "step": 4340 }, { "epoch": 0.71, "learning_rate": 9.49832990750257e-06, "loss": 1.0315, "step": 4350 }, { "epoch": 0.72, "learning_rate": 9.496188763275095e-06, "loss": 1.057, "step": 4360 }, { "epoch": 0.72, "learning_rate": 9.494047619047619e-06, "loss": 1.0513, "step": 4370 }, { "epoch": 0.72, "learning_rate": 9.491906474820146e-06, "loss": 1.0342, "step": 4380 }, { "epoch": 0.72, "learning_rate": 9.48976533059267e-06, "loss": 1.0559, "step": 4390 }, { "epoch": 0.72, "learning_rate": 9.487624186365195e-06, "loss": 1.0377, "step": 4400 }, { "epoch": 0.72, "learning_rate": 9.485483042137718e-06, "loss": 1.0512, "step": 4410 }, { "epoch": 0.73, "learning_rate": 9.483341897910244e-06, "loss": 1.0439, "step": 4420 }, { "epoch": 0.73, "learning_rate": 9.48120075368277e-06, "loss": 1.0344, "step": 4430 }, { "epoch": 0.73, "learning_rate": 9.479059609455293e-06, "loss": 1.0343, "step": 4440 }, { "epoch": 0.73, "learning_rate": 9.477132579650567e-06, "loss": 1.0463, "step": 4450 }, { "epoch": 0.73, "learning_rate": 9.47499143542309e-06, "loss": 1.0443, "step": 4460 }, { "epoch": 0.73, "learning_rate": 9.472850291195616e-06, "loss": 1.0559, "step": 4470 }, { "epoch": 0.74, "learning_rate": 9.47070914696814e-06, "loss": 1.0555, "step": 4480 }, { "epoch": 0.74, "learning_rate": 9.468568002740665e-06, "loss": 1.0267, "step": 4490 }, { "epoch": 0.74, "learning_rate": 9.46642685851319e-06, "loss": 1.042, "step": 4500 }, { "epoch": 0.74, "eval_multi_news_accuracy": 0.5627252632519255, "eval_multi_news_loss": 1.9013671875, "eval_multi_news_runtime": 374.4153, "eval_multi_news_samples_per_second": 15.015, "eval_multi_news_steps_per_second": 1.878, "step": 4500 }, { "epoch": 0.74, "eval_samsum_accuracy": 0.6482050796954074, "eval_samsum_loss": 1.2255859375, "eval_samsum_runtime": 38.0285, "eval_samsum_samples_per_second": 21.51, "eval_samsum_steps_per_second": 2.708, "step": 4500 }, { "epoch": 0.74, "eval_billsum_accuracy": 0.6552127105772998, "eval_billsum_loss": 1.4169921875, "eval_billsum_runtime": 204.5119, "eval_billsum_samples_per_second": 15.984, "eval_billsum_steps_per_second": 2.0, "step": 4500 }, { "epoch": 0.74, "eval_wmt2019_zh-en_accuracy": 0.5987996483045988, "eval_wmt2019_zh-en_loss": 1.8505859375, "eval_wmt2019_zh-en_runtime": 43.4755, "eval_wmt2019_zh-en_samples_per_second": 23.001, "eval_wmt2019_zh-en_steps_per_second": 2.875, "step": 4500 }, { "epoch": 0.74, "eval_ted_trans_en-ja_accuracy": 0.5948422811429342, "eval_ted_trans_en-ja_loss": 1.69140625, "eval_ted_trans_en-ja_runtime": 36.0142, "eval_ted_trans_en-ja_samples_per_second": 22.241, "eval_ted_trans_en-ja_steps_per_second": 2.804, "step": 4500 }, { "epoch": 0.74, "eval_ted_trans_zh-ja_accuracy": 0.521213679609154, "eval_ted_trans_zh-ja_loss": 2.265625, "eval_ted_trans_zh-ja_runtime": 2.289, "eval_ted_trans_zh-ja_samples_per_second": 18.349, "eval_ted_trans_zh-ja_steps_per_second": 2.621, "step": 4500 }, { "epoch": 0.74, "eval_sharegpt_accuracy": 0.7602375296761273, "eval_sharegpt_loss": 0.884765625, "eval_sharegpt_runtime": 733.0075, "eval_sharegpt_samples_per_second": 4.567, "eval_sharegpt_steps_per_second": 0.572, "step": 4500 }, { "epoch": 0.74, "eval_dolly15k_accuracy": 0.5921566611842105, "eval_dolly15k_loss": 1.65234375, "eval_dolly15k_runtime": 33.7747, "eval_dolly15k_samples_per_second": 22.236, "eval_dolly15k_steps_per_second": 2.783, "step": 4500 }, { "epoch": 0.74, "eval_ikala_accuracy": 0.7515879865982705, "eval_ikala_loss": 0.90576171875, "eval_ikala_runtime": 884.7883, "eval_ikala_samples_per_second": 16.056, "eval_ikala_steps_per_second": 2.007, "step": 4500 }, { "epoch": 0.74, "eval_oasst_export_accuracy": 0.6615137668799588, "eval_oasst_export_loss": 1.5693359375, "eval_oasst_export_runtime": 134.5394, "eval_oasst_export_samples_per_second": 15.601, "eval_oasst_export_steps_per_second": 1.955, "step": 4500 }, { "epoch": 0.74, "eval_joke_accuracy": 0.5379075056861259, "eval_joke_loss": 1.966796875, "eval_joke_runtime": 4.5957, "eval_joke_samples_per_second": 16.537, "eval_joke_steps_per_second": 2.176, "step": 4500 }, { "epoch": 0.74, "eval_gsm8k_accuracy": 0.7959928968499074, "eval_gsm8k_loss": 0.74072265625, "eval_gsm8k_runtime": 57.0884, "eval_gsm8k_samples_per_second": 23.105, "eval_gsm8k_steps_per_second": 2.89, "step": 4500 }, { "epoch": 0.74, "eval_webgpt_accuracy": 0.4989797040964437, "eval_webgpt_loss": 2.19140625, "eval_webgpt_runtime": 157.3673, "eval_webgpt_samples_per_second": 22.641, "eval_webgpt_steps_per_second": 2.834, "step": 4500 }, { "epoch": 0.74, "learning_rate": 9.464285714285714e-06, "loss": 1.0577, "step": 4510 }, { "epoch": 0.74, "learning_rate": 9.46214457005824e-06, "loss": 1.0465, "step": 4520 }, { "epoch": 0.74, "learning_rate": 9.460003425830765e-06, "loss": 1.0635, "step": 4530 }, { "epoch": 0.75, "learning_rate": 9.457862281603289e-06, "loss": 1.035, "step": 4540 }, { "epoch": 0.75, "learning_rate": 9.455721137375814e-06, "loss": 1.0623, "step": 4550 }, { "epoch": 0.75, "learning_rate": 9.45357999314834e-06, "loss": 1.0279, "step": 4560 }, { "epoch": 0.75, "learning_rate": 9.451438848920865e-06, "loss": 1.0287, "step": 4570 }, { "epoch": 0.75, "learning_rate": 9.449297704693388e-06, "loss": 1.0567, "step": 4580 }, { "epoch": 0.75, "learning_rate": 9.447156560465914e-06, "loss": 1.0246, "step": 4590 }, { "epoch": 0.76, "learning_rate": 9.445015416238439e-06, "loss": 1.0352, "step": 4600 }, { "epoch": 0.76, "learning_rate": 9.442874272010963e-06, "loss": 1.0493, "step": 4610 }, { "epoch": 0.76, "learning_rate": 9.440733127783488e-06, "loss": 1.0435, "step": 4620 }, { "epoch": 0.76, "learning_rate": 9.438591983556014e-06, "loss": 1.0418, "step": 4630 }, { "epoch": 0.76, "learning_rate": 9.436450839328539e-06, "loss": 1.0586, "step": 4640 }, { "epoch": 0.76, "learning_rate": 9.434309695101063e-06, "loss": 1.0041, "step": 4650 }, { "epoch": 0.77, "learning_rate": 9.432168550873588e-06, "loss": 1.0236, "step": 4660 }, { "epoch": 0.77, "learning_rate": 9.430027406646113e-06, "loss": 1.0377, "step": 4670 }, { "epoch": 0.77, "learning_rate": 9.427886262418637e-06, "loss": 1.0385, "step": 4680 }, { "epoch": 0.77, "learning_rate": 9.425745118191162e-06, "loss": 1.0418, "step": 4690 }, { "epoch": 0.77, "learning_rate": 9.423603973963686e-06, "loss": 1.0304, "step": 4700 }, { "epoch": 0.77, "learning_rate": 9.421462829736211e-06, "loss": 1.0376, "step": 4710 }, { "epoch": 0.78, "learning_rate": 9.419321685508737e-06, "loss": 1.0377, "step": 4720 }, { "epoch": 0.78, "learning_rate": 9.41718054128126e-06, "loss": 1.0451, "step": 4730 }, { "epoch": 0.78, "learning_rate": 9.415039397053788e-06, "loss": 1.0359, "step": 4740 }, { "epoch": 0.78, "learning_rate": 9.412898252826311e-06, "loss": 1.0375, "step": 4750 }, { "epoch": 0.78, "learning_rate": 9.410757108598837e-06, "loss": 1.0575, "step": 4760 }, { "epoch": 0.78, "learning_rate": 9.40861596437136e-06, "loss": 1.042, "step": 4770 }, { "epoch": 0.79, "learning_rate": 9.406474820143886e-06, "loss": 1.0405, "step": 4780 }, { "epoch": 0.79, "learning_rate": 9.404333675916411e-06, "loss": 1.0538, "step": 4790 }, { "epoch": 0.79, "learning_rate": 9.402192531688935e-06, "loss": 1.0168, "step": 4800 }, { "epoch": 0.79, "learning_rate": 9.40005138746146e-06, "loss": 1.0406, "step": 4810 }, { "epoch": 0.79, "learning_rate": 9.397910243233985e-06, "loss": 1.0419, "step": 4820 }, { "epoch": 0.79, "learning_rate": 9.39576909900651e-06, "loss": 1.0249, "step": 4830 }, { "epoch": 0.79, "learning_rate": 9.393627954779035e-06, "loss": 1.0455, "step": 4840 }, { "epoch": 0.8, "learning_rate": 9.39148681055156e-06, "loss": 1.0314, "step": 4850 }, { "epoch": 0.8, "learning_rate": 9.389345666324085e-06, "loss": 1.0365, "step": 4860 }, { "epoch": 0.8, "learning_rate": 9.387204522096609e-06, "loss": 1.0503, "step": 4870 }, { "epoch": 0.8, "learning_rate": 9.385063377869134e-06, "loss": 1.0134, "step": 4880 }, { "epoch": 0.8, "learning_rate": 9.38292223364166e-06, "loss": 1.0655, "step": 4890 }, { "epoch": 0.8, "learning_rate": 9.380781089414183e-06, "loss": 1.0403, "step": 4900 }, { "epoch": 0.81, "learning_rate": 9.378639945186709e-06, "loss": 1.042, "step": 4910 }, { "epoch": 0.81, "learning_rate": 9.376498800959234e-06, "loss": 1.0564, "step": 4920 }, { "epoch": 0.81, "learning_rate": 9.374357656731758e-06, "loss": 1.0469, "step": 4930 }, { "epoch": 0.81, "learning_rate": 9.372216512504283e-06, "loss": 1.0323, "step": 4940 }, { "epoch": 0.81, "learning_rate": 9.370075368276807e-06, "loss": 1.0434, "step": 4950 }, { "epoch": 0.81, "learning_rate": 9.367934224049334e-06, "loss": 1.0474, "step": 4960 }, { "epoch": 0.82, "learning_rate": 9.365793079821858e-06, "loss": 1.0576, "step": 4970 }, { "epoch": 0.82, "learning_rate": 9.363651935594383e-06, "loss": 1.0588, "step": 4980 }, { "epoch": 0.82, "learning_rate": 9.361510791366907e-06, "loss": 1.0369, "step": 4990 }, { "epoch": 0.82, "learning_rate": 9.359369647139432e-06, "loss": 1.0159, "step": 5000 }, { "epoch": 0.82, "eval_multi_news_accuracy": 0.5636508659548628, "eval_multi_news_loss": 1.9013671875, "eval_multi_news_runtime": 374.2666, "eval_multi_news_samples_per_second": 15.021, "eval_multi_news_steps_per_second": 1.878, "step": 5000 }, { "epoch": 0.82, "eval_samsum_accuracy": 0.6542117958662441, "eval_samsum_loss": 1.22265625, "eval_samsum_runtime": 37.6015, "eval_samsum_samples_per_second": 21.754, "eval_samsum_steps_per_second": 2.739, "step": 5000 }, { "epoch": 0.82, "eval_billsum_accuracy": 0.6560928552644996, "eval_billsum_loss": 1.412109375, "eval_billsum_runtime": 204.697, "eval_billsum_samples_per_second": 15.97, "eval_billsum_steps_per_second": 1.998, "step": 5000 }, { "epoch": 0.82, "eval_wmt2019_zh-en_accuracy": 0.6045209655463313, "eval_wmt2019_zh-en_loss": 1.826171875, "eval_wmt2019_zh-en_runtime": 43.7238, "eval_wmt2019_zh-en_samples_per_second": 22.871, "eval_wmt2019_zh-en_steps_per_second": 2.859, "step": 5000 }, { "epoch": 0.82, "eval_ted_trans_en-ja_accuracy": 0.5986221480612599, "eval_ted_trans_en-ja_loss": 1.6884765625, "eval_ted_trans_en-ja_runtime": 35.5277, "eval_ted_trans_en-ja_samples_per_second": 22.546, "eval_ted_trans_en-ja_steps_per_second": 2.843, "step": 5000 }, { "epoch": 0.82, "eval_ted_trans_zh-ja_accuracy": 0.49819293855991104, "eval_ted_trans_zh-ja_loss": 2.29296875, "eval_ted_trans_zh-ja_runtime": 2.6448, "eval_ted_trans_zh-ja_samples_per_second": 15.88, "eval_ted_trans_zh-ja_steps_per_second": 2.269, "step": 5000 }, { "epoch": 0.82, "eval_sharegpt_accuracy": 0.7644941905120852, "eval_sharegpt_loss": 0.86474609375, "eval_sharegpt_runtime": 732.8989, "eval_sharegpt_samples_per_second": 4.568, "eval_sharegpt_steps_per_second": 0.572, "step": 5000 }, { "epoch": 0.82, "eval_dolly15k_accuracy": 0.5935786732456141, "eval_dolly15k_loss": 1.6474609375, "eval_dolly15k_runtime": 33.5372, "eval_dolly15k_samples_per_second": 22.393, "eval_dolly15k_steps_per_second": 2.803, "step": 5000 }, { "epoch": 0.82, "eval_ikala_accuracy": 0.7535167297168253, "eval_ikala_loss": 0.89990234375, "eval_ikala_runtime": 887.8229, "eval_ikala_samples_per_second": 16.001, "eval_ikala_steps_per_second": 2.0, "step": 5000 }, { "epoch": 0.82, "eval_oasst_export_accuracy": 0.6621677719692545, "eval_oasst_export_loss": 1.5673828125, "eval_oasst_export_runtime": 134.0182, "eval_oasst_export_samples_per_second": 15.662, "eval_oasst_export_steps_per_second": 1.962, "step": 5000 }, { "epoch": 0.82, "eval_joke_accuracy": 0.5397081122062168, "eval_joke_loss": 1.966796875, "eval_joke_runtime": 4.7628, "eval_joke_samples_per_second": 15.957, "eval_joke_steps_per_second": 2.1, "step": 5000 }, { "epoch": 0.82, "eval_gsm8k_accuracy": 0.7978304508956146, "eval_gsm8k_loss": 0.73388671875, "eval_gsm8k_runtime": 58.2683, "eval_gsm8k_samples_per_second": 22.637, "eval_gsm8k_steps_per_second": 2.832, "step": 5000 }, { "epoch": 0.82, "eval_webgpt_accuracy": 0.4992032819813969, "eval_webgpt_loss": 2.189453125, "eval_webgpt_runtime": 158.2652, "eval_webgpt_samples_per_second": 22.513, "eval_webgpt_steps_per_second": 2.818, "step": 5000 } ], "max_steps": 48704, "num_train_epochs": 8, "total_flos": 3017973738504192.0, "trial_name": null, "trial_params": null }