{ "best_metric": 1.2033756971359253, "best_model_checkpoint": "output/rihanna/checkpoint-917", "epoch": 7.0, "global_step": 917, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00013670742670262692, "loss": 3.4044, "step": 5 }, { "epoch": 0.08, "learning_rate": 0.00013523678052634687, "loss": 2.8194, "step": 10 }, { "epoch": 0.11, "learning_rate": 0.00013280918103490095, "loss": 2.7475, "step": 15 }, { "epoch": 0.15, "learning_rate": 0.00012945949034742042, "loss": 2.9331, "step": 20 }, { "epoch": 0.19, "learning_rate": 0.00012523581249268407, "loss": 2.8944, "step": 25 }, { "epoch": 0.23, "learning_rate": 0.00012019880259978666, "loss": 2.6589, "step": 30 }, { "epoch": 0.27, "learning_rate": 0.00011442079584574986, "loss": 2.7123, "step": 35 }, { "epoch": 0.31, "learning_rate": 0.00010798476866903087, "loss": 2.5847, "step": 40 }, { "epoch": 0.34, "learning_rate": 0.00010098314716666811, "loss": 2.391, "step": 45 }, { "epoch": 0.38, "learning_rate": 9.351647978736063e-05, "loss": 2.5899, "step": 50 }, { "epoch": 0.42, "learning_rate": 8.5691993381587e-05, "loss": 2.3867, "step": 55 }, { "epoch": 0.46, "learning_rate": 7.762205334494898e-05, "loss": 2.4515, "step": 60 }, { "epoch": 0.5, "learning_rate": 6.942254996821776e-05, "loss": 2.7458, "step": 65 }, { "epoch": 0.53, "learning_rate": 6.121123416728538e-05, "loss": 2.4949, "step": 70 }, { "epoch": 0.57, "learning_rate": 5.310602649316754e-05, "loss": 2.5412, "step": 75 }, { "epoch": 0.61, "learning_rate": 4.5223323705920566e-05, "loss": 2.5738, "step": 80 }, { "epoch": 0.65, "learning_rate": 3.7676327231320786e-05, "loss": 2.2778, "step": 85 }, { "epoch": 0.69, "learning_rate": 3.0573417504900444e-05, "loss": 2.0957, "step": 90 }, { "epoch": 0.73, "learning_rate": 2.401659754895943e-05, "loss": 2.3988, "step": 95 }, { "epoch": 0.76, "learning_rate": 1.8100028133934438e-05, "loss": 2.3799, "step": 100 }, { "epoch": 0.8, "learning_rate": 1.2908675560288951e-05, "loss": 2.5075, "step": 105 }, { "epoch": 0.84, "learning_rate": 8.517091479772992e-06, "loss": 2.1984, "step": 110 }, { "epoch": 0.88, "learning_rate": 4.988342278719811e-06, "loss": 2.4497, "step": 115 }, { "epoch": 0.92, "learning_rate": 2.3731033982246404e-06, "loss": 2.3199, "step": 120 }, { "epoch": 0.95, "learning_rate": 7.089315974356758e-07, "loss": 2.3814, "step": 125 }, { "epoch": 0.99, "learning_rate": 1.9725610793441152e-08, "loss": 2.1807, "step": 130 }, { "epoch": 1.0, "eval_loss": 2.336484432220459, "eval_runtime": 8.0556, "eval_samples_per_second": 22.593, "eval_steps_per_second": 2.855, "step": 131 }, { "epoch": 1.03, "learning_rate": 3.153829445781081e-07, "loss": 2.3816, "step": 135 }, { "epoch": 1.07, "learning_rate": 1.5916577414195624e-06, "loss": 2.1418, "step": 140 }, { "epoch": 1.11, "learning_rate": 3.8302217539407e-06, "loss": 2.1866, "step": 145 }, { "epoch": 1.15, "learning_rate": 6.998927551907465e-06, "loss": 2.3514, "step": 150 }, { "epoch": 1.18, "learning_rate": 1.1052270183036815e-05, "loss": 2.3302, "step": 155 }, { "epoch": 1.22, "learning_rate": 1.5932040657672757e-05, "loss": 2.0237, "step": 160 }, { "epoch": 1.26, "learning_rate": 2.1568161872809022e-05, "loss": 2.1338, "step": 165 }, { "epoch": 1.3, "learning_rate": 2.7879694970972374e-05, "loss": 2.1066, "step": 170 }, { "epoch": 1.34, "learning_rate": 3.477600168191081e-05, "loss": 2.1939, "step": 175 }, { "epoch": 1.37, "learning_rate": 4.215804595500327e-05, "loss": 2.1481, "step": 180 }, { "epoch": 1.41, "learning_rate": 4.991981618998877e-05, "loss": 2.4364, "step": 185 }, { "epoch": 1.45, "learning_rate": 5.794984764173325e-05, "loss": 2.1677, "step": 190 }, { "epoch": 1.49, "learning_rate": 6.613282313617852e-05, "loss": 2.3095, "step": 195 }, { "epoch": 1.53, "learning_rate": 7.435122911001204e-05, "loss": 2.4404, "step": 200 }, { "epoch": 1.56, "learning_rate": 8.248704319210595e-05, "loss": 2.0718, "step": 205 }, { "epoch": 1.6, "learning_rate": 9.042342909181217e-05, "loss": 2.2015, "step": 210 }, { "epoch": 1.64, "learning_rate": 9.804641445426643e-05, "loss": 2.1899, "step": 215 }, { "epoch": 1.68, "learning_rate": 0.00010524652758746261, "loss": 2.1465, "step": 220 }, { "epoch": 1.72, "learning_rate": 0.00011192036955648884, "loss": 2.3478, "step": 225 }, { "epoch": 1.76, "learning_rate": 0.00011797209906849287, "loss": 2.3704, "step": 230 }, { "epoch": 1.79, "learning_rate": 0.0001233148088243337, "loss": 2.2886, "step": 235 }, { "epoch": 1.83, "learning_rate": 0.00012787177357149405, "loss": 2.1345, "step": 240 }, { "epoch": 1.87, "learning_rate": 0.00013157755193529395, "loss": 2.0747, "step": 245 }, { "epoch": 1.91, "learning_rate": 0.00013437892620529645, "loss": 2.2118, "step": 250 }, { "epoch": 1.95, "learning_rate": 0.000136235666580879, "loss": 2.1172, "step": 255 }, { "epoch": 1.98, "learning_rate": 0.00013712110890084145, "loss": 2.2353, "step": 260 }, { "epoch": 2.0, "eval_loss": 2.237405776977539, "eval_runtime": 8.3034, "eval_samples_per_second": 22.521, "eval_steps_per_second": 2.89, "step": 262 }, { "epoch": 2.04, "learning_rate": 0.0001366998287631265, "loss": 2.0176, "step": 265 }, { "epoch": 2.08, "learning_rate": 0.00013520660867542716, "loss": 2.0043, "step": 270 }, { "epoch": 2.12, "learning_rate": 0.00013274211424821946, "loss": 1.743, "step": 275 }, { "epoch": 2.15, "learning_rate": 0.00012934228335981023, "loss": 2.0598, "step": 280 }, { "epoch": 2.19, "learning_rate": 0.00012505669320030482, "loss": 1.9087, "step": 285 }, { "epoch": 2.23, "learning_rate": 0.00011994783732453754, "loss": 1.6869, "step": 290 }, { "epoch": 2.27, "learning_rate": 0.00011409021435531858, "loss": 1.726, "step": 295 }, { "epoch": 2.31, "learning_rate": 0.00010756924162575734, "loss": 1.966, "step": 300 }, { "epoch": 2.35, "learning_rate": 0.00010048000960220251, "loss": 2.0242, "step": 305 }, { "epoch": 2.38, "learning_rate": 9.292589525111797e-05, "loss": 1.804, "step": 310 }, { "epoch": 2.42, "learning_rate": 8.501705457012652e-05, "loss": 1.7316, "step": 315 }, { "epoch": 2.46, "learning_rate": 7.686881626551514e-05, "loss": 2.1338, "step": 320 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 2.067, "step": 325 }, { "epoch": 2.54, "learning_rate": 6.03311837344849e-05, "loss": 1.993, "step": 330 }, { "epoch": 2.58, "learning_rate": 5.218294542987351e-05, "loss": 1.8933, "step": 335 }, { "epoch": 2.62, "learning_rate": 4.427410474888207e-05, "loss": 1.7142, "step": 340 }, { "epoch": 2.65, "learning_rate": 3.6719990397797524e-05, "loss": 1.9927, "step": 345 }, { "epoch": 2.69, "learning_rate": 2.9630758374242683e-05, "loss": 1.8104, "step": 350 }, { "epoch": 2.73, "learning_rate": 2.310978564468145e-05, "loss": 1.8292, "step": 355 }, { "epoch": 2.77, "learning_rate": 1.7252162675462497e-05, "loss": 2.0388, "step": 360 }, { "epoch": 2.81, "learning_rate": 1.214330679969522e-05, "loss": 1.8121, "step": 365 }, { "epoch": 2.85, "learning_rate": 7.8577166401898e-06, "loss": 1.8632, "step": 370 }, { "epoch": 2.88, "learning_rate": 4.457885751780558e-06, "loss": 2.0386, "step": 375 }, { "epoch": 2.92, "learning_rate": 1.9933913245728472e-06, "loss": 1.7312, "step": 380 }, { "epoch": 2.96, "learning_rate": 5.001712368734975e-07, "loss": 2.0118, "step": 385 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.7726, "step": 390 }, { "epoch": 3.0, "eval_loss": 1.8738644123077393, "eval_runtime": 4.2323, "eval_samples_per_second": 45.838, "eval_steps_per_second": 5.907, "step": 390 }, { "epoch": 3.04, "learning_rate": 5.001712368734899e-07, "loss": 1.519, "step": 395 }, { "epoch": 3.08, "learning_rate": 1.9933913245728396e-06, "loss": 1.6714, "step": 400 }, { "epoch": 3.12, "learning_rate": 4.457885751780535e-06, "loss": 1.6067, "step": 405 }, { "epoch": 3.15, "learning_rate": 7.857716640189778e-06, "loss": 1.4835, "step": 410 }, { "epoch": 3.19, "learning_rate": 1.2143306799695189e-05, "loss": 1.9692, "step": 415 }, { "epoch": 3.23, "learning_rate": 1.725216267546246e-05, "loss": 1.5852, "step": 420 }, { "epoch": 3.27, "learning_rate": 2.310978564468141e-05, "loss": 1.766, "step": 425 }, { "epoch": 3.31, "learning_rate": 2.9630758374242642e-05, "loss": 1.7481, "step": 430 }, { "epoch": 3.35, "learning_rate": 3.671999039779748e-05, "loss": 1.9411, "step": 435 }, { "epoch": 3.38, "learning_rate": 4.427410474888202e-05, "loss": 1.5399, "step": 440 }, { "epoch": 3.42, "learning_rate": 5.218294542987346e-05, "loss": 1.9887, "step": 445 }, { "epoch": 3.46, "learning_rate": 6.033118373448485e-05, "loss": 1.6023, "step": 450 }, { "epoch": 3.5, "learning_rate": 6.859999999999997e-05, "loss": 1.665, "step": 455 }, { "epoch": 3.54, "learning_rate": 7.68688162655151e-05, "loss": 1.6708, "step": 460 }, { "epoch": 3.58, "learning_rate": 8.501705457012648e-05, "loss": 1.7596, "step": 465 }, { "epoch": 3.62, "learning_rate": 9.292589525111793e-05, "loss": 2.0391, "step": 470 }, { "epoch": 3.65, "learning_rate": 0.00010048000960220248, "loss": 1.6346, "step": 475 }, { "epoch": 3.69, "learning_rate": 0.00010756924162575731, "loss": 1.5059, "step": 480 }, { "epoch": 3.73, "learning_rate": 0.00011409021435531856, "loss": 1.7107, "step": 485 }, { "epoch": 3.77, "learning_rate": 0.0001199478373245375, "loss": 1.6263, "step": 490 }, { "epoch": 3.81, "learning_rate": 0.0001250566932003048, "loss": 1.7098, "step": 495 }, { "epoch": 3.85, "learning_rate": 0.00012934228335981018, "loss": 1.5807, "step": 500 }, { "epoch": 3.88, "learning_rate": 0.00013274211424821943, "loss": 1.9171, "step": 505 }, { "epoch": 3.92, "learning_rate": 0.00013520660867542716, "loss": 1.6038, "step": 510 }, { "epoch": 3.96, "learning_rate": 0.00013669982876312649, "loss": 1.548, "step": 515 }, { "epoch": 4.0, "learning_rate": 0.0001372, "loss": 1.8101, "step": 520 }, { "epoch": 4.0, "eval_loss": 1.8380614519119263, "eval_runtime": 4.2339, "eval_samples_per_second": 45.821, "eval_steps_per_second": 5.905, "step": 520 }, { "epoch": 4.04, "learning_rate": 0.0001366998287631265, "loss": 1.7809, "step": 525 }, { "epoch": 4.08, "learning_rate": 0.0001352066086754272, "loss": 1.71, "step": 530 }, { "epoch": 4.12, "learning_rate": 0.0001327421142482195, "loss": 1.3283, "step": 535 }, { "epoch": 4.15, "learning_rate": 0.00012934228335981015, "loss": 1.4905, "step": 540 }, { "epoch": 4.19, "learning_rate": 0.00012505669320030482, "loss": 1.2511, "step": 545 }, { "epoch": 4.23, "learning_rate": 0.00011994783732453755, "loss": 1.6209, "step": 550 }, { "epoch": 4.27, "learning_rate": 0.00011409021435531858, "loss": 1.6988, "step": 555 }, { "epoch": 4.31, "learning_rate": 0.00010756924162575738, "loss": 1.2228, "step": 560 }, { "epoch": 4.35, "learning_rate": 0.00010048000960220263, "loss": 1.6827, "step": 565 }, { "epoch": 4.38, "learning_rate": 9.292589525111788e-05, "loss": 1.6977, "step": 570 }, { "epoch": 4.42, "learning_rate": 8.501705457012643e-05, "loss": 1.4269, "step": 575 }, { "epoch": 4.46, "learning_rate": 7.686881626551516e-05, "loss": 1.6831, "step": 580 }, { "epoch": 4.5, "learning_rate": 6.860000000000003e-05, "loss": 1.0505, "step": 585 }, { "epoch": 4.54, "learning_rate": 6.033118373448492e-05, "loss": 1.4459, "step": 590 }, { "epoch": 4.58, "learning_rate": 5.218294542987365e-05, "loss": 1.4365, "step": 595 }, { "epoch": 4.62, "learning_rate": 4.42741047488822e-05, "loss": 1.6545, "step": 600 }, { "epoch": 4.65, "learning_rate": 3.671999039779743e-05, "loss": 1.6826, "step": 605 }, { "epoch": 4.69, "learning_rate": 2.9630758374242696e-05, "loss": 1.6816, "step": 610 }, { "epoch": 4.73, "learning_rate": 2.3109785644681465e-05, "loss": 1.43, "step": 615 }, { "epoch": 4.77, "learning_rate": 1.7252162675462504e-05, "loss": 1.1238, "step": 620 }, { "epoch": 4.81, "learning_rate": 1.2143306799695228e-05, "loss": 1.1441, "step": 625 }, { "epoch": 4.85, "learning_rate": 7.857716640189861e-06, "loss": 1.5854, "step": 630 }, { "epoch": 4.88, "learning_rate": 4.4578857517805195e-06, "loss": 1.2825, "step": 635 }, { "epoch": 4.92, "learning_rate": 1.9933913245728244e-06, "loss": 1.3848, "step": 640 }, { "epoch": 4.96, "learning_rate": 5.001712368734975e-07, "loss": 1.4917, "step": 645 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.0341, "step": 650 }, { "epoch": 5.0, "eval_loss": 1.7726789712905884, "eval_runtime": 4.2405, "eval_samples_per_second": 45.75, "eval_steps_per_second": 5.896, "step": 650 }, { "epoch": 5.04, "learning_rate": 5.001712368734899e-07, "loss": 1.3946, "step": 655 }, { "epoch": 5.08, "learning_rate": 1.9933913245728015e-06, "loss": 1.1575, "step": 660 }, { "epoch": 5.12, "learning_rate": 4.457885751780535e-06, "loss": 1.2407, "step": 665 }, { "epoch": 5.15, "learning_rate": 7.857716640189824e-06, "loss": 1.299, "step": 670 }, { "epoch": 5.19, "learning_rate": 1.2143306799695106e-05, "loss": 1.4759, "step": 675 }, { "epoch": 5.23, "learning_rate": 1.725216267546245e-05, "loss": 1.2347, "step": 680 }, { "epoch": 5.27, "learning_rate": 2.3109785644681495e-05, "loss": 1.4233, "step": 685 }, { "epoch": 5.31, "learning_rate": 2.963075837424263e-05, "loss": 1.4062, "step": 690 }, { "epoch": 5.35, "learning_rate": 3.6719990397797463e-05, "loss": 1.2485, "step": 695 }, { "epoch": 5.38, "learning_rate": 4.4274104748882125e-05, "loss": 1.4536, "step": 700 }, { "epoch": 5.42, "learning_rate": 5.2182945429873444e-05, "loss": 1.2116, "step": 705 }, { "epoch": 5.46, "learning_rate": 6.033118373448483e-05, "loss": 1.1743, "step": 710 }, { "epoch": 5.5, "learning_rate": 6.859999999999984e-05, "loss": 1.2798, "step": 715 }, { "epoch": 5.54, "learning_rate": 7.686881626551508e-05, "loss": 1.1637, "step": 720 }, { "epoch": 5.58, "learning_rate": 8.501705457012647e-05, "loss": 1.4529, "step": 725 }, { "epoch": 5.62, "learning_rate": 9.292589525111778e-05, "loss": 1.1575, "step": 730 }, { "epoch": 5.65, "learning_rate": 0.00010048000960220244, "loss": 1.4035, "step": 735 }, { "epoch": 5.69, "learning_rate": 0.0001075692416257573, "loss": 1.2311, "step": 740 }, { "epoch": 5.73, "learning_rate": 0.00011409021435531843, "loss": 1.3514, "step": 745 }, { "epoch": 5.77, "learning_rate": 0.00011994783732453749, "loss": 1.0519, "step": 750 }, { "epoch": 5.81, "learning_rate": 0.00012505669320030485, "loss": 1.39, "step": 755 }, { "epoch": 5.85, "learning_rate": 0.00012934228335981013, "loss": 1.2267, "step": 760 }, { "epoch": 5.88, "learning_rate": 0.00013274211424821943, "loss": 1.431, "step": 765 }, { "epoch": 5.92, "learning_rate": 0.00013520660867542716, "loss": 1.5557, "step": 770 }, { "epoch": 5.96, "learning_rate": 0.00013669982876312649, "loss": 1.2241, "step": 775 }, { "epoch": 6.0, "learning_rate": 0.0001372, "loss": 1.1435, "step": 780 }, { "epoch": 6.0, "eval_loss": 1.752521276473999, "eval_runtime": 4.2466, "eval_samples_per_second": 45.684, "eval_steps_per_second": 5.887, "step": 780 }, { "epoch": 5.99, "learning_rate": 0.00013718027438920657, "loss": 1.3027, "step": 785 }, { "epoch": 6.0, "eval_loss": 1.2558701038360596, "eval_runtime": 3.8315, "eval_samples_per_second": 47.762, "eval_steps_per_second": 6.003, "step": 786 }, { "epoch": 6.03, "learning_rate": 0.0001368846170554219, "loss": 1.0307, "step": 790 }, { "epoch": 6.07, "learning_rate": 0.00013560834225858045, "loss": 1.3396, "step": 795 }, { "epoch": 6.11, "learning_rate": 0.0001333697782460593, "loss": 1.3763, "step": 800 }, { "epoch": 6.15, "learning_rate": 0.0001302010724480925, "loss": 1.3968, "step": 805 }, { "epoch": 6.18, "learning_rate": 0.00012614772981696314, "loss": 1.2598, "step": 810 }, { "epoch": 6.22, "learning_rate": 0.00012126795934232731, "loss": 1.6357, "step": 815 }, { "epoch": 6.26, "learning_rate": 0.00011563183812719099, "loss": 1.5069, "step": 820 }, { "epoch": 6.3, "learning_rate": 0.00010932030502902761, "loss": 1.3309, "step": 825 }, { "epoch": 6.34, "learning_rate": 0.00010242399831808912, "loss": 1.2963, "step": 830 }, { "epoch": 6.37, "learning_rate": 9.504195404499683e-05, "loss": 1.5269, "step": 835 }, { "epoch": 6.41, "learning_rate": 8.728018381001126e-05, "loss": 1.1606, "step": 840 }, { "epoch": 6.45, "learning_rate": 7.925015235826672e-05, "loss": 1.4031, "step": 845 }, { "epoch": 6.49, "learning_rate": 7.10671768638214e-05, "loss": 1.1422, "step": 850 }, { "epoch": 6.53, "learning_rate": 6.284877088998812e-05, "loss": 1.2245, "step": 855 }, { "epoch": 6.56, "learning_rate": 5.4712956807894146e-05, "loss": 1.5305, "step": 860 }, { "epoch": 6.6, "learning_rate": 4.677657090818787e-05, "loss": 1.4329, "step": 865 }, { "epoch": 6.64, "learning_rate": 3.915358554573355e-05, "loss": 1.0966, "step": 870 }, { "epoch": 6.68, "learning_rate": 3.1953472412537526e-05, "loss": 1.1648, "step": 875 }, { "epoch": 6.72, "learning_rate": 2.5279630443511272e-05, "loss": 1.5295, "step": 880 }, { "epoch": 6.76, "learning_rate": 1.9227900931507197e-05, "loss": 1.1119, "step": 885 }, { "epoch": 6.79, "learning_rate": 1.388519117566634e-05, "loss": 1.0333, "step": 890 }, { "epoch": 6.83, "learning_rate": 9.328226428505963e-06, "loss": 1.3515, "step": 895 }, { "epoch": 6.87, "learning_rate": 5.622448064706006e-06, "loss": 1.2529, "step": 900 }, { "epoch": 6.91, "learning_rate": 2.8210737947035045e-06, "loss": 1.0788, "step": 905 }, { "epoch": 6.95, "learning_rate": 9.643334191210031e-07, "loss": 1.4072, "step": 910 }, { "epoch": 6.98, "learning_rate": 7.88910991585708e-08, "loss": 0.9972, "step": 915 }, { "epoch": 7.0, "eval_loss": 1.2033756971359253, "eval_runtime": 4.6484, "eval_samples_per_second": 39.368, "eval_steps_per_second": 4.948, "step": 917 } ], "max_steps": 917, "num_train_epochs": 7, "total_flos": 953846562816000.0, "trial_name": null, "trial_params": null }