{ "best_metric": 0.9100631475448608, "best_model_checkpoint": "./lora-alpaca/checkpoint-1600", "epoch": 3.9486673247778876, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2.3999999999999997e-05, "loss": 1.5473, "step": 10 }, { "epoch": 0.04, "learning_rate": 5.399999999999999e-05, "loss": 1.5181, "step": 20 }, { "epoch": 0.06, "learning_rate": 8.4e-05, "loss": 1.3749, "step": 30 }, { "epoch": 0.08, "learning_rate": 0.00011399999999999999, "loss": 1.1669, "step": 40 }, { "epoch": 0.1, "learning_rate": 0.00014399999999999998, "loss": 1.0525, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.00017399999999999997, "loss": 1.0376, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.000204, "loss": 1.0043, "step": 70 }, { "epoch": 0.16, "learning_rate": 0.000234, "loss": 0.9849, "step": 80 }, { "epoch": 0.18, "learning_rate": 0.00026399999999999997, "loss": 0.9821, "step": 90 }, { "epoch": 0.2, "learning_rate": 0.000294, "loss": 0.9846, "step": 100 }, { "epoch": 0.22, "learning_rate": 0.0002987525987525987, "loss": 0.9635, "step": 110 }, { "epoch": 0.24, "learning_rate": 0.00029719334719334716, "loss": 0.9538, "step": 120 }, { "epoch": 0.26, "learning_rate": 0.0002956340956340956, "loss": 0.9408, "step": 130 }, { "epoch": 0.28, "learning_rate": 0.00029407484407484405, "loss": 0.9529, "step": 140 }, { "epoch": 0.3, "learning_rate": 0.0002925155925155925, "loss": 0.9622, "step": 150 }, { "epoch": 0.32, "learning_rate": 0.00029095634095634094, "loss": 0.9517, "step": 160 }, { "epoch": 0.34, "learning_rate": 0.0002893970893970894, "loss": 0.9624, "step": 170 }, { "epoch": 0.36, "learning_rate": 0.0002878378378378378, "loss": 0.9445, "step": 180 }, { "epoch": 0.38, "learning_rate": 0.0002862785862785863, "loss": 0.9428, "step": 190 }, { "epoch": 0.39, "learning_rate": 0.0002847193347193347, "loss": 0.9372, "step": 200 }, { "epoch": 0.39, "eval_loss": 0.9433729648590088, "eval_runtime": 44.3089, "eval_samples_per_second": 45.138, "eval_steps_per_second": 0.722, "step": 200 }, { "epoch": 0.41, "learning_rate": 0.00028316008316008317, "loss": 0.9388, "step": 210 }, { "epoch": 0.43, "learning_rate": 0.00028160083160083156, "loss": 0.9412, "step": 220 }, { "epoch": 0.45, "learning_rate": 0.00028004158004158, "loss": 0.9365, "step": 230 }, { "epoch": 0.47, "learning_rate": 0.00027848232848232845, "loss": 0.9382, "step": 240 }, { "epoch": 0.49, "learning_rate": 0.0002769230769230769, "loss": 0.9309, "step": 250 }, { "epoch": 0.51, "learning_rate": 0.00027536382536382534, "loss": 0.9256, "step": 260 }, { "epoch": 0.53, "learning_rate": 0.0002738045738045738, "loss": 0.9245, "step": 270 }, { "epoch": 0.55, "learning_rate": 0.00027224532224532223, "loss": 0.9329, "step": 280 }, { "epoch": 0.57, "learning_rate": 0.0002706860706860707, "loss": 0.9405, "step": 290 }, { "epoch": 0.59, "learning_rate": 0.0002691268191268191, "loss": 0.9168, "step": 300 }, { "epoch": 0.61, "learning_rate": 0.00026756756756756756, "loss": 0.92, "step": 310 }, { "epoch": 0.63, "learning_rate": 0.000266008316008316, "loss": 0.9098, "step": 320 }, { "epoch": 0.65, "learning_rate": 0.00026444906444906445, "loss": 0.9275, "step": 330 }, { "epoch": 0.67, "learning_rate": 0.00026288981288981285, "loss": 0.9215, "step": 340 }, { "epoch": 0.69, "learning_rate": 0.0002613305613305613, "loss": 0.9237, "step": 350 }, { "epoch": 0.71, "learning_rate": 0.00025977130977130974, "loss": 0.9237, "step": 360 }, { "epoch": 0.73, "learning_rate": 0.0002582120582120582, "loss": 0.928, "step": 370 }, { "epoch": 0.75, "learning_rate": 0.0002566528066528066, "loss": 0.9156, "step": 380 }, { "epoch": 0.77, "learning_rate": 0.00025509355509355507, "loss": 0.9255, "step": 390 }, { "epoch": 0.79, "learning_rate": 0.0002535343035343035, "loss": 0.9102, "step": 400 }, { "epoch": 0.79, "eval_loss": 0.9269554615020752, "eval_runtime": 44.228, "eval_samples_per_second": 45.22, "eval_steps_per_second": 0.724, "step": 400 }, { "epoch": 0.81, "learning_rate": 0.00025197505197505196, "loss": 0.9312, "step": 410 }, { "epoch": 0.83, "learning_rate": 0.0002504158004158004, "loss": 0.9249, "step": 420 }, { "epoch": 0.85, "learning_rate": 0.00024885654885654885, "loss": 0.9151, "step": 430 }, { "epoch": 0.87, "learning_rate": 0.0002472972972972973, "loss": 0.9265, "step": 440 }, { "epoch": 0.89, "learning_rate": 0.00024573804573804574, "loss": 0.9064, "step": 450 }, { "epoch": 0.91, "learning_rate": 0.00024417879417879413, "loss": 0.9185, "step": 460 }, { "epoch": 0.93, "learning_rate": 0.0002426195426195426, "loss": 0.9128, "step": 470 }, { "epoch": 0.95, "learning_rate": 0.00024106029106029105, "loss": 0.9159, "step": 480 }, { "epoch": 0.97, "learning_rate": 0.0002395010395010395, "loss": 0.8987, "step": 490 }, { "epoch": 0.99, "learning_rate": 0.0002379417879417879, "loss": 0.9217, "step": 500 }, { "epoch": 1.01, "learning_rate": 0.00023638253638253636, "loss": 0.906, "step": 510 }, { "epoch": 1.03, "learning_rate": 0.0002348232848232848, "loss": 0.9066, "step": 520 }, { "epoch": 1.05, "learning_rate": 0.00023326403326403325, "loss": 0.9085, "step": 530 }, { "epoch": 1.07, "learning_rate": 0.00023170478170478166, "loss": 0.9203, "step": 540 }, { "epoch": 1.09, "learning_rate": 0.0002301455301455301, "loss": 0.9003, "step": 550 }, { "epoch": 1.11, "learning_rate": 0.00022858627858627858, "loss": 0.9071, "step": 560 }, { "epoch": 1.13, "learning_rate": 0.00022702702702702703, "loss": 0.9026, "step": 570 }, { "epoch": 1.15, "learning_rate": 0.00022546777546777544, "loss": 0.8845, "step": 580 }, { "epoch": 1.16, "learning_rate": 0.0002239085239085239, "loss": 0.9023, "step": 590 }, { "epoch": 1.18, "learning_rate": 0.00022234927234927233, "loss": 0.9169, "step": 600 }, { "epoch": 1.18, "eval_loss": 0.9196034073829651, "eval_runtime": 44.3139, "eval_samples_per_second": 45.133, "eval_steps_per_second": 0.722, "step": 600 }, { "epoch": 1.2, "learning_rate": 0.00022079002079002078, "loss": 0.9015, "step": 610 }, { "epoch": 1.22, "learning_rate": 0.0002192307692307692, "loss": 0.9066, "step": 620 }, { "epoch": 1.24, "learning_rate": 0.00021767151767151764, "loss": 0.8981, "step": 630 }, { "epoch": 1.26, "learning_rate": 0.0002161122661122661, "loss": 0.9023, "step": 640 }, { "epoch": 1.28, "learning_rate": 0.00021455301455301456, "loss": 0.9077, "step": 650 }, { "epoch": 1.3, "learning_rate": 0.00021299376299376295, "loss": 0.9055, "step": 660 }, { "epoch": 1.32, "learning_rate": 0.00021143451143451142, "loss": 0.9028, "step": 670 }, { "epoch": 1.34, "learning_rate": 0.00020987525987525987, "loss": 0.8874, "step": 680 }, { "epoch": 1.36, "learning_rate": 0.0002083160083160083, "loss": 0.8844, "step": 690 }, { "epoch": 1.38, "learning_rate": 0.00020675675675675673, "loss": 0.8996, "step": 700 }, { "epoch": 1.4, "learning_rate": 0.00020519750519750517, "loss": 0.9085, "step": 710 }, { "epoch": 1.42, "learning_rate": 0.00020363825363825362, "loss": 0.9024, "step": 720 }, { "epoch": 1.44, "learning_rate": 0.00020207900207900206, "loss": 0.899, "step": 730 }, { "epoch": 1.46, "learning_rate": 0.00020051975051975048, "loss": 0.8926, "step": 740 }, { "epoch": 1.48, "learning_rate": 0.00019896049896049893, "loss": 0.89, "step": 750 }, { "epoch": 1.5, "learning_rate": 0.0001974012474012474, "loss": 0.8858, "step": 760 }, { "epoch": 1.52, "learning_rate": 0.00019584199584199584, "loss": 0.8972, "step": 770 }, { "epoch": 1.54, "learning_rate": 0.00019428274428274426, "loss": 0.9003, "step": 780 }, { "epoch": 1.56, "learning_rate": 0.0001927234927234927, "loss": 0.8991, "step": 790 }, { "epoch": 1.58, "learning_rate": 0.00019116424116424115, "loss": 0.8912, "step": 800 }, { "epoch": 1.58, "eval_loss": 0.914772093296051, "eval_runtime": 44.1817, "eval_samples_per_second": 45.268, "eval_steps_per_second": 0.724, "step": 800 }, { "epoch": 1.6, "learning_rate": 0.0001896049896049896, "loss": 0.8859, "step": 810 }, { "epoch": 1.62, "learning_rate": 0.00018804573804573802, "loss": 0.8942, "step": 820 }, { "epoch": 1.64, "learning_rate": 0.00018648648648648646, "loss": 0.8953, "step": 830 }, { "epoch": 1.66, "learning_rate": 0.0001849272349272349, "loss": 0.8845, "step": 840 }, { "epoch": 1.68, "learning_rate": 0.00018336798336798335, "loss": 0.8871, "step": 850 }, { "epoch": 1.7, "learning_rate": 0.00018180873180873177, "loss": 0.8849, "step": 860 }, { "epoch": 1.72, "learning_rate": 0.0001802494802494802, "loss": 0.8672, "step": 870 }, { "epoch": 1.74, "learning_rate": 0.00017869022869022869, "loss": 0.8811, "step": 880 }, { "epoch": 1.76, "learning_rate": 0.00017713097713097713, "loss": 0.8911, "step": 890 }, { "epoch": 1.78, "learning_rate": 0.00017557172557172555, "loss": 0.8814, "step": 900 }, { "epoch": 1.8, "learning_rate": 0.000174012474012474, "loss": 0.8819, "step": 910 }, { "epoch": 1.82, "learning_rate": 0.00017245322245322244, "loss": 0.8835, "step": 920 }, { "epoch": 1.84, "learning_rate": 0.00017089397089397088, "loss": 0.8768, "step": 930 }, { "epoch": 1.86, "learning_rate": 0.0001693347193347193, "loss": 0.8829, "step": 940 }, { "epoch": 1.88, "learning_rate": 0.00016777546777546775, "loss": 0.8898, "step": 950 }, { "epoch": 1.9, "learning_rate": 0.0001662162162162162, "loss": 0.8884, "step": 960 }, { "epoch": 1.92, "learning_rate": 0.00016465696465696466, "loss": 0.87, "step": 970 }, { "epoch": 1.93, "learning_rate": 0.00016309771309771305, "loss": 0.8851, "step": 980 }, { "epoch": 1.95, "learning_rate": 0.00016153846153846153, "loss": 0.8807, "step": 990 }, { "epoch": 1.97, "learning_rate": 0.00015997920997920997, "loss": 0.8923, "step": 1000 }, { "epoch": 1.97, "eval_loss": 0.9123471975326538, "eval_runtime": 44.1751, "eval_samples_per_second": 45.274, "eval_steps_per_second": 0.724, "step": 1000 }, { "epoch": 1.99, "learning_rate": 0.00015841995841995842, "loss": 0.8825, "step": 1010 }, { "epoch": 2.01, "learning_rate": 0.00015686070686070683, "loss": 0.878, "step": 1020 }, { "epoch": 2.03, "learning_rate": 0.00015530145530145528, "loss": 0.861, "step": 1030 }, { "epoch": 2.05, "learning_rate": 0.00015374220374220372, "loss": 0.8652, "step": 1040 }, { "epoch": 2.07, "learning_rate": 0.00015218295218295217, "loss": 0.8589, "step": 1050 }, { "epoch": 2.09, "learning_rate": 0.0001506237006237006, "loss": 0.8688, "step": 1060 }, { "epoch": 2.11, "learning_rate": 0.00014906444906444906, "loss": 0.8796, "step": 1070 }, { "epoch": 2.13, "learning_rate": 0.0001475051975051975, "loss": 0.8719, "step": 1080 }, { "epoch": 2.15, "learning_rate": 0.00014594594594594595, "loss": 0.8798, "step": 1090 }, { "epoch": 2.17, "learning_rate": 0.00014438669438669437, "loss": 0.8669, "step": 1100 }, { "epoch": 2.19, "learning_rate": 0.0001428274428274428, "loss": 0.8661, "step": 1110 }, { "epoch": 2.21, "learning_rate": 0.00014126819126819126, "loss": 0.8684, "step": 1120 }, { "epoch": 2.23, "learning_rate": 0.0001397089397089397, "loss": 0.8591, "step": 1130 }, { "epoch": 2.25, "learning_rate": 0.00013814968814968815, "loss": 0.8659, "step": 1140 }, { "epoch": 2.27, "learning_rate": 0.0001365904365904366, "loss": 0.868, "step": 1150 }, { "epoch": 2.29, "learning_rate": 0.000135031185031185, "loss": 0.871, "step": 1160 }, { "epoch": 2.31, "learning_rate": 0.00013347193347193346, "loss": 0.8639, "step": 1170 }, { "epoch": 2.33, "learning_rate": 0.0001319126819126819, "loss": 0.8603, "step": 1180 }, { "epoch": 2.35, "learning_rate": 0.00013035343035343035, "loss": 0.8617, "step": 1190 }, { "epoch": 2.37, "learning_rate": 0.0001287941787941788, "loss": 0.8548, "step": 1200 }, { "epoch": 2.37, "eval_loss": 0.9124282002449036, "eval_runtime": 44.2512, "eval_samples_per_second": 45.197, "eval_steps_per_second": 0.723, "step": 1200 }, { "epoch": 2.39, "learning_rate": 0.00012723492723492724, "loss": 0.8694, "step": 1210 }, { "epoch": 2.41, "learning_rate": 0.00012567567567567565, "loss": 0.8651, "step": 1220 }, { "epoch": 2.43, "learning_rate": 0.00012411642411642413, "loss": 0.8606, "step": 1230 }, { "epoch": 2.45, "learning_rate": 0.00012255717255717254, "loss": 0.8678, "step": 1240 }, { "epoch": 2.47, "learning_rate": 0.00012099792099792099, "loss": 0.8676, "step": 1250 }, { "epoch": 2.49, "learning_rate": 0.00011943866943866943, "loss": 0.8602, "step": 1260 }, { "epoch": 2.51, "learning_rate": 0.00011787941787941788, "loss": 0.8718, "step": 1270 }, { "epoch": 2.53, "learning_rate": 0.00011632016632016631, "loss": 0.8618, "step": 1280 }, { "epoch": 2.55, "learning_rate": 0.00011476091476091475, "loss": 0.8677, "step": 1290 }, { "epoch": 2.57, "learning_rate": 0.00011320166320166319, "loss": 0.844, "step": 1300 }, { "epoch": 2.59, "learning_rate": 0.00011164241164241164, "loss": 0.8653, "step": 1310 }, { "epoch": 2.61, "learning_rate": 0.00011008316008316008, "loss": 0.8639, "step": 1320 }, { "epoch": 2.63, "learning_rate": 0.00010852390852390852, "loss": 0.873, "step": 1330 }, { "epoch": 2.65, "learning_rate": 0.00010696465696465695, "loss": 0.8639, "step": 1340 }, { "epoch": 2.67, "learning_rate": 0.0001054054054054054, "loss": 0.8545, "step": 1350 }, { "epoch": 2.69, "learning_rate": 0.00010384615384615383, "loss": 0.8654, "step": 1360 }, { "epoch": 2.7, "learning_rate": 0.00010228690228690229, "loss": 0.8555, "step": 1370 }, { "epoch": 2.72, "learning_rate": 0.00010072765072765072, "loss": 0.8707, "step": 1380 }, { "epoch": 2.74, "learning_rate": 9.916839916839916e-05, "loss": 0.8452, "step": 1390 }, { "epoch": 2.76, "learning_rate": 9.76091476091476e-05, "loss": 0.8512, "step": 1400 }, { "epoch": 2.76, "eval_loss": 0.9109137058258057, "eval_runtime": 44.368, "eval_samples_per_second": 45.077, "eval_steps_per_second": 0.721, "step": 1400 }, { "epoch": 2.78, "learning_rate": 9.604989604989604e-05, "loss": 0.8485, "step": 1410 }, { "epoch": 2.8, "learning_rate": 9.449064449064449e-05, "loss": 0.8635, "step": 1420 }, { "epoch": 2.82, "learning_rate": 9.293139293139293e-05, "loss": 0.8499, "step": 1430 }, { "epoch": 2.84, "learning_rate": 9.137214137214136e-05, "loss": 0.855, "step": 1440 }, { "epoch": 2.86, "learning_rate": 8.98128898128898e-05, "loss": 0.8437, "step": 1450 }, { "epoch": 2.88, "learning_rate": 8.825363825363824e-05, "loss": 0.8552, "step": 1460 }, { "epoch": 2.9, "learning_rate": 8.66943866943867e-05, "loss": 0.845, "step": 1470 }, { "epoch": 2.92, "learning_rate": 8.513513513513513e-05, "loss": 0.8591, "step": 1480 }, { "epoch": 2.94, "learning_rate": 8.357588357588357e-05, "loss": 0.8559, "step": 1490 }, { "epoch": 2.96, "learning_rate": 8.2016632016632e-05, "loss": 0.8562, "step": 1500 }, { "epoch": 2.98, "learning_rate": 8.045738045738045e-05, "loss": 0.8585, "step": 1510 }, { "epoch": 3.0, "learning_rate": 7.889812889812888e-05, "loss": 0.8627, "step": 1520 }, { "epoch": 3.02, "learning_rate": 7.733887733887734e-05, "loss": 0.849, "step": 1530 }, { "epoch": 3.04, "learning_rate": 7.577962577962577e-05, "loss": 0.8479, "step": 1540 }, { "epoch": 3.06, "learning_rate": 7.422037422037422e-05, "loss": 0.8522, "step": 1550 }, { "epoch": 3.08, "learning_rate": 7.266112266112266e-05, "loss": 0.8535, "step": 1560 }, { "epoch": 3.1, "learning_rate": 7.11018711018711e-05, "loss": 0.862, "step": 1570 }, { "epoch": 3.12, "learning_rate": 6.954261954261954e-05, "loss": 0.8618, "step": 1580 }, { "epoch": 3.14, "learning_rate": 6.798336798336798e-05, "loss": 0.8478, "step": 1590 }, { "epoch": 3.16, "learning_rate": 6.642411642411643e-05, "loss": 0.8467, "step": 1600 }, { "epoch": 3.16, "eval_loss": 0.9100631475448608, "eval_runtime": 44.1701, "eval_samples_per_second": 45.279, "eval_steps_per_second": 0.724, "step": 1600 }, { "epoch": 3.18, "learning_rate": 6.486486486486486e-05, "loss": 0.8471, "step": 1610 }, { "epoch": 3.2, "learning_rate": 6.33056133056133e-05, "loss": 0.8511, "step": 1620 }, { "epoch": 3.22, "learning_rate": 6.174636174636175e-05, "loss": 0.8517, "step": 1630 }, { "epoch": 3.24, "learning_rate": 6.018711018711019e-05, "loss": 0.8537, "step": 1640 }, { "epoch": 3.26, "learning_rate": 5.8627858627858625e-05, "loss": 0.8479, "step": 1650 }, { "epoch": 3.28, "learning_rate": 5.7068607068607063e-05, "loss": 0.8473, "step": 1660 }, { "epoch": 3.3, "learning_rate": 5.550935550935551e-05, "loss": 0.8394, "step": 1670 }, { "epoch": 3.32, "learning_rate": 5.395010395010395e-05, "loss": 0.8528, "step": 1680 }, { "epoch": 3.34, "learning_rate": 5.239085239085239e-05, "loss": 0.8356, "step": 1690 }, { "epoch": 3.36, "learning_rate": 5.083160083160083e-05, "loss": 0.8543, "step": 1700 }, { "epoch": 3.38, "learning_rate": 4.927234927234927e-05, "loss": 0.8332, "step": 1710 }, { "epoch": 3.4, "learning_rate": 4.771309771309771e-05, "loss": 0.8511, "step": 1720 }, { "epoch": 3.42, "learning_rate": 4.615384615384615e-05, "loss": 0.8542, "step": 1730 }, { "epoch": 3.44, "learning_rate": 4.4594594594594596e-05, "loss": 0.8238, "step": 1740 }, { "epoch": 3.46, "learning_rate": 4.3035343035343035e-05, "loss": 0.8424, "step": 1750 }, { "epoch": 3.47, "learning_rate": 4.147609147609147e-05, "loss": 0.8455, "step": 1760 }, { "epoch": 3.49, "learning_rate": 3.991683991683992e-05, "loss": 0.8554, "step": 1770 }, { "epoch": 3.51, "learning_rate": 3.8357588357588356e-05, "loss": 0.8491, "step": 1780 }, { "epoch": 3.53, "learning_rate": 3.6798336798336794e-05, "loss": 0.8506, "step": 1790 }, { "epoch": 3.55, "learning_rate": 3.523908523908524e-05, "loss": 0.8357, "step": 1800 }, { "epoch": 3.55, "eval_loss": 0.9110742211341858, "eval_runtime": 44.3239, "eval_samples_per_second": 45.122, "eval_steps_per_second": 0.722, "step": 1800 }, { "epoch": 3.57, "learning_rate": 3.367983367983368e-05, "loss": 0.8606, "step": 1810 }, { "epoch": 3.59, "learning_rate": 3.212058212058212e-05, "loss": 0.8471, "step": 1820 }, { "epoch": 3.61, "learning_rate": 3.056133056133056e-05, "loss": 0.8419, "step": 1830 }, { "epoch": 3.63, "learning_rate": 2.9002079002079002e-05, "loss": 0.8328, "step": 1840 }, { "epoch": 3.65, "learning_rate": 2.744282744282744e-05, "loss": 0.8442, "step": 1850 }, { "epoch": 3.67, "learning_rate": 2.5883575883575882e-05, "loss": 0.835, "step": 1860 }, { "epoch": 3.69, "learning_rate": 2.4324324324324324e-05, "loss": 0.8432, "step": 1870 }, { "epoch": 3.71, "learning_rate": 2.2765072765072765e-05, "loss": 0.8378, "step": 1880 }, { "epoch": 3.73, "learning_rate": 2.1205821205821204e-05, "loss": 0.8394, "step": 1890 }, { "epoch": 3.75, "learning_rate": 1.9646569646569645e-05, "loss": 0.8364, "step": 1900 }, { "epoch": 3.77, "learning_rate": 1.8087318087318087e-05, "loss": 0.849, "step": 1910 }, { "epoch": 3.79, "learning_rate": 1.652806652806653e-05, "loss": 0.8542, "step": 1920 }, { "epoch": 3.81, "learning_rate": 1.4968814968814968e-05, "loss": 0.8464, "step": 1930 }, { "epoch": 3.83, "learning_rate": 1.340956340956341e-05, "loss": 0.837, "step": 1940 }, { "epoch": 3.85, "learning_rate": 1.185031185031185e-05, "loss": 0.8385, "step": 1950 }, { "epoch": 3.87, "learning_rate": 1.0291060291060291e-05, "loss": 0.8399, "step": 1960 }, { "epoch": 3.89, "learning_rate": 8.731808731808731e-06, "loss": 0.839, "step": 1970 }, { "epoch": 3.91, "learning_rate": 7.172557172557172e-06, "loss": 0.8424, "step": 1980 }, { "epoch": 3.93, "learning_rate": 5.613305613305613e-06, "loss": 0.8398, "step": 1990 }, { "epoch": 3.95, "learning_rate": 4.054054054054054e-06, "loss": 0.8435, "step": 2000 }, { "epoch": 3.95, "eval_loss": 0.9102190732955933, "eval_runtime": 44.2725, "eval_samples_per_second": 45.175, "eval_steps_per_second": 0.723, "step": 2000 } ], "max_steps": 2024, "num_train_epochs": 4, "total_flos": 8.080540492757991e+18, "trial_name": null, "trial_params": null }