{ "best_metric": 0.8533899487562311, "best_model_checkpoint": "./results/checkpoint-1200", "epoch": 69.0, "eval_steps": 500, "global_step": 1656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.4191670417785645, "learning_rate": 4.9500000000000004e-05, "loss": 1.1717, "step": 24 }, { "epoch": 1.0, "eval_f1": 0.5559615812547036, "eval_loss": 0.852466344833374, "eval_runtime": 0.2518, "eval_samples_per_second": 865.826, "eval_steps_per_second": 15.887, "step": 24 }, { "epoch": 2.0, "grad_norm": 6.028530597686768, "learning_rate": 4.9e-05, "loss": 0.714, "step": 48 }, { "epoch": 2.0, "eval_f1": 0.7746990836466595, "eval_loss": 0.6411612033843994, "eval_runtime": 0.2498, "eval_samples_per_second": 872.757, "eval_steps_per_second": 16.014, "step": 48 }, { "epoch": 3.0, "grad_norm": 6.396097660064697, "learning_rate": 4.85e-05, "loss": 0.4376, "step": 72 }, { "epoch": 3.0, "eval_f1": 0.7904679173662312, "eval_loss": 0.578528881072998, "eval_runtime": 0.2466, "eval_samples_per_second": 883.847, "eval_steps_per_second": 16.217, "step": 72 }, { "epoch": 4.0, "grad_norm": 13.205650329589844, "learning_rate": 4.8e-05, "loss": 0.262, "step": 96 }, { "epoch": 4.0, "eval_f1": 0.8032839961396288, "eval_loss": 0.6008332967758179, "eval_runtime": 0.2457, "eval_samples_per_second": 887.351, "eval_steps_per_second": 16.282, "step": 96 }, { "epoch": 5.0, "grad_norm": 9.777926445007324, "learning_rate": 4.75e-05, "loss": 0.1384, "step": 120 }, { "epoch": 5.0, "eval_f1": 0.7854101778464291, "eval_loss": 0.6125118732452393, "eval_runtime": 0.2485, "eval_samples_per_second": 877.32, "eval_steps_per_second": 16.098, "step": 120 }, { "epoch": 6.0, "grad_norm": 4.165589332580566, "learning_rate": 4.7e-05, "loss": 0.0982, "step": 144 }, { "epoch": 6.0, "eval_f1": 0.7672888039973402, "eval_loss": 0.7653169631958008, "eval_runtime": 0.2569, "eval_samples_per_second": 848.674, "eval_steps_per_second": 15.572, "step": 144 }, { "epoch": 7.0, "grad_norm": 3.7239115238189697, "learning_rate": 4.6500000000000005e-05, "loss": 0.0657, "step": 168 }, { "epoch": 7.0, "eval_f1": 0.8147995774570269, "eval_loss": 0.7002198100090027, "eval_runtime": 0.2474, "eval_samples_per_second": 881.237, "eval_steps_per_second": 16.169, "step": 168 }, { "epoch": 8.0, "grad_norm": 0.3135942220687866, "learning_rate": 4.600000000000001e-05, "loss": 0.0395, "step": 192 }, { "epoch": 8.0, "eval_f1": 0.8087986139563679, "eval_loss": 0.7771649956703186, "eval_runtime": 0.244, "eval_samples_per_second": 893.39, "eval_steps_per_second": 16.392, "step": 192 }, { "epoch": 9.0, "grad_norm": 0.4529080092906952, "learning_rate": 4.55e-05, "loss": 0.0214, "step": 216 }, { "epoch": 9.0, "eval_f1": 0.8392522702480626, "eval_loss": 0.7208238244056702, "eval_runtime": 0.2501, "eval_samples_per_second": 871.738, "eval_steps_per_second": 15.995, "step": 216 }, { "epoch": 10.0, "grad_norm": 8.327765464782715, "learning_rate": 4.5e-05, "loss": 0.0237, "step": 240 }, { "epoch": 10.0, "eval_f1": 0.8424386724386725, "eval_loss": 0.7019856572151184, "eval_runtime": 0.2469, "eval_samples_per_second": 883.057, "eval_steps_per_second": 16.203, "step": 240 }, { "epoch": 11.0, "grad_norm": 2.6359758377075195, "learning_rate": 4.4500000000000004e-05, "loss": 0.0159, "step": 264 }, { "epoch": 11.0, "eval_f1": 0.8074501800720288, "eval_loss": 0.9096614122390747, "eval_runtime": 0.2471, "eval_samples_per_second": 882.066, "eval_steps_per_second": 16.185, "step": 264 }, { "epoch": 12.0, "grad_norm": 18.37872886657715, "learning_rate": 4.4000000000000006e-05, "loss": 0.0322, "step": 288 }, { "epoch": 12.0, "eval_f1": 0.7848399290035054, "eval_loss": 1.0800021886825562, "eval_runtime": 0.2488, "eval_samples_per_second": 876.232, "eval_steps_per_second": 16.078, "step": 288 }, { "epoch": 13.0, "grad_norm": 13.307744026184082, "learning_rate": 4.35e-05, "loss": 0.029, "step": 312 }, { "epoch": 13.0, "eval_f1": 0.798355010551357, "eval_loss": 1.0241613388061523, "eval_runtime": 0.2493, "eval_samples_per_second": 874.372, "eval_steps_per_second": 16.044, "step": 312 }, { "epoch": 14.0, "grad_norm": 0.41816067695617676, "learning_rate": 4.3e-05, "loss": 0.0121, "step": 336 }, { "epoch": 14.0, "eval_f1": 0.8120306519736268, "eval_loss": 1.022652268409729, "eval_runtime": 0.2454, "eval_samples_per_second": 888.194, "eval_steps_per_second": 16.297, "step": 336 }, { "epoch": 15.0, "grad_norm": 10.153714179992676, "learning_rate": 4.25e-05, "loss": 0.0082, "step": 360 }, { "epoch": 15.0, "eval_f1": 0.7906564533144493, "eval_loss": 1.096178650856018, "eval_runtime": 0.2461, "eval_samples_per_second": 885.781, "eval_steps_per_second": 16.253, "step": 360 }, { "epoch": 16.0, "grad_norm": 0.026447944343090057, "learning_rate": 4.2e-05, "loss": 0.0035, "step": 384 }, { "epoch": 16.0, "eval_f1": 0.7900391731356858, "eval_loss": 1.2207499742507935, "eval_runtime": 0.2461, "eval_samples_per_second": 885.87, "eval_steps_per_second": 16.254, "step": 384 }, { "epoch": 17.0, "grad_norm": 0.008760841563344002, "learning_rate": 4.15e-05, "loss": 0.0064, "step": 408 }, { "epoch": 17.0, "eval_f1": 0.7941827325696338, "eval_loss": 1.120672345161438, "eval_runtime": 0.2456, "eval_samples_per_second": 887.604, "eval_steps_per_second": 16.286, "step": 408 }, { "epoch": 18.0, "grad_norm": 0.2175062894821167, "learning_rate": 4.1e-05, "loss": 0.0089, "step": 432 }, { "epoch": 18.0, "eval_f1": 0.8121032920020557, "eval_loss": 1.1158227920532227, "eval_runtime": 0.2466, "eval_samples_per_second": 884.124, "eval_steps_per_second": 16.222, "step": 432 }, { "epoch": 19.0, "grad_norm": 0.026284487918019295, "learning_rate": 4.05e-05, "loss": 0.0011, "step": 456 }, { "epoch": 19.0, "eval_f1": 0.8106351612230018, "eval_loss": 1.1229100227355957, "eval_runtime": 0.2466, "eval_samples_per_second": 883.852, "eval_steps_per_second": 16.217, "step": 456 }, { "epoch": 20.0, "grad_norm": 7.1518473625183105, "learning_rate": 4e-05, "loss": 0.0036, "step": 480 }, { "epoch": 20.0, "eval_f1": 0.8215225082145855, "eval_loss": 1.0700623989105225, "eval_runtime": 0.2447, "eval_samples_per_second": 890.866, "eval_steps_per_second": 16.346, "step": 480 }, { "epoch": 21.0, "grad_norm": 0.030479425564408302, "learning_rate": 3.9500000000000005e-05, "loss": 0.0038, "step": 504 }, { "epoch": 21.0, "eval_f1": 0.79625307224709, "eval_loss": 1.1593633890151978, "eval_runtime": 0.2451, "eval_samples_per_second": 889.358, "eval_steps_per_second": 16.319, "step": 504 }, { "epoch": 22.0, "grad_norm": 0.8992727994918823, "learning_rate": 3.9000000000000006e-05, "loss": 0.013, "step": 528 }, { "epoch": 22.0, "eval_f1": 0.8198287286484007, "eval_loss": 1.127288579940796, "eval_runtime": 0.2483, "eval_samples_per_second": 878.117, "eval_steps_per_second": 16.112, "step": 528 }, { "epoch": 23.0, "grad_norm": 0.008566158823668957, "learning_rate": 3.85e-05, "loss": 0.0051, "step": 552 }, { "epoch": 23.0, "eval_f1": 0.8245210270785887, "eval_loss": 1.0870376825332642, "eval_runtime": 0.2503, "eval_samples_per_second": 870.992, "eval_steps_per_second": 15.982, "step": 552 }, { "epoch": 24.0, "grad_norm": 0.0786171406507492, "learning_rate": 3.8e-05, "loss": 0.0037, "step": 576 }, { "epoch": 24.0, "eval_f1": 0.8210009216488835, "eval_loss": 1.1669812202453613, "eval_runtime": 0.2462, "eval_samples_per_second": 885.624, "eval_steps_per_second": 16.25, "step": 576 }, { "epoch": 25.0, "grad_norm": 0.02009885385632515, "learning_rate": 3.7500000000000003e-05, "loss": 0.0035, "step": 600 }, { "epoch": 25.0, "eval_f1": 0.823341564899699, "eval_loss": 1.1029468774795532, "eval_runtime": 0.246, "eval_samples_per_second": 886.238, "eval_steps_per_second": 16.261, "step": 600 }, { "epoch": 26.0, "grad_norm": 0.10086794197559357, "learning_rate": 3.7e-05, "loss": 0.0051, "step": 624 }, { "epoch": 26.0, "eval_f1": 0.8138320306415473, "eval_loss": 1.1733108758926392, "eval_runtime": 0.2405, "eval_samples_per_second": 906.567, "eval_steps_per_second": 16.634, "step": 624 }, { "epoch": 27.0, "grad_norm": 24.029403686523438, "learning_rate": 3.65e-05, "loss": 0.0032, "step": 648 }, { "epoch": 27.0, "eval_f1": 0.8293087494328629, "eval_loss": 1.1072059869766235, "eval_runtime": 0.244, "eval_samples_per_second": 893.364, "eval_steps_per_second": 16.392, "step": 648 }, { "epoch": 28.0, "grad_norm": 19.329952239990234, "learning_rate": 3.6e-05, "loss": 0.024, "step": 672 }, { "epoch": 28.0, "eval_f1": 0.7828629987222324, "eval_loss": 1.3776278495788574, "eval_runtime": 0.2463, "eval_samples_per_second": 884.934, "eval_steps_per_second": 16.237, "step": 672 }, { "epoch": 29.0, "grad_norm": 0.0337708480656147, "learning_rate": 3.55e-05, "loss": 0.0097, "step": 696 }, { "epoch": 29.0, "eval_f1": 0.8245008772063647, "eval_loss": 1.161023736000061, "eval_runtime": 0.246, "eval_samples_per_second": 886.218, "eval_steps_per_second": 16.261, "step": 696 }, { "epoch": 30.0, "grad_norm": 0.05386161431670189, "learning_rate": 3.5e-05, "loss": 0.0093, "step": 720 }, { "epoch": 30.0, "eval_f1": 0.8331553578084018, "eval_loss": 1.0987571477890015, "eval_runtime": 0.2499, "eval_samples_per_second": 872.378, "eval_steps_per_second": 16.007, "step": 720 }, { "epoch": 31.0, "grad_norm": 0.011382571421563625, "learning_rate": 3.45e-05, "loss": 0.0048, "step": 744 }, { "epoch": 31.0, "eval_f1": 0.8253722493039259, "eval_loss": 1.1277176141738892, "eval_runtime": 0.2469, "eval_samples_per_second": 882.906, "eval_steps_per_second": 16.2, "step": 744 }, { "epoch": 32.0, "grad_norm": 0.0046822689473629, "learning_rate": 3.4000000000000007e-05, "loss": 0.0041, "step": 768 }, { "epoch": 32.0, "eval_f1": 0.8187101207450439, "eval_loss": 1.1826363801956177, "eval_runtime": 0.2504, "eval_samples_per_second": 870.593, "eval_steps_per_second": 15.974, "step": 768 }, { "epoch": 33.0, "grad_norm": 0.003356009954586625, "learning_rate": 3.35e-05, "loss": 0.0033, "step": 792 }, { "epoch": 33.0, "eval_f1": 0.8126497250026661, "eval_loss": 1.1944386959075928, "eval_runtime": 0.2453, "eval_samples_per_second": 888.767, "eval_steps_per_second": 16.308, "step": 792 }, { "epoch": 34.0, "grad_norm": 0.005327207036316395, "learning_rate": 3.3e-05, "loss": 0.0087, "step": 816 }, { "epoch": 34.0, "eval_f1": 0.8491252629565882, "eval_loss": 1.03484046459198, "eval_runtime": 0.2487, "eval_samples_per_second": 876.726, "eval_steps_per_second": 16.087, "step": 816 }, { "epoch": 35.0, "grad_norm": 0.003193259472027421, "learning_rate": 3.2500000000000004e-05, "loss": 0.0056, "step": 840 }, { "epoch": 35.0, "eval_f1": 0.831069428937251, "eval_loss": 1.0799349546432495, "eval_runtime": 0.2433, "eval_samples_per_second": 895.888, "eval_steps_per_second": 16.438, "step": 840 }, { "epoch": 36.0, "grad_norm": 0.0025981140788644552, "learning_rate": 3.2000000000000005e-05, "loss": 0.0056, "step": 864 }, { "epoch": 36.0, "eval_f1": 0.8005370004504828, "eval_loss": 1.2397780418395996, "eval_runtime": 0.2558, "eval_samples_per_second": 852.208, "eval_steps_per_second": 15.637, "step": 864 }, { "epoch": 37.0, "grad_norm": 0.004587420262396336, "learning_rate": 3.15e-05, "loss": 0.0043, "step": 888 }, { "epoch": 37.0, "eval_f1": 0.7960422975955538, "eval_loss": 1.3035740852355957, "eval_runtime": 0.2431, "eval_samples_per_second": 896.654, "eval_steps_per_second": 16.452, "step": 888 }, { "epoch": 38.0, "grad_norm": 0.0024126123171299696, "learning_rate": 3.1e-05, "loss": 0.004, "step": 912 }, { "epoch": 38.0, "eval_f1": 0.8348215283066768, "eval_loss": 1.1289474964141846, "eval_runtime": 0.2451, "eval_samples_per_second": 889.465, "eval_steps_per_second": 16.32, "step": 912 }, { "epoch": 39.0, "grad_norm": 0.002631419338285923, "learning_rate": 3.05e-05, "loss": 0.0002, "step": 936 }, { "epoch": 39.0, "eval_f1": 0.8470719620834265, "eval_loss": 1.0962085723876953, "eval_runtime": 0.2555, "eval_samples_per_second": 853.196, "eval_steps_per_second": 15.655, "step": 936 }, { "epoch": 40.0, "grad_norm": 0.0036367273423820734, "learning_rate": 3e-05, "loss": 0.0002, "step": 960 }, { "epoch": 40.0, "eval_f1": 0.8475274660470973, "eval_loss": 1.1172648668289185, "eval_runtime": 0.2665, "eval_samples_per_second": 818.091, "eval_steps_per_second": 15.011, "step": 960 }, { "epoch": 41.0, "grad_norm": 0.002203166950494051, "learning_rate": 2.95e-05, "loss": 0.0002, "step": 984 }, { "epoch": 41.0, "eval_f1": 0.8463311286594244, "eval_loss": 1.1128673553466797, "eval_runtime": 0.2476, "eval_samples_per_second": 880.33, "eval_steps_per_second": 16.153, "step": 984 }, { "epoch": 42.0, "grad_norm": 0.0019745519384741783, "learning_rate": 2.9e-05, "loss": 0.0002, "step": 1008 }, { "epoch": 42.0, "eval_f1": 0.8524668435013261, "eval_loss": 1.1187357902526855, "eval_runtime": 0.2433, "eval_samples_per_second": 896.105, "eval_steps_per_second": 16.442, "step": 1008 }, { "epoch": 43.0, "grad_norm": 0.0018844620790332556, "learning_rate": 2.8499999999999998e-05, "loss": 0.0002, "step": 1032 }, { "epoch": 43.0, "eval_f1": 0.8524668435013261, "eval_loss": 1.1086227893829346, "eval_runtime": 0.2535, "eval_samples_per_second": 860.088, "eval_steps_per_second": 15.781, "step": 1032 }, { "epoch": 44.0, "grad_norm": 0.0016616833163425326, "learning_rate": 2.8000000000000003e-05, "loss": 0.0002, "step": 1056 }, { "epoch": 44.0, "eval_f1": 0.8524668435013261, "eval_loss": 1.1084070205688477, "eval_runtime": 0.2467, "eval_samples_per_second": 883.486, "eval_steps_per_second": 16.211, "step": 1056 }, { "epoch": 45.0, "grad_norm": 0.0017370691057294607, "learning_rate": 2.7500000000000004e-05, "loss": 0.0002, "step": 1080 }, { "epoch": 45.0, "eval_f1": 0.8524668435013261, "eval_loss": 1.1120808124542236, "eval_runtime": 0.2544, "eval_samples_per_second": 856.909, "eval_steps_per_second": 15.723, "step": 1080 }, { "epoch": 46.0, "grad_norm": 0.01709812693297863, "learning_rate": 2.7000000000000002e-05, "loss": 0.0018, "step": 1104 }, { "epoch": 46.0, "eval_f1": 0.8230951784833656, "eval_loss": 1.150782585144043, "eval_runtime": 0.2507, "eval_samples_per_second": 869.395, "eval_steps_per_second": 15.952, "step": 1104 }, { "epoch": 47.0, "grad_norm": 0.0014819600619375706, "learning_rate": 2.6500000000000004e-05, "loss": 0.0033, "step": 1128 }, { "epoch": 47.0, "eval_f1": 0.8482902683102467, "eval_loss": 1.1178399324417114, "eval_runtime": 0.251, "eval_samples_per_second": 868.684, "eval_steps_per_second": 15.939, "step": 1128 }, { "epoch": 48.0, "grad_norm": 0.001657757442444563, "learning_rate": 2.6000000000000002e-05, "loss": 0.0012, "step": 1152 }, { "epoch": 48.0, "eval_f1": 0.8485012469416702, "eval_loss": 1.1701571941375732, "eval_runtime": 0.2457, "eval_samples_per_second": 887.403, "eval_steps_per_second": 16.283, "step": 1152 }, { "epoch": 49.0, "grad_norm": 0.001472759060561657, "learning_rate": 2.5500000000000003e-05, "loss": 0.0001, "step": 1176 }, { "epoch": 49.0, "eval_f1": 0.8471931986968837, "eval_loss": 1.1576160192489624, "eval_runtime": 0.2492, "eval_samples_per_second": 874.869, "eval_steps_per_second": 16.053, "step": 1176 }, { "epoch": 50.0, "grad_norm": 0.0018886495381593704, "learning_rate": 2.5e-05, "loss": 0.0001, "step": 1200 }, { "epoch": 50.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.1580414772033691, "eval_runtime": 0.2453, "eval_samples_per_second": 888.766, "eval_steps_per_second": 16.308, "step": 1200 }, { "epoch": 51.0, "grad_norm": 0.0017091715708374977, "learning_rate": 2.45e-05, "loss": 0.0001, "step": 1224 }, { "epoch": 51.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.157599687576294, "eval_runtime": 0.2534, "eval_samples_per_second": 860.384, "eval_steps_per_second": 15.787, "step": 1224 }, { "epoch": 52.0, "grad_norm": 0.0013613449409604073, "learning_rate": 2.4e-05, "loss": 0.0001, "step": 1248 }, { "epoch": 52.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.155985713005066, "eval_runtime": 0.2572, "eval_samples_per_second": 847.53, "eval_steps_per_second": 15.551, "step": 1248 }, { "epoch": 53.0, "grad_norm": 0.001274469424970448, "learning_rate": 2.35e-05, "loss": 0.0001, "step": 1272 }, { "epoch": 53.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.1571167707443237, "eval_runtime": 0.2412, "eval_samples_per_second": 903.632, "eval_steps_per_second": 16.58, "step": 1272 }, { "epoch": 54.0, "grad_norm": 0.0013310050126165152, "learning_rate": 2.3000000000000003e-05, "loss": 0.0001, "step": 1296 }, { "epoch": 54.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.1600090265274048, "eval_runtime": 0.2473, "eval_samples_per_second": 881.514, "eval_steps_per_second": 16.175, "step": 1296 }, { "epoch": 55.0, "grad_norm": 0.0015337098157033324, "learning_rate": 2.25e-05, "loss": 0.0001, "step": 1320 }, { "epoch": 55.0, "eval_f1": 0.8426159574795251, "eval_loss": 1.1735517978668213, "eval_runtime": 0.2491, "eval_samples_per_second": 875.11, "eval_steps_per_second": 16.057, "step": 1320 }, { "epoch": 56.0, "grad_norm": 0.008407847955822945, "learning_rate": 2.2000000000000003e-05, "loss": 0.0001, "step": 1344 }, { "epoch": 56.0, "eval_f1": 0.8426159574795251, "eval_loss": 1.1661510467529297, "eval_runtime": 0.2486, "eval_samples_per_second": 876.938, "eval_steps_per_second": 16.091, "step": 1344 }, { "epoch": 57.0, "grad_norm": 0.0011946760350838304, "learning_rate": 2.15e-05, "loss": 0.0001, "step": 1368 }, { "epoch": 57.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1671065092086792, "eval_runtime": 0.2535, "eval_samples_per_second": 859.876, "eval_steps_per_second": 15.778, "step": 1368 }, { "epoch": 58.0, "grad_norm": 0.0011800089851021767, "learning_rate": 2.1e-05, "loss": 0.0001, "step": 1392 }, { "epoch": 58.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1681970357894897, "eval_runtime": 0.2494, "eval_samples_per_second": 874.117, "eval_steps_per_second": 16.039, "step": 1392 }, { "epoch": 59.0, "grad_norm": 0.0018475407268851995, "learning_rate": 2.05e-05, "loss": 0.0001, "step": 1416 }, { "epoch": 59.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1666558980941772, "eval_runtime": 0.2484, "eval_samples_per_second": 877.516, "eval_steps_per_second": 16.101, "step": 1416 }, { "epoch": 60.0, "grad_norm": 0.0015070955269038677, "learning_rate": 2e-05, "loss": 0.0001, "step": 1440 }, { "epoch": 60.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1691235303878784, "eval_runtime": 0.2503, "eval_samples_per_second": 870.922, "eval_steps_per_second": 15.98, "step": 1440 }, { "epoch": 61.0, "grad_norm": 0.001129466574639082, "learning_rate": 1.9500000000000003e-05, "loss": 0.0001, "step": 1464 }, { "epoch": 61.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1708507537841797, "eval_runtime": 0.2483, "eval_samples_per_second": 878.104, "eval_steps_per_second": 16.112, "step": 1464 }, { "epoch": 62.0, "grad_norm": 0.001501582097262144, "learning_rate": 1.9e-05, "loss": 0.0001, "step": 1488 }, { "epoch": 62.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1724032163619995, "eval_runtime": 0.2483, "eval_samples_per_second": 877.807, "eval_steps_per_second": 16.107, "step": 1488 }, { "epoch": 63.0, "grad_norm": 0.0010985672706738114, "learning_rate": 1.85e-05, "loss": 0.0001, "step": 1512 }, { "epoch": 63.0, "eval_f1": 0.8475855910205873, "eval_loss": 1.1738693714141846, "eval_runtime": 0.2494, "eval_samples_per_second": 874.122, "eval_steps_per_second": 16.039, "step": 1512 }, { "epoch": 64.0, "grad_norm": 0.08045843243598938, "learning_rate": 1.8e-05, "loss": 0.0001, "step": 1536 }, { "epoch": 64.0, "eval_f1": 0.8533899487562311, "eval_loss": 1.1738698482513428, "eval_runtime": 0.2562, "eval_samples_per_second": 850.855, "eval_steps_per_second": 15.612, "step": 1536 }, { "epoch": 65.0, "grad_norm": 0.002004158915951848, "learning_rate": 1.75e-05, "loss": 0.0001, "step": 1560 }, { "epoch": 65.0, "eval_f1": 0.8471931986968837, "eval_loss": 1.1787022352218628, "eval_runtime": 0.2428, "eval_samples_per_second": 897.779, "eval_steps_per_second": 16.473, "step": 1560 }, { "epoch": 66.0, "grad_norm": 0.001049485057592392, "learning_rate": 1.7000000000000003e-05, "loss": 0.0001, "step": 1584 }, { "epoch": 66.0, "eval_f1": 0.8471931986968837, "eval_loss": 1.180769443511963, "eval_runtime": 0.2525, "eval_samples_per_second": 863.258, "eval_steps_per_second": 15.84, "step": 1584 }, { "epoch": 67.0, "grad_norm": 0.0010408489033579826, "learning_rate": 1.65e-05, "loss": 0.0001, "step": 1608 }, { "epoch": 67.0, "eval_f1": 0.8471931986968837, "eval_loss": 1.1815507411956787, "eval_runtime": 0.2505, "eval_samples_per_second": 870.186, "eval_steps_per_second": 15.967, "step": 1608 }, { "epoch": 68.0, "grad_norm": 0.0012064232723787427, "learning_rate": 1.6000000000000003e-05, "loss": 0.0001, "step": 1632 }, { "epoch": 68.0, "eval_f1": 0.8471931986968837, "eval_loss": 1.182210922241211, "eval_runtime": 0.2583, "eval_samples_per_second": 844.028, "eval_steps_per_second": 15.487, "step": 1632 }, { "epoch": 69.0, "grad_norm": 0.0014143523294478655, "learning_rate": 1.55e-05, "loss": 0.0001, "step": 1656 }, { "epoch": 69.0, "eval_f1": 0.8524668435013261, "eval_loss": 1.1818993091583252, "eval_runtime": 0.2452, "eval_samples_per_second": 888.962, "eval_steps_per_second": 16.311, "step": 1656 } ], "logging_steps": 500, "max_steps": 2400, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2551863638350032.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }