{ "best_metric": null, "best_model_checkpoint": null, "epoch": 21.0, "eval_steps": 500, "global_step": 11109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1890359168241966, "grad_norm": 0.5107442140579224, "learning_rate": 5e-05, "loss": 2.0482, "step": 100 }, { "epoch": 0.3780718336483932, "grad_norm": 0.5563424229621887, "learning_rate": 5e-05, "loss": 1.8835, "step": 200 }, { "epoch": 0.5671077504725898, "grad_norm": 0.4735371172428131, "learning_rate": 5e-05, "loss": 1.8512, "step": 300 }, { "epoch": 0.7561436672967864, "grad_norm": 0.4543495178222656, "learning_rate": 5e-05, "loss": 1.8489, "step": 400 }, { "epoch": 0.945179584120983, "grad_norm": 0.5007112622261047, "learning_rate": 5e-05, "loss": 1.8439, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.5733333333333334, "eval_loss": 1.6194567680358887, "eval_runtime": 5.4154, "eval_samples_per_second": 92.33, "eval_steps_per_second": 11.634, "step": 529 }, { "epoch": 1.0, "eval_exact_match": 17.0, "eval_f1": 26.880952380952387, "step": 529 }, { "epoch": 1.1342155009451795, "grad_norm": 0.5181360244750977, "learning_rate": 5e-05, "loss": 1.8217, "step": 600 }, { "epoch": 1.3232514177693762, "grad_norm": 1.1348918676376343, "learning_rate": 5e-05, "loss": 1.7866, "step": 700 }, { "epoch": 1.5122873345935728, "grad_norm": 0.661758303642273, "learning_rate": 5e-05, "loss": 1.7934, "step": 800 }, { "epoch": 1.7013232514177694, "grad_norm": 0.5634772777557373, "learning_rate": 5e-05, "loss": 1.7731, "step": 900 }, { "epoch": 1.8903591682419658, "grad_norm": 0.6698930263519287, "learning_rate": 5e-05, "loss": 1.7969, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.5751794871794872, "eval_loss": 1.603736162185669, "eval_runtime": 5.0986, "eval_samples_per_second": 98.066, "eval_steps_per_second": 12.356, "step": 1058 }, { "epoch": 2.0, "eval_exact_match": 17.6, "eval_f1": 27.218095238095238, "step": 1058 }, { "epoch": 2.0793950850661624, "grad_norm": 0.6265193223953247, "learning_rate": 5e-05, "loss": 1.7416, "step": 1100 }, { "epoch": 2.268431001890359, "grad_norm": 0.812364935874939, "learning_rate": 5e-05, "loss": 1.6957, "step": 1200 }, { "epoch": 2.4574669187145557, "grad_norm": 1.0972926616668701, "learning_rate": 5e-05, "loss": 1.7033, "step": 1300 }, { "epoch": 2.6465028355387523, "grad_norm": 1.0831658840179443, "learning_rate": 5e-05, "loss": 1.6935, "step": 1400 }, { "epoch": 2.835538752362949, "grad_norm": 1.0022494792938232, "learning_rate": 5e-05, "loss": 1.6799, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.573025641025641, "eval_loss": 1.6428505182266235, "eval_runtime": 5.1488, "eval_samples_per_second": 97.111, "eval_steps_per_second": 12.236, "step": 1587 }, { "epoch": 3.0, "eval_exact_match": 20.2, "eval_f1": 27.93809523809523, "step": 1587 }, { "epoch": 3.0245746691871456, "grad_norm": 0.8240587711334229, "learning_rate": 5e-05, "loss": 1.6618, "step": 1600 }, { "epoch": 3.213610586011342, "grad_norm": 1.2994645833969116, "learning_rate": 5e-05, "loss": 1.5736, "step": 1700 }, { "epoch": 3.402646502835539, "grad_norm": 1.0891159772872925, "learning_rate": 5e-05, "loss": 1.5798, "step": 1800 }, { "epoch": 3.5916824196597354, "grad_norm": 1.3826475143432617, "learning_rate": 5e-05, "loss": 1.5957, "step": 1900 }, { "epoch": 3.780718336483932, "grad_norm": 0.9901586174964905, "learning_rate": 5e-05, "loss": 1.6026, "step": 2000 }, { "epoch": 3.9697542533081287, "grad_norm": 1.0607813596725464, "learning_rate": 5e-05, "loss": 1.6106, "step": 2100 }, { "epoch": 4.0, "eval_accuracy": 0.5708717948717948, "eval_loss": 1.7143596410751343, "eval_runtime": 5.3942, "eval_samples_per_second": 92.693, "eval_steps_per_second": 11.679, "step": 2116 }, { "epoch": 4.0, "eval_exact_match": 19.0, "eval_f1": 27.71428571428571, "step": 2116 }, { "epoch": 4.158790170132325, "grad_norm": 1.3189178705215454, "learning_rate": 5e-05, "loss": 1.5029, "step": 2200 }, { "epoch": 4.3478260869565215, "grad_norm": 1.062267541885376, "learning_rate": 5e-05, "loss": 1.51, "step": 2300 }, { "epoch": 4.536862003780718, "grad_norm": 1.3810843229293823, "learning_rate": 5e-05, "loss": 1.4867, "step": 2400 }, { "epoch": 4.725897920604915, "grad_norm": 1.1991653442382812, "learning_rate": 5e-05, "loss": 1.501, "step": 2500 }, { "epoch": 4.914933837429111, "grad_norm": 1.310654878616333, "learning_rate": 5e-05, "loss": 1.5034, "step": 2600 }, { "epoch": 5.0, "eval_accuracy": 0.5681025641025641, "eval_loss": 1.8328346014022827, "eval_runtime": 5.7763, "eval_samples_per_second": 86.561, "eval_steps_per_second": 10.907, "step": 2645 }, { "epoch": 5.0, "eval_exact_match": 18.8, "eval_f1": 26.6015873015873, "step": 2645 }, { "epoch": 5.103969754253308, "grad_norm": 1.1406631469726562, "learning_rate": 5e-05, "loss": 1.4605, "step": 2700 }, { "epoch": 5.293005671077505, "grad_norm": 1.447221279144287, "learning_rate": 5e-05, "loss": 1.4204, "step": 2800 }, { "epoch": 5.482041587901701, "grad_norm": 1.461799144744873, "learning_rate": 5e-05, "loss": 1.4233, "step": 2900 }, { "epoch": 5.671077504725898, "grad_norm": 1.4184092283248901, "learning_rate": 5e-05, "loss": 1.4265, "step": 3000 }, { "epoch": 5.8601134215500945, "grad_norm": 0.9850455522537231, "learning_rate": 5e-05, "loss": 1.4139, "step": 3100 }, { "epoch": 6.0, "eval_accuracy": 0.564974358974359, "eval_loss": 1.931396484375, "eval_runtime": 5.3358, "eval_samples_per_second": 93.706, "eval_steps_per_second": 11.807, "step": 3174 }, { "epoch": 6.0, "eval_exact_match": 17.6, "eval_f1": 25.464285714285715, "step": 3174 }, { "epoch": 6.049149338374291, "grad_norm": 1.2274116277694702, "learning_rate": 5e-05, "loss": 1.4159, "step": 3200 }, { "epoch": 6.238185255198488, "grad_norm": 1.2107988595962524, "learning_rate": 5e-05, "loss": 1.3579, "step": 3300 }, { "epoch": 6.427221172022684, "grad_norm": 1.2965952157974243, "learning_rate": 5e-05, "loss": 1.3485, "step": 3400 }, { "epoch": 6.616257088846881, "grad_norm": 1.3035151958465576, "learning_rate": 5e-05, "loss": 1.36, "step": 3500 }, { "epoch": 6.805293005671078, "grad_norm": 1.1840296983718872, "learning_rate": 5e-05, "loss": 1.3599, "step": 3600 }, { "epoch": 6.994328922495274, "grad_norm": 1.780287265777588, "learning_rate": 5e-05, "loss": 1.355, "step": 3700 }, { "epoch": 7.0, "eval_accuracy": 0.565076923076923, "eval_loss": 1.9659509658813477, "eval_runtime": 5.3127, "eval_samples_per_second": 94.114, "eval_steps_per_second": 11.858, "step": 3703 }, { "epoch": 7.0, "eval_exact_match": 18.0, "eval_f1": 26.546666666666663, "step": 3703 }, { "epoch": 7.183364839319471, "grad_norm": 1.1608421802520752, "learning_rate": 5e-05, "loss": 1.298, "step": 3800 }, { "epoch": 7.3724007561436675, "grad_norm": 1.083270788192749, "learning_rate": 5e-05, "loss": 1.3088, "step": 3900 }, { "epoch": 7.561436672967864, "grad_norm": 1.3489899635314941, "learning_rate": 5e-05, "loss": 1.299, "step": 4000 }, { "epoch": 7.750472589792061, "grad_norm": 1.1663447618484497, "learning_rate": 5e-05, "loss": 1.2841, "step": 4100 }, { "epoch": 7.939508506616257, "grad_norm": 1.9308583736419678, "learning_rate": 5e-05, "loss": 1.314, "step": 4200 }, { "epoch": 8.0, "eval_accuracy": 0.5616923076923077, "eval_loss": 2.1164443492889404, "eval_runtime": 5.2165, "eval_samples_per_second": 95.85, "eval_steps_per_second": 12.077, "step": 4232 }, { "epoch": 8.0, "eval_exact_match": 17.0, "eval_f1": 24.659841269841277, "step": 4232 }, { "epoch": 8.128544423440454, "grad_norm": 1.109632134437561, "learning_rate": 5e-05, "loss": 1.2558, "step": 4300 }, { "epoch": 8.31758034026465, "grad_norm": 1.1132491827011108, "learning_rate": 5e-05, "loss": 1.2264, "step": 4400 }, { "epoch": 8.506616257088847, "grad_norm": 1.1017369031906128, "learning_rate": 5e-05, "loss": 1.2649, "step": 4500 }, { "epoch": 8.695652173913043, "grad_norm": 1.0874963998794556, "learning_rate": 5e-05, "loss": 1.2512, "step": 4600 }, { "epoch": 8.88468809073724, "grad_norm": 1.2617064714431763, "learning_rate": 5e-05, "loss": 1.261, "step": 4700 }, { "epoch": 9.0, "eval_accuracy": 0.5622051282051282, "eval_loss": 2.1635327339172363, "eval_runtime": 5.0832, "eval_samples_per_second": 98.363, "eval_steps_per_second": 12.394, "step": 4761 }, { "epoch": 9.0, "eval_exact_match": 17.8, "eval_f1": 25.451428571428576, "step": 4761 }, { "epoch": 9.073724007561436, "grad_norm": 1.2258027791976929, "learning_rate": 5e-05, "loss": 1.2298, "step": 4800 }, { "epoch": 9.262759924385634, "grad_norm": 1.8261148929595947, "learning_rate": 5e-05, "loss": 1.1748, "step": 4900 }, { "epoch": 9.45179584120983, "grad_norm": 1.1221072673797607, "learning_rate": 5e-05, "loss": 1.19, "step": 5000 }, { "epoch": 9.640831758034027, "grad_norm": 1.3320170640945435, "learning_rate": 5e-05, "loss": 1.1932, "step": 5100 }, { "epoch": 9.829867674858223, "grad_norm": 1.194077730178833, "learning_rate": 5e-05, "loss": 1.217, "step": 5200 }, { "epoch": 10.0, "eval_accuracy": 0.5612820512820513, "eval_loss": 2.244041919708252, "eval_runtime": 5.1606, "eval_samples_per_second": 96.889, "eval_steps_per_second": 12.208, "step": 5290 }, { "epoch": 10.0, "eval_exact_match": 18.4, "eval_f1": 26.067662337662348, "step": 5290 }, { "epoch": 10.01890359168242, "grad_norm": 1.1352814435958862, "learning_rate": 5e-05, "loss": 1.2093, "step": 5300 }, { "epoch": 10.207939508506616, "grad_norm": 1.2741838693618774, "learning_rate": 5e-05, "loss": 1.1467, "step": 5400 }, { "epoch": 10.396975425330814, "grad_norm": 1.6901249885559082, "learning_rate": 5e-05, "loss": 1.1288, "step": 5500 }, { "epoch": 10.58601134215501, "grad_norm": 1.1944465637207031, "learning_rate": 5e-05, "loss": 1.1399, "step": 5600 }, { "epoch": 10.775047258979207, "grad_norm": 1.2234572172164917, "learning_rate": 5e-05, "loss": 1.1678, "step": 5700 }, { "epoch": 10.964083175803403, "grad_norm": 1.2781612873077393, "learning_rate": 5e-05, "loss": 1.146, "step": 5800 }, { "epoch": 11.0, "eval_accuracy": 0.5618461538461539, "eval_loss": 2.296630859375, "eval_runtime": 5.1078, "eval_samples_per_second": 97.889, "eval_steps_per_second": 12.334, "step": 5819 }, { "epoch": 11.0, "eval_exact_match": 16.8, "eval_f1": 23.63305916305917, "step": 5819 }, { "epoch": 11.1531190926276, "grad_norm": 1.138975739479065, "learning_rate": 5e-05, "loss": 1.0842, "step": 5900 }, { "epoch": 11.342155009451796, "grad_norm": 1.727989912033081, "learning_rate": 5e-05, "loss": 1.0806, "step": 6000 }, { "epoch": 11.531190926275993, "grad_norm": 1.3104358911514282, "learning_rate": 5e-05, "loss": 1.0953, "step": 6100 }, { "epoch": 11.720226843100189, "grad_norm": 1.3629850149154663, "learning_rate": 5e-05, "loss": 1.0945, "step": 6200 }, { "epoch": 11.909262759924385, "grad_norm": 1.3276056051254272, "learning_rate": 5e-05, "loss": 1.1069, "step": 6300 }, { "epoch": 12.0, "eval_accuracy": 0.562, "eval_loss": 2.3118155002593994, "eval_runtime": 5.3837, "eval_samples_per_second": 92.873, "eval_steps_per_second": 11.702, "step": 6348 }, { "epoch": 12.0, "eval_exact_match": 17.4, "eval_f1": 24.43924963924964, "step": 6348 }, { "epoch": 12.098298676748582, "grad_norm": 1.2608078718185425, "learning_rate": 5e-05, "loss": 1.0418, "step": 6400 }, { "epoch": 12.287334593572778, "grad_norm": 1.4456945657730103, "learning_rate": 5e-05, "loss": 1.0258, "step": 6500 }, { "epoch": 12.476370510396976, "grad_norm": 1.4195287227630615, "learning_rate": 5e-05, "loss": 1.0255, "step": 6600 }, { "epoch": 12.665406427221171, "grad_norm": 1.2595598697662354, "learning_rate": 5e-05, "loss": 1.0511, "step": 6700 }, { "epoch": 12.854442344045369, "grad_norm": 1.5901292562484741, "learning_rate": 5e-05, "loss": 1.045, "step": 6800 }, { "epoch": 13.0, "eval_accuracy": 0.5614871794871795, "eval_loss": 2.3580987453460693, "eval_runtime": 5.3496, "eval_samples_per_second": 93.466, "eval_steps_per_second": 11.777, "step": 6877 }, { "epoch": 13.0, "eval_exact_match": 17.2, "eval_f1": 24.350995670995676, "step": 6877 }, { "epoch": 13.043478260869565, "grad_norm": 1.471511721611023, "learning_rate": 5e-05, "loss": 1.0404, "step": 6900 }, { "epoch": 13.232514177693762, "grad_norm": 1.2928757667541504, "learning_rate": 5e-05, "loss": 0.9573, "step": 7000 }, { "epoch": 13.421550094517958, "grad_norm": 1.5545622110366821, "learning_rate": 5e-05, "loss": 0.9583, "step": 7100 }, { "epoch": 13.610586011342155, "grad_norm": 1.6132512092590332, "learning_rate": 5e-05, "loss": 0.981, "step": 7200 }, { "epoch": 13.799621928166351, "grad_norm": 1.7548742294311523, "learning_rate": 5e-05, "loss": 0.9999, "step": 7300 }, { "epoch": 13.988657844990549, "grad_norm": 1.4219907522201538, "learning_rate": 5e-05, "loss": 1.0028, "step": 7400 }, { "epoch": 14.0, "eval_accuracy": 0.5618974358974359, "eval_loss": 2.413728952407837, "eval_runtime": 5.0464, "eval_samples_per_second": 99.08, "eval_steps_per_second": 12.484, "step": 7406 }, { "epoch": 14.0, "eval_exact_match": 16.6, "eval_f1": 24.201789321789327, "step": 7406 }, { "epoch": 14.177693761814744, "grad_norm": 1.5799965858459473, "learning_rate": 5e-05, "loss": 0.9096, "step": 7500 }, { "epoch": 14.366729678638942, "grad_norm": 1.5961594581604004, "learning_rate": 5e-05, "loss": 0.9075, "step": 7600 }, { "epoch": 14.555765595463138, "grad_norm": 1.5756512880325317, "learning_rate": 5e-05, "loss": 0.9222, "step": 7700 }, { "epoch": 14.744801512287335, "grad_norm": 1.515251636505127, "learning_rate": 5e-05, "loss": 0.9445, "step": 7800 }, { "epoch": 14.93383742911153, "grad_norm": 2.6804299354553223, "learning_rate": 5e-05, "loss": 0.9153, "step": 7900 }, { "epoch": 15.0, "eval_accuracy": 0.5596923076923077, "eval_loss": 2.4756176471710205, "eval_runtime": 5.1956, "eval_samples_per_second": 96.236, "eval_steps_per_second": 12.126, "step": 7935 }, { "epoch": 15.0, "eval_exact_match": 16.8, "eval_f1": 24.146709956709966, "step": 7935 }, { "epoch": 15.122873345935728, "grad_norm": 1.7786214351654053, "learning_rate": 5e-05, "loss": 0.8874, "step": 8000 }, { "epoch": 15.311909262759924, "grad_norm": 1.709060788154602, "learning_rate": 5e-05, "loss": 0.8538, "step": 8100 }, { "epoch": 15.500945179584122, "grad_norm": 2.3245434761047363, "learning_rate": 5e-05, "loss": 0.8591, "step": 8200 }, { "epoch": 15.689981096408317, "grad_norm": 1.6101887226104736, "learning_rate": 5e-05, "loss": 0.8593, "step": 8300 }, { "epoch": 15.879017013232515, "grad_norm": 1.5917792320251465, "learning_rate": 5e-05, "loss": 0.8748, "step": 8400 }, { "epoch": 16.0, "eval_accuracy": 0.5604615384615385, "eval_loss": 2.4434447288513184, "eval_runtime": 5.1072, "eval_samples_per_second": 97.901, "eval_steps_per_second": 12.336, "step": 8464 }, { "epoch": 16.0, "eval_exact_match": 15.6, "eval_f1": 22.917027417027416, "step": 8464 }, { "epoch": 16.068052930056712, "grad_norm": 1.6529980897903442, "learning_rate": 5e-05, "loss": 0.8413, "step": 8500 }, { "epoch": 16.257088846880908, "grad_norm": 1.931520938873291, "learning_rate": 5e-05, "loss": 0.7863, "step": 8600 }, { "epoch": 16.446124763705104, "grad_norm": 1.8401894569396973, "learning_rate": 5e-05, "loss": 0.799, "step": 8700 }, { "epoch": 16.6351606805293, "grad_norm": 1.7952015399932861, "learning_rate": 5e-05, "loss": 0.8238, "step": 8800 }, { "epoch": 16.8241965973535, "grad_norm": 1.8028634786605835, "learning_rate": 5e-05, "loss": 0.807, "step": 8900 }, { "epoch": 17.0, "eval_accuracy": 0.5606153846153846, "eval_loss": 2.4520959854125977, "eval_runtime": 5.1769, "eval_samples_per_second": 96.584, "eval_steps_per_second": 12.17, "step": 8993 }, { "epoch": 17.0, "eval_exact_match": 17.4, "eval_f1": 25.104011544011545, "step": 8993 }, { "epoch": 17.013232514177695, "grad_norm": 1.7197688817977905, "learning_rate": 5e-05, "loss": 0.8249, "step": 9000 }, { "epoch": 17.20226843100189, "grad_norm": 1.768463134765625, "learning_rate": 5e-05, "loss": 0.7304, "step": 9100 }, { "epoch": 17.391304347826086, "grad_norm": 1.9696024656295776, "learning_rate": 5e-05, "loss": 0.7445, "step": 9200 }, { "epoch": 17.58034026465028, "grad_norm": 2.093703269958496, "learning_rate": 5e-05, "loss": 0.755, "step": 9300 }, { "epoch": 17.76937618147448, "grad_norm": 1.909550666809082, "learning_rate": 5e-05, "loss": 0.7698, "step": 9400 }, { "epoch": 17.958412098298677, "grad_norm": 1.6835806369781494, "learning_rate": 5e-05, "loss": 0.7711, "step": 9500 }, { "epoch": 18.0, "eval_accuracy": 0.5603589743589743, "eval_loss": 2.473491668701172, "eval_runtime": 5.4257, "eval_samples_per_second": 92.155, "eval_steps_per_second": 11.611, "step": 9522 }, { "epoch": 18.0, "eval_exact_match": 17.0, "eval_f1": 24.074487734487732, "step": 9522 }, { "epoch": 18.147448015122873, "grad_norm": 2.0483736991882324, "learning_rate": 5e-05, "loss": 0.7048, "step": 9600 }, { "epoch": 18.33648393194707, "grad_norm": 1.987457275390625, "learning_rate": 5e-05, "loss": 0.6863, "step": 9700 }, { "epoch": 18.525519848771268, "grad_norm": 2.1744563579559326, "learning_rate": 5e-05, "loss": 0.7045, "step": 9800 }, { "epoch": 18.714555765595463, "grad_norm": 1.9871633052825928, "learning_rate": 5e-05, "loss": 0.7076, "step": 9900 }, { "epoch": 18.90359168241966, "grad_norm": 1.8895001411437988, "learning_rate": 5e-05, "loss": 0.7202, "step": 10000 }, { "epoch": 19.0, "eval_accuracy": 0.5601538461538461, "eval_loss": 2.5168700218200684, "eval_runtime": 5.0434, "eval_samples_per_second": 99.14, "eval_steps_per_second": 12.492, "step": 10051 }, { "epoch": 19.0, "eval_exact_match": 17.2, "eval_f1": 24.260360750360757, "step": 10051 }, { "epoch": 19.092627599243855, "grad_norm": 1.7914209365844727, "learning_rate": 5e-05, "loss": 0.6728, "step": 10100 }, { "epoch": 19.281663516068054, "grad_norm": 2.198495388031006, "learning_rate": 5e-05, "loss": 0.6241, "step": 10200 }, { "epoch": 19.47069943289225, "grad_norm": 1.81365966796875, "learning_rate": 5e-05, "loss": 0.6558, "step": 10300 }, { "epoch": 19.659735349716446, "grad_norm": 1.6571800708770752, "learning_rate": 5e-05, "loss": 0.6652, "step": 10400 }, { "epoch": 19.84877126654064, "grad_norm": 1.6469954252243042, "learning_rate": 5e-05, "loss": 0.6637, "step": 10500 }, { "epoch": 20.0, "eval_accuracy": 0.5596410256410257, "eval_loss": 2.516073703765869, "eval_runtime": 5.8234, "eval_samples_per_second": 85.86, "eval_steps_per_second": 10.818, "step": 10580 }, { "epoch": 20.0, "eval_exact_match": 17.6, "eval_f1": 25.225079365079367, "step": 10580 }, { "epoch": 20.03780718336484, "grad_norm": 1.7114077806472778, "learning_rate": 5e-05, "loss": 0.66, "step": 10600 }, { "epoch": 20.226843100189036, "grad_norm": 1.9962202310562134, "learning_rate": 5e-05, "loss": 0.5938, "step": 10700 }, { "epoch": 20.415879017013232, "grad_norm": 2.397592067718506, "learning_rate": 5e-05, "loss": 0.6042, "step": 10800 }, { "epoch": 20.604914933837428, "grad_norm": 2.0724213123321533, "learning_rate": 5e-05, "loss": 0.6124, "step": 10900 }, { "epoch": 20.793950850661627, "grad_norm": 1.9385457038879395, "learning_rate": 5e-05, "loss": 0.6078, "step": 11000 }, { "epoch": 20.982986767485823, "grad_norm": 2.034963607788086, "learning_rate": 5e-05, "loss": 0.6257, "step": 11100 }, { "epoch": 21.0, "eval_accuracy": 0.5597948717948718, "eval_loss": 2.5497183799743652, "eval_runtime": 5.0442, "eval_samples_per_second": 99.123, "eval_steps_per_second": 12.489, "step": 11109 }, { "epoch": 21.0, "eval_exact_match": 17.4, "eval_f1": 24.755238095238095, "step": 11109 } ], "logging_steps": 100, "max_steps": 26450, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.843537263196897e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }