{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0, "eval_accuracy": 0.7733118971061094, "eval_f1": 0.7518500246669956, "eval_loss": 0.8074839115142822, "eval_precision": 0.7419668938656281, "eval_recall": 0.762, "eval_runtime": 2.1702, "eval_samples_per_second": 24.883, "eval_steps_per_second": 0.461, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.8378502526412495, "eval_f1": 0.8489243413101281, "eval_loss": 0.6263514161109924, "eval_precision": 0.8217126813289658, "eval_recall": 0.878, "eval_runtime": 1.8285, "eval_samples_per_second": 29.533, "eval_steps_per_second": 0.547, "step": 40 }, { "epoch": 6.0, "eval_accuracy": 0.8375057418465779, "eval_f1": 0.8824383164005806, "eval_loss": 0.5844890475273132, "eval_precision": 0.8547328959700093, "eval_recall": 0.912, "eval_runtime": 1.6994, "eval_samples_per_second": 31.775, "eval_steps_per_second": 0.588, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.8396876435461644, "eval_f1": 0.8708251473477405, "eval_loss": 0.7096832990646362, "eval_precision": 0.8556949806949807, "eval_recall": 0.8865, "eval_runtime": 1.9498, "eval_samples_per_second": 27.695, "eval_steps_per_second": 0.513, "step": 80 }, { "epoch": 10.0, "eval_accuracy": 0.8438217730822233, "eval_f1": 0.8839153959665518, "eval_loss": 0.7337720394134521, "eval_precision": 0.8697967086156825, "eval_recall": 0.8985, "eval_runtime": 1.6222, "eval_samples_per_second": 33.289, "eval_steps_per_second": 0.616, "step": 100 }, { "epoch": 12.0, "eval_accuracy": 0.8494487827285255, "eval_f1": 0.8967391304347826, "eval_loss": 0.8648139238357544, "eval_precision": 0.88623046875, "eval_recall": 0.9075, "eval_runtime": 1.6638, "eval_samples_per_second": 32.455, "eval_steps_per_second": 0.601, "step": 120 }, { "epoch": 14.0, "eval_accuracy": 0.8330271015158475, "eval_f1": 0.8940709501364426, "eval_loss": 0.8960414528846741, "eval_precision": 0.8872476612506155, "eval_recall": 0.901, "eval_runtime": 1.8377, "eval_samples_per_second": 29.384, "eval_steps_per_second": 0.544, "step": 140 }, { "epoch": 16.0, "eval_accuracy": 0.8408360128617364, "eval_f1": 0.897062453715132, "eval_loss": 0.9291977882385254, "eval_precision": 0.8859093125304729, "eval_recall": 0.9085, "eval_runtime": 2.2856, "eval_samples_per_second": 23.626, "eval_steps_per_second": 0.438, "step": 160 }, { "epoch": 18.0, "eval_accuracy": 0.8530087276067984, "eval_f1": 0.9070520807376028, "eval_loss": 0.956124484539032, "eval_precision": 0.9041231992051664, "eval_recall": 0.91, "eval_runtime": 2.1004, "eval_samples_per_second": 25.71, "eval_steps_per_second": 0.476, "step": 180 }, { "epoch": 20.0, "eval_accuracy": 0.8460036747818098, "eval_f1": 0.9054455445544554, "eval_loss": 1.1085299253463745, "eval_precision": 0.8965686274509804, "eval_recall": 0.9145, "eval_runtime": 1.9718, "eval_samples_per_second": 27.386, "eval_steps_per_second": 0.507, "step": 200 }, { "epoch": 22.0, "eval_accuracy": 0.8494487827285255, "eval_f1": 0.9123157631776169, "eval_loss": 1.1246980428695679, "eval_precision": 0.9116325511732402, "eval_recall": 0.913, "eval_runtime": 2.2938, "eval_samples_per_second": 23.541, "eval_steps_per_second": 0.436, "step": 220 }, { "epoch": 24.0, "eval_accuracy": 0.8488745980707395, "eval_f1": 0.9118450459399057, "eval_loss": 1.2039830684661865, "eval_precision": 0.9057720769610261, "eval_recall": 0.918, "eval_runtime": 2.3959, "eval_samples_per_second": 22.539, "eval_steps_per_second": 0.417, "step": 240 }, { "epoch": 26.0, "eval_accuracy": 0.8449701423977951, "eval_f1": 0.9141426783479348, "eval_loss": 1.2313123941421509, "eval_precision": 0.9152882205513785, "eval_recall": 0.913, "eval_runtime": 1.8248, "eval_samples_per_second": 29.592, "eval_steps_per_second": 0.548, "step": 260 }, { "epoch": 28.0, "eval_accuracy": 0.8450849793293523, "eval_f1": 0.9095876800794833, "eval_loss": 1.2972946166992188, "eval_precision": 0.903751233958539, "eval_recall": 0.9155, "eval_runtime": 2.3256, "eval_samples_per_second": 23.219, "eval_steps_per_second": 0.43, "step": 280 }, { "epoch": 30.0, "eval_accuracy": 0.8516306844281121, "eval_f1": 0.9177467597208374, "eval_loss": 1.27461838722229, "eval_precision": 0.9150099403578529, "eval_recall": 0.9205, "eval_runtime": 2.085, "eval_samples_per_second": 25.899, "eval_steps_per_second": 0.48, "step": 300 }, { "epoch": 32.0, "eval_accuracy": 0.8451998162609095, "eval_f1": 0.9093167701863354, "eval_loss": 1.3256772756576538, "eval_precision": 0.9037037037037037, "eval_recall": 0.915, "eval_runtime": 1.6622, "eval_samples_per_second": 32.487, "eval_steps_per_second": 0.602, "step": 320 }, { "epoch": 34.0, "eval_accuracy": 0.8609324758842444, "eval_f1": 0.9068725099601593, "eval_loss": 1.1881499290466309, "eval_precision": 0.9032738095238095, "eval_recall": 0.9105, "eval_runtime": 1.8765, "eval_samples_per_second": 28.777, "eval_steps_per_second": 0.533, "step": 340 }, { "epoch": 36.0, "eval_accuracy": 0.8310748736793753, "eval_f1": 0.9006986027944112, "eval_loss": 1.4595391750335693, "eval_precision": 0.8989043824701195, "eval_recall": 0.9025, "eval_runtime": 1.593, "eval_samples_per_second": 33.898, "eval_steps_per_second": 0.628, "step": 360 }, { "epoch": 38.0, "eval_accuracy": 0.8542719338539274, "eval_f1": 0.9134879082523062, "eval_loss": 1.3311606645584106, "eval_precision": 0.9109895574341124, "eval_recall": 0.916, "eval_runtime": 2.269, "eval_samples_per_second": 23.799, "eval_steps_per_second": 0.441, "step": 380 }, { "epoch": 40.0, "eval_accuracy": 0.8555351401010565, "eval_f1": 0.9195, "eval_loss": 1.2927559614181519, "eval_precision": 0.9195, "eval_recall": 0.9195, "eval_runtime": 1.8831, "eval_samples_per_second": 28.676, "eval_steps_per_second": 0.531, "step": 400 }, { "epoch": 42.0, "eval_accuracy": 0.8598989435002297, "eval_f1": 0.9248439450686643, "eval_loss": 1.3193974494934082, "eval_precision": 0.9236907730673317, "eval_recall": 0.926, "eval_runtime": 1.733, "eval_samples_per_second": 31.16, "eval_steps_per_second": 0.577, "step": 420 }, { "epoch": 44.0, "eval_accuracy": 0.8456591639871383, "eval_f1": 0.9162290572643161, "eval_loss": 1.455527424812317, "eval_precision": 0.9164582291145573, "eval_recall": 0.916, "eval_runtime": 2.5054, "eval_samples_per_second": 21.554, "eval_steps_per_second": 0.399, "step": 440 }, { "epoch": 46.0, "eval_accuracy": 0.8541570969223702, "eval_f1": 0.916729182295574, "eval_loss": 1.4188611507415771, "eval_precision": 0.9169584792396198, "eval_recall": 0.9165, "eval_runtime": 2.3672, "eval_samples_per_second": 22.811, "eval_steps_per_second": 0.422, "step": 460 }, { "epoch": 48.0, "eval_accuracy": 0.8516306844281121, "eval_f1": 0.9209932279909707, "eval_loss": 1.462133765220642, "eval_precision": 0.9240060392551586, "eval_recall": 0.918, "eval_runtime": 2.0221, "eval_samples_per_second": 26.705, "eval_steps_per_second": 0.495, "step": 480 }, { "epoch": 50.0, "learning_rate": 5e-06, "loss": 0.1212, "step": 500 }, { "epoch": 50.0, "eval_accuracy": 0.8492191088654111, "eval_f1": 0.9188919391065635, "eval_loss": 1.5107489824295044, "eval_precision": 0.9172894867962132, "eval_recall": 0.9205, "eval_runtime": 2.2455, "eval_samples_per_second": 24.048, "eval_steps_per_second": 0.445, "step": 500 }, { "epoch": 52.0, "eval_accuracy": 0.8520900321543409, "eval_f1": 0.9225037257824144, "eval_loss": 1.3997772932052612, "eval_precision": 0.9165844027640672, "eval_recall": 0.9285, "eval_runtime": 1.6019, "eval_samples_per_second": 33.711, "eval_steps_per_second": 0.624, "step": 520 }, { "epoch": 54.0, "eval_accuracy": 0.8531235645383556, "eval_f1": 0.9185, "eval_loss": 1.3953099250793457, "eval_precision": 0.9185, "eval_recall": 0.9185, "eval_runtime": 2.4867, "eval_samples_per_second": 21.715, "eval_steps_per_second": 0.402, "step": 540 }, { "epoch": 56.0, "eval_accuracy": 0.8493339457969683, "eval_f1": 0.9165622650964669, "eval_loss": 1.424229621887207, "eval_precision": 0.9186338523355098, "eval_recall": 0.9145, "eval_runtime": 2.2429, "eval_samples_per_second": 24.076, "eval_steps_per_second": 0.446, "step": 560 }, { "epoch": 58.0, "eval_accuracy": 0.8508268259072118, "eval_f1": 0.9144851657940662, "eval_loss": 1.4575742483139038, "eval_precision": 0.9119840875186475, "eval_recall": 0.917, "eval_runtime": 1.7215, "eval_samples_per_second": 31.368, "eval_steps_per_second": 0.581, "step": 580 }, { "epoch": 60.0, "eval_accuracy": 0.8524345429490124, "eval_f1": 0.9176001991535974, "eval_loss": 1.469473123550415, "eval_precision": 0.9137332672285573, "eval_recall": 0.9215, "eval_runtime": 2.1512, "eval_samples_per_second": 25.102, "eval_steps_per_second": 0.465, "step": 600 }, { "epoch": 62.0, "eval_accuracy": 0.8517455213596693, "eval_f1": 0.918474195961107, "eval_loss": 1.4966249465942383, "eval_precision": 0.9159622078567876, "eval_recall": 0.921, "eval_runtime": 2.0241, "eval_samples_per_second": 26.678, "eval_steps_per_second": 0.494, "step": 620 }, { "epoch": 64.0, "eval_accuracy": 0.8484152503445108, "eval_f1": 0.92, "eval_loss": 1.5030325651168823, "eval_precision": 0.92, "eval_recall": 0.92, "eval_runtime": 2.1745, "eval_samples_per_second": 24.834, "eval_steps_per_second": 0.46, "step": 640 }, { "epoch": 66.0, "eval_accuracy": 0.8462333486449242, "eval_f1": 0.9175591531755916, "eval_loss": 1.4818426370620728, "eval_precision": 0.9141439205955335, "eval_recall": 0.921, "eval_runtime": 2.068, "eval_samples_per_second": 26.112, "eval_steps_per_second": 0.484, "step": 660 }, { "epoch": 68.0, "eval_accuracy": 0.8497932935231971, "eval_f1": 0.9201501877346684, "eval_loss": 1.4994901418685913, "eval_precision": 0.9213032581453634, "eval_recall": 0.919, "eval_runtime": 1.5887, "eval_samples_per_second": 33.99, "eval_steps_per_second": 0.629, "step": 680 }, { "epoch": 70.0, "eval_accuracy": 0.8501378043178687, "eval_f1": 0.9175180662845751, "eval_loss": 1.52091383934021, "eval_precision": 0.914555389965226, "eval_recall": 0.9205, "eval_runtime": 2.0412, "eval_samples_per_second": 26.455, "eval_steps_per_second": 0.49, "step": 700 }, { "epoch": 72.0, "eval_accuracy": 0.8465778594395957, "eval_f1": 0.9170633926334252, "eval_loss": 1.54710054397583, "eval_precision": 0.9191361125062782, "eval_recall": 0.915, "eval_runtime": 1.9021, "eval_samples_per_second": 28.39, "eval_steps_per_second": 0.526, "step": 720 }, { "epoch": 74.0, "eval_accuracy": 0.8531235645383556, "eval_f1": 0.919080919080919, "eval_loss": 1.5027331113815308, "eval_precision": 0.9181636726546906, "eval_recall": 0.92, "eval_runtime": 2.6669, "eval_samples_per_second": 20.248, "eval_steps_per_second": 0.375, "step": 740 }, { "epoch": 76.0, "eval_accuracy": 0.8531235645383556, "eval_f1": 0.9171464330413016, "eval_loss": 1.5013470649719238, "eval_precision": 0.9182957393483709, "eval_recall": 0.916, "eval_runtime": 2.1913, "eval_samples_per_second": 24.643, "eval_steps_per_second": 0.456, "step": 760 }, { "epoch": 78.0, "eval_accuracy": 0.8509416628387689, "eval_f1": 0.9218045112781955, "eval_loss": 1.5246856212615967, "eval_precision": 0.9241206030150754, "eval_recall": 0.9195, "eval_runtime": 1.7299, "eval_samples_per_second": 31.215, "eval_steps_per_second": 0.578, "step": 780 }, { "epoch": 80.0, "eval_accuracy": 0.8507119889756546, "eval_f1": 0.9178286852589641, "eval_loss": 1.5289556980133057, "eval_precision": 0.9141865079365079, "eval_recall": 0.9215, "eval_runtime": 1.9199, "eval_samples_per_second": 28.126, "eval_steps_per_second": 0.521, "step": 800 }, { "epoch": 82.0, "eval_accuracy": 0.852779053743684, "eval_f1": 0.9211576846307385, "eval_loss": 1.523385763168335, "eval_precision": 0.9193227091633466, "eval_recall": 0.923, "eval_runtime": 1.6928, "eval_samples_per_second": 31.899, "eval_steps_per_second": 0.591, "step": 820 }, { "epoch": 84.0, "eval_accuracy": 0.8535829122645843, "eval_f1": 0.9211183225162256, "eval_loss": 1.5238100290298462, "eval_precision": 0.919740777666999, "eval_recall": 0.9225, "eval_runtime": 1.7587, "eval_samples_per_second": 30.704, "eval_steps_per_second": 0.569, "step": 840 }, { "epoch": 86.0, "eval_accuracy": 0.8497932935231971, "eval_f1": 0.9178116412690482, "eval_loss": 1.52961266040802, "eval_precision": 0.9171243135297055, "eval_recall": 0.9185, "eval_runtime": 2.2872, "eval_samples_per_second": 23.609, "eval_steps_per_second": 0.437, "step": 860 }, { "epoch": 88.0, "eval_accuracy": 0.8484152503445108, "eval_f1": 0.9172293073268317, "eval_loss": 1.5319523811340332, "eval_precision": 0.9174587293646823, "eval_recall": 0.917, "eval_runtime": 1.8758, "eval_samples_per_second": 28.788, "eval_steps_per_second": 0.533, "step": 880 }, { "epoch": 90.0, "eval_accuracy": 0.8516306844281121, "eval_f1": 0.9216520650813517, "eval_loss": 1.5241385698318481, "eval_precision": 0.9228070175438596, "eval_recall": 0.9205, "eval_runtime": 2.319, "eval_samples_per_second": 23.286, "eval_steps_per_second": 0.431, "step": 900 }, { "epoch": 92.0, "eval_accuracy": 0.8524345429490124, "eval_f1": 0.922, "eval_loss": 1.5183860063552856, "eval_precision": 0.922, "eval_recall": 0.922, "eval_runtime": 1.8138, "eval_samples_per_second": 29.771, "eval_steps_per_second": 0.551, "step": 920 }, { "epoch": 94.0, "eval_accuracy": 0.8526642168121268, "eval_f1": 0.9215, "eval_loss": 1.5182560682296753, "eval_precision": 0.9215, "eval_recall": 0.9215, "eval_runtime": 2.1167, "eval_samples_per_second": 25.511, "eval_steps_per_second": 0.472, "step": 940 }, { "epoch": 96.0, "eval_accuracy": 0.8523197060174552, "eval_f1": 0.9209999999999999, "eval_loss": 1.519921898841858, "eval_precision": 0.921, "eval_recall": 0.921, "eval_runtime": 2.4525, "eval_samples_per_second": 22.018, "eval_steps_per_second": 0.408, "step": 960 }, { "epoch": 98.0, "eval_accuracy": 0.8520900321543409, "eval_f1": 0.9214607303651825, "eval_loss": 1.5220308303833008, "eval_precision": 0.9219219219219219, "eval_recall": 0.921, "eval_runtime": 2.2195, "eval_samples_per_second": 24.33, "eval_steps_per_second": 0.451, "step": 980 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.0007, "step": 1000 }, { "epoch": 100.0, "eval_accuracy": 0.8518603582912264, "eval_f1": 0.9214607303651825, "eval_loss": 1.5228424072265625, "eval_precision": 0.9219219219219219, "eval_recall": 0.921, "eval_runtime": 2.3676, "eval_samples_per_second": 22.808, "eval_steps_per_second": 0.422, "step": 1000 }, { "epoch": 100.0, "step": 1000, "total_flos": 1.4163133547413504e+16, "train_loss": 0.06096216064691544, "train_runtime": 422.6711, "train_samples_per_second": 37.854, "train_steps_per_second": 2.366 } ], "max_steps": 1000, "num_train_epochs": 100, "total_flos": 1.4163133547413504e+16, "trial_name": null, "trial_params": null }