{ "best_metric": 0.8802370452039692, "best_model_checkpoint": "test/checkpoint-3000", "epoch": 12.539184952978056, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31, "eval_accuracy": 0.6750517260154634, "eval_f1": 0.38478707258280515, "eval_loss": 1.2847243547439575, "eval_precision": 0.5487649605296664, "eval_recall": 0.29626065438548255, "eval_runtime": 48.6236, "eval_samples_per_second": 9.728, "eval_steps_per_second": 2.447, "step": 100 }, { "epoch": 0.63, "eval_accuracy": 0.7829140803658935, "eval_f1": 0.589244320815948, "eval_loss": 0.8382514119148254, "eval_precision": 0.6727240649258998, "eval_recall": 0.5241957657409954, "eval_runtime": 47.451, "eval_samples_per_second": 9.968, "eval_steps_per_second": 2.508, "step": 200 }, { "epoch": 0.94, "eval_accuracy": 0.8379070020690407, "eval_f1": 0.7082447783070723, "eval_loss": 0.6504582166671753, "eval_precision": 0.7584366661434626, "eval_recall": 0.6642837503436898, "eval_runtime": 47.1785, "eval_samples_per_second": 10.026, "eval_steps_per_second": 2.522, "step": 300 }, { "epoch": 1.25, "eval_accuracy": 0.8560383316998802, "eval_f1": 0.74649361523969, "eval_loss": 0.5616394877433777, "eval_precision": 0.7579708091256908, "eval_recall": 0.7353588122078636, "eval_runtime": 47.4319, "eval_samples_per_second": 9.972, "eval_steps_per_second": 2.509, "step": 400 }, { "epoch": 1.57, "learning_rate": 8.750000000000001e-06, "loss": 1.0044, "step": 500 }, { "epoch": 1.57, "eval_accuracy": 0.8640967004246978, "eval_f1": 0.7635898529098273, "eval_loss": 0.48347049951553345, "eval_precision": 0.7902632740108839, "eval_recall": 0.7386582348089085, "eval_runtime": 47.4677, "eval_samples_per_second": 9.965, "eval_steps_per_second": 2.507, "step": 500 }, { "epoch": 1.88, "eval_accuracy": 0.8795600566263748, "eval_f1": 0.7909914858555341, "eval_loss": 0.4531821012496948, "eval_precision": 0.7901234567901234, "eval_recall": 0.7918614242507561, "eval_runtime": 48.4837, "eval_samples_per_second": 9.756, "eval_steps_per_second": 2.454, "step": 600 }, { "epoch": 2.19, "eval_accuracy": 0.8883262550364804, "eval_f1": 0.8112951398316589, "eval_loss": 0.4272719919681549, "eval_precision": 0.8012872083668544, "eval_recall": 0.8215562276601595, "eval_runtime": 47.6145, "eval_samples_per_second": 9.934, "eval_steps_per_second": 2.499, "step": 700 }, { "epoch": 2.51, "eval_accuracy": 0.8954590003266907, "eval_f1": 0.8212751449028298, "eval_loss": 0.3849042057991028, "eval_precision": 0.8147747260181302, "eval_recall": 0.8278801209788287, "eval_runtime": 47.9087, "eval_samples_per_second": 9.873, "eval_steps_per_second": 2.484, "step": 800 }, { "epoch": 2.82, "eval_accuracy": 0.8950234128280519, "eval_f1": 0.8168306752629793, "eval_loss": 0.3808012306690216, "eval_precision": 0.8065942903096099, "eval_recall": 0.8273302172119879, "eval_runtime": 47.5595, "eval_samples_per_second": 9.945, "eval_steps_per_second": 2.502, "step": 900 }, { "epoch": 3.13, "learning_rate": 7.500000000000001e-06, "loss": 0.4068, "step": 1000 }, { "epoch": 3.13, "eval_accuracy": 0.9042796471741261, "eval_f1": 0.838237306692213, "eval_loss": 0.35156166553497314, "eval_precision": 0.8344686648501363, "eval_recall": 0.8420401429749794, "eval_runtime": 48.1704, "eval_samples_per_second": 9.819, "eval_steps_per_second": 2.47, "step": 1000 }, { "epoch": 3.45, "eval_accuracy": 0.9095066971577915, "eval_f1": 0.8464466126911634, "eval_loss": 0.33900508284568787, "eval_precision": 0.8521666434443361, "eval_recall": 0.8408028594995876, "eval_runtime": 48.0055, "eval_samples_per_second": 9.853, "eval_steps_per_second": 2.479, "step": 1100 }, { "epoch": 3.76, "eval_accuracy": 0.9087444190351737, "eval_f1": 0.8463414634146341, "eval_loss": 0.32527512311935425, "eval_precision": 0.8343574672722415, "eval_recall": 0.8586747319219137, "eval_runtime": 48.0035, "eval_samples_per_second": 9.853, "eval_steps_per_second": 2.479, "step": 1200 }, { "epoch": 4.08, "eval_accuracy": 0.9081999346618752, "eval_f1": 0.8466865833785986, "eval_loss": 0.33180123567581177, "eval_precision": 0.8364636436812449, "eval_recall": 0.8571624965631015, "eval_runtime": 47.8611, "eval_samples_per_second": 9.883, "eval_steps_per_second": 2.486, "step": 1300 }, { "epoch": 4.39, "eval_accuracy": 0.9187629315038658, "eval_f1": 0.8616123499142367, "eval_loss": 0.3064488470554352, "eval_precision": 0.8600191754554171, "eval_recall": 0.8632114379983503, "eval_runtime": 47.6825, "eval_samples_per_second": 9.92, "eval_steps_per_second": 2.496, "step": 1400 }, { "epoch": 4.7, "learning_rate": 6.25e-06, "loss": 0.2678, "step": 1500 }, { "epoch": 4.7, "eval_accuracy": 0.9184906893172166, "eval_f1": 0.8611149536588739, "eval_loss": 0.30125683546066284, "eval_precision": 0.8665089086859689, "eval_recall": 0.8557877371459994, "eval_runtime": 47.6577, "eval_samples_per_second": 9.925, "eval_steps_per_second": 2.497, "step": 1500 }, { "epoch": 5.02, "eval_accuracy": 0.9157138190133943, "eval_f1": 0.8587268993839836, "eval_loss": 0.30989399552345276, "eval_precision": 0.8550981461286805, "eval_recall": 0.8623865823480891, "eval_runtime": 47.7978, "eval_samples_per_second": 9.896, "eval_steps_per_second": 2.49, "step": 1600 }, { "epoch": 5.33, "eval_accuracy": 0.9191440705651748, "eval_f1": 0.8643950889777902, "eval_loss": 0.3114277124404907, "eval_precision": 0.8673864894795127, "eval_recall": 0.8614242507561177, "eval_runtime": 47.8331, "eval_samples_per_second": 9.889, "eval_steps_per_second": 2.488, "step": 1700 }, { "epoch": 5.64, "eval_accuracy": 0.9207230752477404, "eval_f1": 0.8651245795867938, "eval_loss": 0.3051866888999939, "eval_precision": 0.8638793694311172, "eval_recall": 0.8663733846576849, "eval_runtime": 47.7044, "eval_samples_per_second": 9.915, "eval_steps_per_second": 2.495, "step": 1800 }, { "epoch": 5.96, "eval_accuracy": 0.9236088424262223, "eval_f1": 0.8706572930175415, "eval_loss": 0.2927141785621643, "eval_precision": 0.8645790971939813, "eval_recall": 0.8768215562276601, "eval_runtime": 47.4403, "eval_samples_per_second": 9.97, "eval_steps_per_second": 2.508, "step": 1900 }, { "epoch": 6.27, "learning_rate": 5e-06, "loss": 0.2068, "step": 2000 }, { "epoch": 6.27, "eval_accuracy": 0.9194707611891538, "eval_f1": 0.8640943047083819, "eval_loss": 0.30293476581573486, "eval_precision": 0.8615552822194888, "eval_recall": 0.8666483365411053, "eval_runtime": 47.7322, "eval_samples_per_second": 9.909, "eval_steps_per_second": 2.493, "step": 2000 }, { "epoch": 6.58, "eval_accuracy": 0.9211042143090493, "eval_f1": 0.8684515293957353, "eval_loss": 0.3005865216255188, "eval_precision": 0.8607697501688049, "eval_recall": 0.8762716524608194, "eval_runtime": 47.8357, "eval_samples_per_second": 9.888, "eval_steps_per_second": 2.488, "step": 2100 }, { "epoch": 6.9, "eval_accuracy": 0.923391048676903, "eval_f1": 0.8718300205620287, "eval_loss": 0.2952803373336792, "eval_precision": 0.8693275013668671, "eval_recall": 0.8743469892768766, "eval_runtime": 47.6233, "eval_samples_per_second": 9.932, "eval_steps_per_second": 2.499, "step": 2200 }, { "epoch": 7.21, "eval_accuracy": 0.9212675596210389, "eval_f1": 0.8683849739511927, "eval_loss": 0.3120380938053131, "eval_precision": 0.8660103910308996, "eval_recall": 0.8707726147924113, "eval_runtime": 48.4779, "eval_samples_per_second": 9.757, "eval_steps_per_second": 2.455, "step": 2300 }, { "epoch": 7.52, "eval_accuracy": 0.9225198736796254, "eval_f1": 0.8689645739296951, "eval_loss": 0.3099469840526581, "eval_precision": 0.8645890038105607, "eval_recall": 0.8733846576849051, "eval_runtime": 47.5215, "eval_samples_per_second": 9.953, "eval_steps_per_second": 2.504, "step": 2400 }, { "epoch": 7.84, "learning_rate": 3.7500000000000005e-06, "loss": 0.16, "step": 2500 }, { "epoch": 7.84, "eval_accuracy": 0.9236088424262223, "eval_f1": 0.8730713844887883, "eval_loss": 0.299545019865036, "eval_precision": 0.8709809823505268, "eval_recall": 0.8751718449271377, "eval_runtime": 47.9228, "eval_samples_per_second": 9.87, "eval_steps_per_second": 2.483, "step": 2500 }, { "epoch": 8.15, "eval_accuracy": 0.9233366002395731, "eval_f1": 0.8713222745579903, "eval_loss": 0.3018472492694855, "eval_precision": 0.8653559322033898, "eval_recall": 0.877371459994501, "eval_runtime": 47.7011, "eval_samples_per_second": 9.916, "eval_steps_per_second": 2.495, "step": 2600 }, { "epoch": 8.46, "eval_accuracy": 0.9234999455515627, "eval_f1": 0.8743416102332581, "eval_loss": 0.305177241563797, "eval_precision": 0.8701157249829816, "eval_recall": 0.8786087434698928, "eval_runtime": 48.329, "eval_samples_per_second": 9.787, "eval_steps_per_second": 2.462, "step": 2700 }, { "epoch": 8.78, "eval_accuracy": 0.9252967439834476, "eval_f1": 0.8783895960154953, "eval_loss": 0.2957703769207001, "eval_precision": 0.8840155945419104, "eval_recall": 0.8728347539180643, "eval_runtime": 47.6638, "eval_samples_per_second": 9.924, "eval_steps_per_second": 2.497, "step": 2800 }, { "epoch": 9.09, "eval_accuracy": 0.9270935424153327, "eval_f1": 0.8792522850663185, "eval_loss": 0.2976396679878235, "eval_precision": 0.8790710457606157, "eval_recall": 0.879433599120154, "eval_runtime": 48.2883, "eval_samples_per_second": 9.795, "eval_steps_per_second": 2.464, "step": 2900 }, { "epoch": 9.4, "learning_rate": 2.5e-06, "loss": 0.1364, "step": 3000 }, { "epoch": 9.4, "eval_accuracy": 0.9288358924098878, "eval_f1": 0.8802370452039692, "eval_loss": 0.30077481269836426, "eval_precision": 0.8824260845537442, "eval_recall": 0.878058839703052, "eval_runtime": 47.4506, "eval_samples_per_second": 9.968, "eval_steps_per_second": 2.508, "step": 3000 }, { "epoch": 9.72, "eval_accuracy": 0.9276380267886312, "eval_f1": 0.8794581144594968, "eval_loss": 0.295279324054718, "eval_precision": 0.8843480678343064, "eval_recall": 0.8746219411602969, "eval_runtime": 48.0211, "eval_samples_per_second": 9.85, "eval_steps_per_second": 2.478, "step": 3100 }, { "epoch": 10.03, "eval_accuracy": 0.9261134705433954, "eval_f1": 0.8766407807023572, "eval_loss": 0.30134841799736023, "eval_precision": 0.8764600797031744, "eval_recall": 0.8768215562276601, "eval_runtime": 47.9205, "eval_samples_per_second": 9.871, "eval_steps_per_second": 2.483, "step": 3200 }, { "epoch": 10.34, "eval_accuracy": 0.9256234346074268, "eval_f1": 0.876704818038517, "eval_loss": 0.306386798620224, "eval_precision": 0.8741287412874129, "eval_recall": 0.8792961231784437, "eval_runtime": 47.48, "eval_samples_per_second": 9.962, "eval_steps_per_second": 2.506, "step": 3300 }, { "epoch": 10.66, "eval_accuracy": 0.9298159642818251, "eval_f1": 0.8808639427706698, "eval_loss": 0.2949816882610321, "eval_precision": 0.8814702643171806, "eval_recall": 0.8802584547704152, "eval_runtime": 47.5486, "eval_samples_per_second": 9.948, "eval_steps_per_second": 2.503, "step": 3400 }, { "epoch": 10.97, "learning_rate": 1.25e-06, "loss": 0.1192, "step": 3500 }, { "epoch": 10.97, "eval_accuracy": 0.9262223674180551, "eval_f1": 0.8796156485929993, "eval_loss": 0.3046295940876007, "eval_precision": 0.8782894736842105, "eval_recall": 0.8809458344789661, "eval_runtime": 47.8942, "eval_samples_per_second": 9.876, "eval_steps_per_second": 2.485, "step": 3500 }, { "epoch": 11.29, "eval_accuracy": 0.9261134705433954, "eval_f1": 0.8789643126241523, "eval_loss": 0.30884549021720886, "eval_precision": 0.8759044368600682, "eval_recall": 0.8820456420126478, "eval_runtime": 48.0682, "eval_samples_per_second": 9.84, "eval_steps_per_second": 2.476, "step": 3600 }, { "epoch": 11.6, "eval_accuracy": 0.9268757486660133, "eval_f1": 0.8796163069544365, "eval_loss": 0.30328136682510376, "eval_precision": 0.8767927878705095, "eval_recall": 0.8824580698377784, "eval_runtime": 47.6224, "eval_samples_per_second": 9.932, "eval_steps_per_second": 2.499, "step": 3700 }, { "epoch": 11.91, "eval_accuracy": 0.9269301971033431, "eval_f1": 0.8791058081327573, "eval_loss": 0.30441808700561523, "eval_precision": 0.8770009577233547, "eval_recall": 0.8812207863623865, "eval_runtime": 47.6174, "eval_samples_per_second": 9.933, "eval_steps_per_second": 2.499, "step": 3800 }, { "epoch": 12.23, "eval_accuracy": 0.9272568877273222, "eval_f1": 0.8795933786661172, "eval_loss": 0.3058357238769531, "eval_precision": 0.8789293067947838, "eval_recall": 0.8802584547704152, "eval_runtime": 47.5523, "eval_samples_per_second": 9.947, "eval_steps_per_second": 2.503, "step": 3900 }, { "epoch": 12.54, "learning_rate": 0.0, "loss": 0.1044, "step": 4000 }, { "epoch": 12.54, "eval_accuracy": 0.9272024392899924, "eval_f1": 0.8791826659352715, "eval_loss": 0.3065986931324005, "eval_precision": 0.8770177838577291, "eval_recall": 0.8813582623040968, "eval_runtime": 47.8877, "eval_samples_per_second": 9.877, "eval_steps_per_second": 2.485, "step": 4000 } ], "max_steps": 4000, "num_train_epochs": 13, "total_flos": 4247432429568000.0, "trial_name": null, "trial_params": null }