{ "best_metric": 0.08749764412641525, "best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-1600", "epoch": 4.0, "global_step": 3580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "eval_accuracy": 0.9538808220802765, "eval_f1": 0.9490697836636672, "eval_loss": 0.17476704716682434, "eval_precision": 0.9467019204007793, "eval_recall": 0.9514495214880822, "eval_runtime": 1.5406, "eval_samples_per_second": 1073.605, "eval_steps_per_second": 67.506, "step": 200 }, { "epoch": 0.45, "eval_accuracy": 0.9654529743650565, "eval_f1": 0.9616131828280513, "eval_loss": 0.125724658370018, "eval_precision": 0.9590238861730456, "eval_recall": 0.9642164991708457, "eval_runtime": 1.5211, "eval_samples_per_second": 1087.347, "eval_steps_per_second": 68.37, "step": 400 }, { "epoch": 0.56, "learning_rate": 4.303072625698324e-05, "loss": 0.3133, "step": 500 }, { "epoch": 0.67, "eval_accuracy": 0.9684688500703139, "eval_f1": 0.9654409345999343, "eval_loss": 0.1092953085899353, "eval_precision": 0.9633379749353491, "eval_recall": 0.9675530958422409, "eval_runtime": 1.5243, "eval_samples_per_second": 1085.096, "eval_steps_per_second": 68.229, "step": 600 }, { "epoch": 0.89, "eval_accuracy": 0.9705189678250115, "eval_f1": 0.9675733365251304, "eval_loss": 0.09988456219434738, "eval_precision": 0.9660797514241326, "eval_recall": 0.9690715470220376, "eval_runtime": 1.5122, "eval_samples_per_second": 1093.787, "eval_steps_per_second": 68.775, "step": 800 }, { "epoch": 1.12, "learning_rate": 3.604748603351956e-05, "loss": 0.0983, "step": 1000 }, { "epoch": 1.12, "eval_accuracy": 0.9718066450924248, "eval_f1": 0.9694776714513557, "eval_loss": 0.09871890395879745, "eval_precision": 0.9673755197039925, "eval_recall": 0.971588979241174, "eval_runtime": 1.5084, "eval_samples_per_second": 1096.509, "eval_steps_per_second": 68.946, "step": 1000 }, { "epoch": 1.34, "eval_accuracy": 0.9728401755307433, "eval_f1": 0.9709789372905697, "eval_loss": 0.09597848355770111, "eval_precision": 0.9693355369267836, "eval_recall": 0.9726279195220875, "eval_runtime": 1.4942, "eval_samples_per_second": 1106.933, "eval_steps_per_second": 69.602, "step": 1200 }, { "epoch": 1.56, "eval_accuracy": 0.9736026160180274, "eval_f1": 0.9714610733031314, "eval_loss": 0.09189929813146591, "eval_precision": 0.9703765723740606, "eval_recall": 0.9725480010389402, "eval_runtime": 1.5177, "eval_samples_per_second": 1089.787, "eval_steps_per_second": 68.523, "step": 1400 }, { "epoch": 1.68, "learning_rate": 2.9064245810055868e-05, "loss": 0.0695, "step": 1500 }, { "epoch": 1.79, "eval_accuracy": 0.9747039189441047, "eval_f1": 0.9728139460666986, "eval_loss": 0.08749764412641525, "eval_precision": 0.9711674399155732, "eval_recall": 0.9744660446344728, "eval_runtime": 1.5082, "eval_samples_per_second": 1096.655, "eval_steps_per_second": 68.955, "step": 1600 }, { "epoch": 2.01, "eval_accuracy": 0.974975008895139, "eval_f1": 0.9729659656365613, "eval_loss": 0.08767995983362198, "eval_precision": 0.9712323312761298, "eval_recall": 0.9747058000839144, "eval_runtime": 1.4537, "eval_samples_per_second": 1137.75, "eval_steps_per_second": 71.539, "step": 1800 }, { "epoch": 2.23, "learning_rate": 2.2094972067039108e-05, "loss": 0.0597, "step": 2000 }, { "epoch": 2.23, "eval_accuracy": 0.974636146456346, "eval_f1": 0.9728004468426774, "eval_loss": 0.08931880444288254, "eval_precision": 0.9712794773741237, "eval_recall": 0.9743261872889653, "eval_runtime": 1.4451, "eval_samples_per_second": 1144.553, "eval_steps_per_second": 71.967, "step": 2000 }, { "epoch": 2.46, "eval_accuracy": 0.975025838260958, "eval_f1": 0.9732900972811175, "eval_loss": 0.0905543640255928, "eval_precision": 0.9720971020847451, "eval_recall": 0.9744860242552597, "eval_runtime": 1.4435, "eval_samples_per_second": 1145.863, "eval_steps_per_second": 72.049, "step": 2200 }, { "epoch": 2.68, "eval_accuracy": 0.9755510750410871, "eval_f1": 0.9741129056392965, "eval_loss": 0.09149234741926193, "eval_precision": 0.9727446605036659, "eval_recall": 0.9754850052945995, "eval_runtime": 1.4454, "eval_samples_per_second": 1144.307, "eval_steps_per_second": 71.952, "step": 2400 }, { "epoch": 2.79, "learning_rate": 1.511173184357542e-05, "loss": 0.0458, "step": 2500 }, { "epoch": 2.91, "eval_accuracy": 0.9755171887972077, "eval_f1": 0.9740555211846648, "eval_loss": 0.08821560442447662, "eval_precision": 0.9729681239160337, "eval_recall": 0.975145351741224, "eval_runtime": 1.4692, "eval_samples_per_second": 1125.788, "eval_steps_per_second": 70.787, "step": 2600 }, { "epoch": 3.13, "eval_accuracy": 0.9757374493824232, "eval_f1": 0.9740832369019593, "eval_loss": 0.09264585375785828, "eval_precision": 0.9727053573207419, "eval_recall": 0.9754650256738127, "eval_runtime": 1.4438, "eval_samples_per_second": 1145.593, "eval_steps_per_second": 72.032, "step": 2800 }, { "epoch": 3.35, "learning_rate": 8.128491620111732e-06, "loss": 0.0383, "step": 3000 }, { "epoch": 3.35, "eval_accuracy": 0.9753985869436302, "eval_f1": 0.973785013167345, "eval_loss": 0.093608058989048, "eval_precision": 0.9723688666653386, "eval_recall": 0.9752052906035843, "eval_runtime": 1.444, "eval_samples_per_second": 1145.395, "eval_steps_per_second": 72.02, "step": 3000 }, { "epoch": 3.58, "eval_accuracy": 0.9756357906507853, "eval_f1": 0.974050980196538, "eval_loss": 0.09429396688938141, "eval_precision": 0.9727602423083055, "eval_recall": 0.9753451479490919, "eval_runtime": 1.4392, "eval_samples_per_second": 1149.213, "eval_steps_per_second": 72.26, "step": 3200 }, { "epoch": 3.8, "eval_accuracy": 0.9750936107487166, "eval_f1": 0.9735209019255712, "eval_loss": 0.09423112124204636, "eval_precision": 0.9722792403196556, "eval_recall": 0.9747657389462748, "eval_runtime": 1.4459, "eval_samples_per_second": 1143.936, "eval_steps_per_second": 71.928, "step": 3400 }, { "epoch": 3.91, "learning_rate": 1.1452513966480447e-06, "loss": 0.0344, "step": 3500 }, { "epoch": 4.0, "step": 3580, "total_flos": 32358086511744.0, "train_loss": 0.09274842659188383, "train_runtime": 230.8308, "train_samples_per_second": 247.887, "train_steps_per_second": 15.509 } ], "max_steps": 3580, "num_train_epochs": 4, "total_flos": 32358086511744.0, "trial_name": null, "trial_params": null }