{ "best_metric": 0.8361669242658424, "best_model_checkpoint": "mit-b2-fv-finetuned-memes/checkpoint-180", "epoch": 19.987654320987655, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49, "learning_rate": 3e-05, "loss": 1.6187, "step": 10 }, { "epoch": 0.99, "learning_rate": 6e-05, "loss": 1.3683, "step": 20 }, { "epoch": 0.99, "eval_accuracy": 0.5703245749613601, "eval_f1": 0.4915430460832317, "eval_loss": 1.1798477172851562, "eval_precision": 0.4913898246935893, "eval_recall": 0.5703245749613601, "eval_runtime": 25.7297, "eval_samples_per_second": 50.292, "eval_steps_per_second": 0.816, "step": 20 }, { "epoch": 1.49, "learning_rate": 9e-05, "loss": 1.1618, "step": 30 }, { "epoch": 1.99, "learning_rate": 0.00012, "loss": 1.0113, "step": 40 }, { "epoch": 1.99, "eval_accuracy": 0.615919629057187, "eval_f1": 0.6273659431894075, "eval_loss": 1.0383963584899902, "eval_precision": 0.6812645697652936, "eval_recall": 0.615919629057187, "eval_runtime": 24.3501, "eval_samples_per_second": 53.142, "eval_steps_per_second": 0.862, "step": 40 }, { "epoch": 2.49, "learning_rate": 0.00011666666666666667, "loss": 0.89, "step": 50 }, { "epoch": 2.99, "learning_rate": 0.00011333333333333333, "loss": 0.7581, "step": 60 }, { "epoch": 2.99, "eval_accuracy": 0.6808346213292118, "eval_f1": 0.6839700359928248, "eval_loss": 0.834769070148468, "eval_precision": 0.7376556278142583, "eval_recall": 0.6808346213292118, "eval_runtime": 25.9343, "eval_samples_per_second": 49.895, "eval_steps_per_second": 0.81, "step": 60 }, { "epoch": 3.49, "learning_rate": 0.00011, "loss": 0.706, "step": 70 }, { "epoch": 3.99, "learning_rate": 0.00010666666666666667, "loss": 0.6241, "step": 80 }, { "epoch": 3.99, "eval_accuracy": 0.7712519319938176, "eval_f1": 0.7734801228569064, "eval_loss": 0.6034244894981384, "eval_precision": 0.7864230813661904, "eval_recall": 0.7712519319938176, "eval_runtime": 24.0546, "eval_samples_per_second": 53.794, "eval_steps_per_second": 0.873, "step": 80 }, { "epoch": 4.49, "learning_rate": 0.00010333333333333334, "loss": 0.574, "step": 90 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 0.4999, "step": 100 }, { "epoch": 4.99, "eval_accuracy": 0.794435857805255, "eval_f1": 0.7908928134527277, "eval_loss": 0.5480836033821106, "eval_precision": 0.7999661256925431, "eval_recall": 0.794435857805255, "eval_runtime": 24.7472, "eval_samples_per_second": 52.289, "eval_steps_per_second": 0.849, "step": 100 }, { "epoch": 5.49, "learning_rate": 9.666666666666667e-05, "loss": 0.4429, "step": 110 }, { "epoch": 5.99, "learning_rate": 9.333333333333334e-05, "loss": 0.3981, "step": 120 }, { "epoch": 5.99, "eval_accuracy": 0.8021638330757341, "eval_f1": 0.8000125861001945, "eval_loss": 0.5253472328186035, "eval_precision": 0.8090800980369639, "eval_recall": 0.8021638330757341, "eval_runtime": 24.5494, "eval_samples_per_second": 52.71, "eval_steps_per_second": 0.855, "step": 120 }, { "epoch": 6.49, "learning_rate": 9e-05, "loss": 0.3603, "step": 130 }, { "epoch": 6.99, "learning_rate": 8.666666666666667e-05, "loss": 0.3484, "step": 140 }, { "epoch": 6.99, "eval_accuracy": 0.8238021638330757, "eval_f1": 0.8146387182540739, "eval_loss": 0.46875712275505066, "eval_precision": 0.8147156146167328, "eval_recall": 0.8238021638330757, "eval_runtime": 25.2779, "eval_samples_per_second": 51.191, "eval_steps_per_second": 0.831, "step": 140 }, { "epoch": 7.49, "learning_rate": 8.333333333333333e-05, "loss": 0.2789, "step": 150 }, { "epoch": 7.99, "learning_rate": 7.999999999999999e-05, "loss": 0.3142, "step": 160 }, { "epoch": 7.99, "eval_accuracy": 0.7867078825347759, "eval_f1": 0.7919733028920879, "eval_loss": 0.6245487928390503, "eval_precision": 0.820948058010093, "eval_recall": 0.7867078825347759, "eval_runtime": 24.2688, "eval_samples_per_second": 53.32, "eval_steps_per_second": 0.865, "step": 160 }, { "epoch": 8.49, "learning_rate": 7.666666666666667e-05, "loss": 0.2513, "step": 170 }, { "epoch": 8.99, "learning_rate": 7.333333333333334e-05, "loss": 0.2339, "step": 180 }, { "epoch": 8.99, "eval_accuracy": 0.8361669242658424, "eval_f1": 0.8354791396567843, "eval_loss": 0.5053289532661438, "eval_precision": 0.8426050546923035, "eval_recall": 0.8361669242658424, "eval_runtime": 24.696, "eval_samples_per_second": 52.397, "eval_steps_per_second": 0.85, "step": 180 }, { "epoch": 9.49, "learning_rate": 7.000000000000001e-05, "loss": 0.1999, "step": 190 }, { "epoch": 9.99, "learning_rate": 6.666666666666667e-05, "loss": 0.2284, "step": 200 }, { "epoch": 9.99, "eval_accuracy": 0.8230293663060279, "eval_f1": 0.8187153015149123, "eval_loss": 0.5069507360458374, "eval_precision": 0.822032270944375, "eval_recall": 0.8230293663060279, "eval_runtime": 23.6165, "eval_samples_per_second": 54.792, "eval_steps_per_second": 0.889, "step": 200 }, { "epoch": 10.49, "learning_rate": 6.333333333333333e-05, "loss": 0.1861, "step": 210 }, { "epoch": 10.99, "learning_rate": 6e-05, "loss": 0.1824, "step": 220 }, { "epoch": 10.99, "eval_accuracy": 0.8006182380216383, "eval_f1": 0.8035059555919015, "eval_loss": 0.5779785513877869, "eval_precision": 0.8138172496848511, "eval_recall": 0.8006182380216383, "eval_runtime": 24.9222, "eval_samples_per_second": 51.922, "eval_steps_per_second": 0.843, "step": 220 }, { "epoch": 11.49, "learning_rate": 5.6666666666666664e-05, "loss": 0.1647, "step": 230 }, { "epoch": 11.99, "learning_rate": 5.333333333333333e-05, "loss": 0.1561, "step": 240 }, { "epoch": 11.99, "eval_accuracy": 0.8253477588871716, "eval_f1": 0.8217716611197545, "eval_loss": 0.5429410338401794, "eval_precision": 0.8196794558105368, "eval_recall": 0.8253477588871716, "eval_runtime": 23.8337, "eval_samples_per_second": 54.293, "eval_steps_per_second": 0.881, "step": 240 }, { "epoch": 12.49, "learning_rate": 5e-05, "loss": 0.1551, "step": 250 }, { "epoch": 12.99, "learning_rate": 4.666666666666667e-05, "loss": 0.1229, "step": 260 }, { "epoch": 12.99, "eval_accuracy": 0.8330757341576507, "eval_f1": 0.8303358084478046, "eval_loss": 0.5324836373329163, "eval_precision": 0.8296069273511578, "eval_recall": 0.8330757341576507, "eval_runtime": 25.348, "eval_samples_per_second": 51.049, "eval_steps_per_second": 0.828, "step": 260 }, { "epoch": 13.49, "learning_rate": 4.3333333333333334e-05, "loss": 0.1208, "step": 270 }, { "epoch": 13.99, "learning_rate": 3.9999999999999996e-05, "loss": 0.1232, "step": 280 }, { "epoch": 13.99, "eval_accuracy": 0.8276661514683153, "eval_f1": 0.8273115902224707, "eval_loss": 0.5595067143440247, "eval_precision": 0.8290015047050906, "eval_recall": 0.8276661514683153, "eval_runtime": 23.6014, "eval_samples_per_second": 54.827, "eval_steps_per_second": 0.89, "step": 280 }, { "epoch": 14.49, "learning_rate": 3.666666666666667e-05, "loss": 0.1204, "step": 290 }, { "epoch": 14.99, "learning_rate": 3.3333333333333335e-05, "loss": 0.118, "step": 300 }, { "epoch": 14.99, "eval_accuracy": 0.8292117465224111, "eval_f1": 0.8298744039909668, "eval_loss": 0.5973792672157288, "eval_precision": 0.8344810455815268, "eval_recall": 0.8292117465224111, "eval_runtime": 24.4918, "eval_samples_per_second": 52.834, "eval_steps_per_second": 0.857, "step": 300 }, { "epoch": 15.49, "learning_rate": 3e-05, "loss": 0.1015, "step": 310 }, { "epoch": 15.99, "learning_rate": 2.6666666666666667e-05, "loss": 0.11, "step": 320 }, { "epoch": 15.99, "eval_accuracy": 0.8253477588871716, "eval_f1": 0.8230916961516846, "eval_loss": 0.579598069190979, "eval_precision": 0.8228234989922505, "eval_recall": 0.8253477588871716, "eval_runtime": 23.8333, "eval_samples_per_second": 54.294, "eval_steps_per_second": 0.881, "step": 320 }, { "epoch": 16.49, "learning_rate": 2.3333333333333336e-05, "loss": 0.1037, "step": 330 }, { "epoch": 16.99, "learning_rate": 1.9999999999999998e-05, "loss": 0.0948, "step": 340 }, { "epoch": 16.99, "eval_accuracy": 0.8346213292117465, "eval_f1": 0.8348916431445179, "eval_loss": 0.5581147074699402, "eval_precision": 0.8357545769977985, "eval_recall": 0.8346213292117465, "eval_runtime": 24.5732, "eval_samples_per_second": 52.659, "eval_steps_per_second": 0.855, "step": 340 }, { "epoch": 17.49, "learning_rate": 1.6666666666666667e-05, "loss": 0.0933, "step": 350 }, { "epoch": 17.99, "learning_rate": 1.3333333333333333e-05, "loss": 0.0985, "step": 360 }, { "epoch": 17.99, "eval_accuracy": 0.8338485316846986, "eval_f1": 0.8318239397011512, "eval_loss": 0.569961428642273, "eval_precision": 0.830062297595451, "eval_recall": 0.8338485316846986, "eval_runtime": 24.4249, "eval_samples_per_second": 52.979, "eval_steps_per_second": 0.86, "step": 360 }, { "epoch": 18.49, "learning_rate": 9.999999999999999e-06, "loss": 0.0877, "step": 370 }, { "epoch": 18.99, "learning_rate": 6.666666666666667e-06, "loss": 0.0821, "step": 380 }, { "epoch": 18.99, "eval_accuracy": 0.8330757341576507, "eval_f1": 0.833525849625881, "eval_loss": 0.5755681395530701, "eval_precision": 0.8342801097840022, "eval_recall": 0.8330757341576507, "eval_runtime": 23.5787, "eval_samples_per_second": 54.88, "eval_steps_per_second": 0.891, "step": 380 }, { "epoch": 19.49, "learning_rate": 3.3333333333333333e-06, "loss": 0.0843, "step": 390 }, { "epoch": 19.99, "learning_rate": 0.0, "loss": 0.0813, "step": 400 }, { "epoch": 19.99, "eval_accuracy": 0.8323029366306027, "eval_f1": 0.831492653119617, "eval_loss": 0.5984169840812683, "eval_precision": 0.831217385971583, "eval_recall": 0.8323029366306027, "eval_runtime": 24.9692, "eval_samples_per_second": 51.824, "eval_steps_per_second": 0.841, "step": 400 }, { "epoch": 19.99, "step": 400, "total_flos": 1.1809647563061068e+19, "train_loss": 0.36638923436403276, "train_runtime": 5624.3012, "train_samples_per_second": 18.399, "train_steps_per_second": 0.071 } ], "max_steps": 400, "num_train_epochs": 20, "total_flos": 1.1809647563061068e+19, "trial_name": null, "trial_params": null }