{ "best_metric": 0.8454404945904173, "best_model_checkpoint": "cvt-13-384-in22k-FV-finetuned-memes/checkpoint-180", "epoch": 19.987654320987655, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49, "learning_rate": 3e-05, "loss": 1.6621, "step": 10 }, { "epoch": 0.99, "learning_rate": 6e-05, "loss": 1.4066, "step": 20 }, { "epoch": 0.99, "eval_accuracy": 0.5123647604327666, "eval_f1": 0.43706156197054624, "eval_loss": 1.2430025339126587, "eval_precision": 0.5141456021674624, "eval_recall": 0.5123647604327666, "eval_runtime": 12.004, "eval_samples_per_second": 107.798, "eval_steps_per_second": 1.749, "step": 20 }, { "epoch": 1.49, "learning_rate": 9e-05, "loss": 1.2862, "step": 30 }, { "epoch": 1.99, "learning_rate": 0.00012, "loss": 1.0813, "step": 40 }, { "epoch": 1.99, "eval_accuracy": 0.6893353941267388, "eval_f1": 0.6615579665180347, "eval_loss": 0.8244319558143616, "eval_precision": 0.6834239635195151, "eval_recall": 0.6893353941267388, "eval_runtime": 11.1665, "eval_samples_per_second": 115.882, "eval_steps_per_second": 1.881, "step": 40 }, { "epoch": 2.49, "learning_rate": 0.00011666666666666667, "loss": 0.9199, "step": 50 }, { "epoch": 2.99, "learning_rate": 0.00011333333333333333, "loss": 0.8392, "step": 60 }, { "epoch": 2.99, "eval_accuracy": 0.7612055641421948, "eval_f1": 0.7569545008442342, "eval_loss": 0.6334171891212463, "eval_precision": 0.767024313347433, "eval_recall": 0.7612055641421948, "eval_runtime": 10.5864, "eval_samples_per_second": 122.232, "eval_steps_per_second": 1.984, "step": 60 }, { "epoch": 3.49, "learning_rate": 0.00011, "loss": 0.7028, "step": 70 }, { "epoch": 3.99, "learning_rate": 0.00010666666666666667, "loss": 0.7065, "step": 80 }, { "epoch": 3.99, "eval_accuracy": 0.776661514683153, "eval_f1": 0.7671672909038773, "eval_loss": 0.581936240196228, "eval_precision": 0.7799046353657799, "eval_recall": 0.776661514683153, "eval_runtime": 10.9056, "eval_samples_per_second": 118.654, "eval_steps_per_second": 1.926, "step": 80 }, { "epoch": 4.49, "learning_rate": 0.00010333333333333334, "loss": 0.6294, "step": 90 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 0.5751, "step": 100 }, { "epoch": 4.99, "eval_accuracy": 0.8176197836166924, "eval_f1": 0.8130327497943682, "eval_loss": 0.5365290641784668, "eval_precision": 0.8215999410934138, "eval_recall": 0.8176197836166924, "eval_runtime": 10.6877, "eval_samples_per_second": 121.074, "eval_steps_per_second": 1.965, "step": 100 }, { "epoch": 5.49, "learning_rate": 9.666666666666667e-05, "loss": 0.5597, "step": 110 }, { "epoch": 5.99, "learning_rate": 9.333333333333334e-05, "loss": 0.4896, "step": 120 }, { "epoch": 5.99, "eval_accuracy": 0.8307573415765069, "eval_f1": 0.8264996195118711, "eval_loss": 0.49427157640457153, "eval_precision": 0.8257369667433976, "eval_recall": 0.8307573415765069, "eval_runtime": 11.8177, "eval_samples_per_second": 109.496, "eval_steps_per_second": 1.777, "step": 120 }, { "epoch": 6.49, "learning_rate": 9e-05, "loss": 0.4976, "step": 130 }, { "epoch": 6.99, "learning_rate": 8.666666666666667e-05, "loss": 0.4487, "step": 140 }, { "epoch": 6.99, "eval_accuracy": 0.8106646058732612, "eval_f1": 0.8053637875370054, "eval_loss": 0.5398644208908081, "eval_precision": 0.806886049971449, "eval_recall": 0.8106646058732612, "eval_runtime": 11.1665, "eval_samples_per_second": 115.882, "eval_steps_per_second": 1.881, "step": 140 }, { "epoch": 7.49, "learning_rate": 8.333333333333333e-05, "loss": 0.4755, "step": 150 }, { "epoch": 7.99, "learning_rate": 7.999999999999999e-05, "loss": 0.4349, "step": 160 }, { "epoch": 7.99, "eval_accuracy": 0.8299845440494591, "eval_f1": 0.8273441546369988, "eval_loss": 0.48919445276260376, "eval_precision": 0.8285069417879617, "eval_recall": 0.8299845440494591, "eval_runtime": 10.9335, "eval_samples_per_second": 118.351, "eval_steps_per_second": 1.921, "step": 160 }, { "epoch": 8.49, "learning_rate": 7.666666666666667e-05, "loss": 0.4266, "step": 170 }, { "epoch": 8.99, "learning_rate": 7.333333333333334e-05, "loss": 0.43, "step": 180 }, { "epoch": 8.99, "eval_accuracy": 0.8454404945904173, "eval_f1": 0.8425612552932723, "eval_loss": 0.4984479546546936, "eval_precision": 0.8464868510362556, "eval_recall": 0.8454404945904173, "eval_runtime": 10.7938, "eval_samples_per_second": 119.884, "eval_steps_per_second": 1.946, "step": 180 }, { "epoch": 9.49, "learning_rate": 7.000000000000001e-05, "loss": 0.4143, "step": 190 }, { "epoch": 9.99, "learning_rate": 6.666666666666667e-05, "loss": 0.4372, "step": 200 }, { "epoch": 9.99, "eval_accuracy": 0.8191653786707882, "eval_f1": 0.8157445071524905, "eval_loss": 0.5572782158851624, "eval_precision": 0.822149098583948, "eval_recall": 0.8191653786707882, "eval_runtime": 10.9137, "eval_samples_per_second": 118.567, "eval_steps_per_second": 1.924, "step": 200 }, { "epoch": 10.49, "learning_rate": 6.333333333333333e-05, "loss": 0.3865, "step": 210 }, { "epoch": 10.99, "learning_rate": 6e-05, "loss": 0.3994, "step": 220 }, { "epoch": 10.99, "eval_accuracy": 0.8299845440494591, "eval_f1": 0.82808018136039, "eval_loss": 0.5158317685127258, "eval_precision": 0.8283803654792194, "eval_recall": 0.8299845440494591, "eval_runtime": 11.8018, "eval_samples_per_second": 109.644, "eval_steps_per_second": 1.779, "step": 220 }, { "epoch": 11.49, "learning_rate": 5.6666666666666664e-05, "loss": 0.3822, "step": 230 }, { "epoch": 11.99, "learning_rate": 5.333333333333333e-05, "loss": 0.3883, "step": 240 }, { "epoch": 11.99, "eval_accuracy": 0.8353941267387944, "eval_f1": 0.8314264954771738, "eval_loss": 0.5494962930679321, "eval_precision": 0.8317149547063074, "eval_recall": 0.8353941267387944, "eval_runtime": 11.0963, "eval_samples_per_second": 116.616, "eval_steps_per_second": 1.893, "step": 240 }, { "epoch": 12.49, "learning_rate": 5e-05, "loss": 0.3857, "step": 250 }, { "epoch": 12.99, "learning_rate": 4.666666666666667e-05, "loss": 0.406, "step": 260 }, { "epoch": 12.99, "eval_accuracy": 0.8284389489953632, "eval_f1": 0.8245717709756429, "eval_loss": 0.5297971367835999, "eval_precision": 0.8285366488073825, "eval_recall": 0.8284389489953632, "eval_runtime": 11.0958, "eval_samples_per_second": 116.621, "eval_steps_per_second": 1.893, "step": 260 }, { "epoch": 13.49, "learning_rate": 4.3333333333333334e-05, "loss": 0.3594, "step": 270 }, { "epoch": 13.99, "learning_rate": 3.9999999999999996e-05, "loss": 0.3355, "step": 280 }, { "epoch": 13.99, "eval_accuracy": 0.839258114374034, "eval_f1": 0.8356831660968493, "eval_loss": 0.5400787591934204, "eval_precision": 0.8345744148504253, "eval_recall": 0.839258114374034, "eval_runtime": 10.7779, "eval_samples_per_second": 120.06, "eval_steps_per_second": 1.948, "step": 280 }, { "epoch": 14.49, "learning_rate": 3.666666666666667e-05, "loss": 0.3706, "step": 290 }, { "epoch": 14.99, "learning_rate": 3.3333333333333335e-05, "loss": 0.395, "step": 300 }, { "epoch": 14.99, "eval_accuracy": 0.8307573415765069, "eval_f1": 0.8260821652476246, "eval_loss": 0.5915156602859497, "eval_precision": 0.8278203282316018, "eval_recall": 0.8307573415765069, "eval_runtime": 10.988, "eval_samples_per_second": 117.765, "eval_steps_per_second": 1.911, "step": 300 }, { "epoch": 15.49, "learning_rate": 3e-05, "loss": 0.3548, "step": 310 }, { "epoch": 15.99, "learning_rate": 2.6666666666666667e-05, "loss": 0.3612, "step": 320 }, { "epoch": 15.99, "eval_accuracy": 0.8408037094281299, "eval_f1": 0.8368203137682052, "eval_loss": 0.5851544737815857, "eval_precision": 0.8377711910324739, "eval_recall": 0.8408037094281299, "eval_runtime": 11.7394, "eval_samples_per_second": 110.227, "eval_steps_per_second": 1.789, "step": 320 }, { "epoch": 16.49, "learning_rate": 2.3333333333333336e-05, "loss": 0.3594, "step": 330 }, { "epoch": 16.99, "learning_rate": 1.9999999999999998e-05, "loss": 0.3765, "step": 340 }, { "epoch": 16.99, "eval_accuracy": 0.8384853168469861, "eval_f1": 0.8356408095957625, "eval_loss": 0.5508687496185303, "eval_precision": 0.8351154357408708, "eval_recall": 0.8384853168469861, "eval_runtime": 10.7561, "eval_samples_per_second": 120.304, "eval_steps_per_second": 1.952, "step": 340 }, { "epoch": 17.49, "learning_rate": 1.6666666666666667e-05, "loss": 0.3896, "step": 350 }, { "epoch": 17.99, "learning_rate": 1.3333333333333333e-05, "loss": 0.3688, "step": 360 }, { "epoch": 17.99, "eval_accuracy": 0.8415765069551777, "eval_f1": 0.8386603858270167, "eval_loss": 0.5668028593063354, "eval_precision": 0.8398264475825991, "eval_recall": 0.8415765069551777, "eval_runtime": 10.5834, "eval_samples_per_second": 122.266, "eval_steps_per_second": 1.984, "step": 360 }, { "epoch": 18.49, "learning_rate": 9.999999999999999e-06, "loss": 0.3705, "step": 370 }, { "epoch": 18.99, "learning_rate": 6.666666666666667e-06, "loss": 0.3503, "step": 380 }, { "epoch": 18.99, "eval_accuracy": 0.839258114374034, "eval_f1": 0.8364792101800207, "eval_loss": 0.5626189708709717, "eval_precision": 0.8370661747552312, "eval_recall": 0.839258114374034, "eval_runtime": 10.7269, "eval_samples_per_second": 120.631, "eval_steps_per_second": 1.958, "step": 380 }, { "epoch": 19.49, "learning_rate": 3.3333333333333333e-06, "loss": 0.3613, "step": 390 }, { "epoch": 19.99, "learning_rate": 0.0, "loss": 0.3611, "step": 400 }, { "epoch": 19.99, "eval_accuracy": 0.8346213292117465, "eval_f1": 0.8322067261008879, "eval_loss": 0.5595440864562988, "eval_precision": 0.8326806465391725, "eval_recall": 0.8346213292117465, "eval_runtime": 10.6902, "eval_samples_per_second": 121.045, "eval_steps_per_second": 1.964, "step": 400 }, { "epoch": 19.99, "step": 400, "total_flos": 1.832202236159447e+18, "train_loss": 0.5471319568157196, "train_runtime": 1318.2665, "train_samples_per_second": 78.497, "train_steps_per_second": 0.303 } ], "max_steps": 400, "num_train_epochs": 20, "total_flos": 1.832202236159447e+18, "trial_name": null, "trial_params": null }