|
{ |
|
"best_metric": 0.8454404945904173, |
|
"best_model_checkpoint": "cvt-13-384-in22k-FV-finetuned-memes/checkpoint-180", |
|
"epoch": 19.987654320987655, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6621, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6e-05, |
|
"loss": 1.4066, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5123647604327666, |
|
"eval_f1": 0.43706156197054624, |
|
"eval_loss": 1.2430025339126587, |
|
"eval_precision": 0.5141456021674624, |
|
"eval_recall": 0.5123647604327666, |
|
"eval_runtime": 12.004, |
|
"eval_samples_per_second": 107.798, |
|
"eval_steps_per_second": 1.749, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9e-05, |
|
"loss": 1.2862, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00012, |
|
"loss": 1.0813, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.6893353941267388, |
|
"eval_f1": 0.6615579665180347, |
|
"eval_loss": 0.8244319558143616, |
|
"eval_precision": 0.6834239635195151, |
|
"eval_recall": 0.6893353941267388, |
|
"eval_runtime": 11.1665, |
|
"eval_samples_per_second": 115.882, |
|
"eval_steps_per_second": 1.881, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00011666666666666667, |
|
"loss": 0.9199, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00011333333333333333, |
|
"loss": 0.8392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7612055641421948, |
|
"eval_f1": 0.7569545008442342, |
|
"eval_loss": 0.6334171891212463, |
|
"eval_precision": 0.767024313347433, |
|
"eval_recall": 0.7612055641421948, |
|
"eval_runtime": 10.5864, |
|
"eval_samples_per_second": 122.232, |
|
"eval_steps_per_second": 1.984, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00011, |
|
"loss": 0.7028, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.7065, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.776661514683153, |
|
"eval_f1": 0.7671672909038773, |
|
"eval_loss": 0.581936240196228, |
|
"eval_precision": 0.7799046353657799, |
|
"eval_recall": 0.776661514683153, |
|
"eval_runtime": 10.9056, |
|
"eval_samples_per_second": 118.654, |
|
"eval_steps_per_second": 1.926, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00010333333333333334, |
|
"loss": 0.6294, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8176197836166924, |
|
"eval_f1": 0.8130327497943682, |
|
"eval_loss": 0.5365290641784668, |
|
"eval_precision": 0.8215999410934138, |
|
"eval_recall": 0.8176197836166924, |
|
"eval_runtime": 10.6877, |
|
"eval_samples_per_second": 121.074, |
|
"eval_steps_per_second": 1.965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.5597, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.4896, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8307573415765069, |
|
"eval_f1": 0.8264996195118711, |
|
"eval_loss": 0.49427157640457153, |
|
"eval_precision": 0.8257369667433976, |
|
"eval_recall": 0.8307573415765069, |
|
"eval_runtime": 11.8177, |
|
"eval_samples_per_second": 109.496, |
|
"eval_steps_per_second": 1.777, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 9e-05, |
|
"loss": 0.4976, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.4487, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8106646058732612, |
|
"eval_f1": 0.8053637875370054, |
|
"eval_loss": 0.5398644208908081, |
|
"eval_precision": 0.806886049971449, |
|
"eval_recall": 0.8106646058732612, |
|
"eval_runtime": 11.1665, |
|
"eval_samples_per_second": 115.882, |
|
"eval_steps_per_second": 1.881, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.4755, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 7.999999999999999e-05, |
|
"loss": 0.4349, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.8299845440494591, |
|
"eval_f1": 0.8273441546369988, |
|
"eval_loss": 0.48919445276260376, |
|
"eval_precision": 0.8285069417879617, |
|
"eval_recall": 0.8299845440494591, |
|
"eval_runtime": 10.9335, |
|
"eval_samples_per_second": 118.351, |
|
"eval_steps_per_second": 1.921, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.4266, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 7.333333333333334e-05, |
|
"loss": 0.43, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8454404945904173, |
|
"eval_f1": 0.8425612552932723, |
|
"eval_loss": 0.4984479546546936, |
|
"eval_precision": 0.8464868510362556, |
|
"eval_recall": 0.8454404945904173, |
|
"eval_runtime": 10.7938, |
|
"eval_samples_per_second": 119.884, |
|
"eval_steps_per_second": 1.946, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 0.4143, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.4372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.8191653786707882, |
|
"eval_f1": 0.8157445071524905, |
|
"eval_loss": 0.5572782158851624, |
|
"eval_precision": 0.822149098583948, |
|
"eval_recall": 0.8191653786707882, |
|
"eval_runtime": 10.9137, |
|
"eval_samples_per_second": 118.567, |
|
"eval_steps_per_second": 1.924, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.3865, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3994, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8299845440494591, |
|
"eval_f1": 0.82808018136039, |
|
"eval_loss": 0.5158317685127258, |
|
"eval_precision": 0.8283803654792194, |
|
"eval_recall": 0.8299845440494591, |
|
"eval_runtime": 11.8018, |
|
"eval_samples_per_second": 109.644, |
|
"eval_steps_per_second": 1.779, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5.6666666666666664e-05, |
|
"loss": 0.3822, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.3883, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.8353941267387944, |
|
"eval_f1": 0.8314264954771738, |
|
"eval_loss": 0.5494962930679321, |
|
"eval_precision": 0.8317149547063074, |
|
"eval_recall": 0.8353941267387944, |
|
"eval_runtime": 11.0963, |
|
"eval_samples_per_second": 116.616, |
|
"eval_steps_per_second": 1.893, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3857, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.406, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8284389489953632, |
|
"eval_f1": 0.8245717709756429, |
|
"eval_loss": 0.5297971367835999, |
|
"eval_precision": 0.8285366488073825, |
|
"eval_recall": 0.8284389489953632, |
|
"eval_runtime": 11.0958, |
|
"eval_samples_per_second": 116.621, |
|
"eval_steps_per_second": 1.893, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.3594, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 0.3355, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.839258114374034, |
|
"eval_f1": 0.8356831660968493, |
|
"eval_loss": 0.5400787591934204, |
|
"eval_precision": 0.8345744148504253, |
|
"eval_recall": 0.839258114374034, |
|
"eval_runtime": 10.7779, |
|
"eval_samples_per_second": 120.06, |
|
"eval_steps_per_second": 1.948, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.666666666666667e-05, |
|
"loss": 0.3706, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.395, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8307573415765069, |
|
"eval_f1": 0.8260821652476246, |
|
"eval_loss": 0.5915156602859497, |
|
"eval_precision": 0.8278203282316018, |
|
"eval_recall": 0.8307573415765069, |
|
"eval_runtime": 10.988, |
|
"eval_samples_per_second": 117.765, |
|
"eval_steps_per_second": 1.911, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3548, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.3612, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.8408037094281299, |
|
"eval_f1": 0.8368203137682052, |
|
"eval_loss": 0.5851544737815857, |
|
"eval_precision": 0.8377711910324739, |
|
"eval_recall": 0.8408037094281299, |
|
"eval_runtime": 11.7394, |
|
"eval_samples_per_second": 110.227, |
|
"eval_steps_per_second": 1.789, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.3594, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.3765, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8384853168469861, |
|
"eval_f1": 0.8356408095957625, |
|
"eval_loss": 0.5508687496185303, |
|
"eval_precision": 0.8351154357408708, |
|
"eval_recall": 0.8384853168469861, |
|
"eval_runtime": 10.7561, |
|
"eval_samples_per_second": 120.304, |
|
"eval_steps_per_second": 1.952, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3896, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.3688, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.8415765069551777, |
|
"eval_f1": 0.8386603858270167, |
|
"eval_loss": 0.5668028593063354, |
|
"eval_precision": 0.8398264475825991, |
|
"eval_recall": 0.8415765069551777, |
|
"eval_runtime": 10.5834, |
|
"eval_samples_per_second": 122.266, |
|
"eval_steps_per_second": 1.984, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.3705, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3503, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.839258114374034, |
|
"eval_f1": 0.8364792101800207, |
|
"eval_loss": 0.5626189708709717, |
|
"eval_precision": 0.8370661747552312, |
|
"eval_recall": 0.839258114374034, |
|
"eval_runtime": 10.7269, |
|
"eval_samples_per_second": 120.631, |
|
"eval_steps_per_second": 1.958, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3613, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.3611, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.8346213292117465, |
|
"eval_f1": 0.8322067261008879, |
|
"eval_loss": 0.5595440864562988, |
|
"eval_precision": 0.8326806465391725, |
|
"eval_recall": 0.8346213292117465, |
|
"eval_runtime": 10.6902, |
|
"eval_samples_per_second": 121.045, |
|
"eval_steps_per_second": 1.964, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"step": 400, |
|
"total_flos": 1.832202236159447e+18, |
|
"train_loss": 0.5471319568157196, |
|
"train_runtime": 1318.2665, |
|
"train_samples_per_second": 78.497, |
|
"train_steps_per_second": 0.303 |
|
} |
|
], |
|
"max_steps": 400, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.832202236159447e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|