{ "best_metric": 0.19492121040821075, "best_model_checkpoint": "xblock-social-screenshots/checkpoint-482", "epoch": 2.0, "eval_steps": 500, "global_step": 482, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 9.764724731445312, "learning_rate": 7.5342465753424655e-06, "loss": 0.8744, "step": 12 }, { "epoch": 0.1, "grad_norm": 6.759668827056885, "learning_rate": 1.5068493150684931e-05, "loss": 0.7068, "step": 24 }, { "epoch": 0.15, "grad_norm": 5.702600479125977, "learning_rate": 2.328767123287671e-05, "loss": 0.4411, "step": 36 }, { "epoch": 0.2, "grad_norm": 4.835807800292969, "learning_rate": 3.1506849315068496e-05, "loss": 0.3594, "step": 48 }, { "epoch": 0.25, "grad_norm": 1.6052229404449463, "learning_rate": 3.9726027397260274e-05, "loss": 0.2137, "step": 60 }, { "epoch": 0.3, "grad_norm": 10.121936798095703, "learning_rate": 4.794520547945205e-05, "loss": 0.2919, "step": 72 }, { "epoch": 0.35, "grad_norm": 3.0559985637664795, "learning_rate": 4.930769230769231e-05, "loss": 0.2982, "step": 84 }, { "epoch": 0.4, "grad_norm": 8.374061584472656, "learning_rate": 4.8384615384615386e-05, "loss": 0.3612, "step": 96 }, { "epoch": 0.45, "grad_norm": 1.3100831508636475, "learning_rate": 4.7461538461538465e-05, "loss": 0.247, "step": 108 }, { "epoch": 0.5, "grad_norm": 12.590871810913086, "learning_rate": 4.653846153846154e-05, "loss": 0.3005, "step": 120 }, { "epoch": 0.55, "grad_norm": 0.9043705463409424, "learning_rate": 4.5615384615384615e-05, "loss": 0.399, "step": 132 }, { "epoch": 0.6, "grad_norm": 0.23747485876083374, "learning_rate": 4.4692307692307693e-05, "loss": 0.1496, "step": 144 }, { "epoch": 0.65, "grad_norm": 10.08144760131836, "learning_rate": 4.376923076923077e-05, "loss": 0.1989, "step": 156 }, { "epoch": 0.7, "grad_norm": 6.865645885467529, "learning_rate": 4.284615384615385e-05, "loss": 0.3254, "step": 168 }, { "epoch": 0.75, "grad_norm": 4.330836296081543, "learning_rate": 4.192307692307693e-05, "loss": 0.2285, "step": 180 }, { "epoch": 0.8, "grad_norm": 3.9234697818756104, "learning_rate": 4.1e-05, "loss": 0.1921, "step": 192 }, { "epoch": 0.85, "grad_norm": 1.4691423177719116, "learning_rate": 4.007692307692308e-05, "loss": 0.348, "step": 204 }, { "epoch": 0.9, "grad_norm": 0.398327499628067, "learning_rate": 3.915384615384616e-05, "loss": 0.1605, "step": 216 }, { "epoch": 0.95, "grad_norm": 3.6116676330566406, "learning_rate": 3.823076923076923e-05, "loss": 0.2432, "step": 228 }, { "epoch": 1.0, "grad_norm": 0.39185717701911926, "learning_rate": 3.730769230769231e-05, "loss": 0.0795, "step": 240 }, { "epoch": 1.0, "eval_accuracy": 0.9337474120082816, "eval_auc": 0.9920519240002744, "eval_f1": 0.937007874015748, "eval_loss": 0.20392270386219025, "eval_precision": 0.8847583643122676, "eval_recall": 0.99581589958159, "eval_runtime": 34.2431, "eval_samples_per_second": 14.105, "eval_steps_per_second": 0.905, "step": 241 }, { "epoch": 1.05, "grad_norm": 0.1400236040353775, "learning_rate": 3.6384615384615386e-05, "loss": 0.2248, "step": 252 }, { "epoch": 1.1, "grad_norm": 7.511680603027344, "learning_rate": 3.5461538461538464e-05, "loss": 0.1798, "step": 264 }, { "epoch": 1.15, "grad_norm": 0.11658162623643875, "learning_rate": 3.453846153846154e-05, "loss": 0.1071, "step": 276 }, { "epoch": 1.2, "grad_norm": 7.7041544914245605, "learning_rate": 3.3615384615384615e-05, "loss": 0.0488, "step": 288 }, { "epoch": 1.24, "grad_norm": 0.3671063184738159, "learning_rate": 3.269230769230769e-05, "loss": 0.1775, "step": 300 }, { "epoch": 1.29, "grad_norm": 7.276589870452881, "learning_rate": 3.176923076923077e-05, "loss": 0.1291, "step": 312 }, { "epoch": 1.34, "grad_norm": 0.03822338953614235, "learning_rate": 3.084615384615385e-05, "loss": 0.1253, "step": 324 }, { "epoch": 1.39, "grad_norm": 0.15799850225448608, "learning_rate": 2.9923076923076925e-05, "loss": 0.2956, "step": 336 }, { "epoch": 1.44, "grad_norm": 3.221668004989624, "learning_rate": 2.9e-05, "loss": 0.1433, "step": 348 }, { "epoch": 1.49, "grad_norm": 1.780090093612671, "learning_rate": 2.807692307692308e-05, "loss": 0.1282, "step": 360 }, { "epoch": 1.54, "grad_norm": 3.4230823516845703, "learning_rate": 2.7153846153846157e-05, "loss": 0.1416, "step": 372 }, { "epoch": 1.59, "grad_norm": 0.7402670383453369, "learning_rate": 2.6230769230769232e-05, "loss": 0.1831, "step": 384 }, { "epoch": 1.64, "grad_norm": 0.3014300763607025, "learning_rate": 2.530769230769231e-05, "loss": 0.2397, "step": 396 }, { "epoch": 1.69, "grad_norm": 0.061521705240011215, "learning_rate": 2.4384615384615386e-05, "loss": 0.1162, "step": 408 }, { "epoch": 1.74, "grad_norm": 0.593765139579773, "learning_rate": 2.3461538461538464e-05, "loss": 0.0587, "step": 420 }, { "epoch": 1.79, "grad_norm": 2.235006093978882, "learning_rate": 2.253846153846154e-05, "loss": 0.2774, "step": 432 }, { "epoch": 1.84, "grad_norm": 7.07012939453125, "learning_rate": 2.1615384615384614e-05, "loss": 0.137, "step": 444 }, { "epoch": 1.89, "grad_norm": 0.4600817561149597, "learning_rate": 2.0692307692307693e-05, "loss": 0.2305, "step": 456 }, { "epoch": 1.94, "grad_norm": 5.892112731933594, "learning_rate": 1.976923076923077e-05, "loss": 0.2878, "step": 468 }, { "epoch": 1.99, "grad_norm": 8.57860279083252, "learning_rate": 1.8846153846153846e-05, "loss": 0.188, "step": 480 }, { "epoch": 2.0, "eval_accuracy": 0.9399585921325052, "eval_auc": 0.9930893751286096, "eval_f1": 0.9423459244532804, "eval_loss": 0.19492121040821075, "eval_precision": 0.8977272727272727, "eval_recall": 0.9916317991631799, "eval_runtime": 34.2263, "eval_samples_per_second": 14.112, "eval_steps_per_second": 0.906, "step": 482 } ], "logging_steps": 12, "max_steps": 723, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.9880911195386675e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }