{ "best_metric": 0.9025482535362244, "best_model_checkpoint": "xblock-large-patch1-224/checkpoint-498", "epoch": 3.0, "eval_steps": 500, "global_step": 498, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": Infinity, "learning_rate": 7.000000000000001e-06, "loss": 2.4849, "step": 8 }, { "epoch": 0.1, "grad_norm": 11.997095108032227, "learning_rate": 1.5e-05, "loss": 2.3188, "step": 16 }, { "epoch": 0.14, "grad_norm": 12.830357551574707, "learning_rate": 2.2000000000000003e-05, "loss": 2.1147, "step": 24 }, { "epoch": 0.19, "grad_norm": 11.86018180847168, "learning_rate": 3e-05, "loss": 1.9589, "step": 32 }, { "epoch": 0.24, "grad_norm": 13.479438781738281, "learning_rate": 3.8e-05, "loss": 1.7637, "step": 40 }, { "epoch": 0.29, "grad_norm": 13.69570255279541, "learning_rate": 4.600000000000001e-05, "loss": 1.8184, "step": 48 }, { "epoch": 0.34, "grad_norm": 9.349321365356445, "learning_rate": 4.955357142857143e-05, "loss": 2.0031, "step": 56 }, { "epoch": 0.39, "grad_norm": 11.169551849365234, "learning_rate": 4.866071428571429e-05, "loss": 1.7263, "step": 64 }, { "epoch": 0.43, "grad_norm": 9.97819995880127, "learning_rate": 4.7767857142857144e-05, "loss": 1.9287, "step": 72 }, { "epoch": 0.48, "grad_norm": 11.742727279663086, "learning_rate": 4.6875e-05, "loss": 1.814, "step": 80 }, { "epoch": 0.53, "grad_norm": 8.304338455200195, "learning_rate": 4.598214285714286e-05, "loss": 1.4417, "step": 88 }, { "epoch": 0.58, "grad_norm": 10.799261093139648, "learning_rate": 4.5089285714285714e-05, "loss": 1.7538, "step": 96 }, { "epoch": 0.63, "grad_norm": 8.281989097595215, "learning_rate": 4.419642857142857e-05, "loss": 1.7161, "step": 104 }, { "epoch": 0.67, "grad_norm": 7.088228225708008, "learning_rate": 4.3303571428571435e-05, "loss": 1.7526, "step": 112 }, { "epoch": 0.72, "grad_norm": 9.898828506469727, "learning_rate": 4.2410714285714285e-05, "loss": 1.8377, "step": 120 }, { "epoch": 0.77, "grad_norm": 9.417756080627441, "learning_rate": 4.151785714285715e-05, "loss": 1.6742, "step": 128 }, { "epoch": 0.82, "grad_norm": 10.03836441040039, "learning_rate": 4.0625000000000005e-05, "loss": 1.5157, "step": 136 }, { "epoch": 0.87, "grad_norm": 10.27881908416748, "learning_rate": 3.9732142857142855e-05, "loss": 1.5828, "step": 144 }, { "epoch": 0.92, "grad_norm": 10.77905559539795, "learning_rate": 3.883928571428572e-05, "loss": 1.3418, "step": 152 }, { "epoch": 0.96, "grad_norm": 12.170005798339844, "learning_rate": 3.794642857142857e-05, "loss": 1.8624, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.5271084337349398, "eval_f1_macro": 0.4518532713560705, "eval_f1_micro": 0.5271084337349398, "eval_f1_weighted": 0.496654440865482, "eval_loss": 1.4107117652893066, "eval_precision_macro": 0.5016472507129397, "eval_precision_micro": 0.5271084337349398, "eval_precision_weighted": 0.5568483249244561, "eval_recall_macro": 0.47663109756097566, "eval_recall_micro": 0.5271084337349398, "eval_recall_weighted": 0.5271084337349398, "eval_runtime": 316.798, "eval_samples_per_second": 1.048, "eval_steps_per_second": 0.066, "step": 166 }, { "epoch": 1.01, "grad_norm": 8.142753601074219, "learning_rate": 3.716517857142857e-05, "loss": 1.5166, "step": 168 }, { "epoch": 1.06, "grad_norm": 5.322903633117676, "learning_rate": 3.627232142857143e-05, "loss": 1.1869, "step": 176 }, { "epoch": 1.11, "grad_norm": 9.545618057250977, "learning_rate": 3.5379464285714287e-05, "loss": 1.3475, "step": 184 }, { "epoch": 1.16, "grad_norm": 13.654799461364746, "learning_rate": 3.448660714285715e-05, "loss": 1.4239, "step": 192 }, { "epoch": 1.2, "grad_norm": 8.991721153259277, "learning_rate": 3.359375e-05, "loss": 1.3054, "step": 200 }, { "epoch": 1.25, "grad_norm": 12.191709518432617, "learning_rate": 3.270089285714286e-05, "loss": 1.1966, "step": 208 }, { "epoch": 1.3, "grad_norm": 9.003186225891113, "learning_rate": 3.1808035714285713e-05, "loss": 1.2069, "step": 216 }, { "epoch": 1.35, "grad_norm": 12.034103393554688, "learning_rate": 3.091517857142857e-05, "loss": 1.4924, "step": 224 }, { "epoch": 1.4, "grad_norm": 12.357869148254395, "learning_rate": 3.013392857142857e-05, "loss": 1.4235, "step": 232 }, { "epoch": 1.45, "grad_norm": 13.986096382141113, "learning_rate": 2.9241071428571432e-05, "loss": 1.3803, "step": 240 }, { "epoch": 1.49, "grad_norm": 10.213234901428223, "learning_rate": 2.8348214285714285e-05, "loss": 1.2811, "step": 248 }, { "epoch": 1.54, "grad_norm": 11.94521713256836, "learning_rate": 2.7455357142857145e-05, "loss": 1.3474, "step": 256 }, { "epoch": 1.59, "grad_norm": 7.413544654846191, "learning_rate": 2.6562500000000002e-05, "loss": 1.2188, "step": 264 }, { "epoch": 1.64, "grad_norm": 6.420960426330566, "learning_rate": 2.5669642857142855e-05, "loss": 1.195, "step": 272 }, { "epoch": 1.69, "grad_norm": 7.711160659790039, "learning_rate": 2.4776785714285715e-05, "loss": 1.4389, "step": 280 }, { "epoch": 1.73, "grad_norm": 7.766310214996338, "learning_rate": 2.3883928571428572e-05, "loss": 1.5062, "step": 288 }, { "epoch": 1.78, "grad_norm": 11.04636001586914, "learning_rate": 2.299107142857143e-05, "loss": 1.2456, "step": 296 }, { "epoch": 1.83, "grad_norm": 7.171872138977051, "learning_rate": 2.2098214285714286e-05, "loss": 0.7718, "step": 304 }, { "epoch": 1.88, "grad_norm": 9.676796913146973, "learning_rate": 2.1205357142857142e-05, "loss": 1.0983, "step": 312 }, { "epoch": 1.93, "grad_norm": 9.70329761505127, "learning_rate": 2.0312500000000002e-05, "loss": 1.0594, "step": 320 }, { "epoch": 1.98, "grad_norm": 10.712843894958496, "learning_rate": 1.941964285714286e-05, "loss": 1.2865, "step": 328 }, { "epoch": 2.0, "eval_accuracy": 0.6295180722891566, "eval_f1_macro": 0.6326430342148868, "eval_f1_micro": 0.6295180722891566, "eval_f1_weighted": 0.6385426615207972, "eval_loss": 1.0817334651947021, "eval_precision_macro": 0.6845586183973281, "eval_precision_micro": 0.6295180722891566, "eval_precision_weighted": 0.6861824846026167, "eval_recall_macro": 0.6314774629363941, "eval_recall_micro": 0.6295180722891566, "eval_recall_weighted": 0.6295180722891566, "eval_runtime": 311.4659, "eval_samples_per_second": 1.066, "eval_steps_per_second": 0.067, "step": 332 }, { "epoch": 2.02, "grad_norm": 12.722149848937988, "learning_rate": 1.8526785714285716e-05, "loss": 1.0814, "step": 336 }, { "epoch": 2.07, "grad_norm": 7.002964019775391, "learning_rate": 1.7633928571428573e-05, "loss": 0.8135, "step": 344 }, { "epoch": 2.12, "grad_norm": 7.798354625701904, "learning_rate": 1.674107142857143e-05, "loss": 0.9007, "step": 352 }, { "epoch": 2.17, "grad_norm": 5.395328998565674, "learning_rate": 1.5848214285714286e-05, "loss": 0.8786, "step": 360 }, { "epoch": 2.22, "grad_norm": 11.069001197814941, "learning_rate": 1.4955357142857143e-05, "loss": 1.0465, "step": 368 }, { "epoch": 2.27, "grad_norm": 11.216327667236328, "learning_rate": 1.4062500000000001e-05, "loss": 1.1112, "step": 376 }, { "epoch": 2.31, "grad_norm": 10.935038566589355, "learning_rate": 1.3169642857142858e-05, "loss": 0.8433, "step": 384 }, { "epoch": 2.36, "grad_norm": 18.220169067382812, "learning_rate": 1.2276785714285715e-05, "loss": 0.812, "step": 392 }, { "epoch": 2.41, "grad_norm": 13.36108112335205, "learning_rate": 1.1383928571428572e-05, "loss": 1.0622, "step": 400 }, { "epoch": 2.46, "grad_norm": 12.954853057861328, "learning_rate": 1.049107142857143e-05, "loss": 0.8341, "step": 408 }, { "epoch": 2.51, "grad_norm": 15.882329940795898, "learning_rate": 9.598214285714287e-06, "loss": 0.8285, "step": 416 }, { "epoch": 2.55, "grad_norm": 7.79279899597168, "learning_rate": 8.705357142857143e-06, "loss": 0.9739, "step": 424 }, { "epoch": 2.6, "grad_norm": 11.043404579162598, "learning_rate": 7.8125e-06, "loss": 0.9797, "step": 432 }, { "epoch": 2.65, "grad_norm": 7.065421104431152, "learning_rate": 6.919642857142858e-06, "loss": 0.8766, "step": 440 }, { "epoch": 2.7, "grad_norm": 7.5092878341674805, "learning_rate": 6.0267857142857145e-06, "loss": 0.637, "step": 448 }, { "epoch": 2.75, "grad_norm": 10.960742950439453, "learning_rate": 5.133928571428571e-06, "loss": 0.9343, "step": 456 }, { "epoch": 2.8, "grad_norm": 23.01622772216797, "learning_rate": 4.241071428571429e-06, "loss": 1.0866, "step": 464 }, { "epoch": 2.84, "grad_norm": 2.7104849815368652, "learning_rate": 3.348214285714286e-06, "loss": 0.7754, "step": 472 }, { "epoch": 2.89, "grad_norm": 16.19709014892578, "learning_rate": 2.455357142857143e-06, "loss": 0.9792, "step": 480 }, { "epoch": 2.94, "grad_norm": 9.475327491760254, "learning_rate": 1.5625e-06, "loss": 0.8772, "step": 488 }, { "epoch": 2.99, "grad_norm": 7.728980541229248, "learning_rate": 6.696428571428571e-07, "loss": 0.7722, "step": 496 }, { "epoch": 3.0, "eval_accuracy": 0.6716867469879518, "eval_f1_macro": 0.6858654529218409, "eval_f1_micro": 0.6716867469879518, "eval_f1_weighted": 0.676828467951081, "eval_loss": 0.9025482535362244, "eval_precision_macro": 0.7239086041672248, "eval_precision_micro": 0.6716867469879518, "eval_precision_weighted": 0.7046011538585282, "eval_recall_macro": 0.6707409732185557, "eval_recall_micro": 0.6716867469879518, "eval_recall_weighted": 0.6716867469879518, "eval_runtime": 302.7239, "eval_samples_per_second": 1.097, "eval_steps_per_second": 0.069, "step": 498 } ], "logging_steps": 8, "max_steps": 498, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.0897396284801761e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }