{ "best_metric": 0.05716780200600624, "best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_xxlarge/epochs_4_bs_16_lr_5e-6/checkpoint-1800", "epoch": 4.0, "global_step": 3580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "eval_accuracy": 0.9740770234323376, "eval_f1": 0.9703136222169021, "eval_loss": 0.09671590477228165, "eval_precision": 0.9689025240552224, "eval_recall": 0.9717288365866816, "eval_runtime": 12.1146, "eval_samples_per_second": 136.529, "eval_steps_per_second": 8.585, "step": 200 }, { "epoch": 0.45, "eval_accuracy": 0.978109486453974, "eval_f1": 0.9763370891925068, "eval_loss": 0.08312373608350754, "eval_precision": 0.9753928372018824, "eval_recall": 0.9772831711654113, "eval_runtime": 12.0635, "eval_samples_per_second": 137.107, "eval_steps_per_second": 8.621, "step": 400 }, { "epoch": 0.56, "learning_rate": 4.305865921787709e-06, "loss": 0.2346, "step": 500 }, { "epoch": 0.67, "eval_accuracy": 0.9802951491841887, "eval_f1": 0.9781281181400918, "eval_loss": 0.06705235689878464, "eval_precision": 0.9769778149853495, "eval_recall": 0.979281133244091, "eval_runtime": 12.0607, "eval_samples_per_second": 137.14, "eval_steps_per_second": 8.623, "step": 600 }, { "epoch": 0.89, "eval_accuracy": 0.9816167126954812, "eval_f1": 0.980212056468521, "eval_loss": 0.06424280256032944, "eval_precision": 0.9796252319849933, "eval_recall": 0.9807995844238876, "eval_runtime": 12.0651, "eval_samples_per_second": 137.09, "eval_steps_per_second": 8.62, "step": 800 }, { "epoch": 1.12, "learning_rate": 3.607541899441341e-06, "loss": 0.0581, "step": 1000 }, { "epoch": 1.12, "eval_accuracy": 0.9821419494756104, "eval_f1": 0.9813241767565355, "eval_loss": 0.06397537887096405, "eval_precision": 0.9805313971116253, "eval_recall": 0.9821182393958162, "eval_runtime": 12.0587, "eval_samples_per_second": 137.163, "eval_steps_per_second": 8.624, "step": 1000 }, { "epoch": 1.34, "eval_accuracy": 0.982734958743498, "eval_f1": 0.9817945902784708, "eval_loss": 0.06401154398918152, "eval_precision": 0.9809330062426455, "eval_recall": 0.9826576891570598, "eval_runtime": 12.05, "eval_samples_per_second": 137.262, "eval_steps_per_second": 8.631, "step": 1200 }, { "epoch": 1.56, "eval_accuracy": 0.9834635129869029, "eval_f1": 0.982266245955985, "eval_loss": 0.05947747826576233, "eval_precision": 0.9818152783599816, "eval_recall": 0.9827176280194202, "eval_runtime": 12.0881, "eval_samples_per_second": 136.829, "eval_steps_per_second": 8.604, "step": 1400 }, { "epoch": 1.68, "learning_rate": 2.9092178770949727e-06, "loss": 0.04, "step": 1500 }, { "epoch": 1.79, "eval_accuracy": 0.9830229918164721, "eval_f1": 0.982217698479224, "eval_loss": 0.05940423533320427, "eval_precision": 0.9811407268595124, "eval_recall": 0.9832970370222374, "eval_runtime": 12.0517, "eval_samples_per_second": 137.242, "eval_steps_per_second": 8.629, "step": 1600 }, { "epoch": 2.01, "eval_accuracy": 0.9839718066450924, "eval_f1": 0.9833950734391069, "eval_loss": 0.05716780200600624, "eval_precision": 0.9829141716566866, "eval_recall": 0.9838764460250544, "eval_runtime": 12.0638, "eval_samples_per_second": 137.105, "eval_steps_per_second": 8.621, "step": 1800 }, { "epoch": 2.23, "learning_rate": 2.2108938547486037e-06, "loss": 0.0327, "step": 2000 }, { "epoch": 2.23, "eval_accuracy": 0.983700716694058, "eval_f1": 0.9826579207475964, "eval_loss": 0.06271136552095413, "eval_precision": 0.9820794252644183, "eval_recall": 0.9832370981598769, "eval_runtime": 12.0524, "eval_samples_per_second": 137.234, "eval_steps_per_second": 8.629, "step": 2000 }, { "epoch": 2.46, "eval_accuracy": 0.9840734653767303, "eval_f1": 0.9836376523674989, "eval_loss": 0.06178496032953262, "eval_precision": 0.982980166806337, "eval_recall": 0.9842960180615772, "eval_runtime": 12.0504, "eval_samples_per_second": 137.257, "eval_steps_per_second": 8.63, "step": 2200 }, { "epoch": 2.68, "eval_accuracy": 0.9839887497670321, "eval_f1": 0.983732286767129, "eval_loss": 0.0652877539396286, "eval_precision": 0.9833885716567504, "eval_recall": 0.9840762422329224, "eval_runtime": 12.0582, "eval_samples_per_second": 137.168, "eval_steps_per_second": 8.625, "step": 2400 }, { "epoch": 2.79, "learning_rate": 1.5125698324022347e-06, "loss": 0.022, "step": 2500 }, { "epoch": 2.91, "eval_accuracy": 0.9843445553277647, "eval_f1": 0.9838023527532006, "eval_loss": 0.061138641089200974, "eval_precision": 0.9834487990895842, "eval_recall": 0.9841561607160696, "eval_runtime": 12.0686, "eval_samples_per_second": 137.05, "eval_steps_per_second": 8.617, "step": 2600 }, { "epoch": 3.13, "eval_accuracy": 0.984378441571644, "eval_f1": 0.9841013042523019, "eval_loss": 0.06497478485107422, "eval_precision": 0.9837869137232195, "eval_recall": 0.984415895786298, "eval_runtime": 12.0336, "eval_samples_per_second": 137.449, "eval_steps_per_second": 8.642, "step": 2800 }, { "epoch": 3.35, "learning_rate": 8.142458100558661e-07, "loss": 0.0161, "step": 3000 }, { "epoch": 3.35, "eval_accuracy": 0.9844631571813423, "eval_f1": 0.9838933546357782, "eval_loss": 0.06659159064292908, "eval_precision": 0.9834710743801653, "eval_recall": 0.984315997682364, "eval_runtime": 12.058, "eval_samples_per_second": 137.17, "eval_steps_per_second": 8.625, "step": 3000 }, { "epoch": 3.58, "eval_accuracy": 0.9840056928889718, "eval_f1": 0.983372447196285, "eval_loss": 0.06790520250797272, "eval_precision": 0.9830484785560258, "eval_recall": 0.9836966294379733, "eval_runtime": 12.0261, "eval_samples_per_second": 137.534, "eval_steps_per_second": 8.648, "step": 3200 }, { "epoch": 3.8, "eval_accuracy": 0.9842767828400061, "eval_f1": 0.9837931757586651, "eval_loss": 0.06859102845191956, "eval_precision": 0.9833905613670846, "eval_recall": 0.9841961199576432, "eval_runtime": 12.0273, "eval_samples_per_second": 137.521, "eval_steps_per_second": 8.647, "step": 3400 }, { "epoch": 3.91, "learning_rate": 1.1592178770949721e-07, "loss": 0.012, "step": 3500 }, { "epoch": 4.0, "step": 3580, "total_flos": 5364557434522752.0, "train_loss": 0.05827242064409416, "train_runtime": 5244.9021, "train_samples_per_second": 10.91, "train_steps_per_second": 0.683 } ], "max_steps": 3580, "num_train_epochs": 4, "total_flos": 5364557434522752.0, "trial_name": null, "trial_params": null }