{ "best_metric": 0.4636478126049042, "best_model_checkpoint": "/data/jcanete/all_results/pawsx/distillbeto/epochs_4_bs_32_lr_5e-5/checkpoint-2100", "epoch": 4.0, "global_step": 6176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "eval_accuracy": 0.5805000066757202, "eval_loss": 0.6699569225311279, "eval_runtime": 0.7381, "eval_samples_per_second": 2709.802, "eval_steps_per_second": 85.359, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.596826424870467e-05, "loss": 0.6641, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.6809999942779541, "eval_loss": 0.5914173722267151, "eval_runtime": 0.7361, "eval_samples_per_second": 2717.087, "eval_steps_per_second": 85.588, "step": 600 }, { "epoch": 0.58, "eval_accuracy": 0.7275000214576721, "eval_loss": 0.5336065888404846, "eval_runtime": 0.7255, "eval_samples_per_second": 2756.65, "eval_steps_per_second": 86.834, "step": 900 }, { "epoch": 0.65, "learning_rate": 4.192033678756477e-05, "loss": 0.5478, "step": 1000 }, { "epoch": 0.78, "eval_accuracy": 0.746999979019165, "eval_loss": 0.5002644062042236, "eval_runtime": 0.7294, "eval_samples_per_second": 2741.855, "eval_steps_per_second": 86.368, "step": 1200 }, { "epoch": 0.97, "learning_rate": 3.787240932642487e-05, "loss": 0.4937, "step": 1500 }, { "epoch": 0.97, "eval_accuracy": 0.7515000104904175, "eval_loss": 0.493168443441391, "eval_runtime": 0.7379, "eval_samples_per_second": 2710.282, "eval_steps_per_second": 85.374, "step": 1500 }, { "epoch": 1.17, "eval_accuracy": 0.7570000290870667, "eval_loss": 0.5791745781898499, "eval_runtime": 0.7412, "eval_samples_per_second": 2698.347, "eval_steps_per_second": 84.998, "step": 1800 }, { "epoch": 1.3, "learning_rate": 3.3832577720207255e-05, "loss": 0.3997, "step": 2000 }, { "epoch": 1.36, "eval_accuracy": 0.7749999761581421, "eval_loss": 0.4636478126049042, "eval_runtime": 0.7343, "eval_samples_per_second": 2723.768, "eval_steps_per_second": 85.799, "step": 2100 }, { "epoch": 1.55, "eval_accuracy": 0.7730000019073486, "eval_loss": 0.4842735230922699, "eval_runtime": 0.728, "eval_samples_per_second": 2747.191, "eval_steps_per_second": 86.537, "step": 2400 }, { "epoch": 1.62, "learning_rate": 2.978465025906736e-05, "loss": 0.3796, "step": 2500 }, { "epoch": 1.75, "eval_accuracy": 0.7789999842643738, "eval_loss": 0.4961821436882019, "eval_runtime": 0.7387, "eval_samples_per_second": 2707.452, "eval_steps_per_second": 85.285, "step": 2700 }, { "epoch": 1.94, "learning_rate": 2.5736722797927466e-05, "loss": 0.3598, "step": 3000 }, { "epoch": 1.94, "eval_accuracy": 0.7879999876022339, "eval_loss": 0.46712586283683777, "eval_runtime": 0.7391, "eval_samples_per_second": 2705.908, "eval_steps_per_second": 85.236, "step": 3000 }, { "epoch": 2.14, "eval_accuracy": 0.7885000109672546, "eval_loss": 0.5191779732704163, "eval_runtime": 0.7313, "eval_samples_per_second": 2734.943, "eval_steps_per_second": 86.151, "step": 3300 }, { "epoch": 2.27, "learning_rate": 2.1688795336787565e-05, "loss": 0.2884, "step": 3500 }, { "epoch": 2.33, "eval_accuracy": 0.7925000190734863, "eval_loss": 0.5480718612670898, "eval_runtime": 0.7293, "eval_samples_per_second": 2742.348, "eval_steps_per_second": 86.384, "step": 3600 }, { "epoch": 2.53, "eval_accuracy": 0.7735000252723694, "eval_loss": 0.5514509081840515, "eval_runtime": 0.735, "eval_samples_per_second": 2721.166, "eval_steps_per_second": 85.717, "step": 3900 }, { "epoch": 2.59, "learning_rate": 1.7640867875647667e-05, "loss": 0.27, "step": 4000 }, { "epoch": 2.72, "eval_accuracy": 0.7879999876022339, "eval_loss": 0.5070611238479614, "eval_runtime": 0.7377, "eval_samples_per_second": 2710.994, "eval_steps_per_second": 85.396, "step": 4200 }, { "epoch": 2.91, "learning_rate": 1.3601036269430053e-05, "loss": 0.2665, "step": 4500 }, { "epoch": 2.91, "eval_accuracy": 0.7875000238418579, "eval_loss": 0.5371026992797852, "eval_runtime": 0.7291, "eval_samples_per_second": 2743.286, "eval_steps_per_second": 86.413, "step": 4500 }, { "epoch": 3.11, "eval_accuracy": 0.7910000085830688, "eval_loss": 0.624961793422699, "eval_runtime": 0.7282, "eval_samples_per_second": 2746.606, "eval_steps_per_second": 86.518, "step": 4800 }, { "epoch": 3.24, "learning_rate": 9.553108808290157e-06, "loss": 0.2171, "step": 5000 }, { "epoch": 3.3, "eval_accuracy": 0.7940000295639038, "eval_loss": 0.5903840065002441, "eval_runtime": 0.7375, "eval_samples_per_second": 2712.014, "eval_steps_per_second": 85.428, "step": 5100 }, { "epoch": 3.5, "eval_accuracy": 0.7850000262260437, "eval_loss": 0.598899781703949, "eval_runtime": 0.7383, "eval_samples_per_second": 2709.079, "eval_steps_per_second": 85.336, "step": 5400 }, { "epoch": 3.56, "learning_rate": 5.50518134715026e-06, "loss": 0.2083, "step": 5500 }, { "epoch": 3.69, "eval_accuracy": 0.7910000085830688, "eval_loss": 0.5919374227523804, "eval_runtime": 0.7401, "eval_samples_per_second": 2702.433, "eval_steps_per_second": 85.127, "step": 5700 }, { "epoch": 3.89, "learning_rate": 1.4653497409326426e-06, "loss": 0.1906, "step": 6000 }, { "epoch": 3.89, "eval_accuracy": 0.7864999771118164, "eval_loss": 0.6115372180938721, "eval_runtime": 0.7292, "eval_samples_per_second": 2742.712, "eval_steps_per_second": 86.395, "step": 6000 }, { "epoch": 4.0, "step": 6176, "total_flos": 4946498250063360.0, "train_loss": 0.35224579220608726, "train_runtime": 2738.1778, "train_samples_per_second": 72.166, "train_steps_per_second": 2.256 } ], "max_steps": 6176, "num_train_epochs": 4, "total_flos": 4946498250063360.0, "trial_name": null, "trial_params": null }