{ "best_metric": 0.13781137764453888, "best_model_checkpoint": "/data/jcanete/all_results/mldoc/distillbeto/epochs_3_bs_16_lr_2e-5/checkpoint-900", "epoch": 3.0, "global_step": 1776, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_accuracy": 0.7329999804496765, "eval_loss": 0.9082120656967163, "eval_runtime": 2.085, "eval_samples_per_second": 479.613, "eval_steps_per_second": 30.216, "step": 100 }, { "epoch": 0.34, "eval_accuracy": 0.8560000061988831, "eval_loss": 0.47957685589790344, "eval_runtime": 2.0816, "eval_samples_per_second": 480.41, "eval_steps_per_second": 30.266, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.9480000138282776, "eval_loss": 0.23153278231620789, "eval_runtime": 2.0846, "eval_samples_per_second": 479.697, "eval_steps_per_second": 30.221, "step": 300 }, { "epoch": 0.68, "eval_accuracy": 0.9309999942779541, "eval_loss": 0.25234749913215637, "eval_runtime": 2.0857, "eval_samples_per_second": 479.464, "eval_steps_per_second": 30.206, "step": 400 }, { "epoch": 0.84, "learning_rate": 1.4391891891891894e-05, "loss": 0.1569, "step": 500 }, { "epoch": 0.84, "eval_accuracy": 0.9539999961853027, "eval_loss": 0.18860605359077454, "eval_runtime": 2.0884, "eval_samples_per_second": 478.832, "eval_steps_per_second": 30.166, "step": 500 }, { "epoch": 1.01, "eval_accuracy": 0.9020000100135803, "eval_loss": 0.3387507200241089, "eval_runtime": 2.0825, "eval_samples_per_second": 480.196, "eval_steps_per_second": 30.252, "step": 600 }, { "epoch": 1.18, "eval_accuracy": 0.9610000252723694, "eval_loss": 0.16218014061450958, "eval_runtime": 2.0844, "eval_samples_per_second": 479.763, "eval_steps_per_second": 30.225, "step": 700 }, { "epoch": 1.35, "eval_accuracy": 0.9509999752044678, "eval_loss": 0.18978460133075714, "eval_runtime": 2.0789, "eval_samples_per_second": 481.026, "eval_steps_per_second": 30.305, "step": 800 }, { "epoch": 1.52, "eval_accuracy": 0.9710000157356262, "eval_loss": 0.13781137764453888, "eval_runtime": 2.0825, "eval_samples_per_second": 480.201, "eval_steps_per_second": 30.253, "step": 900 }, { "epoch": 1.69, "learning_rate": 8.772522522522522e-06, "loss": 0.0617, "step": 1000 }, { "epoch": 1.69, "eval_accuracy": 0.9610000252723694, "eval_loss": 0.1706898957490921, "eval_runtime": 2.0817, "eval_samples_per_second": 480.376, "eval_steps_per_second": 30.264, "step": 1000 }, { "epoch": 1.86, "eval_accuracy": 0.9599999785423279, "eval_loss": 0.1884380429983139, "eval_runtime": 2.08, "eval_samples_per_second": 480.775, "eval_steps_per_second": 30.289, "step": 1100 }, { "epoch": 2.03, "eval_accuracy": 0.9570000171661377, "eval_loss": 0.20238688588142395, "eval_runtime": 2.0822, "eval_samples_per_second": 480.271, "eval_steps_per_second": 30.257, "step": 1200 }, { "epoch": 2.2, "eval_accuracy": 0.9589999914169312, "eval_loss": 0.1709979772567749, "eval_runtime": 2.0789, "eval_samples_per_second": 481.035, "eval_steps_per_second": 30.305, "step": 1300 }, { "epoch": 2.36, "eval_accuracy": 0.9610000252723694, "eval_loss": 0.1705155074596405, "eval_runtime": 2.0769, "eval_samples_per_second": 481.48, "eval_steps_per_second": 30.333, "step": 1400 }, { "epoch": 2.53, "learning_rate": 3.141891891891892e-06, "loss": 0.0353, "step": 1500 }, { "epoch": 2.53, "eval_accuracy": 0.9570000171661377, "eval_loss": 0.18021175265312195, "eval_runtime": 2.0858, "eval_samples_per_second": 479.427, "eval_steps_per_second": 30.204, "step": 1500 }, { "epoch": 2.7, "eval_accuracy": 0.9520000219345093, "eval_loss": 0.19292205572128296, "eval_runtime": 2.0769, "eval_samples_per_second": 481.487, "eval_steps_per_second": 30.334, "step": 1600 }, { "epoch": 2.87, "eval_accuracy": 0.9570000171661377, "eval_loss": 0.1787494271993637, "eval_runtime": 2.0981, "eval_samples_per_second": 476.617, "eval_steps_per_second": 30.027, "step": 1700 }, { "epoch": 3.0, "step": 1776, "total_flos": 3142677580766976.0, "train_loss": 0.077526035609546, "train_runtime": 3451.0165, "train_samples_per_second": 8.222, "train_steps_per_second": 0.515 } ], "max_steps": 1776, "num_train_epochs": 3, "total_flos": 3142677580766976.0, "trial_name": null, "trial_params": null }