{ "best_metric": 0.12496982514858246, "best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_xxlarge/epochs_4_bs_16_lr_5e-6/checkpoint-900", "epoch": 3.9991546914623837, "global_step": 2364, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_accuracy": 0.6899999976158142, "eval_loss": 0.9207698702812195, "eval_runtime": 53.4599, "eval_samples_per_second": 18.706, "eval_steps_per_second": 2.338, "step": 100 }, { "epoch": 0.34, "eval_accuracy": 0.8460000157356262, "eval_loss": 0.4446202516555786, "eval_runtime": 53.5207, "eval_samples_per_second": 18.684, "eval_steps_per_second": 2.336, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.9229999780654907, "eval_loss": 0.31312161684036255, "eval_runtime": 52.7206, "eval_samples_per_second": 18.968, "eval_steps_per_second": 2.371, "step": 300 }, { "epoch": 0.68, "eval_accuracy": 0.9449999928474426, "eval_loss": 0.29927751421928406, "eval_runtime": 53.4208, "eval_samples_per_second": 18.719, "eval_steps_per_second": 2.34, "step": 400 }, { "epoch": 0.85, "learning_rate": 3.95093062605753e-06, "loss": 0.1599, "step": 500 }, { "epoch": 0.85, "eval_accuracy": 0.953000009059906, "eval_loss": 0.22006945312023163, "eval_runtime": 52.7047, "eval_samples_per_second": 18.974, "eval_steps_per_second": 2.372, "step": 500 }, { "epoch": 1.02, "eval_accuracy": 0.9279999732971191, "eval_loss": 0.3698674440383911, "eval_runtime": 52.7048, "eval_samples_per_second": 18.974, "eval_steps_per_second": 2.372, "step": 600 }, { "epoch": 1.18, "eval_accuracy": 0.9580000042915344, "eval_loss": 0.23167340457439423, "eval_runtime": 52.7049, "eval_samples_per_second": 18.974, "eval_steps_per_second": 2.372, "step": 700 }, { "epoch": 1.35, "eval_accuracy": 0.9639999866485596, "eval_loss": 0.16574066877365112, "eval_runtime": 53.2433, "eval_samples_per_second": 18.782, "eval_steps_per_second": 2.348, "step": 800 }, { "epoch": 1.52, "eval_accuracy": 0.9739999771118164, "eval_loss": 0.12496982514858246, "eval_runtime": 52.7138, "eval_samples_per_second": 18.97, "eval_steps_per_second": 2.371, "step": 900 }, { "epoch": 1.69, "learning_rate": 2.8934010152284262e-06, "loss": 0.0708, "step": 1000 }, { "epoch": 1.69, "eval_accuracy": 0.9549999833106995, "eval_loss": 0.23840972781181335, "eval_runtime": 52.6578, "eval_samples_per_second": 18.991, "eval_steps_per_second": 2.374, "step": 1000 }, { "epoch": 1.86, "eval_accuracy": 0.9700000286102295, "eval_loss": 0.1677614450454712, "eval_runtime": 52.6824, "eval_samples_per_second": 18.982, "eval_steps_per_second": 2.373, "step": 1100 }, { "epoch": 2.03, "eval_accuracy": 0.9710000157356262, "eval_loss": 0.1557045578956604, "eval_runtime": 53.1989, "eval_samples_per_second": 18.797, "eval_steps_per_second": 2.35, "step": 1200 }, { "epoch": 2.2, "eval_accuracy": 0.9629999995231628, "eval_loss": 0.22913765907287598, "eval_runtime": 52.6879, "eval_samples_per_second": 18.98, "eval_steps_per_second": 2.372, "step": 1300 }, { "epoch": 2.37, "eval_accuracy": 0.9670000076293945, "eval_loss": 0.17893917858600616, "eval_runtime": 52.8001, "eval_samples_per_second": 18.939, "eval_steps_per_second": 2.367, "step": 1400 }, { "epoch": 2.54, "learning_rate": 1.8358714043993234e-06, "loss": 0.0453, "step": 1500 }, { "epoch": 2.54, "eval_accuracy": 0.9710000157356262, "eval_loss": 0.16048669815063477, "eval_runtime": 52.6463, "eval_samples_per_second": 18.995, "eval_steps_per_second": 2.374, "step": 1500 }, { "epoch": 2.71, "eval_accuracy": 0.9520000219345093, "eval_loss": 0.2762700319290161, "eval_runtime": 52.6922, "eval_samples_per_second": 18.978, "eval_steps_per_second": 2.372, "step": 1600 }, { "epoch": 2.88, "eval_accuracy": 0.9649999737739563, "eval_loss": 0.18934407830238342, "eval_runtime": 52.6384, "eval_samples_per_second": 18.998, "eval_steps_per_second": 2.375, "step": 1700 }, { "epoch": 3.05, "eval_accuracy": 0.9739999771118164, "eval_loss": 0.16606220602989197, "eval_runtime": 52.7133, "eval_samples_per_second": 18.971, "eval_steps_per_second": 2.371, "step": 1800 }, { "epoch": 3.21, "eval_accuracy": 0.9660000205039978, "eval_loss": 0.222770556807518, "eval_runtime": 53.295, "eval_samples_per_second": 18.763, "eval_steps_per_second": 2.345, "step": 1900 }, { "epoch": 3.38, "learning_rate": 7.7834179357022e-07, "loss": 0.0294, "step": 2000 }, { "epoch": 3.38, "eval_accuracy": 0.9639999866485596, "eval_loss": 0.20788319408893585, "eval_runtime": 52.7008, "eval_samples_per_second": 18.975, "eval_steps_per_second": 2.372, "step": 2000 }, { "epoch": 3.55, "eval_accuracy": 0.9660000205039978, "eval_loss": 0.18167437613010406, "eval_runtime": 52.7113, "eval_samples_per_second": 18.971, "eval_steps_per_second": 2.371, "step": 2100 }, { "epoch": 3.72, "eval_accuracy": 0.9620000123977661, "eval_loss": 0.20487231016159058, "eval_runtime": 52.7823, "eval_samples_per_second": 18.946, "eval_steps_per_second": 2.368, "step": 2200 }, { "epoch": 3.89, "eval_accuracy": 0.9670000076293945, "eval_loss": 0.19099852442741394, "eval_runtime": 53.0072, "eval_samples_per_second": 18.865, "eval_steps_per_second": 2.358, "step": 2300 }, { "epoch": 4.0, "step": 2364, "total_flos": 1.796922868289472e+16, "train_loss": 0.06767434234748233, "train_runtime": 11146.6816, "train_samples_per_second": 3.394, "train_steps_per_second": 0.212 } ], "max_steps": 2364, "num_train_epochs": 4, "total_flos": 1.796922868289472e+16, "trial_name": null, "trial_params": null }