{ "best_metric": 0.5380504727363586, "best_model_checkpoint": "/data/jcanete/all_results/xnli/beto_uncased/epochs_4_bs_64_lr_5e-5/checkpoint-12000", "epoch": 4.0, "global_step": 24544, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.898142112125163e-05, "loss": 0.7866, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.796487940026076e-05, "loss": 0.703, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.6946300521512387e-05, "loss": 0.6809, "step": 1500 }, { "epoch": 0.33, "learning_rate": 4.5927721642764015e-05, "loss": 0.6555, "step": 2000 }, { "epoch": 0.33, "eval_accuracy": 0.7497991967871486, "eval_loss": 0.602264940738678, "eval_runtime": 1.3367, "eval_samples_per_second": 1862.863, "eval_steps_per_second": 29.177, "step": 2000 }, { "epoch": 0.41, "learning_rate": 4.490914276401565e-05, "loss": 0.6351, "step": 2500 }, { "epoch": 0.49, "learning_rate": 4.389056388526728e-05, "loss": 0.6267, "step": 3000 }, { "epoch": 0.57, "learning_rate": 4.2871985006518906e-05, "loss": 0.62, "step": 3500 }, { "epoch": 0.65, "learning_rate": 4.185340612777054e-05, "loss": 0.6094, "step": 4000 }, { "epoch": 0.65, "eval_accuracy": 0.7678714859437751, "eval_loss": 0.5793344378471375, "eval_runtime": 2.4971, "eval_samples_per_second": 997.156, "eval_steps_per_second": 15.618, "step": 4000 }, { "epoch": 0.73, "learning_rate": 4.083482724902217e-05, "loss": 0.5983, "step": 4500 }, { "epoch": 0.81, "learning_rate": 3.9816248370273797e-05, "loss": 0.5886, "step": 5000 }, { "epoch": 0.9, "learning_rate": 3.880174380704042e-05, "loss": 0.5871, "step": 5500 }, { "epoch": 0.98, "learning_rate": 3.7783164928292046e-05, "loss": 0.5771, "step": 6000 }, { "epoch": 0.98, "eval_accuracy": 0.7811244979919679, "eval_loss": 0.5455663800239563, "eval_runtime": 2.4323, "eval_samples_per_second": 1023.703, "eval_steps_per_second": 16.034, "step": 6000 }, { "epoch": 1.06, "learning_rate": 3.676458604954368e-05, "loss": 0.4853, "step": 6500 }, { "epoch": 1.14, "learning_rate": 3.574600717079531e-05, "loss": 0.4519, "step": 7000 }, { "epoch": 1.22, "learning_rate": 3.472742829204694e-05, "loss": 0.4597, "step": 7500 }, { "epoch": 1.3, "learning_rate": 3.3708849413298565e-05, "loss": 0.4484, "step": 8000 }, { "epoch": 1.3, "eval_accuracy": 0.7899598393574297, "eval_loss": 0.5768685340881348, "eval_runtime": 1.3402, "eval_samples_per_second": 1857.904, "eval_steps_per_second": 29.1, "step": 8000 }, { "epoch": 1.39, "learning_rate": 3.26902705345502e-05, "loss": 0.4567, "step": 8500 }, { "epoch": 1.47, "learning_rate": 3.167372881355932e-05, "loss": 0.4483, "step": 9000 }, { "epoch": 1.55, "learning_rate": 3.065514993481095e-05, "loss": 0.446, "step": 9500 }, { "epoch": 1.63, "learning_rate": 2.9636571056062584e-05, "loss": 0.4513, "step": 10000 }, { "epoch": 1.63, "eval_accuracy": 0.7738955823293173, "eval_loss": 0.5736198425292969, "eval_runtime": 1.3279, "eval_samples_per_second": 1875.149, "eval_steps_per_second": 29.37, "step": 10000 }, { "epoch": 1.71, "learning_rate": 2.8617992177314212e-05, "loss": 0.4511, "step": 10500 }, { "epoch": 1.79, "learning_rate": 2.759941329856584e-05, "loss": 0.4423, "step": 11000 }, { "epoch": 1.87, "learning_rate": 2.6580834419817475e-05, "loss": 0.4472, "step": 11500 }, { "epoch": 1.96, "learning_rate": 2.5564292698826596e-05, "loss": 0.4362, "step": 12000 }, { "epoch": 1.96, "eval_accuracy": 0.7811244979919679, "eval_loss": 0.5380504727363586, "eval_runtime": 1.33, "eval_samples_per_second": 1872.162, "eval_steps_per_second": 29.323, "step": 12000 }, { "epoch": 2.04, "learning_rate": 2.4545713820078228e-05, "loss": 0.3786, "step": 12500 }, { "epoch": 2.12, "learning_rate": 2.3527134941329856e-05, "loss": 0.2879, "step": 13000 }, { "epoch": 2.2, "learning_rate": 2.2508556062581487e-05, "loss": 0.2885, "step": 13500 }, { "epoch": 2.28, "learning_rate": 2.148997718383312e-05, "loss": 0.2909, "step": 14000 }, { "epoch": 2.28, "eval_accuracy": 0.7835341365461848, "eval_loss": 0.6650500297546387, "eval_runtime": 2.4288, "eval_samples_per_second": 1025.198, "eval_steps_per_second": 16.057, "step": 14000 }, { "epoch": 2.36, "learning_rate": 2.0473435462842243e-05, "loss": 0.2991, "step": 14500 }, { "epoch": 2.44, "learning_rate": 1.945485658409387e-05, "loss": 0.2865, "step": 15000 }, { "epoch": 2.53, "learning_rate": 1.8436277705345503e-05, "loss": 0.2947, "step": 15500 }, { "epoch": 2.61, "learning_rate": 1.741769882659713e-05, "loss": 0.3002, "step": 16000 }, { "epoch": 2.61, "eval_accuracy": 0.7807228915662651, "eval_loss": 0.6428889632225037, "eval_runtime": 2.4279, "eval_samples_per_second": 1025.584, "eval_steps_per_second": 16.063, "step": 16000 }, { "epoch": 2.69, "learning_rate": 1.6399119947848762e-05, "loss": 0.2916, "step": 16500 }, { "epoch": 2.77, "learning_rate": 1.5380541069100394e-05, "loss": 0.2987, "step": 17000 }, { "epoch": 2.85, "learning_rate": 1.4361962190352022e-05, "loss": 0.3005, "step": 17500 }, { "epoch": 2.93, "learning_rate": 1.3343383311603652e-05, "loss": 0.2916, "step": 18000 }, { "epoch": 2.93, "eval_accuracy": 0.7779116465863454, "eval_loss": 0.6353530883789062, "eval_runtime": 1.3158, "eval_samples_per_second": 1892.365, "eval_steps_per_second": 29.639, "step": 18000 }, { "epoch": 3.01, "learning_rate": 1.2328878748370275e-05, "loss": 0.2756, "step": 18500 }, { "epoch": 3.1, "learning_rate": 1.1310299869621905e-05, "loss": 0.1773, "step": 19000 }, { "epoch": 3.18, "learning_rate": 1.0291720990873534e-05, "loss": 0.1788, "step": 19500 }, { "epoch": 3.26, "learning_rate": 9.273142112125164e-06, "loss": 0.1801, "step": 20000 }, { "epoch": 3.26, "eval_accuracy": 0.7811244979919679, "eval_loss": 0.7538436055183411, "eval_runtime": 1.3281, "eval_samples_per_second": 1874.884, "eval_steps_per_second": 29.366, "step": 20000 }, { "epoch": 3.34, "learning_rate": 8.254563233376792e-06, "loss": 0.176, "step": 20500 }, { "epoch": 3.42, "learning_rate": 7.23802151238592e-06, "loss": 0.1801, "step": 21000 }, { "epoch": 3.5, "learning_rate": 6.219442633637549e-06, "loss": 0.1761, "step": 21500 }, { "epoch": 3.59, "learning_rate": 5.200863754889179e-06, "loss": 0.1795, "step": 22000 }, { "epoch": 3.59, "eval_accuracy": 0.7891566265060241, "eval_loss": 0.7428993582725525, "eval_runtime": 2.4875, "eval_samples_per_second": 1000.986, "eval_steps_per_second": 15.678, "step": 22000 }, { "epoch": 3.67, "learning_rate": 4.182284876140809e-06, "loss": 0.1734, "step": 22500 }, { "epoch": 3.75, "learning_rate": 3.1637059973924383e-06, "loss": 0.1759, "step": 23000 }, { "epoch": 3.83, "learning_rate": 2.1471642764015647e-06, "loss": 0.1789, "step": 23500 }, { "epoch": 3.91, "learning_rate": 1.1285853976531943e-06, "loss": 0.1723, "step": 24000 }, { "epoch": 3.91, "eval_accuracy": 0.7895582329317269, "eval_loss": 0.7440813183784485, "eval_runtime": 2.4411, "eval_samples_per_second": 1020.029, "eval_steps_per_second": 15.976, "step": 24000 }, { "epoch": 3.99, "learning_rate": 1.1000651890482399e-07, "loss": 0.17, "step": 24500 }, { "epoch": 4.0, "step": 24544, "total_flos": 8.969723878657651e+16, "train_loss": 0.38933088829573315, "train_runtime": 9866.0231, "train_samples_per_second": 159.214, "train_steps_per_second": 2.488 } ], "max_steps": 24544, "num_train_epochs": 4, "total_flos": 8.969723878657651e+16, "trial_name": null, "trial_params": null }