{ "best_metric": 0.31465813517570496, "best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_large/epochs_2_bs_32_lr_5e-6/checkpoint-2700", "epoch": 2.0, "global_step": 3088, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "eval_accuracy": 0.7465000152587891, "eval_loss": 0.5256621241569519, "eval_runtime": 3.674, "eval_samples_per_second": 544.365, "eval_steps_per_second": 17.148, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.195272020725389e-06, "loss": 0.5015, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.8044999837875366, "eval_loss": 0.42749786376953125, "eval_runtime": 3.7001, "eval_samples_per_second": 540.533, "eval_steps_per_second": 17.027, "step": 600 }, { "epoch": 0.58, "eval_accuracy": 0.8335000276565552, "eval_loss": 0.40074634552001953, "eval_runtime": 3.6783, "eval_samples_per_second": 543.722, "eval_steps_per_second": 17.127, "step": 900 }, { "epoch": 0.65, "learning_rate": 3.3856865284974094e-06, "loss": 0.302, "step": 1000 }, { "epoch": 0.78, "eval_accuracy": 0.8454999923706055, "eval_loss": 0.3730817139148712, "eval_runtime": 3.6669, "eval_samples_per_second": 545.419, "eval_steps_per_second": 17.181, "step": 1200 }, { "epoch": 0.97, "learning_rate": 2.5761010362694307e-06, "loss": 0.2552, "step": 1500 }, { "epoch": 0.97, "eval_accuracy": 0.859000027179718, "eval_loss": 0.33458662033081055, "eval_runtime": 3.679, "eval_samples_per_second": 543.627, "eval_steps_per_second": 17.124, "step": 1500 }, { "epoch": 1.17, "eval_accuracy": 0.8654999732971191, "eval_loss": 0.33431583642959595, "eval_runtime": 3.6739, "eval_samples_per_second": 544.382, "eval_steps_per_second": 17.148, "step": 1800 }, { "epoch": 1.3, "learning_rate": 1.7665155440414508e-06, "loss": 0.2051, "step": 2000 }, { "epoch": 1.36, "eval_accuracy": 0.871999979019165, "eval_loss": 0.3163151144981384, "eval_runtime": 3.662, "eval_samples_per_second": 546.147, "eval_steps_per_second": 17.204, "step": 2100 }, { "epoch": 1.55, "eval_accuracy": 0.8784999847412109, "eval_loss": 0.31933271884918213, "eval_runtime": 3.6771, "eval_samples_per_second": 543.901, "eval_steps_per_second": 17.133, "step": 2400 }, { "epoch": 1.62, "learning_rate": 9.585492227979275e-07, "loss": 0.1902, "step": 2500 }, { "epoch": 1.75, "eval_accuracy": 0.8769999742507935, "eval_loss": 0.31465813517570496, "eval_runtime": 3.6635, "eval_samples_per_second": 545.931, "eval_steps_per_second": 17.197, "step": 2700 }, { "epoch": 1.94, "learning_rate": 1.4896373056994818e-07, "loss": 0.1928, "step": 3000 }, { "epoch": 1.94, "eval_accuracy": 0.871999979019165, "eval_loss": 0.32511112093925476, "eval_runtime": 3.6606, "eval_samples_per_second": 546.359, "eval_steps_per_second": 17.21, "step": 3000 }, { "epoch": 2.0, "step": 3088, "total_flos": 803053584088992.0, "train_loss": 0.2719091473465756, "train_runtime": 1265.6788, "train_samples_per_second": 78.062, "train_steps_per_second": 2.44 } ], "max_steps": 3088, "num_train_epochs": 2, "total_flos": 803053584088992.0, "trial_name": null, "trial_params": null }