{ "best_metric": 0.2576049864292145, "best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_xlarge/epochs_3_bs_32_lr_5e-6/checkpoint-1200", "epoch": 3.0, "global_step": 4632, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "eval_accuracy": 0.7799999713897705, "eval_loss": 0.4942445755004883, "eval_runtime": 10.4303, "eval_samples_per_second": 191.749, "eval_steps_per_second": 6.04, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.465673575129534e-06, "loss": 0.4488, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.8755000233650208, "eval_loss": 0.3036494255065918, "eval_runtime": 10.411, "eval_samples_per_second": 192.104, "eval_steps_per_second": 6.051, "step": 600 }, { "epoch": 0.58, "eval_accuracy": 0.8794999718666077, "eval_loss": 0.29540833830833435, "eval_runtime": 10.4508, "eval_samples_per_second": 191.372, "eval_steps_per_second": 6.028, "step": 900 }, { "epoch": 0.65, "learning_rate": 3.925949913644214e-06, "loss": 0.2328, "step": 1000 }, { "epoch": 0.78, "eval_accuracy": 0.9075000286102295, "eval_loss": 0.2576049864292145, "eval_runtime": 10.4132, "eval_samples_per_second": 192.063, "eval_steps_per_second": 6.05, "step": 1200 }, { "epoch": 0.97, "learning_rate": 3.386226252158895e-06, "loss": 0.2015, "step": 1500 }, { "epoch": 0.97, "eval_accuracy": 0.8995000123977661, "eval_loss": 0.2740257680416107, "eval_runtime": 10.4482, "eval_samples_per_second": 191.42, "eval_steps_per_second": 6.03, "step": 1500 }, { "epoch": 1.17, "eval_accuracy": 0.9020000100135803, "eval_loss": 0.2881057858467102, "eval_runtime": 10.4618, "eval_samples_per_second": 191.171, "eval_steps_per_second": 6.022, "step": 1800 }, { "epoch": 1.3, "learning_rate": 2.8465025906735755e-06, "loss": 0.1539, "step": 2000 }, { "epoch": 1.36, "eval_accuracy": 0.906499981880188, "eval_loss": 0.2683846056461334, "eval_runtime": 10.4536, "eval_samples_per_second": 191.321, "eval_steps_per_second": 6.027, "step": 2100 }, { "epoch": 1.55, "eval_accuracy": 0.906000018119812, "eval_loss": 0.34197333455085754, "eval_runtime": 10.4733, "eval_samples_per_second": 190.961, "eval_steps_per_second": 6.015, "step": 2400 }, { "epoch": 1.62, "learning_rate": 2.3078583765112265e-06, "loss": 0.1428, "step": 2500 }, { "epoch": 1.75, "eval_accuracy": 0.906499981880188, "eval_loss": 0.3059616684913635, "eval_runtime": 10.4777, "eval_samples_per_second": 190.882, "eval_steps_per_second": 6.013, "step": 2700 }, { "epoch": 1.94, "learning_rate": 1.7681347150259068e-06, "loss": 0.1399, "step": 3000 }, { "epoch": 1.94, "eval_accuracy": 0.9020000100135803, "eval_loss": 0.2935360372066498, "eval_runtime": 10.5064, "eval_samples_per_second": 190.361, "eval_steps_per_second": 5.996, "step": 3000 }, { "epoch": 2.14, "eval_accuracy": 0.909500002861023, "eval_loss": 0.3417232036590576, "eval_runtime": 10.4876, "eval_samples_per_second": 190.702, "eval_steps_per_second": 6.007, "step": 3300 }, { "epoch": 2.27, "learning_rate": 1.2284110535405874e-06, "loss": 0.1069, "step": 3500 }, { "epoch": 2.33, "eval_accuracy": 0.8999999761581421, "eval_loss": 0.4427025616168976, "eval_runtime": 10.5396, "eval_samples_per_second": 189.761, "eval_steps_per_second": 5.977, "step": 3600 }, { "epoch": 2.53, "eval_accuracy": 0.9039999842643738, "eval_loss": 0.40283679962158203, "eval_runtime": 10.5439, "eval_samples_per_second": 189.684, "eval_steps_per_second": 5.975, "step": 3900 }, { "epoch": 2.59, "learning_rate": 6.886873920552678e-07, "loss": 0.1009, "step": 4000 }, { "epoch": 2.72, "eval_accuracy": 0.9024999737739563, "eval_loss": 0.4096806049346924, "eval_runtime": 10.4683, "eval_samples_per_second": 191.052, "eval_steps_per_second": 6.018, "step": 4200 }, { "epoch": 2.91, "learning_rate": 1.5004317789291884e-07, "loss": 0.0936, "step": 4500 }, { "epoch": 2.91, "eval_accuracy": 0.9070000052452087, "eval_loss": 0.4148932099342346, "eval_runtime": 10.465, "eval_samples_per_second": 191.113, "eval_steps_per_second": 6.02, "step": 4500 }, { "epoch": 3.0, "step": 4632, "total_flos": 4794349372791840.0, "train_loss": 0.17779028992166998, "train_runtime": 4326.2693, "train_samples_per_second": 34.257, "train_steps_per_second": 1.071 } ], "max_steps": 4632, "num_train_epochs": 3, "total_flos": 4794349372791840.0, "trial_name": null, "trial_params": null }