{ "best_metric": 0.15601937472820282, "best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_tiny/epochs_4_bs_16_lr_3e-5/checkpoint-2100", "epoch": 4.0, "global_step": 2368, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_accuracy": 0.29100000858306885, "eval_loss": 1.4215397834777832, "eval_runtime": 0.9993, "eval_samples_per_second": 1000.659, "eval_steps_per_second": 63.042, "step": 100 }, { "epoch": 0.34, "eval_accuracy": 0.5360000133514404, "eval_loss": 1.0526843070983887, "eval_runtime": 0.9899, "eval_samples_per_second": 1010.212, "eval_steps_per_second": 63.643, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.7480000257492065, "eval_loss": 0.8007577657699585, "eval_runtime": 0.9905, "eval_samples_per_second": 1009.612, "eval_steps_per_second": 63.606, "step": 300 }, { "epoch": 0.68, "eval_accuracy": 0.7699999809265137, "eval_loss": 0.7043776512145996, "eval_runtime": 1.0018, "eval_samples_per_second": 998.248, "eval_steps_per_second": 62.89, "step": 400 }, { "epoch": 0.84, "learning_rate": 2.3703547297297295e-05, "loss": 0.2646, "step": 500 }, { "epoch": 0.84, "eval_accuracy": 0.843999981880188, "eval_loss": 0.46549031138420105, "eval_runtime": 1.0012, "eval_samples_per_second": 998.759, "eval_steps_per_second": 62.922, "step": 500 }, { "epoch": 1.01, "eval_accuracy": 0.8529999852180481, "eval_loss": 0.46177130937576294, "eval_runtime": 0.9926, "eval_samples_per_second": 1007.431, "eval_steps_per_second": 63.468, "step": 600 }, { "epoch": 1.18, "eval_accuracy": 0.9089999794960022, "eval_loss": 0.3174803853034973, "eval_runtime": 0.9914, "eval_samples_per_second": 1008.709, "eval_steps_per_second": 63.549, "step": 700 }, { "epoch": 1.35, "eval_accuracy": 0.9269999861717224, "eval_loss": 0.2770524322986603, "eval_runtime": 0.9992, "eval_samples_per_second": 1000.797, "eval_steps_per_second": 63.05, "step": 800 }, { "epoch": 1.52, "eval_accuracy": 0.9449999928474426, "eval_loss": 0.2170553058385849, "eval_runtime": 0.9905, "eval_samples_per_second": 1009.635, "eval_steps_per_second": 63.607, "step": 900 }, { "epoch": 1.69, "learning_rate": 1.736908783783784e-05, "loss": 0.0843, "step": 1000 }, { "epoch": 1.69, "eval_accuracy": 0.9350000023841858, "eval_loss": 0.23940612375736237, "eval_runtime": 0.9908, "eval_samples_per_second": 1009.32, "eval_steps_per_second": 63.587, "step": 1000 }, { "epoch": 1.86, "eval_accuracy": 0.9480000138282776, "eval_loss": 0.20756861567497253, "eval_runtime": 1.0016, "eval_samples_per_second": 998.361, "eval_steps_per_second": 62.897, "step": 1100 }, { "epoch": 2.03, "eval_accuracy": 0.9419999718666077, "eval_loss": 0.21042801439762115, "eval_runtime": 0.9893, "eval_samples_per_second": 1010.862, "eval_steps_per_second": 63.684, "step": 1200 }, { "epoch": 2.2, "eval_accuracy": 0.9509999752044678, "eval_loss": 0.18397970497608185, "eval_runtime": 0.9927, "eval_samples_per_second": 1007.38, "eval_steps_per_second": 63.465, "step": 1300 }, { "epoch": 2.36, "eval_accuracy": 0.9549999833106995, "eval_loss": 0.16272501647472382, "eval_runtime": 0.9936, "eval_samples_per_second": 1006.426, "eval_steps_per_second": 63.405, "step": 1400 }, { "epoch": 2.53, "learning_rate": 1.1047297297297297e-05, "loss": 0.0457, "step": 1500 }, { "epoch": 2.53, "eval_accuracy": 0.9380000233650208, "eval_loss": 0.21701455116271973, "eval_runtime": 0.992, "eval_samples_per_second": 1008.023, "eval_steps_per_second": 63.505, "step": 1500 }, { "epoch": 2.7, "eval_accuracy": 0.9419999718666077, "eval_loss": 0.20601776242256165, "eval_runtime": 0.9926, "eval_samples_per_second": 1007.478, "eval_steps_per_second": 63.471, "step": 1600 }, { "epoch": 2.87, "eval_accuracy": 0.9520000219345093, "eval_loss": 0.1742561310529709, "eval_runtime": 0.9948, "eval_samples_per_second": 1005.234, "eval_steps_per_second": 63.33, "step": 1700 }, { "epoch": 3.04, "eval_accuracy": 0.9490000009536743, "eval_loss": 0.17574115097522736, "eval_runtime": 0.9934, "eval_samples_per_second": 1006.664, "eval_steps_per_second": 63.42, "step": 1800 }, { "epoch": 3.21, "eval_accuracy": 0.9399999976158142, "eval_loss": 0.21880225837230682, "eval_runtime": 1.0033, "eval_samples_per_second": 996.71, "eval_steps_per_second": 62.793, "step": 1900 }, { "epoch": 3.38, "learning_rate": 4.712837837837838e-06, "loss": 0.0388, "step": 2000 }, { "epoch": 3.38, "eval_accuracy": 0.9559999704360962, "eval_loss": 0.16699624061584473, "eval_runtime": 0.9915, "eval_samples_per_second": 1008.575, "eval_steps_per_second": 63.54, "step": 2000 }, { "epoch": 3.55, "eval_accuracy": 0.9589999914169312, "eval_loss": 0.15601937472820282, "eval_runtime": 0.9908, "eval_samples_per_second": 1009.251, "eval_steps_per_second": 63.583, "step": 2100 }, { "epoch": 3.72, "eval_accuracy": 0.9559999704360962, "eval_loss": 0.1640777885913849, "eval_runtime": 0.9918, "eval_samples_per_second": 1008.262, "eval_steps_per_second": 63.521, "step": 2200 }, { "epoch": 3.89, "eval_accuracy": 0.9549999833106995, "eval_loss": 0.16484832763671875, "eval_runtime": 1.0391, "eval_samples_per_second": 962.351, "eval_steps_per_second": 60.628, "step": 2300 }, { "epoch": 4.0, "step": 2368, "total_flos": 124651187836032.0, "train_loss": 0.09686436967269794, "train_runtime": 383.5478, "train_samples_per_second": 98.637, "train_steps_per_second": 6.174 } ], "max_steps": 2368, "num_train_epochs": 4, "total_flos": 124651187836032.0, "trial_name": null, "trial_params": null }