{ "best_metric": 0.05549783632159233, "best_model_checkpoint": "./vit-base-beans/checkpoint-160", "epoch": 3.6363636363636362, "eval_steps": 40, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "grad_norm": 0.8493722081184387, "learning_rate": 0.00018863636363636364, "loss": 0.444, "step": 10 }, { "epoch": 0.45, "grad_norm": 0.272053599357605, "learning_rate": 0.00017727272727272728, "loss": 0.151, "step": 20 }, { "epoch": 0.68, "grad_norm": 0.1838751882314682, "learning_rate": 0.00016590909090909094, "loss": 0.117, "step": 30 }, { "epoch": 0.91, "grad_norm": 4.570974349975586, "learning_rate": 0.00015454545454545454, "loss": 0.2073, "step": 40 }, { "epoch": 0.91, "eval_accuracy": 0.975, "eval_loss": 0.09584420919418335, "eval_runtime": 57.723, "eval_samples_per_second": 3.465, "eval_steps_per_second": 0.433, "step": 40 }, { "epoch": 1.14, "grad_norm": 0.50724858045578, "learning_rate": 0.0001431818181818182, "loss": 0.0636, "step": 50 }, { "epoch": 1.36, "grad_norm": 0.09627090394496918, "learning_rate": 0.0001318181818181818, "loss": 0.0651, "step": 60 }, { "epoch": 1.59, "grad_norm": 0.08125822246074677, "learning_rate": 0.00012045454545454546, "loss": 0.0298, "step": 70 }, { "epoch": 1.82, "grad_norm": 0.0715242475271225, "learning_rate": 0.00010909090909090909, "loss": 0.0342, "step": 80 }, { "epoch": 1.82, "eval_accuracy": 0.965, "eval_loss": 0.09033375978469849, "eval_runtime": 3.0566, "eval_samples_per_second": 65.433, "eval_steps_per_second": 8.179, "step": 80 }, { "epoch": 2.05, "grad_norm": 0.06209810450673103, "learning_rate": 9.772727272727274e-05, "loss": 0.0213, "step": 90 }, { "epoch": 2.27, "grad_norm": 0.056180860847234726, "learning_rate": 8.636363636363637e-05, "loss": 0.0081, "step": 100 }, { "epoch": 2.5, "grad_norm": 0.05113891884684563, "learning_rate": 7.500000000000001e-05, "loss": 0.0073, "step": 110 }, { "epoch": 2.73, "grad_norm": 0.05209748074412346, "learning_rate": 6.363636363636364e-05, "loss": 0.0139, "step": 120 }, { "epoch": 2.73, "eval_accuracy": 0.965, "eval_loss": 0.09859169274568558, "eval_runtime": 2.9938, "eval_samples_per_second": 66.805, "eval_steps_per_second": 8.351, "step": 120 }, { "epoch": 2.95, "grad_norm": 0.06674375385046005, "learning_rate": 5.2272727272727274e-05, "loss": 0.0068, "step": 130 }, { "epoch": 3.18, "grad_norm": 0.04334596171975136, "learning_rate": 4.0909090909090915e-05, "loss": 0.0068, "step": 140 }, { "epoch": 3.41, "grad_norm": 0.04224753752350807, "learning_rate": 2.954545454545455e-05, "loss": 0.0057, "step": 150 }, { "epoch": 3.64, "grad_norm": 0.048290498554706573, "learning_rate": 1.8181818181818182e-05, "loss": 0.0069, "step": 160 }, { "epoch": 3.64, "eval_accuracy": 0.97, "eval_loss": 0.05549783632159233, "eval_runtime": 3.0091, "eval_samples_per_second": 66.466, "eval_steps_per_second": 8.308, "step": 160 } ], "logging_steps": 10, "max_steps": 176, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 40, "total_flos": 1.9721711356913664e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }