{ "best_metric": 0.4735751152038574, "best_model_checkpoint": "./vit-lr-cosine-warmup/checkpoint-963", "epoch": 13.0, "eval_steps": 500, "global_step": 4173, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 7.814958572387695, "learning_rate": 4.1428571428571437e-05, "loss": 0.86, "step": 321 }, { "epoch": 1.0, "eval_accuracy": 0.8141470180305131, "eval_f1": 0.8010598763076213, "eval_loss": 0.5250416994094849, "eval_precision": 0.8100096575743447, "eval_recall": 0.8141470180305131, "eval_runtime": 36.0914, "eval_samples_per_second": 79.908, "eval_steps_per_second": 10.002, "step": 321 }, { "epoch": 2.0, "grad_norm": 6.845722198486328, "learning_rate": 8.311688311688312e-05, "loss": 0.4517, "step": 642 }, { "epoch": 2.0, "eval_accuracy": 0.8221220527045769, "eval_f1": 0.8099830282273331, "eval_loss": 0.5117006897926331, "eval_precision": 0.8347375649374938, "eval_recall": 0.8221220527045769, "eval_runtime": 37.2073, "eval_samples_per_second": 77.512, "eval_steps_per_second": 9.702, "step": 642 }, { "epoch": 3.0, "grad_norm": 2.5204238891601562, "learning_rate": 9.985334621908699e-05, "loss": 0.3512, "step": 963 }, { "epoch": 3.0, "eval_accuracy": 0.8394590846047156, "eval_f1": 0.830758624078281, "eval_loss": 0.4735751152038574, "eval_precision": 0.8318317467279469, "eval_recall": 0.8394590846047156, "eval_runtime": 35.8524, "eval_samples_per_second": 80.441, "eval_steps_per_second": 10.069, "step": 963 }, { "epoch": 4.0, "grad_norm": 5.594597816467285, "learning_rate": 9.894936461151184e-05, "loss": 0.2184, "step": 1284 }, { "epoch": 4.0, "eval_accuracy": 0.8567961165048543, "eval_f1": 0.85051019948, "eval_loss": 0.4796653389930725, "eval_precision": 0.8536361493542505, "eval_recall": 0.8567961165048543, "eval_runtime": 35.3028, "eval_samples_per_second": 81.693, "eval_steps_per_second": 10.226, "step": 1284 }, { "epoch": 5.0, "grad_norm": 9.165299415588379, "learning_rate": 9.723506398349735e-05, "loss": 0.1264, "step": 1605 }, { "epoch": 5.0, "eval_accuracy": 0.8547156726768377, "eval_f1": 0.8530254035056796, "eval_loss": 0.6211732029914856, "eval_precision": 0.8551837556766221, "eval_recall": 0.8547156726768377, "eval_runtime": 36.6331, "eval_samples_per_second": 78.727, "eval_steps_per_second": 9.854, "step": 1605 }, { "epoch": 6.0, "grad_norm": 0.9397739768028259, "learning_rate": 9.473882326123909e-05, "loss": 0.0687, "step": 1926 }, { "epoch": 6.0, "eval_accuracy": 0.8463938973647711, "eval_f1": 0.840249522586874, "eval_loss": 0.7659199237823486, "eval_precision": 0.8475689441425499, "eval_recall": 0.8463938973647711, "eval_runtime": 35.8316, "eval_samples_per_second": 80.488, "eval_steps_per_second": 10.075, "step": 1926 }, { "epoch": 7.0, "grad_norm": 11.412993431091309, "learning_rate": 9.15019657867844e-05, "loss": 0.0463, "step": 2247 }, { "epoch": 7.0, "eval_accuracy": 0.8519417475728155, "eval_f1": 0.84690540461018, "eval_loss": 0.8237490057945251, "eval_precision": 0.8546320390871954, "eval_recall": 0.8519417475728155, "eval_runtime": 36.3237, "eval_samples_per_second": 79.397, "eval_steps_per_second": 9.938, "step": 2247 }, { "epoch": 8.0, "grad_norm": 0.15119314193725586, "learning_rate": 8.759130166350091e-05, "loss": 0.0373, "step": 2568 }, { "epoch": 8.0, "eval_accuracy": 0.8377253814147018, "eval_f1": 0.8414971604167042, "eval_loss": 0.871150553226471, "eval_precision": 0.8492780112281874, "eval_recall": 0.8377253814147018, "eval_runtime": 37.0971, "eval_samples_per_second": 77.742, "eval_steps_per_second": 9.731, "step": 2568 }, { "epoch": 9.0, "grad_norm": 0.9838098883628845, "learning_rate": 8.304716115113689e-05, "loss": 0.0347, "step": 2889 }, { "epoch": 9.0, "eval_accuracy": 0.8567961165048543, "eval_f1": 0.8533749018674412, "eval_loss": 0.8180708885192871, "eval_precision": 0.8549859977362129, "eval_recall": 0.8567961165048543, "eval_runtime": 36.43, "eval_samples_per_second": 79.166, "eval_steps_per_second": 9.909, "step": 2889 }, { "epoch": 10.0, "grad_norm": 2.4911880493164062, "learning_rate": 7.795595034552552e-05, "loss": 0.0263, "step": 3210 }, { "epoch": 10.0, "eval_accuracy": 0.8446601941747572, "eval_f1": 0.8308350673322552, "eval_loss": 1.0705382823944092, "eval_precision": 0.8388632159592988, "eval_recall": 0.8446601941747572, "eval_runtime": 36.2711, "eval_samples_per_second": 79.512, "eval_steps_per_second": 9.953, "step": 3210 }, { "epoch": 11.0, "grad_norm": 0.003689270233735442, "learning_rate": 7.240195031927308e-05, "loss": 0.0289, "step": 3531 }, { "epoch": 11.0, "eval_accuracy": 0.858876560332871, "eval_f1": 0.855018220816544, "eval_loss": 0.9376017451286316, "eval_precision": 0.8605983316828895, "eval_recall": 0.858876560332871, "eval_runtime": 36.7783, "eval_samples_per_second": 78.416, "eval_steps_per_second": 9.816, "step": 3531 }, { "epoch": 12.0, "grad_norm": 0.08966358751058578, "learning_rate": 6.647710326399964e-05, "loss": 0.0164, "step": 3852 }, { "epoch": 12.0, "eval_accuracy": 0.863384188626907, "eval_f1": 0.8610588511525862, "eval_loss": 0.9714025259017944, "eval_precision": 0.8611342448885915, "eval_recall": 0.863384188626907, "eval_runtime": 36.4831, "eval_samples_per_second": 79.05, "eval_steps_per_second": 9.895, "step": 3852 }, { "epoch": 13.0, "grad_norm": 0.05049363151192665, "learning_rate": 6.027949045818934e-05, "loss": 0.0077, "step": 4173 }, { "epoch": 13.0, "eval_accuracy": 0.8398058252427184, "eval_f1": 0.8242916893123671, "eval_loss": 1.2992373704910278, "eval_precision": 0.8395816522197255, "eval_recall": 0.8398058252427184, "eval_runtime": 37.0798, "eval_samples_per_second": 77.778, "eval_steps_per_second": 9.736, "step": 4173 }, { "epoch": 13.0, "step": 4173, "total_flos": 5.166157498470679e+18, "train_loss": 0.1749291451406399, "train_runtime": 1863.5469, "train_samples_per_second": 275.174, "train_steps_per_second": 17.225 } ], "logging_steps": 500, "max_steps": 32100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 5.166157498470679e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }