{ "best_metric": 1.0, "best_model_checkpoint": "ktp-not-ktp-clip/checkpoint-52", "epoch": 18.46153846153846, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.897196261682243, "eval_loss": 0.4882933795452118, "eval_runtime": 6.4817, "eval_samples_per_second": 33.016, "eval_steps_per_second": 2.16, "step": 6 }, { "epoch": 1.5384615384615383, "grad_norm": 8.5100679397583, "learning_rate": 4.166666666666667e-05, "loss": 0.6748, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.9719626168224299, "eval_loss": 0.07545875012874603, "eval_runtime": 4.9236, "eval_samples_per_second": 43.464, "eval_steps_per_second": 2.843, "step": 13 }, { "epoch": 2.9230769230769234, "eval_accuracy": 0.9345794392523364, "eval_loss": 0.13731282949447632, "eval_runtime": 4.9997, "eval_samples_per_second": 42.803, "eval_steps_per_second": 2.8, "step": 19 }, { "epoch": 3.076923076923077, "grad_norm": 55.80315399169922, "learning_rate": 4.62962962962963e-05, "loss": 0.1779, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.9719626168224299, "eval_loss": 0.06819602847099304, "eval_runtime": 5.1079, "eval_samples_per_second": 41.896, "eval_steps_per_second": 2.741, "step": 26 }, { "epoch": 4.615384615384615, "grad_norm": 5.013700008392334, "learning_rate": 4.166666666666667e-05, "loss": 0.1511, "step": 30 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.9766355140186916, "eval_loss": 0.03988885134458542, "eval_runtime": 5.0626, "eval_samples_per_second": 42.271, "eval_steps_per_second": 2.765, "step": 32 }, { "epoch": 6.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.011155444197356701, "eval_runtime": 5.129, "eval_samples_per_second": 41.724, "eval_steps_per_second": 2.73, "step": 39 }, { "epoch": 6.153846153846154, "grad_norm": 22.189523696899414, "learning_rate": 3.7037037037037037e-05, "loss": 0.0248, "step": 40 }, { "epoch": 6.923076923076923, "eval_accuracy": 0.9766355140186916, "eval_loss": 0.05554972589015961, "eval_runtime": 5.1142, "eval_samples_per_second": 41.844, "eval_steps_per_second": 2.737, "step": 45 }, { "epoch": 7.6923076923076925, "grad_norm": 49.896671295166016, "learning_rate": 3.240740740740741e-05, "loss": 0.057, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 1.0, "eval_loss": 0.005110082216560841, "eval_runtime": 5.1693, "eval_samples_per_second": 41.398, "eval_steps_per_second": 2.708, "step": 52 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.985981308411215, "eval_loss": 0.029344480484724045, "eval_runtime": 5.1265, "eval_samples_per_second": 41.744, "eval_steps_per_second": 2.731, "step": 58 }, { "epoch": 9.23076923076923, "grad_norm": 22.167015075683594, "learning_rate": 2.777777777777778e-05, "loss": 0.0361, "step": 60 }, { "epoch": 10.0, "eval_accuracy": 0.985981308411215, "eval_loss": 0.027151916176080704, "eval_runtime": 5.1474, "eval_samples_per_second": 41.574, "eval_steps_per_second": 2.72, "step": 65 }, { "epoch": 10.76923076923077, "grad_norm": 0.32469525933265686, "learning_rate": 2.314814814814815e-05, "loss": 0.011, "step": 70 }, { "epoch": 10.923076923076923, "eval_accuracy": 0.9906542056074766, "eval_loss": 0.01700090989470482, "eval_runtime": 5.0367, "eval_samples_per_second": 42.488, "eval_steps_per_second": 2.78, "step": 71 }, { "epoch": 12.0, "eval_accuracy": 0.985981308411215, "eval_loss": 0.07101369649171829, "eval_runtime": 4.9654, "eval_samples_per_second": 43.099, "eval_steps_per_second": 2.82, "step": 78 }, { "epoch": 12.307692307692308, "grad_norm": 0.06220458447933197, "learning_rate": 1.8518518518518518e-05, "loss": 0.0006, "step": 80 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.9813084112149533, "eval_loss": 0.07214021682739258, "eval_runtime": 4.9054, "eval_samples_per_second": 43.625, "eval_steps_per_second": 2.854, "step": 84 }, { "epoch": 13.846153846153847, "grad_norm": 0.0023754581343382597, "learning_rate": 1.388888888888889e-05, "loss": 0.0001, "step": 90 }, { "epoch": 14.0, "eval_accuracy": 0.9906542056074766, "eval_loss": 0.02770264819264412, "eval_runtime": 4.9176, "eval_samples_per_second": 43.517, "eval_steps_per_second": 2.847, "step": 91 }, { "epoch": 14.923076923076923, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.022401457652449608, "eval_runtime": 4.8851, "eval_samples_per_second": 43.806, "eval_steps_per_second": 2.866, "step": 97 }, { "epoch": 15.384615384615385, "grad_norm": 0.0014737015590071678, "learning_rate": 9.259259259259259e-06, "loss": 0.0001, "step": 100 }, { "epoch": 16.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.02016393281519413, "eval_runtime": 5.2559, "eval_samples_per_second": 40.716, "eval_steps_per_second": 2.664, "step": 104 }, { "epoch": 16.923076923076923, "grad_norm": 0.0013604339910671115, "learning_rate": 4.6296296296296296e-06, "loss": 0.0, "step": 110 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.023425478488206863, "eval_runtime": 5.0917, "eval_samples_per_second": 42.029, "eval_steps_per_second": 2.75, "step": 110 }, { "epoch": 18.0, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.02447943389415741, "eval_runtime": 4.9857, "eval_samples_per_second": 42.923, "eval_steps_per_second": 2.808, "step": 117 }, { "epoch": 18.46153846153846, "grad_norm": 0.009677406400442123, "learning_rate": 0.0, "loss": 0.0, "step": 120 }, { "epoch": 18.46153846153846, "eval_accuracy": 0.9953271028037384, "eval_loss": 0.024529017508029938, "eval_runtime": 5.1559, "eval_samples_per_second": 41.506, "eval_steps_per_second": 2.715, "step": 120 }, { "epoch": 18.46153846153846, "step": 120, "total_flos": 6.035309694497341e+17, "train_loss": 0.09444785690047866, "train_runtime": 374.1698, "train_samples_per_second": 22.129, "train_steps_per_second": 0.321 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.035309694497341e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }