{ "best_metric": 0.26184663, "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v0-20241218-000939/checkpoint-100", "epoch": 5.0, "eval_steps": 200, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.78614354, "epoch": 0.05, "grad_norm": 32.0172921873113, "learning_rate": 0.0, "loss": 1.30341828, "memory(GiB)": 54.56, "step": 1, "train_speed(iter/s)": 0.023493 }, { "acc": 0.80234951, "epoch": 0.25, "grad_norm": 37.62154000246541, "learning_rate": 1e-05, "loss": 1.23205733, "memory(GiB)": 74.75, "step": 5, "train_speed(iter/s)": 0.030873 }, { "acc": 0.83633699, "epoch": 0.5, "grad_norm": 74.22787353731742, "learning_rate": 9.93181333636191e-06, "loss": 0.93288889, "memory(GiB)": 74.75, "step": 10, "train_speed(iter/s)": 0.032122 }, { "acc": 0.84369278, "epoch": 0.75, "grad_norm": 5.440459167288553, "learning_rate": 9.729113299882324e-06, "loss": 0.84061708, "memory(GiB)": 74.75, "step": 15, "train_speed(iter/s)": 0.032554 }, { "acc": 0.86311598, "epoch": 1.0, "grad_norm": 7.873547600114794, "learning_rate": 9.397429019156841e-06, "loss": 0.72119961, "memory(GiB)": 74.75, "step": 20, "train_speed(iter/s)": 0.032776 }, { "acc": 0.885886, "epoch": 1.25, "grad_norm": 5.745656860997658, "learning_rate": 8.94580797672727e-06, "loss": 0.77134295, "memory(GiB)": 74.75, "step": 25, "train_speed(iter/s)": 0.032863 }, { "acc": 0.8865592, "epoch": 1.5, "grad_norm": 16.573461756273073, "learning_rate": 8.386569217342893e-06, "loss": 0.61206017, "memory(GiB)": 74.75, "step": 30, "train_speed(iter/s)": 0.032959 }, { "acc": 0.89316502, "epoch": 1.75, "grad_norm": 5.34427710104793, "learning_rate": 7.734967316533076e-06, "loss": 0.64310627, "memory(GiB)": 74.75, "step": 35, "train_speed(iter/s)": 0.033027 }, { "acc": 0.89337797, "epoch": 2.0, "grad_norm": 3.3194243420102083, "learning_rate": 7.008776275552522e-06, "loss": 0.58372121, "memory(GiB)": 74.75, "step": 40, "train_speed(iter/s)": 0.03308 }, { "acc": 0.91168728, "epoch": 2.25, "grad_norm": 1.99209188413019, "learning_rate": 6.2278046929604265e-06, "loss": 0.4987381, "memory(GiB)": 74.75, "step": 45, "train_speed(iter/s)": 0.033097 }, { "acc": 0.90439634, "epoch": 2.5, "grad_norm": 7.169335908534055, "learning_rate": 5.413355437688926e-06, "loss": 0.54837952, "memory(GiB)": 74.75, "step": 50, "train_speed(iter/s)": 0.033134 }, { "acc": 0.90367451, "epoch": 2.75, "grad_norm": 5.395325589957545, "learning_rate": 4.587644562311076e-06, "loss": 0.53503237, "memory(GiB)": 74.75, "step": 55, "train_speed(iter/s)": 0.033163 }, { "acc": 0.92254467, "epoch": 3.0, "grad_norm": 2.5695200478983646, "learning_rate": 3.773195307039575e-06, "loss": 0.40150399, "memory(GiB)": 74.75, "step": 60, "train_speed(iter/s)": 0.033186 }, { "acc": 0.9301857, "epoch": 3.25, "grad_norm": 3.1556048007324367, "learning_rate": 2.9922237244474807e-06, "loss": 0.38734879, "memory(GiB)": 74.75, "step": 65, "train_speed(iter/s)": 0.033189 }, { "acc": 0.91883011, "epoch": 3.5, "grad_norm": 17.287645910961686, "learning_rate": 2.266032683466928e-06, "loss": 0.44510117, "memory(GiB)": 74.75, "step": 70, "train_speed(iter/s)": 0.033209 }, { "acc": 0.93858633, "epoch": 3.75, "grad_norm": 5.109693472015538, "learning_rate": 1.6144307826571085e-06, "loss": 0.36709385, "memory(GiB)": 74.75, "step": 75, "train_speed(iter/s)": 0.033226 }, { "acc": 0.94115381, "epoch": 4.0, "grad_norm": 3.535417356581457, "learning_rate": 1.0551920232727309e-06, "loss": 0.34772708, "memory(GiB)": 74.75, "step": 80, "train_speed(iter/s)": 0.03324 }, { "acc": 0.94864588, "epoch": 4.25, "grad_norm": 2.6450151758477927, "learning_rate": 6.035709808431585e-07, "loss": 0.30039248, "memory(GiB)": 74.75, "step": 85, "train_speed(iter/s)": 0.033238 }, { "acc": 0.95026588, "epoch": 4.5, "grad_norm": 2.3681008746117964, "learning_rate": 2.7188670011767715e-07, "loss": 0.28691387, "memory(GiB)": 74.75, "step": 90, "train_speed(iter/s)": 0.033251 }, { "acc": 0.94560108, "epoch": 4.75, "grad_norm": 4.066631026473562, "learning_rate": 6.918666363808976e-08, "loss": 0.29291582, "memory(GiB)": 74.75, "step": 95, "train_speed(iter/s)": 0.033261 }, { "acc": 0.95544872, "epoch": 5.0, "grad_norm": 3.609028127209203, "learning_rate": 1e-09, "loss": 0.26392193, "memory(GiB)": 74.75, "step": 100, "train_speed(iter/s)": 0.033271 }, { "epoch": 5.0, "eval_acc": 0.8952164009111617, "eval_loss": 0.2618466317653656, "eval_runtime": 23.5294, "eval_samples_per_second": 1.488, "eval_steps_per_second": 0.213, "step": 100 } ], "logging_steps": 5, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 240537459228672.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }