{ "best_metric": 0.8655030800821355, "best_model_checkpoint": "AnimeCharacterClassifierMark1/checkpoint-258", "epoch": 16.0, "eval_steps": 500, "global_step": 276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.58, "learning_rate": 6.944444444444445e-06, "loss": 5.0145, "step": 10 }, { "epoch": 0.99, "eval_accuracy": 0.009240246406570842, "eval_loss": 4.930349826812744, "eval_runtime": 9.5929, "eval_samples_per_second": 101.533, "eval_steps_per_second": 0.834, "step": 17 }, { "epoch": 1.16, "learning_rate": 1.388888888888889e-05, "loss": 4.932, "step": 20 }, { "epoch": 1.74, "learning_rate": 2.0833333333333336e-05, "loss": 4.8416, "step": 30 }, { "epoch": 1.97, "eval_accuracy": 0.028747433264887063, "eval_loss": 4.748697757720947, "eval_runtime": 8.8137, "eval_samples_per_second": 110.51, "eval_steps_per_second": 0.908, "step": 34 }, { "epoch": 2.32, "learning_rate": 2.777777777777778e-05, "loss": 4.6652, "step": 40 }, { "epoch": 2.9, "learning_rate": 3.472222222222222e-05, "loss": 4.4383, "step": 50 }, { "epoch": 2.96, "eval_accuracy": 0.11704312114989733, "eval_loss": 4.359685897827148, "eval_runtime": 10.311, "eval_samples_per_second": 94.462, "eval_steps_per_second": 0.776, "step": 51 }, { "epoch": 3.48, "learning_rate": 4.166666666666667e-05, "loss": 4.0762, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.32238193018480493, "eval_loss": 3.641871213912964, "eval_runtime": 9.02, "eval_samples_per_second": 107.982, "eval_steps_per_second": 0.887, "step": 69 }, { "epoch": 4.06, "learning_rate": 4.8611111111111115e-05, "loss": 3.6518, "step": 70 }, { "epoch": 4.64, "learning_rate": 4.937694704049845e-05, "loss": 3.108, "step": 80 }, { "epoch": 4.99, "eval_accuracy": 0.5246406570841889, "eval_loss": 2.857390880584717, "eval_runtime": 8.9304, "eval_samples_per_second": 109.065, "eval_steps_per_second": 0.896, "step": 86 }, { "epoch": 5.22, "learning_rate": 4.85981308411215e-05, "loss": 2.606, "step": 90 }, { "epoch": 5.8, "learning_rate": 4.781931464174455e-05, "loss": 2.1571, "step": 100 }, { "epoch": 5.97, "eval_accuracy": 0.6652977412731006, "eval_loss": 2.2128942012786865, "eval_runtime": 8.8437, "eval_samples_per_second": 110.136, "eval_steps_per_second": 0.905, "step": 103 }, { "epoch": 6.38, "learning_rate": 4.7040498442367604e-05, "loss": 1.7668, "step": 110 }, { "epoch": 6.96, "learning_rate": 4.6261682242990654e-05, "loss": 1.4685, "step": 120 }, { "epoch": 6.96, "eval_accuracy": 0.7494866529774127, "eval_loss": 1.7289572954177856, "eval_runtime": 9.9463, "eval_samples_per_second": 97.926, "eval_steps_per_second": 0.804, "step": 120 }, { "epoch": 7.54, "learning_rate": 4.548286604361371e-05, "loss": 1.1649, "step": 130 }, { "epoch": 8.0, "eval_accuracy": 0.797741273100616, "eval_loss": 1.3861801624298096, "eval_runtime": 8.8226, "eval_samples_per_second": 110.398, "eval_steps_per_second": 0.907, "step": 138 }, { "epoch": 8.12, "learning_rate": 4.470404984423676e-05, "loss": 0.9897, "step": 140 }, { "epoch": 8.7, "learning_rate": 4.392523364485982e-05, "loss": 0.7905, "step": 150 }, { "epoch": 8.99, "eval_accuracy": 0.8213552361396304, "eval_loss": 1.1588941812515259, "eval_runtime": 8.795, "eval_samples_per_second": 110.745, "eval_steps_per_second": 0.91, "step": 155 }, { "epoch": 9.28, "learning_rate": 4.314641744548287e-05, "loss": 0.6727, "step": 160 }, { "epoch": 9.86, "learning_rate": 4.236760124610592e-05, "loss": 0.5549, "step": 170 }, { "epoch": 9.97, "eval_accuracy": 0.8295687885010267, "eval_loss": 1.0262539386749268, "eval_runtime": 8.8584, "eval_samples_per_second": 109.953, "eval_steps_per_second": 0.903, "step": 172 }, { "epoch": 10.43, "learning_rate": 4.1588785046728974e-05, "loss": 0.4577, "step": 180 }, { "epoch": 10.96, "eval_accuracy": 0.8367556468172485, "eval_loss": 0.8994325995445251, "eval_runtime": 8.7654, "eval_samples_per_second": 111.119, "eval_steps_per_second": 0.913, "step": 189 }, { "epoch": 11.01, "learning_rate": 4.0809968847352024e-05, "loss": 0.3757, "step": 190 }, { "epoch": 11.59, "learning_rate": 4.003115264797508e-05, "loss": 0.2964, "step": 200 }, { "epoch": 12.0, "eval_accuracy": 0.8552361396303901, "eval_loss": 0.808638870716095, "eval_runtime": 11.2618, "eval_samples_per_second": 86.487, "eval_steps_per_second": 0.71, "step": 207 }, { "epoch": 12.17, "learning_rate": 3.925233644859813e-05, "loss": 0.2592, "step": 210 }, { "epoch": 12.75, "learning_rate": 3.847352024922119e-05, "loss": 0.194, "step": 220 }, { "epoch": 12.99, "eval_accuracy": 0.8583162217659137, "eval_loss": 0.744567334651947, "eval_runtime": 8.8124, "eval_samples_per_second": 110.526, "eval_steps_per_second": 0.908, "step": 224 }, { "epoch": 13.33, "learning_rate": 3.769470404984424e-05, "loss": 0.1626, "step": 230 }, { "epoch": 13.91, "learning_rate": 3.691588785046729e-05, "loss": 0.1358, "step": 240 }, { "epoch": 13.97, "eval_accuracy": 0.8572895277207392, "eval_loss": 0.7063936591148376, "eval_runtime": 8.8917, "eval_samples_per_second": 109.54, "eval_steps_per_second": 0.9, "step": 241 }, { "epoch": 14.49, "learning_rate": 3.6137071651090344e-05, "loss": 0.1116, "step": 250 }, { "epoch": 14.96, "eval_accuracy": 0.8655030800821355, "eval_loss": 0.67196124792099, "eval_runtime": 9.0077, "eval_samples_per_second": 108.129, "eval_steps_per_second": 0.888, "step": 258 }, { "epoch": 15.07, "learning_rate": 3.5358255451713394e-05, "loss": 0.0974, "step": 260 }, { "epoch": 15.65, "learning_rate": 3.457943925233645e-05, "loss": 0.0811, "step": 270 }, { "epoch": 16.0, "eval_accuracy": 0.864476386036961, "eval_loss": 0.6515084505081177, "eval_runtime": 9.3868, "eval_samples_per_second": 103.762, "eval_steps_per_second": 0.852, "step": 276 }, { "epoch": 16.0, "step": 276, "total_flos": 1.087746145977493e+19, "train_loss": 1.7795628476617993, "train_runtime": 3092.1212, "train_samples_per_second": 119.027, "train_steps_per_second": 0.231 } ], "logging_steps": 10, "max_steps": 714, "num_train_epochs": 42, "save_steps": 500, "total_flos": 1.087746145977493e+19, "trial_name": null, "trial_params": null }