{ "best_metric": 0.87021494370522, "best_model_checkpoint": "./outputs/finetuning/mnli_ChcE/checkpoint-20000", "epoch": 5.0, "global_step": 59960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 1.933288859239493e-05, "loss": 0.3102, "step": 2000 }, { "epoch": 0.17, "eval_acc": 0.8544524053224155, "eval_loss": 0.41351282596588135, "eval_runtime": 16.7417, "eval_samples_per_second": 583.571, "eval_steps_per_second": 18.278, "step": 2000 }, { "epoch": 0.33, "learning_rate": 1.866577718478986e-05, "loss": 0.3046, "step": 4000 }, { "epoch": 0.33, "eval_acc": 0.8644831115660184, "eval_loss": 0.40244850516319275, "eval_runtime": 16.6646, "eval_samples_per_second": 586.274, "eval_steps_per_second": 18.362, "step": 4000 }, { "epoch": 0.5, "learning_rate": 1.7998665777184793e-05, "loss": 0.3038, "step": 6000 }, { "epoch": 0.5, "eval_acc": 0.8668372569089048, "eval_loss": 0.39355912804603577, "eval_runtime": 16.6429, "eval_samples_per_second": 587.039, "eval_steps_per_second": 18.386, "step": 6000 }, { "epoch": 0.67, "learning_rate": 1.7331554369579722e-05, "loss": 0.3012, "step": 8000 }, { "epoch": 0.67, "eval_acc": 0.8625383828045036, "eval_loss": 0.40068650245666504, "eval_runtime": 16.6669, "eval_samples_per_second": 586.192, "eval_steps_per_second": 18.36, "step": 8000 }, { "epoch": 0.83, "learning_rate": 1.666444296197465e-05, "loss": 0.2979, "step": 10000 }, { "epoch": 0.83, "eval_acc": 0.8620266120777892, "eval_loss": 0.42349421977996826, "eval_runtime": 16.6784, "eval_samples_per_second": 585.787, "eval_steps_per_second": 18.347, "step": 10000 }, { "epoch": 1.0, "learning_rate": 1.599733155436958e-05, "loss": 0.2997, "step": 12000 }, { "epoch": 1.0, "eval_acc": 0.8643807574206755, "eval_loss": 0.403084933757782, "eval_runtime": 16.6652, "eval_samples_per_second": 586.251, "eval_steps_per_second": 18.362, "step": 12000 }, { "epoch": 1.17, "learning_rate": 1.533022014676451e-05, "loss": 0.2099, "step": 14000 }, { "epoch": 1.17, "eval_acc": 0.8632548618219038, "eval_loss": 0.43926700949668884, "eval_runtime": 16.6492, "eval_samples_per_second": 586.816, "eval_steps_per_second": 18.379, "step": 14000 }, { "epoch": 1.33, "learning_rate": 1.4663108739159441e-05, "loss": 0.2114, "step": 16000 }, { "epoch": 1.33, "eval_acc": 0.8628454452405322, "eval_loss": 0.46620267629623413, "eval_runtime": 16.6551, "eval_samples_per_second": 586.607, "eval_steps_per_second": 18.373, "step": 16000 }, { "epoch": 1.5, "learning_rate": 1.3995997331554372e-05, "loss": 0.2147, "step": 18000 }, { "epoch": 1.5, "eval_acc": 0.864790174002047, "eval_loss": 0.4331408143043518, "eval_runtime": 16.6453, "eval_samples_per_second": 586.954, "eval_steps_per_second": 18.384, "step": 18000 }, { "epoch": 1.67, "learning_rate": 1.33288859239493e-05, "loss": 0.2122, "step": 20000 }, { "epoch": 1.67, "eval_acc": 0.87021494370522, "eval_loss": 0.4166066646575928, "eval_runtime": 16.67, "eval_samples_per_second": 586.082, "eval_steps_per_second": 18.356, "step": 20000 }, { "epoch": 1.83, "learning_rate": 1.2661774516344229e-05, "loss": 0.2156, "step": 22000 }, { "epoch": 1.83, "eval_acc": 0.8632548618219038, "eval_loss": 0.4462782144546509, "eval_runtime": 16.6855, "eval_samples_per_second": 585.539, "eval_steps_per_second": 18.339, "step": 22000 }, { "epoch": 2.0, "learning_rate": 1.199466310873916e-05, "loss": 0.2117, "step": 24000 }, { "epoch": 2.0, "eval_acc": 0.8679631525076765, "eval_loss": 0.46366986632347107, "eval_runtime": 16.6496, "eval_samples_per_second": 586.801, "eval_steps_per_second": 18.379, "step": 24000 }, { "epoch": 2.17, "learning_rate": 1.132755170113409e-05, "loss": 0.1469, "step": 26000 }, { "epoch": 2.17, "eval_acc": 0.8680655066530194, "eval_loss": 0.5210850834846497, "eval_runtime": 16.6494, "eval_samples_per_second": 586.808, "eval_steps_per_second": 18.379, "step": 26000 }, { "epoch": 2.33, "learning_rate": 1.066044029352902e-05, "loss": 0.1526, "step": 28000 }, { "epoch": 2.33, "eval_acc": 0.8620266120777892, "eval_loss": 0.5206254720687866, "eval_runtime": 16.6442, "eval_samples_per_second": 586.99, "eval_steps_per_second": 18.385, "step": 28000 }, { "epoch": 2.5, "learning_rate": 9.99332888592395e-06, "loss": 0.1494, "step": 30000 }, { "epoch": 2.5, "eval_acc": 0.8664278403275333, "eval_loss": 0.5167897939682007, "eval_runtime": 16.6489, "eval_samples_per_second": 586.825, "eval_steps_per_second": 18.38, "step": 30000 }, { "epoch": 2.67, "learning_rate": 9.326217478318879e-06, "loss": 0.1519, "step": 32000 }, { "epoch": 2.67, "eval_acc": 0.8700102354145343, "eval_loss": 0.48301637172698975, "eval_runtime": 16.6933, "eval_samples_per_second": 585.265, "eval_steps_per_second": 18.331, "step": 32000 }, { "epoch": 2.84, "learning_rate": 8.65910607071381e-06, "loss": 0.152, "step": 34000 }, { "epoch": 2.84, "eval_acc": 0.8635619242579324, "eval_loss": 0.5464909672737122, "eval_runtime": 16.6533, "eval_samples_per_second": 586.671, "eval_steps_per_second": 18.375, "step": 34000 }, { "epoch": 3.0, "learning_rate": 7.99199466310874e-06, "loss": 0.1498, "step": 36000 }, { "epoch": 3.0, "eval_acc": 0.8679631525076765, "eval_loss": 0.5550450682640076, "eval_runtime": 16.6712, "eval_samples_per_second": 586.039, "eval_steps_per_second": 18.355, "step": 36000 }, { "epoch": 3.17, "learning_rate": 7.324883255503669e-06, "loss": 0.1131, "step": 38000 }, { "epoch": 3.17, "eval_acc": 0.8601842374616172, "eval_loss": 0.6764107346534729, "eval_runtime": 16.6576, "eval_samples_per_second": 586.52, "eval_steps_per_second": 18.37, "step": 38000 }, { "epoch": 3.34, "learning_rate": 6.6577718478985995e-06, "loss": 0.1135, "step": 40000 }, { "epoch": 3.34, "eval_acc": 0.865711361310133, "eval_loss": 0.6199905276298523, "eval_runtime": 16.6534, "eval_samples_per_second": 586.668, "eval_steps_per_second": 18.375, "step": 40000 }, { "epoch": 3.5, "learning_rate": 5.99066044029353e-06, "loss": 0.1175, "step": 42000 }, { "epoch": 3.5, "eval_acc": 0.8671443193449335, "eval_loss": 0.588898241519928, "eval_runtime": 16.664, "eval_samples_per_second": 586.293, "eval_steps_per_second": 18.363, "step": 42000 }, { "epoch": 3.67, "learning_rate": 5.32354903268846e-06, "loss": 0.1156, "step": 44000 }, { "epoch": 3.67, "eval_acc": 0.8663254861821904, "eval_loss": 0.6299933195114136, "eval_runtime": 16.6491, "eval_samples_per_second": 586.818, "eval_steps_per_second": 18.379, "step": 44000 }, { "epoch": 3.84, "learning_rate": 4.656437625083389e-06, "loss": 0.1104, "step": 46000 }, { "epoch": 3.84, "eval_acc": 0.8689866939611054, "eval_loss": 0.6044776439666748, "eval_runtime": 16.6309, "eval_samples_per_second": 587.46, "eval_steps_per_second": 18.399, "step": 46000 }, { "epoch": 4.0, "learning_rate": 3.989326217478319e-06, "loss": 0.1111, "step": 48000 }, { "epoch": 4.0, "eval_acc": 0.869396110542477, "eval_loss": 0.6412716507911682, "eval_runtime": 16.6717, "eval_samples_per_second": 586.023, "eval_steps_per_second": 18.354, "step": 48000 }, { "epoch": 4.17, "learning_rate": 3.3222148098732494e-06, "loss": 0.086, "step": 50000 }, { "epoch": 4.17, "eval_acc": 0.8658137154554759, "eval_loss": 0.7271037697792053, "eval_runtime": 16.6567, "eval_samples_per_second": 586.55, "eval_steps_per_second": 18.371, "step": 50000 }, { "epoch": 4.34, "learning_rate": 2.6551034022681787e-06, "loss": 0.0895, "step": 52000 }, { "epoch": 4.34, "eval_acc": 0.8682702149437053, "eval_loss": 0.7273563146591187, "eval_runtime": 16.678, "eval_samples_per_second": 585.801, "eval_steps_per_second": 18.348, "step": 52000 }, { "epoch": 4.5, "learning_rate": 1.987991994663109e-06, "loss": 0.0867, "step": 54000 }, { "epoch": 4.5, "eval_acc": 0.8658137154554759, "eval_loss": 0.7226472496986389, "eval_runtime": 16.6651, "eval_samples_per_second": 586.253, "eval_steps_per_second": 18.362, "step": 54000 }, { "epoch": 4.67, "learning_rate": 1.3208805870580388e-06, "loss": 0.0886, "step": 56000 }, { "epoch": 4.67, "eval_acc": 0.8690890481064483, "eval_loss": 0.7181665897369385, "eval_runtime": 16.6571, "eval_samples_per_second": 586.536, "eval_steps_per_second": 18.371, "step": 56000 }, { "epoch": 4.84, "learning_rate": 6.537691794529687e-07, "loss": 0.0849, "step": 58000 }, { "epoch": 4.84, "eval_acc": 0.8698055271238485, "eval_loss": 0.7094203233718872, "eval_runtime": 16.6693, "eval_samples_per_second": 586.108, "eval_steps_per_second": 18.357, "step": 58000 }, { "epoch": 5.0, "step": 59960, "total_flos": 1.2620695529804544e+17, "train_loss": 0.17320066710326415, "train_runtime": 11406.3483, "train_samples_per_second": 168.211, "train_steps_per_second": 5.257 } ], "max_steps": 59960, "num_train_epochs": 5, "total_flos": 1.2620695529804544e+17, "trial_name": null, "trial_params": null }