{ "best_metric": 1.945299744606018, "best_model_checkpoint": "nli-distilroberta-base-finetuned/checkpoint-336", "epoch": 3.0, "eval_steps": 500, "global_step": 336, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.044642857142857144, "grad_norm": 7.880863189697266, "learning_rate": 7.3529411764705884e-06, "loss": 2.6033, "step": 5 }, { "epoch": 0.08928571428571429, "grad_norm": 6.478671550750732, "learning_rate": 1.4705882352941177e-05, "loss": 2.6009, "step": 10 }, { "epoch": 0.13392857142857142, "grad_norm": 8.778610229492188, "learning_rate": 2.2058823529411766e-05, "loss": 2.5459, "step": 15 }, { "epoch": 0.17857142857142858, "grad_norm": 8.028711318969727, "learning_rate": 2.9411764705882354e-05, "loss": 2.5029, "step": 20 }, { "epoch": 0.22321428571428573, "grad_norm": 6.802711486816406, "learning_rate": 3.6764705882352945e-05, "loss": 2.4562, "step": 25 }, { "epoch": 0.26785714285714285, "grad_norm": 6.200512409210205, "learning_rate": 4.411764705882353e-05, "loss": 2.4949, "step": 30 }, { "epoch": 0.3125, "grad_norm": 9.700362205505371, "learning_rate": 4.983443708609272e-05, "loss": 2.4858, "step": 35 }, { "epoch": 0.35714285714285715, "grad_norm": 8.349287033081055, "learning_rate": 4.900662251655629e-05, "loss": 2.5112, "step": 40 }, { "epoch": 0.4017857142857143, "grad_norm": 8.527876853942871, "learning_rate": 4.8178807947019873e-05, "loss": 2.4937, "step": 45 }, { "epoch": 0.44642857142857145, "grad_norm": 8.475303649902344, "learning_rate": 4.735099337748345e-05, "loss": 2.4454, "step": 50 }, { "epoch": 0.49107142857142855, "grad_norm": 9.274892807006836, "learning_rate": 4.652317880794702e-05, "loss": 2.4847, "step": 55 }, { "epoch": 0.5357142857142857, "grad_norm": 10.273117065429688, "learning_rate": 4.56953642384106e-05, "loss": 2.4654, "step": 60 }, { "epoch": 0.5803571428571429, "grad_norm": 8.115155220031738, "learning_rate": 4.4867549668874174e-05, "loss": 2.3502, "step": 65 }, { "epoch": 0.625, "grad_norm": 6.344263076782227, "learning_rate": 4.403973509933775e-05, "loss": 2.2957, "step": 70 }, { "epoch": 0.6696428571428571, "grad_norm": 8.441248893737793, "learning_rate": 4.321192052980133e-05, "loss": 2.3153, "step": 75 }, { "epoch": 0.7142857142857143, "grad_norm": 9.295808792114258, "learning_rate": 4.23841059602649e-05, "loss": 2.4324, "step": 80 }, { "epoch": 0.7589285714285714, "grad_norm": 9.574031829833984, "learning_rate": 4.155629139072848e-05, "loss": 2.3097, "step": 85 }, { "epoch": 0.8035714285714286, "grad_norm": 12.203108787536621, "learning_rate": 4.0728476821192055e-05, "loss": 2.6227, "step": 90 }, { "epoch": 0.8482142857142857, "grad_norm": 8.894155502319336, "learning_rate": 3.990066225165563e-05, "loss": 2.4142, "step": 95 }, { "epoch": 0.8928571428571429, "grad_norm": 9.997325897216797, "learning_rate": 3.907284768211921e-05, "loss": 2.3777, "step": 100 }, { "epoch": 0.9375, "grad_norm": 14.863235473632812, "learning_rate": 3.824503311258278e-05, "loss": 2.3846, "step": 105 }, { "epoch": 0.9821428571428571, "grad_norm": 10.179811477661133, "learning_rate": 3.741721854304636e-05, "loss": 2.4329, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.22869955156950672, "eval_f1_macro": 0.0925894973736031, "eval_f1_micro": 0.22869955156950672, "eval_f1_weighted": 0.1441249985494747, "eval_loss": 2.2802867889404297, "eval_precision_macro": 0.08478615978615979, "eval_precision_micro": 0.22869955156950672, "eval_precision_weighted": 0.1228868362276434, "eval_recall_macro": 0.14624033374033374, "eval_recall_micro": 0.22869955156950672, "eval_recall_weighted": 0.22869955156950672, "eval_runtime": 20.6162, "eval_samples_per_second": 10.817, "eval_steps_per_second": 0.679, "step": 112 }, { "epoch": 1.0267857142857142, "grad_norm": 10.954036712646484, "learning_rate": 3.6589403973509936e-05, "loss": 2.2923, "step": 115 }, { "epoch": 1.0714285714285714, "grad_norm": 10.56191349029541, "learning_rate": 3.576158940397351e-05, "loss": 2.1989, "step": 120 }, { "epoch": 1.1160714285714286, "grad_norm": 10.658546447753906, "learning_rate": 3.493377483443709e-05, "loss": 2.1796, "step": 125 }, { "epoch": 1.1607142857142858, "grad_norm": 10.462434768676758, "learning_rate": 3.410596026490066e-05, "loss": 2.1629, "step": 130 }, { "epoch": 1.2053571428571428, "grad_norm": 12.142037391662598, "learning_rate": 3.3278145695364236e-05, "loss": 2.1759, "step": 135 }, { "epoch": 1.25, "grad_norm": 21.685199737548828, "learning_rate": 3.2450331125827816e-05, "loss": 2.2638, "step": 140 }, { "epoch": 1.2946428571428572, "grad_norm": 11.347991943359375, "learning_rate": 3.162251655629139e-05, "loss": 2.2119, "step": 145 }, { "epoch": 1.3392857142857144, "grad_norm": 15.21415901184082, "learning_rate": 3.079470198675497e-05, "loss": 1.9364, "step": 150 }, { "epoch": 1.3839285714285714, "grad_norm": 14.830402374267578, "learning_rate": 2.9966887417218544e-05, "loss": 2.1774, "step": 155 }, { "epoch": 1.4285714285714286, "grad_norm": 9.813250541687012, "learning_rate": 2.913907284768212e-05, "loss": 1.9836, "step": 160 }, { "epoch": 1.4732142857142856, "grad_norm": 12.23851203918457, "learning_rate": 2.8311258278145697e-05, "loss": 1.9926, "step": 165 }, { "epoch": 1.5178571428571428, "grad_norm": 21.17872428894043, "learning_rate": 2.7483443708609274e-05, "loss": 2.0915, "step": 170 }, { "epoch": 1.5625, "grad_norm": 11.805004119873047, "learning_rate": 2.6655629139072848e-05, "loss": 2.0649, "step": 175 }, { "epoch": 1.6071428571428572, "grad_norm": 11.245916366577148, "learning_rate": 2.5827814569536424e-05, "loss": 2.0081, "step": 180 }, { "epoch": 1.6517857142857144, "grad_norm": 10.090818405151367, "learning_rate": 2.5e-05, "loss": 1.9429, "step": 185 }, { "epoch": 1.6964285714285714, "grad_norm": 12.75944995880127, "learning_rate": 2.4172185430463578e-05, "loss": 1.9855, "step": 190 }, { "epoch": 1.7410714285714286, "grad_norm": 16.755380630493164, "learning_rate": 2.3344370860927155e-05, "loss": 1.9488, "step": 195 }, { "epoch": 1.7857142857142856, "grad_norm": 9.849225997924805, "learning_rate": 2.2516556291390732e-05, "loss": 1.8947, "step": 200 }, { "epoch": 1.8303571428571428, "grad_norm": 12.145004272460938, "learning_rate": 2.1688741721854305e-05, "loss": 2.1183, "step": 205 }, { "epoch": 1.875, "grad_norm": 16.307331085205078, "learning_rate": 2.0860927152317882e-05, "loss": 1.7943, "step": 210 }, { "epoch": 1.9196428571428572, "grad_norm": 23.1722412109375, "learning_rate": 2.003311258278146e-05, "loss": 2.3577, "step": 215 }, { "epoch": 1.9642857142857144, "grad_norm": 17.713895797729492, "learning_rate": 1.9205298013245036e-05, "loss": 2.0445, "step": 220 }, { "epoch": 2.0, "eval_accuracy": 0.336322869955157, "eval_f1_macro": 0.218286653320018, "eval_f1_micro": 0.336322869955157, "eval_f1_weighted": 0.2710922586805886, "eval_loss": 2.0475845336914062, "eval_precision_macro": 0.2992208878078443, "eval_precision_micro": 0.336322869955157, "eval_precision_weighted": 0.3176332441198887, "eval_recall_macro": 0.2567881249055338, "eval_recall_micro": 0.336322869955157, "eval_recall_weighted": 0.336322869955157, "eval_runtime": 20.8253, "eval_samples_per_second": 10.708, "eval_steps_per_second": 0.672, "step": 224 }, { "epoch": 2.0089285714285716, "grad_norm": 28.49532699584961, "learning_rate": 1.837748344370861e-05, "loss": 1.8895, "step": 225 }, { "epoch": 2.0535714285714284, "grad_norm": 11.639945030212402, "learning_rate": 1.7549668874172186e-05, "loss": 1.6994, "step": 230 }, { "epoch": 2.0982142857142856, "grad_norm": 14.041617393493652, "learning_rate": 1.6721854304635763e-05, "loss": 1.9426, "step": 235 }, { "epoch": 2.142857142857143, "grad_norm": 10.518083572387695, "learning_rate": 1.589403973509934e-05, "loss": 1.6471, "step": 240 }, { "epoch": 2.1875, "grad_norm": 14.844023704528809, "learning_rate": 1.5066225165562913e-05, "loss": 1.8054, "step": 245 }, { "epoch": 2.232142857142857, "grad_norm": 11.304939270019531, "learning_rate": 1.4238410596026492e-05, "loss": 1.8372, "step": 250 }, { "epoch": 2.2767857142857144, "grad_norm": 9.891529083251953, "learning_rate": 1.3410596026490067e-05, "loss": 1.7693, "step": 255 }, { "epoch": 2.3214285714285716, "grad_norm": 15.98595905303955, "learning_rate": 1.2582781456953644e-05, "loss": 1.5496, "step": 260 }, { "epoch": 2.3660714285714284, "grad_norm": 20.194751739501953, "learning_rate": 1.1754966887417219e-05, "loss": 1.7032, "step": 265 }, { "epoch": 2.4107142857142856, "grad_norm": 11.00220012664795, "learning_rate": 1.0927152317880796e-05, "loss": 1.578, "step": 270 }, { "epoch": 2.455357142857143, "grad_norm": 25.064462661743164, "learning_rate": 1.0099337748344372e-05, "loss": 1.5681, "step": 275 }, { "epoch": 2.5, "grad_norm": 19.558372497558594, "learning_rate": 9.271523178807948e-06, "loss": 1.5657, "step": 280 }, { "epoch": 2.544642857142857, "grad_norm": 18.167156219482422, "learning_rate": 8.443708609271524e-06, "loss": 1.5478, "step": 285 }, { "epoch": 2.5892857142857144, "grad_norm": 10.529936790466309, "learning_rate": 7.6158940397350995e-06, "loss": 1.5239, "step": 290 }, { "epoch": 2.633928571428571, "grad_norm": 19.37651824951172, "learning_rate": 6.7880794701986755e-06, "loss": 1.4103, "step": 295 }, { "epoch": 2.678571428571429, "grad_norm": 22.9803466796875, "learning_rate": 5.9602649006622515e-06, "loss": 1.688, "step": 300 }, { "epoch": 2.7232142857142856, "grad_norm": 19.83251190185547, "learning_rate": 5.1324503311258275e-06, "loss": 1.788, "step": 305 }, { "epoch": 2.767857142857143, "grad_norm": 8.999404907226562, "learning_rate": 4.304635761589404e-06, "loss": 1.4493, "step": 310 }, { "epoch": 2.8125, "grad_norm": 12.178604125976562, "learning_rate": 3.47682119205298e-06, "loss": 1.5678, "step": 315 }, { "epoch": 2.857142857142857, "grad_norm": 21.65730094909668, "learning_rate": 2.6490066225165563e-06, "loss": 1.6601, "step": 320 }, { "epoch": 2.9017857142857144, "grad_norm": 13.858233451843262, "learning_rate": 1.8211920529801325e-06, "loss": 1.7391, "step": 325 }, { "epoch": 2.946428571428571, "grad_norm": 15.862234115600586, "learning_rate": 9.933774834437087e-07, "loss": 1.7676, "step": 330 }, { "epoch": 2.991071428571429, "grad_norm": 20.09609603881836, "learning_rate": 1.6556291390728477e-07, "loss": 1.6468, "step": 335 }, { "epoch": 3.0, "eval_accuracy": 0.40358744394618834, "eval_f1_macro": 0.30714691820978623, "eval_f1_micro": 0.40358744394618834, "eval_f1_weighted": 0.34876029798237923, "eval_loss": 1.945299744606018, "eval_precision_macro": 0.46399317884851915, "eval_precision_micro": 0.40358744394618834, "eval_precision_weighted": 0.4327219217629977, "eval_recall_macro": 0.3275659862704397, "eval_recall_micro": 0.40358744394618834, "eval_recall_weighted": 0.40358744394618834, "eval_runtime": 20.5195, "eval_samples_per_second": 10.868, "eval_steps_per_second": 0.682, "step": 336 } ], "logging_steps": 5, "max_steps": 336, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 88538704328448.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }