|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.936, |
|
"global_step": 496, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.1204, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.1463, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.8788, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.6339, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.4717, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.008, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.9327354260089685e-05, |
|
"loss": 0.8316, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_Macro F1": 0.7014511124514329, |
|
"eval_Macro Precision": 0.6819163828588641, |
|
"eval_Macro Recall": 0.7429670943209584, |
|
"eval_Micro F1": 0.743, |
|
"eval_Micro Precision": 0.743, |
|
"eval_Micro Recall": 0.743, |
|
"eval_Weighted F1": 0.702015038120142, |
|
"eval_Weighted Precision": 0.6827499598363416, |
|
"eval_Weighted Recall": 0.743, |
|
"eval_accuracy": 0.743, |
|
"eval_loss": 0.7518972158432007, |
|
"eval_runtime": 447.9624, |
|
"eval_samples_per_second": 4.465, |
|
"eval_steps_per_second": 0.141, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.8430493273542606e-05, |
|
"loss": 0.751, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.7533632286995514e-05, |
|
"loss": 0.6157, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.6636771300448435e-05, |
|
"loss": 0.534, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.573991031390134e-05, |
|
"loss": 0.4818, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.4843049327354265e-05, |
|
"loss": 0.4119, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.394618834080718e-05, |
|
"loss": 0.3738, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.3049327354260094e-05, |
|
"loss": 0.3613, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.215246636771301e-05, |
|
"loss": 0.3561, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Macro F1": 0.9399641065415552, |
|
"eval_Macro Precision": 0.9480023804705043, |
|
"eval_Macro Recall": 0.9394177169921687, |
|
"eval_Micro F1": 0.9395, |
|
"eval_Micro Precision": 0.9395, |
|
"eval_Micro Recall": 0.9395, |
|
"eval_Weighted F1": 0.9400723727432211, |
|
"eval_Weighted Precision": 0.9482020576131688, |
|
"eval_Weighted Recall": 0.9395, |
|
"eval_accuracy": 0.9395, |
|
"eval_loss": 0.23021972179412842, |
|
"eval_runtime": 447.7687, |
|
"eval_samples_per_second": 4.467, |
|
"eval_steps_per_second": 0.141, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.125560538116592e-05, |
|
"loss": 0.343, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.035874439461884e-05, |
|
"loss": 0.2871, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.9461883408071745e-05, |
|
"loss": 0.2976, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.8565022421524667e-05, |
|
"loss": 0.2644, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.766816143497758e-05, |
|
"loss": 0.2489, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.6771300448430496e-05, |
|
"loss": 0.2646, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.587443946188341e-05, |
|
"loss": 0.2206, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.4977578475336325e-05, |
|
"loss": 0.2222, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_Macro F1": 0.9560784374426261, |
|
"eval_Macro Precision": 0.960026790280936, |
|
"eval_Macro Recall": 0.9551284727306868, |
|
"eval_Micro F1": 0.956, |
|
"eval_Micro Precision": 0.956, |
|
"eval_Micro Recall": 0.956, |
|
"eval_Weighted F1": 0.9564097977894885, |
|
"eval_Weighted Precision": 0.9597888158665016, |
|
"eval_Weighted Recall": 0.956, |
|
"eval_accuracy": 0.956, |
|
"eval_loss": 0.1349564790725708, |
|
"eval_runtime": 446.9116, |
|
"eval_samples_per_second": 4.475, |
|
"eval_steps_per_second": 0.141, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.408071748878924e-05, |
|
"loss": 0.2258, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.3183856502242154e-05, |
|
"loss": 0.207, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.228699551569507e-05, |
|
"loss": 0.1826, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.139013452914798e-05, |
|
"loss": 0.1929, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.0493273542600898e-05, |
|
"loss": 0.2159, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.9596412556053816e-05, |
|
"loss": 0.1813, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.8699551569506727e-05, |
|
"loss": 0.1723, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.7802690582959645e-05, |
|
"loss": 0.1705, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Macro F1": 0.9725373484087296, |
|
"eval_Macro Precision": 0.9740041726370726, |
|
"eval_Macro Recall": 0.9721459242124089, |
|
"eval_Micro F1": 0.9725, |
|
"eval_Micro Precision": 0.9725, |
|
"eval_Micro Recall": 0.9725, |
|
"eval_Weighted F1": 0.9727108492700939, |
|
"eval_Weighted Precision": 0.9739894365164001, |
|
"eval_Weighted Recall": 0.9725, |
|
"eval_accuracy": 0.9725, |
|
"eval_loss": 0.0872766375541687, |
|
"eval_runtime": 435.148, |
|
"eval_samples_per_second": 4.596, |
|
"eval_steps_per_second": 0.145, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.6905829596412556e-05, |
|
"loss": 0.1824, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.600896860986547e-05, |
|
"loss": 0.1877, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.511210762331839e-05, |
|
"loss": 0.2047, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.4215246636771303e-05, |
|
"loss": 0.1814, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.3318385650224218e-05, |
|
"loss": 0.1396, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.2421524663677132e-05, |
|
"loss": 0.1233, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.1524663677130047e-05, |
|
"loss": 0.1612, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.062780269058296e-05, |
|
"loss": 0.1541, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_Macro F1": 0.9823741759080236, |
|
"eval_Macro Precision": 0.9829774434613384, |
|
"eval_Macro Recall": 0.9821795541998369, |
|
"eval_Micro F1": 0.9825, |
|
"eval_Micro Precision": 0.9825, |
|
"eval_Micro Recall": 0.9825, |
|
"eval_Weighted F1": 0.982525654398398, |
|
"eval_Weighted Precision": 0.9829534667560904, |
|
"eval_Weighted Recall": 0.9825, |
|
"eval_accuracy": 0.9825, |
|
"eval_loss": 0.06422679126262665, |
|
"eval_runtime": 424.7541, |
|
"eval_samples_per_second": 4.709, |
|
"eval_steps_per_second": 0.148, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.9730941704035873e-05, |
|
"loss": 0.137, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.883408071748879e-05, |
|
"loss": 0.1577, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 1.7937219730941705e-05, |
|
"loss": 0.1378, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 1.704035874439462e-05, |
|
"loss": 0.1795, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.6143497757847534e-05, |
|
"loss": 0.1447, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.5246636771300449e-05, |
|
"loss": 0.1235, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.4349775784753363e-05, |
|
"loss": 0.1253, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_Macro F1": 0.991431216491566, |
|
"eval_Macro Precision": 0.9916198483282233, |
|
"eval_Macro Recall": 0.9913450460193864, |
|
"eval_Micro F1": 0.9915, |
|
"eval_Micro Precision": 0.9915, |
|
"eval_Micro Recall": 0.9915, |
|
"eval_Weighted F1": 0.9915159731866887, |
|
"eval_Weighted Precision": 0.9916339514381117, |
|
"eval_Weighted Recall": 0.9915, |
|
"eval_accuracy": 0.9915, |
|
"eval_loss": 0.033043112605810165, |
|
"eval_runtime": 424.2354, |
|
"eval_samples_per_second": 4.714, |
|
"eval_steps_per_second": 0.149, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.3452914798206278e-05, |
|
"loss": 0.1134, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.2556053811659194e-05, |
|
"loss": 0.1393, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.1659192825112109e-05, |
|
"loss": 0.0912, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.0762331838565023e-05, |
|
"loss": 0.1131, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 9.865470852017936e-06, |
|
"loss": 0.1255, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 8.968609865470853e-06, |
|
"loss": 0.1418, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 8.071748878923767e-06, |
|
"loss": 0.1399, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 7.174887892376682e-06, |
|
"loss": 0.1196, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_Macro F1": 0.9820075650260679, |
|
"eval_Macro Precision": 0.983170950573056, |
|
"eval_Macro Recall": 0.9817144393341324, |
|
"eval_Micro F1": 0.982, |
|
"eval_Micro Precision": 0.982, |
|
"eval_Micro Recall": 0.982, |
|
"eval_Weighted F1": 0.9821570480740702, |
|
"eval_Weighted Precision": 0.9831749197494307, |
|
"eval_Weighted Recall": 0.982, |
|
"eval_accuracy": 0.982, |
|
"eval_loss": 0.05244705080986023, |
|
"eval_runtime": 424.1947, |
|
"eval_samples_per_second": 4.715, |
|
"eval_steps_per_second": 0.149, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.278026905829597e-06, |
|
"loss": 0.1201, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 5.381165919282512e-06, |
|
"loss": 0.1111, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.484304932735426e-06, |
|
"loss": 0.1021, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 3.587443946188341e-06, |
|
"loss": 0.1158, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.690582959641256e-06, |
|
"loss": 0.1321, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.7937219730941704e-06, |
|
"loss": 0.1429, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 8.968609865470852e-07, |
|
"loss": 0.1103, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.0, |
|
"loss": 0.0896, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_Macro F1": 0.9863352307981634, |
|
"eval_Macro Precision": 0.9869554360352077, |
|
"eval_Macro Recall": 0.986134153009335, |
|
"eval_Micro F1": 0.9865, |
|
"eval_Micro Precision": 0.9865, |
|
"eval_Micro Recall": 0.9865, |
|
"eval_Weighted F1": 0.9865091518400995, |
|
"eval_Weighted Precision": 0.9869273604184196, |
|
"eval_Weighted Recall": 0.9865, |
|
"eval_accuracy": 0.9865, |
|
"eval_loss": 0.04359065368771553, |
|
"eval_runtime": 424.3089, |
|
"eval_samples_per_second": 4.714, |
|
"eval_steps_per_second": 0.148, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"step": 496, |
|
"total_flos": 4.920648490788323e+18, |
|
"train_loss": 0.35526800215724974, |
|
"train_runtime": 45263.0947, |
|
"train_samples_per_second": 1.414, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"max_steps": 496, |
|
"num_train_epochs": 8, |
|
"total_flos": 4.920648490788323e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|