|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997714808043876e-05, |
|
"loss": 6.9866, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9017367458866546e-05, |
|
"loss": 3.9852, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5429333333333334, |
|
"eval_f1": 0.6712696586114307, |
|
"eval_loss": 1.4847679138183594, |
|
"eval_precision": 0.5241090146750524, |
|
"eval_recall": 0.9333333333333333, |
|
"eval_runtime": 305.136, |
|
"eval_samples_per_second": 12.29, |
|
"eval_steps_per_second": 1.537, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8034734917733096e-05, |
|
"loss": 2.6079, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5717333333333333, |
|
"eval_f1": 0.6556603773584906, |
|
"eval_loss": 0.8047523498535156, |
|
"eval_precision": 0.5482251703119397, |
|
"eval_recall": 0.8154666666666667, |
|
"eval_runtime": 305.1856, |
|
"eval_samples_per_second": 12.288, |
|
"eval_steps_per_second": 1.537, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.705210237659964e-05, |
|
"loss": 1.0141, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5896, |
|
"eval_f1": 0.6892792247122956, |
|
"eval_loss": 0.7188379764556885, |
|
"eval_precision": 0.5545808966861598, |
|
"eval_recall": 0.9104, |
|
"eval_runtime": 305.1209, |
|
"eval_samples_per_second": 12.29, |
|
"eval_steps_per_second": 1.537, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.606946983546618e-05, |
|
"loss": 0.8023, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.49733333333333335, |
|
"eval_f1": 0.11874707807386631, |
|
"eval_loss": 2.954312801361084, |
|
"eval_precision": 0.4810606060606061, |
|
"eval_recall": 0.06773333333333334, |
|
"eval_runtime": 305.0816, |
|
"eval_samples_per_second": 12.292, |
|
"eval_steps_per_second": 1.537, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5086837294332726e-05, |
|
"loss": 2.4517, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6850666666666667, |
|
"eval_f1": 0.7327449649241911, |
|
"eval_loss": 0.6679351925849915, |
|
"eval_precision": 0.6363993710691824, |
|
"eval_recall": 0.8634666666666667, |
|
"eval_runtime": 304.8754, |
|
"eval_samples_per_second": 12.3, |
|
"eval_steps_per_second": 1.538, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.410420475319927e-05, |
|
"loss": 1.6579, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.8109333333333333, |
|
"eval_f1": 0.7899259259259258, |
|
"eval_loss": 0.7148910164833069, |
|
"eval_precision": 0.8886666666666667, |
|
"eval_recall": 0.7109333333333333, |
|
"eval_runtime": 305.6713, |
|
"eval_samples_per_second": 12.268, |
|
"eval_steps_per_second": 1.534, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.312157221206581e-05, |
|
"loss": 1.3412, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4938666666666667, |
|
"eval_f1": 0.6546579330422125, |
|
"eval_loss": 3.249486207962036, |
|
"eval_precision": 0.4968240817453742, |
|
"eval_recall": 0.9594666666666667, |
|
"eval_runtime": 304.7083, |
|
"eval_samples_per_second": 12.307, |
|
"eval_steps_per_second": 1.539, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.213893967093236e-05, |
|
"loss": 1.6235, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8013333333333333, |
|
"eval_f1": 0.797719250610915, |
|
"eval_loss": 0.4983011782169342, |
|
"eval_precision": 0.8125, |
|
"eval_recall": 0.7834666666666666, |
|
"eval_runtime": 304.8657, |
|
"eval_samples_per_second": 12.3, |
|
"eval_steps_per_second": 1.538, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1156307129798905e-05, |
|
"loss": 0.7822, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.856, |
|
"eval_f1": 0.8429319371727749, |
|
"eval_loss": 0.8706910014152527, |
|
"eval_precision": 0.927063339731286, |
|
"eval_recall": 0.7728, |
|
"eval_runtime": 305.1415, |
|
"eval_samples_per_second": 12.289, |
|
"eval_steps_per_second": 1.537, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.017367458866545e-05, |
|
"loss": 1.0922, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.8034666666666667, |
|
"eval_f1": 0.7738570113531757, |
|
"eval_loss": 0.667508602142334, |
|
"eval_precision": 0.9111271676300579, |
|
"eval_recall": 0.6725333333333333, |
|
"eval_runtime": 305.5394, |
|
"eval_samples_per_second": 12.273, |
|
"eval_steps_per_second": 1.535, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.919104204753199e-05, |
|
"loss": 0.5747, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.88, |
|
"eval_f1": 0.8822605965463108, |
|
"eval_loss": 0.4237947165966034, |
|
"eval_precision": 0.8659476117103235, |
|
"eval_recall": 0.8992, |
|
"eval_runtime": 305.7145, |
|
"eval_samples_per_second": 12.266, |
|
"eval_steps_per_second": 1.534, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.820840950639854e-05, |
|
"loss": 0.5248, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8424, |
|
"eval_f1": 0.8570737605804112, |
|
"eval_loss": 1.081692099571228, |
|
"eval_precision": 0.784070796460177, |
|
"eval_recall": 0.9450666666666667, |
|
"eval_runtime": 305.0927, |
|
"eval_samples_per_second": 12.291, |
|
"eval_steps_per_second": 1.537, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7225776965265085e-05, |
|
"loss": 0.6488, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.8365333333333334, |
|
"eval_f1": 0.8115585613280049, |
|
"eval_loss": 0.5578557252883911, |
|
"eval_precision": 0.9579100145137881, |
|
"eval_recall": 0.704, |
|
"eval_runtime": 305.0779, |
|
"eval_samples_per_second": 12.292, |
|
"eval_steps_per_second": 1.537, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.624314442413163e-05, |
|
"loss": 0.9679, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.6829333333333333, |
|
"eval_f1": 0.5987175160310497, |
|
"eval_loss": 0.9434438943862915, |
|
"eval_precision": 0.8152573529411765, |
|
"eval_recall": 0.4730666666666667, |
|
"eval_runtime": 304.9674, |
|
"eval_samples_per_second": 12.296, |
|
"eval_steps_per_second": 1.538, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.526051188299818e-05, |
|
"loss": 0.4584, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8458666666666667, |
|
"eval_f1": 0.8547008547008548, |
|
"eval_loss": 0.44859734177589417, |
|
"eval_precision": 0.8083689966714218, |
|
"eval_recall": 0.9066666666666666, |
|
"eval_runtime": 304.9696, |
|
"eval_samples_per_second": 12.296, |
|
"eval_steps_per_second": 1.538, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4277879341864715e-05, |
|
"loss": 0.418, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.8901333333333333, |
|
"eval_f1": 0.892931392931393, |
|
"eval_loss": 0.5646004676818848, |
|
"eval_precision": 0.8707551951343132, |
|
"eval_recall": 0.9162666666666667, |
|
"eval_runtime": 304.779, |
|
"eval_samples_per_second": 12.304, |
|
"eval_steps_per_second": 1.539, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.329524680073126e-05, |
|
"loss": 0.5809, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.8773333333333333, |
|
"eval_f1": 0.8847117794486216, |
|
"eval_loss": 0.4200115501880646, |
|
"eval_precision": 0.83451536643026, |
|
"eval_recall": 0.9413333333333334, |
|
"eval_runtime": 304.7969, |
|
"eval_samples_per_second": 12.303, |
|
"eval_steps_per_second": 1.539, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.231261425959781e-05, |
|
"loss": 0.4702, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.7117333333333333, |
|
"eval_f1": 0.7615265828369733, |
|
"eval_loss": 0.5946761965751648, |
|
"eval_precision": 0.6493604213694507, |
|
"eval_recall": 0.9205333333333333, |
|
"eval_runtime": 304.8632, |
|
"eval_samples_per_second": 12.301, |
|
"eval_steps_per_second": 1.538, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.132998171846435e-05, |
|
"loss": 0.3684, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.8965333333333333, |
|
"eval_f1": 0.8914381645215446, |
|
"eval_loss": 0.30674976110458374, |
|
"eval_precision": 0.9376103590347263, |
|
"eval_recall": 0.8496, |
|
"eval_runtime": 304.8468, |
|
"eval_samples_per_second": 12.301, |
|
"eval_steps_per_second": 1.538, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.03473491773309e-05, |
|
"loss": 0.5431, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8762666666666666, |
|
"eval_f1": 0.8831234256926952, |
|
"eval_loss": 0.8089134693145752, |
|
"eval_precision": 0.8367541766109785, |
|
"eval_recall": 0.9349333333333333, |
|
"eval_runtime": 304.976, |
|
"eval_samples_per_second": 12.296, |
|
"eval_steps_per_second": 1.538, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9364716636197442e-05, |
|
"loss": 0.4287, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.8466666666666667, |
|
"eval_f1": 0.822366388631449, |
|
"eval_loss": 0.49679452180862427, |
|
"eval_precision": 0.9772393538913363, |
|
"eval_recall": 0.7098666666666666, |
|
"eval_runtime": 306.5019, |
|
"eval_samples_per_second": 12.235, |
|
"eval_steps_per_second": 1.53, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.838208409506399e-05, |
|
"loss": 0.4942, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.9157333333333333, |
|
"eval_f1": 0.9138025095471904, |
|
"eval_loss": 0.23615875840187073, |
|
"eval_precision": 0.9352317141261864, |
|
"eval_recall": 0.8933333333333333, |
|
"eval_runtime": 305.2275, |
|
"eval_samples_per_second": 12.286, |
|
"eval_steps_per_second": 1.537, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.739945155393053e-05, |
|
"loss": 0.3997, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.8874666666666666, |
|
"eval_f1": 0.8867418142780461, |
|
"eval_loss": 0.6193059086799622, |
|
"eval_precision": 0.8924905456509995, |
|
"eval_recall": 0.8810666666666667, |
|
"eval_runtime": 304.7962, |
|
"eval_samples_per_second": 12.303, |
|
"eval_steps_per_second": 1.539, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.641681901279708e-05, |
|
"loss": 0.3862, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.8712, |
|
"eval_f1": 0.8817625458996328, |
|
"eval_loss": 0.6541677713394165, |
|
"eval_precision": 0.8149321266968326, |
|
"eval_recall": 0.9605333333333334, |
|
"eval_runtime": 305.7101, |
|
"eval_samples_per_second": 12.267, |
|
"eval_steps_per_second": 1.534, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5434186471663625e-05, |
|
"loss": 0.4199, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.8778666666666667, |
|
"eval_f1": 0.8674768518518517, |
|
"eval_loss": 0.5205650329589844, |
|
"eval_precision": 0.948134092346616, |
|
"eval_recall": 0.7994666666666667, |
|
"eval_runtime": 306.2355, |
|
"eval_samples_per_second": 12.245, |
|
"eval_steps_per_second": 1.532, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4451553930530165e-05, |
|
"loss": 0.4922, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.8664, |
|
"eval_f1": 0.8587538765153652, |
|
"eval_loss": 0.3552885353565216, |
|
"eval_precision": 0.9108851674641149, |
|
"eval_recall": 0.8122666666666667, |
|
"eval_runtime": 305.1994, |
|
"eval_samples_per_second": 12.287, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.346892138939671e-05, |
|
"loss": 0.3303, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9082666666666667, |
|
"eval_f1": 0.9038569032979319, |
|
"eval_loss": 0.38709646463394165, |
|
"eval_precision": 0.9495008807985907, |
|
"eval_recall": 0.8624, |
|
"eval_runtime": 305.1281, |
|
"eval_samples_per_second": 12.29, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2486288848263255e-05, |
|
"loss": 0.2829, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9104, |
|
"eval_f1": 0.9114391143911439, |
|
"eval_loss": 0.3015682101249695, |
|
"eval_precision": 0.900990099009901, |
|
"eval_recall": 0.9221333333333334, |
|
"eval_runtime": 305.3174, |
|
"eval_samples_per_second": 12.282, |
|
"eval_steps_per_second": 1.536, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1503656307129798e-05, |
|
"loss": 0.5477, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.8509333333333333, |
|
"eval_f1": 0.8479738917595866, |
|
"eval_loss": 0.5632778406143188, |
|
"eval_precision": 0.865149833518313, |
|
"eval_recall": 0.8314666666666667, |
|
"eval_runtime": 305.058, |
|
"eval_samples_per_second": 12.293, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0521023765996345e-05, |
|
"loss": 0.4431, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9082666666666667, |
|
"eval_f1": 0.9050248481501934, |
|
"eval_loss": 0.42895591259002686, |
|
"eval_precision": 0.9381797366914711, |
|
"eval_recall": 0.8741333333333333, |
|
"eval_runtime": 305.0347, |
|
"eval_samples_per_second": 12.294, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.953839122486289e-05, |
|
"loss": 0.3335, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.9069333333333334, |
|
"eval_f1": 0.90008588605783, |
|
"eval_loss": 0.32379406690597534, |
|
"eval_precision": 0.9715698393077874, |
|
"eval_recall": 0.8384, |
|
"eval_runtime": 306.6476, |
|
"eval_samples_per_second": 12.229, |
|
"eval_steps_per_second": 1.529, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8555758683729435e-05, |
|
"loss": 0.3255, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.9152, |
|
"eval_f1": 0.9159619450317126, |
|
"eval_loss": 0.3898433446884155, |
|
"eval_precision": 0.9078051335777895, |
|
"eval_recall": 0.9242666666666667, |
|
"eval_runtime": 305.4671, |
|
"eval_samples_per_second": 12.276, |
|
"eval_steps_per_second": 1.535, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7573126142595978e-05, |
|
"loss": 0.3103, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9189333333333334, |
|
"eval_f1": 0.9140271493212669, |
|
"eval_loss": 0.25054827332496643, |
|
"eval_precision": 0.9729078868151716, |
|
"eval_recall": 0.8618666666666667, |
|
"eval_runtime": 304.9946, |
|
"eval_samples_per_second": 12.295, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.659049360146252e-05, |
|
"loss": 0.2878, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9208, |
|
"eval_f1": 0.9207788743664976, |
|
"eval_loss": 0.29952123761177063, |
|
"eval_precision": 0.9210245464247598, |
|
"eval_recall": 0.9205333333333333, |
|
"eval_runtime": 304.6679, |
|
"eval_samples_per_second": 12.308, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5607861060329068e-05, |
|
"loss": 0.3426, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.9248, |
|
"eval_f1": 0.9255936675461741, |
|
"eval_loss": 0.3102300763130188, |
|
"eval_precision": 0.9159268929503916, |
|
"eval_recall": 0.9354666666666667, |
|
"eval_runtime": 305.1368, |
|
"eval_samples_per_second": 12.29, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4625228519195613e-05, |
|
"loss": 0.2799, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9226666666666666, |
|
"eval_f1": 0.9236440231700894, |
|
"eval_loss": 0.32980650663375854, |
|
"eval_precision": 0.9121164846593863, |
|
"eval_recall": 0.9354666666666667, |
|
"eval_runtime": 305.2653, |
|
"eval_samples_per_second": 12.284, |
|
"eval_steps_per_second": 1.536, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3642595978062158e-05, |
|
"loss": 0.4076, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.9277333333333333, |
|
"eval_f1": 0.9256515775034294, |
|
"eval_loss": 0.29825451970100403, |
|
"eval_precision": 0.9531073446327684, |
|
"eval_recall": 0.8997333333333334, |
|
"eval_runtime": 304.9271, |
|
"eval_samples_per_second": 12.298, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2659963436928701e-05, |
|
"loss": 0.2303, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.9168, |
|
"eval_f1": 0.9147540983606557, |
|
"eval_loss": 0.27176761627197266, |
|
"eval_precision": 0.9378151260504202, |
|
"eval_recall": 0.8928, |
|
"eval_runtime": 305.6751, |
|
"eval_samples_per_second": 12.268, |
|
"eval_steps_per_second": 1.534, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1677330895795248e-05, |
|
"loss": 0.2793, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.9229333333333334, |
|
"eval_f1": 0.9229949373834265, |
|
"eval_loss": 0.34073972702026367, |
|
"eval_precision": 0.9222577209797657, |
|
"eval_recall": 0.9237333333333333, |
|
"eval_runtime": 305.4396, |
|
"eval_samples_per_second": 12.277, |
|
"eval_steps_per_second": 1.535, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0694698354661791e-05, |
|
"loss": 0.226, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9290666666666667, |
|
"eval_f1": 0.9279132791327913, |
|
"eval_loss": 0.34017863869667053, |
|
"eval_precision": 0.9432506887052342, |
|
"eval_recall": 0.9130666666666667, |
|
"eval_runtime": 304.4994, |
|
"eval_samples_per_second": 12.315, |
|
"eval_steps_per_second": 1.54, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.712065813528338e-06, |
|
"loss": 0.5583, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.92, |
|
"eval_f1": 0.9220779220779222, |
|
"eval_loss": 0.26789960265159607, |
|
"eval_precision": 0.8987341772151899, |
|
"eval_recall": 0.9466666666666667, |
|
"eval_runtime": 304.5833, |
|
"eval_samples_per_second": 12.312, |
|
"eval_steps_per_second": 1.54, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.729433272394881e-06, |
|
"loss": 0.2609, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9293333333333333, |
|
"eval_f1": 0.928436402916554, |
|
"eval_loss": 0.3069748878479004, |
|
"eval_precision": 0.9403719912472648, |
|
"eval_recall": 0.9168, |
|
"eval_runtime": 305.3926, |
|
"eval_samples_per_second": 12.279, |
|
"eval_steps_per_second": 1.536, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.746800731261426e-06, |
|
"loss": 0.3392, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.9309333333333333, |
|
"eval_f1": 0.928748280605227, |
|
"eval_loss": 0.24878720939159393, |
|
"eval_precision": 0.9590909090909091, |
|
"eval_recall": 0.9002666666666667, |
|
"eval_runtime": 305.0861, |
|
"eval_samples_per_second": 12.292, |
|
"eval_steps_per_second": 1.537, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.764168190127972e-06, |
|
"loss": 0.3378, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9272, |
|
"eval_f1": 0.9248554913294799, |
|
"eval_loss": 0.2940199077129364, |
|
"eval_precision": 0.9556313993174061, |
|
"eval_recall": 0.896, |
|
"eval_runtime": 304.8472, |
|
"eval_samples_per_second": 12.301, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.781535648994515e-06, |
|
"loss": 0.2271, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9349333333333333, |
|
"eval_f1": 0.934654525977504, |
|
"eval_loss": 0.23414117097854614, |
|
"eval_precision": 0.9386767079074771, |
|
"eval_recall": 0.9306666666666666, |
|
"eval_runtime": 304.8432, |
|
"eval_samples_per_second": 12.301, |
|
"eval_steps_per_second": 1.538, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.798903107861061e-06, |
|
"loss": 0.4284, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9325333333333333, |
|
"eval_f1": 0.9310438811665304, |
|
"eval_loss": 0.30585160851478577, |
|
"eval_precision": 0.9520624303232998, |
|
"eval_recall": 0.9109333333333334, |
|
"eval_runtime": 304.8244, |
|
"eval_samples_per_second": 12.302, |
|
"eval_steps_per_second": 1.539, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.816270566727605e-06, |
|
"loss": 0.2887, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9341333333333334, |
|
"eval_f1": 0.9327158812312721, |
|
"eval_loss": 0.27199041843414307, |
|
"eval_precision": 0.9532293986636972, |
|
"eval_recall": 0.9130666666666667, |
|
"eval_runtime": 305.2967, |
|
"eval_samples_per_second": 12.283, |
|
"eval_steps_per_second": 1.536, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.83363802559415e-06, |
|
"loss": 0.2287, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9362666666666667, |
|
"eval_f1": 0.9355273806312382, |
|
"eval_loss": 0.3032676577568054, |
|
"eval_precision": 0.9465065502183406, |
|
"eval_recall": 0.9248, |
|
"eval_runtime": 305.4791, |
|
"eval_samples_per_second": 12.276, |
|
"eval_steps_per_second": 1.535, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.851005484460695e-06, |
|
"loss": 0.3379, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9362666666666667, |
|
"eval_f1": 0.9354577369700243, |
|
"eval_loss": 0.25949496030807495, |
|
"eval_precision": 0.9474835886214442, |
|
"eval_recall": 0.9237333333333333, |
|
"eval_runtime": 305.1987, |
|
"eval_samples_per_second": 12.287, |
|
"eval_steps_per_second": 1.537, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.683729433272396e-07, |
|
"loss": 0.2405, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9354666666666667, |
|
"eval_f1": 0.9345238095238096, |
|
"eval_loss": 0.2737696170806885, |
|
"eval_precision": 0.9483800109829764, |
|
"eval_recall": 0.9210666666666667, |
|
"eval_runtime": 304.727, |
|
"eval_samples_per_second": 12.306, |
|
"eval_steps_per_second": 1.539, |
|
"step": 2150 |
|
} |
|
], |
|
"max_steps": 2188, |
|
"num_train_epochs": 1, |
|
"total_flos": 6.497491746816e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|