|
{ |
|
"best_metric": 0.22676756978034973, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.29027576197387517, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001451378809869376, |
|
"grad_norm": 0.11311399191617966, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.2249, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001451378809869376, |
|
"eval_loss": 0.27678582072257996, |
|
"eval_runtime": 142.3159, |
|
"eval_samples_per_second": 2.045, |
|
"eval_steps_per_second": 0.513, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002902757619738752, |
|
"grad_norm": 0.11226661503314972, |
|
"learning_rate": 2.026e-05, |
|
"loss": 0.4195, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0043541364296081275, |
|
"grad_norm": 0.1760484129190445, |
|
"learning_rate": 3.039e-05, |
|
"loss": 0.3752, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005805515239477504, |
|
"grad_norm": 0.13543817400932312, |
|
"learning_rate": 4.052e-05, |
|
"loss": 0.3455, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00725689404934688, |
|
"grad_norm": 0.11544519662857056, |
|
"learning_rate": 5.065e-05, |
|
"loss": 0.1971, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008708272859216255, |
|
"grad_norm": 0.13015075027942657, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.2833, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010159651669085631, |
|
"grad_norm": 0.14773070812225342, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.4397, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011611030478955007, |
|
"grad_norm": 0.15062645077705383, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.4152, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.013062409288824383, |
|
"grad_norm": 0.12542325258255005, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.3613, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01451378809869376, |
|
"grad_norm": 0.13777385652065277, |
|
"learning_rate": 0.0001013, |
|
"loss": 0.7267, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015965166908563134, |
|
"grad_norm": 0.16426308453083038, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 0.5661, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01741654571843251, |
|
"grad_norm": 0.12953603267669678, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 0.3289, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.018867924528301886, |
|
"grad_norm": 0.1950962245464325, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 0.6131, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.020319303338171262, |
|
"grad_norm": 0.17205172777175903, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 0.4953, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02177068214804064, |
|
"grad_norm": 0.13275088369846344, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 0.2192, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.023222060957910014, |
|
"grad_norm": 0.13935284316539764, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 0.2338, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02467343976777939, |
|
"grad_norm": 0.1427544802427292, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 0.3142, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.026124818577648767, |
|
"grad_norm": 0.13596214354038239, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 0.255, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.027576197387518143, |
|
"grad_norm": 0.16519062221050262, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 0.5583, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02902757619738752, |
|
"grad_norm": 0.11557265371084213, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 0.1951, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030478955007256895, |
|
"grad_norm": 0.09901455044746399, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 0.1046, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03193033381712627, |
|
"grad_norm": 0.15276901423931122, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 0.445, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.033381712626995644, |
|
"grad_norm": 0.15448720753192902, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 0.4333, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03483309143686502, |
|
"grad_norm": 0.18187403678894043, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 0.3444, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.036284470246734396, |
|
"grad_norm": 0.13476215302944183, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 0.3449, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03773584905660377, |
|
"grad_norm": 0.15487666428089142, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 0.4944, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03918722786647315, |
|
"grad_norm": 0.17659035325050354, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 0.5834, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.040638606676342524, |
|
"grad_norm": 0.1136946901679039, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 0.2565, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0420899854862119, |
|
"grad_norm": 0.17775152623653412, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.4925, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04354136429608128, |
|
"grad_norm": 0.2578783333301544, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 0.3918, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04499274310595065, |
|
"grad_norm": 0.1302911639213562, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 0.3904, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04644412191582003, |
|
"grad_norm": 0.12905248999595642, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 0.28, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.047895500725689405, |
|
"grad_norm": 0.16814574599266052, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 0.4776, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04934687953555878, |
|
"grad_norm": 0.1260037124156952, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 0.2609, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05079825834542816, |
|
"grad_norm": 0.10647979378700256, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 0.2253, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05224963715529753, |
|
"grad_norm": 0.10167553275823593, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 0.2064, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05370101596516691, |
|
"grad_norm": 0.13301363587379456, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 0.2473, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.055152394775036286, |
|
"grad_norm": 0.11595452576875687, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 0.2187, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05660377358490566, |
|
"grad_norm": 0.15230633318424225, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 0.3451, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05805515239477504, |
|
"grad_norm": 0.15629370510578156, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 0.6737, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.059506531204644414, |
|
"grad_norm": 0.14271894097328186, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 0.216, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06095791001451379, |
|
"grad_norm": 0.23410384356975555, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 0.3089, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.062409288824383166, |
|
"grad_norm": 0.13687372207641602, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 0.212, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06386066763425254, |
|
"grad_norm": 0.1781831830739975, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 0.6556, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06531204644412192, |
|
"grad_norm": 0.25682398676872253, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 0.5198, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06676342525399129, |
|
"grad_norm": 0.2759973406791687, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 0.9177, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06821480406386067, |
|
"grad_norm": 0.2668778598308563, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 0.8107, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06966618287373004, |
|
"grad_norm": 0.2767494022846222, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.606, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07111756168359942, |
|
"grad_norm": 0.5263920426368713, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 0.9517, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07256894049346879, |
|
"grad_norm": 0.4693450927734375, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 1.1133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07256894049346879, |
|
"eval_loss": 0.2412186712026596, |
|
"eval_runtime": 145.5936, |
|
"eval_samples_per_second": 1.999, |
|
"eval_steps_per_second": 0.501, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07402031930333818, |
|
"grad_norm": 0.1490558385848999, |
|
"learning_rate": 7.944052631578947e-05, |
|
"loss": 0.5739, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07547169811320754, |
|
"grad_norm": 0.19806860387325287, |
|
"learning_rate": 7.890736842105263e-05, |
|
"loss": 0.3819, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 0.12554095685482025, |
|
"learning_rate": 7.837421052631579e-05, |
|
"loss": 0.3486, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0783744557329463, |
|
"grad_norm": 0.17904381453990936, |
|
"learning_rate": 7.784105263157893e-05, |
|
"loss": 0.6337, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07982583454281568, |
|
"grad_norm": 0.16063204407691956, |
|
"learning_rate": 7.730789473684211e-05, |
|
"loss": 0.4335, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08127721335268505, |
|
"grad_norm": 0.17032606899738312, |
|
"learning_rate": 7.677473684210526e-05, |
|
"loss": 0.5176, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08272859216255443, |
|
"grad_norm": 0.36131739616394043, |
|
"learning_rate": 7.624157894736842e-05, |
|
"loss": 0.3518, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0841799709724238, |
|
"grad_norm": 0.19793149828910828, |
|
"learning_rate": 7.570842105263158e-05, |
|
"loss": 0.8414, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08563134978229318, |
|
"grad_norm": 0.13127806782722473, |
|
"learning_rate": 7.517526315789474e-05, |
|
"loss": 0.3519, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08708272859216255, |
|
"grad_norm": 0.1507265418767929, |
|
"learning_rate": 7.464210526315789e-05, |
|
"loss": 0.8202, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08853410740203194, |
|
"grad_norm": 0.1529066562652588, |
|
"learning_rate": 7.410894736842106e-05, |
|
"loss": 0.5119, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0899854862119013, |
|
"grad_norm": 0.12405847012996674, |
|
"learning_rate": 7.35757894736842e-05, |
|
"loss": 0.5436, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09143686502177069, |
|
"grad_norm": 0.16444605588912964, |
|
"learning_rate": 7.304263157894737e-05, |
|
"loss": 0.4945, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.09288824383164006, |
|
"grad_norm": 0.10274633765220642, |
|
"learning_rate": 7.250947368421053e-05, |
|
"loss": 0.2285, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09433962264150944, |
|
"grad_norm": 0.13879486918449402, |
|
"learning_rate": 7.197631578947368e-05, |
|
"loss": 0.3028, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09579100145137881, |
|
"grad_norm": 0.1328522115945816, |
|
"learning_rate": 7.144315789473684e-05, |
|
"loss": 0.4795, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09724238026124818, |
|
"grad_norm": 0.15546078979969025, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.5576, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09869375907111756, |
|
"grad_norm": 0.16497625410556793, |
|
"learning_rate": 7.037684210526316e-05, |
|
"loss": 0.271, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.10014513788098693, |
|
"grad_norm": 0.1409892439842224, |
|
"learning_rate": 6.984368421052632e-05, |
|
"loss": 0.4059, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.10159651669085631, |
|
"grad_norm": 0.173180490732193, |
|
"learning_rate": 6.931052631578947e-05, |
|
"loss": 0.5719, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10304789550072568, |
|
"grad_norm": 0.1634824275970459, |
|
"learning_rate": 6.877736842105263e-05, |
|
"loss": 0.3316, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.10449927431059507, |
|
"grad_norm": 0.1520518958568573, |
|
"learning_rate": 6.824421052631579e-05, |
|
"loss": 0.749, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.10595065312046444, |
|
"grad_norm": 0.11503203213214874, |
|
"learning_rate": 6.771105263157895e-05, |
|
"loss": 0.3243, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.10740203193033382, |
|
"grad_norm": 0.09616807848215103, |
|
"learning_rate": 6.71778947368421e-05, |
|
"loss": 0.2127, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.10885341074020319, |
|
"grad_norm": 0.12218757718801498, |
|
"learning_rate": 6.664473684210527e-05, |
|
"loss": 0.3151, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11030478955007257, |
|
"grad_norm": 0.11861466616392136, |
|
"learning_rate": 6.611157894736842e-05, |
|
"loss": 0.2481, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.11175616835994194, |
|
"grad_norm": 0.08120733499526978, |
|
"learning_rate": 6.557842105263158e-05, |
|
"loss": 0.1281, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.11320754716981132, |
|
"grad_norm": 0.11339303106069565, |
|
"learning_rate": 6.504526315789474e-05, |
|
"loss": 0.3048, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.11465892597968069, |
|
"grad_norm": 0.12353406846523285, |
|
"learning_rate": 6.451210526315789e-05, |
|
"loss": 0.522, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11611030478955008, |
|
"grad_norm": 0.18549102544784546, |
|
"learning_rate": 6.397894736842105e-05, |
|
"loss": 1.1374, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11756168359941944, |
|
"grad_norm": 0.10565419495105743, |
|
"learning_rate": 6.344578947368421e-05, |
|
"loss": 0.2235, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11901306240928883, |
|
"grad_norm": 0.13751162588596344, |
|
"learning_rate": 6.291263157894737e-05, |
|
"loss": 0.4815, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1204644412191582, |
|
"grad_norm": 0.12583044171333313, |
|
"learning_rate": 6.237947368421053e-05, |
|
"loss": 0.4072, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.12191582002902758, |
|
"grad_norm": 0.11933013796806335, |
|
"learning_rate": 6.184631578947368e-05, |
|
"loss": 0.4836, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.12336719883889695, |
|
"grad_norm": 0.17227371037006378, |
|
"learning_rate": 6.131315789473684e-05, |
|
"loss": 0.6044, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12481857764876633, |
|
"grad_norm": 0.1414662003517151, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.5979, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1262699564586357, |
|
"grad_norm": 0.08421526849269867, |
|
"learning_rate": 6.024684210526315e-05, |
|
"loss": 0.1035, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12772133526850507, |
|
"grad_norm": 0.12226138263940811, |
|
"learning_rate": 5.9713684210526305e-05, |
|
"loss": 0.1514, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12917271407837447, |
|
"grad_norm": 0.08483120799064636, |
|
"learning_rate": 5.918052631578947e-05, |
|
"loss": 0.1681, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.13062409288824384, |
|
"grad_norm": 0.16209205985069275, |
|
"learning_rate": 5.8647368421052634e-05, |
|
"loss": 1.0777, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1320754716981132, |
|
"grad_norm": 0.16335219144821167, |
|
"learning_rate": 5.811421052631579e-05, |
|
"loss": 0.8582, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.13352685050798258, |
|
"grad_norm": 0.14614659547805786, |
|
"learning_rate": 5.758105263157894e-05, |
|
"loss": 0.8367, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.13497822931785197, |
|
"grad_norm": 0.12466667592525482, |
|
"learning_rate": 5.70478947368421e-05, |
|
"loss": 0.2996, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.13642960812772134, |
|
"grad_norm": 0.16494490206241608, |
|
"learning_rate": 5.6514736842105256e-05, |
|
"loss": 0.7263, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1378809869375907, |
|
"grad_norm": 0.1597548872232437, |
|
"learning_rate": 5.5981578947368424e-05, |
|
"loss": 0.5913, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13933236574746008, |
|
"grad_norm": 0.246367946267128, |
|
"learning_rate": 5.544842105263158e-05, |
|
"loss": 1.299, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14078374455732948, |
|
"grad_norm": 0.2906947731971741, |
|
"learning_rate": 5.491526315789474e-05, |
|
"loss": 1.0694, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.14223512336719885, |
|
"grad_norm": 0.32492387294769287, |
|
"learning_rate": 5.438210526315789e-05, |
|
"loss": 0.6249, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.14368650217706821, |
|
"grad_norm": 0.3696464002132416, |
|
"learning_rate": 5.384894736842105e-05, |
|
"loss": 1.3239, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.14513788098693758, |
|
"grad_norm": 0.4203333854675293, |
|
"learning_rate": 5.331578947368421e-05, |
|
"loss": 0.976, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14513788098693758, |
|
"eval_loss": 0.2287154644727707, |
|
"eval_runtime": 144.369, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.506, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14658925979680695, |
|
"grad_norm": 0.14465273916721344, |
|
"learning_rate": 5.278263157894736e-05, |
|
"loss": 0.7933, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.14804063860667635, |
|
"grad_norm": 0.1529458463191986, |
|
"learning_rate": 5.224947368421053e-05, |
|
"loss": 0.4874, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.14949201741654572, |
|
"grad_norm": 0.09371291100978851, |
|
"learning_rate": 5.171631578947368e-05, |
|
"loss": 0.1754, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.1509433962264151, |
|
"grad_norm": 0.11294244974851608, |
|
"learning_rate": 5.1183157894736844e-05, |
|
"loss": 0.4367, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.15239477503628446, |
|
"grad_norm": 0.11110277473926544, |
|
"learning_rate": 5.065e-05, |
|
"loss": 0.3404, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 0.09716267883777618, |
|
"learning_rate": 5.011684210526315e-05, |
|
"loss": 0.3296, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.15529753265602322, |
|
"grad_norm": 0.15396001935005188, |
|
"learning_rate": 4.958368421052631e-05, |
|
"loss": 0.772, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1567489114658926, |
|
"grad_norm": 0.17330443859100342, |
|
"learning_rate": 4.9050526315789473e-05, |
|
"loss": 0.4806, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.15820029027576196, |
|
"grad_norm": 0.15418781340122223, |
|
"learning_rate": 4.851736842105263e-05, |
|
"loss": 0.6729, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.15965166908563136, |
|
"grad_norm": 0.13719098269939423, |
|
"learning_rate": 4.798421052631579e-05, |
|
"loss": 0.2485, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16110304789550073, |
|
"grad_norm": 0.1265023797750473, |
|
"learning_rate": 4.745105263157895e-05, |
|
"loss": 0.3845, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1625544267053701, |
|
"grad_norm": 0.15158045291900635, |
|
"learning_rate": 4.69178947368421e-05, |
|
"loss": 0.4527, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.16400580551523947, |
|
"grad_norm": 0.12712687253952026, |
|
"learning_rate": 4.638473684210526e-05, |
|
"loss": 0.3207, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.16545718432510886, |
|
"grad_norm": 0.10735659301280975, |
|
"learning_rate": 4.585157894736842e-05, |
|
"loss": 0.2524, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.16690856313497823, |
|
"grad_norm": 0.14663982391357422, |
|
"learning_rate": 4.531842105263158e-05, |
|
"loss": 0.7199, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1683599419448476, |
|
"grad_norm": 0.12890632450580597, |
|
"learning_rate": 4.478526315789473e-05, |
|
"loss": 0.5994, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.16981132075471697, |
|
"grad_norm": 0.16796326637268066, |
|
"learning_rate": 4.425210526315789e-05, |
|
"loss": 0.6609, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.17126269956458637, |
|
"grad_norm": 0.21263499557971954, |
|
"learning_rate": 4.3718947368421054e-05, |
|
"loss": 0.6373, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.17271407837445574, |
|
"grad_norm": 0.15164527297019958, |
|
"learning_rate": 4.318578947368421e-05, |
|
"loss": 0.3913, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1741654571843251, |
|
"grad_norm": 0.09173654019832611, |
|
"learning_rate": 4.265263157894736e-05, |
|
"loss": 0.22, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17561683599419448, |
|
"grad_norm": 0.10727760940790176, |
|
"learning_rate": 4.211947368421052e-05, |
|
"loss": 0.2657, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.17706821480406387, |
|
"grad_norm": 0.1093788743019104, |
|
"learning_rate": 4.1586315789473684e-05, |
|
"loss": 0.1771, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.17851959361393324, |
|
"grad_norm": 0.11123545467853546, |
|
"learning_rate": 4.105315789473684e-05, |
|
"loss": 0.3519, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1799709724238026, |
|
"grad_norm": 0.11694060266017914, |
|
"learning_rate": 4.052e-05, |
|
"loss": 0.2824, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.18142235123367198, |
|
"grad_norm": 0.1088772639632225, |
|
"learning_rate": 3.998684210526316e-05, |
|
"loss": 0.3736, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18287373004354138, |
|
"grad_norm": 0.07843092083930969, |
|
"learning_rate": 3.945368421052631e-05, |
|
"loss": 0.1476, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.18432510885341075, |
|
"grad_norm": 0.12299855053424835, |
|
"learning_rate": 3.892052631578947e-05, |
|
"loss": 0.3185, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.18577648766328012, |
|
"grad_norm": 0.13632050156593323, |
|
"learning_rate": 3.838736842105263e-05, |
|
"loss": 0.3271, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.18722786647314948, |
|
"grad_norm": 0.11568715423345566, |
|
"learning_rate": 3.785421052631579e-05, |
|
"loss": 0.2214, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 0.11555133759975433, |
|
"learning_rate": 3.732105263157894e-05, |
|
"loss": 0.2503, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19013062409288825, |
|
"grad_norm": 0.11684191972017288, |
|
"learning_rate": 3.67878947368421e-05, |
|
"loss": 0.1773, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.19158200290275762, |
|
"grad_norm": 0.09825399518013, |
|
"learning_rate": 3.6254736842105264e-05, |
|
"loss": 0.2053, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.193033381712627, |
|
"grad_norm": 0.13328059017658234, |
|
"learning_rate": 3.572157894736842e-05, |
|
"loss": 0.1957, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.19448476052249636, |
|
"grad_norm": 0.1092911884188652, |
|
"learning_rate": 3.518842105263158e-05, |
|
"loss": 0.2233, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.19593613933236576, |
|
"grad_norm": 0.10300347954034805, |
|
"learning_rate": 3.465526315789473e-05, |
|
"loss": 0.3399, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19738751814223512, |
|
"grad_norm": 0.1662655472755432, |
|
"learning_rate": 3.4122105263157894e-05, |
|
"loss": 0.2793, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1988388969521045, |
|
"grad_norm": 0.08663914352655411, |
|
"learning_rate": 3.358894736842105e-05, |
|
"loss": 0.1391, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.20029027576197386, |
|
"grad_norm": 0.15232573449611664, |
|
"learning_rate": 3.305578947368421e-05, |
|
"loss": 0.3642, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.20174165457184326, |
|
"grad_norm": 0.10668061673641205, |
|
"learning_rate": 3.252263157894737e-05, |
|
"loss": 0.1986, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.20319303338171263, |
|
"grad_norm": 0.09178131073713303, |
|
"learning_rate": 3.198947368421052e-05, |
|
"loss": 0.2896, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.204644412191582, |
|
"grad_norm": 0.1355268359184265, |
|
"learning_rate": 3.1456315789473684e-05, |
|
"loss": 0.8356, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.20609579100145137, |
|
"grad_norm": 0.13758248090744019, |
|
"learning_rate": 3.092315789473684e-05, |
|
"loss": 0.6022, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.20754716981132076, |
|
"grad_norm": 0.12314596027135849, |
|
"learning_rate": 3.039e-05, |
|
"loss": 0.4143, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.20899854862119013, |
|
"grad_norm": 0.11046963185071945, |
|
"learning_rate": 2.9856842105263153e-05, |
|
"loss": 0.2393, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2104499274310595, |
|
"grad_norm": 0.16623206436634064, |
|
"learning_rate": 2.9323684210526317e-05, |
|
"loss": 0.3518, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21190130624092887, |
|
"grad_norm": 0.19511793553829193, |
|
"learning_rate": 2.879052631578947e-05, |
|
"loss": 0.5, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.21335268505079827, |
|
"grad_norm": 0.2845495939254761, |
|
"learning_rate": 2.8257368421052628e-05, |
|
"loss": 0.7584, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.21480406386066764, |
|
"grad_norm": 0.27343127131462097, |
|
"learning_rate": 2.772421052631579e-05, |
|
"loss": 0.87, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.216255442670537, |
|
"grad_norm": 0.41281190514564514, |
|
"learning_rate": 2.7191052631578946e-05, |
|
"loss": 0.8707, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.21770682148040638, |
|
"grad_norm": 0.3892386257648468, |
|
"learning_rate": 2.6657894736842104e-05, |
|
"loss": 0.5039, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21770682148040638, |
|
"eval_loss": 0.22993537783622742, |
|
"eval_runtime": 142.9262, |
|
"eval_samples_per_second": 2.036, |
|
"eval_steps_per_second": 0.511, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21915820029027577, |
|
"grad_norm": 0.15316051244735718, |
|
"learning_rate": 2.6124736842105265e-05, |
|
"loss": 1.0054, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.22060957910014514, |
|
"grad_norm": 0.18049275875091553, |
|
"learning_rate": 2.5591578947368422e-05, |
|
"loss": 0.8339, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2220609579100145, |
|
"grad_norm": 0.1573140174150467, |
|
"learning_rate": 2.5058421052631576e-05, |
|
"loss": 0.5323, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.22351233671988388, |
|
"grad_norm": 0.1617121547460556, |
|
"learning_rate": 2.4525263157894737e-05, |
|
"loss": 1.0705, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.22496371552975328, |
|
"grad_norm": 0.12206801027059555, |
|
"learning_rate": 2.3992105263157894e-05, |
|
"loss": 0.5119, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 0.9858077764511108, |
|
"learning_rate": 2.345894736842105e-05, |
|
"loss": 1.0604, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.22786647314949202, |
|
"grad_norm": 0.15416570007801056, |
|
"learning_rate": 2.292578947368421e-05, |
|
"loss": 0.2413, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.22931785195936139, |
|
"grad_norm": 0.18875420093536377, |
|
"learning_rate": 2.2392631578947366e-05, |
|
"loss": 0.7481, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 0.12112567573785782, |
|
"learning_rate": 2.1859473684210527e-05, |
|
"loss": 0.32, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.23222060957910015, |
|
"grad_norm": 0.14630571007728577, |
|
"learning_rate": 2.132631578947368e-05, |
|
"loss": 0.3848, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.23367198838896952, |
|
"grad_norm": 0.10878642648458481, |
|
"learning_rate": 2.0793157894736842e-05, |
|
"loss": 0.4093, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.2351233671988389, |
|
"grad_norm": 0.14565850794315338, |
|
"learning_rate": 2.026e-05, |
|
"loss": 0.4859, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2365747460087083, |
|
"grad_norm": 0.14800970256328583, |
|
"learning_rate": 1.9726842105263157e-05, |
|
"loss": 0.3829, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.23802612481857766, |
|
"grad_norm": 0.14318357408046722, |
|
"learning_rate": 1.9193684210526314e-05, |
|
"loss": 0.8232, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.23947750362844702, |
|
"grad_norm": 0.11647699773311615, |
|
"learning_rate": 1.866052631578947e-05, |
|
"loss": 0.3973, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2409288824383164, |
|
"grad_norm": 0.17588454484939575, |
|
"learning_rate": 1.8127368421052632e-05, |
|
"loss": 1.0372, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.24238026124818576, |
|
"grad_norm": 0.22197209298610687, |
|
"learning_rate": 1.759421052631579e-05, |
|
"loss": 0.6101, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.24383164005805516, |
|
"grad_norm": 0.11662589013576508, |
|
"learning_rate": 1.7061052631578947e-05, |
|
"loss": 0.3538, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.24528301886792453, |
|
"grad_norm": 0.12414685636758804, |
|
"learning_rate": 1.6527894736842104e-05, |
|
"loss": 0.5084, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2467343976777939, |
|
"grad_norm": 0.10851942747831345, |
|
"learning_rate": 1.599473684210526e-05, |
|
"loss": 0.3311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.24818577648766327, |
|
"grad_norm": 0.1293657124042511, |
|
"learning_rate": 1.546157894736842e-05, |
|
"loss": 0.3142, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.24963715529753266, |
|
"grad_norm": 0.19298462569713593, |
|
"learning_rate": 1.4928421052631576e-05, |
|
"loss": 0.4494, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.251088534107402, |
|
"grad_norm": 0.12975549697875977, |
|
"learning_rate": 1.4395263157894735e-05, |
|
"loss": 0.2316, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.2525399129172714, |
|
"grad_norm": 0.09491987526416779, |
|
"learning_rate": 1.3862105263157895e-05, |
|
"loss": 0.3277, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2539912917271408, |
|
"grad_norm": 0.21053697168827057, |
|
"learning_rate": 1.3328947368421052e-05, |
|
"loss": 0.3236, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.25544267053701014, |
|
"grad_norm": 0.12965311110019684, |
|
"learning_rate": 1.2795789473684211e-05, |
|
"loss": 0.9922, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.25689404934687954, |
|
"grad_norm": 0.10223641246557236, |
|
"learning_rate": 1.2262631578947368e-05, |
|
"loss": 0.3105, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.25834542815674894, |
|
"grad_norm": 0.15014851093292236, |
|
"learning_rate": 1.1729473684210526e-05, |
|
"loss": 0.5113, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2597968069666183, |
|
"grad_norm": 0.1316860169172287, |
|
"learning_rate": 1.1196315789473683e-05, |
|
"loss": 0.5018, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.2612481857764877, |
|
"grad_norm": 0.11262688785791397, |
|
"learning_rate": 1.066315789473684e-05, |
|
"loss": 0.2846, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.262699564586357, |
|
"grad_norm": 0.14847137033939362, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.3721, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2641509433962264, |
|
"grad_norm": 0.11905442178249359, |
|
"learning_rate": 9.596842105263157e-06, |
|
"loss": 0.5352, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2656023222060958, |
|
"grad_norm": 0.12718041241168976, |
|
"learning_rate": 9.063684210526316e-06, |
|
"loss": 0.298, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.26705370101596515, |
|
"grad_norm": 0.09555162489414215, |
|
"learning_rate": 8.530526315789473e-06, |
|
"loss": 0.2167, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.26850507982583455, |
|
"grad_norm": 0.09585528820753098, |
|
"learning_rate": 7.99736842105263e-06, |
|
"loss": 0.6557, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.26995645863570394, |
|
"grad_norm": 0.09632135927677155, |
|
"learning_rate": 7.464210526315788e-06, |
|
"loss": 0.2777, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2714078374455733, |
|
"grad_norm": 0.10153202712535858, |
|
"learning_rate": 6.931052631578947e-06, |
|
"loss": 0.237, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2728592162554427, |
|
"grad_norm": 0.1199401244521141, |
|
"learning_rate": 6.3978947368421055e-06, |
|
"loss": 0.2484, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.274310595065312, |
|
"grad_norm": 0.15911225974559784, |
|
"learning_rate": 5.864736842105263e-06, |
|
"loss": 0.2827, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.2757619738751814, |
|
"grad_norm": 0.12618254125118256, |
|
"learning_rate": 5.33157894736842e-06, |
|
"loss": 0.2016, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2772133526850508, |
|
"grad_norm": 0.1026952788233757, |
|
"learning_rate": 4.7984210526315785e-06, |
|
"loss": 0.282, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.27866473149492016, |
|
"grad_norm": 0.10733366012573242, |
|
"learning_rate": 4.265263157894737e-06, |
|
"loss": 0.1684, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.28011611030478956, |
|
"grad_norm": 0.12453214824199677, |
|
"learning_rate": 3.732105263157894e-06, |
|
"loss": 0.4221, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.28156748911465895, |
|
"grad_norm": 0.19456051290035248, |
|
"learning_rate": 3.1989473684210527e-06, |
|
"loss": 0.4257, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2830188679245283, |
|
"grad_norm": 0.18270336091518402, |
|
"learning_rate": 2.66578947368421e-06, |
|
"loss": 0.8292, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2844702467343977, |
|
"grad_norm": 0.1802217960357666, |
|
"learning_rate": 2.1326315789473684e-06, |
|
"loss": 0.8952, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.28592162554426703, |
|
"grad_norm": 0.23863941431045532, |
|
"learning_rate": 1.5994736842105264e-06, |
|
"loss": 0.6211, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.28737300435413643, |
|
"grad_norm": 0.282530277967453, |
|
"learning_rate": 1.0663157894736842e-06, |
|
"loss": 0.6815, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2888243831640058, |
|
"grad_norm": 0.46267229318618774, |
|
"learning_rate": 5.331578947368421e-07, |
|
"loss": 0.6762, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.29027576197387517, |
|
"grad_norm": 0.4375733733177185, |
|
"learning_rate": 0.0, |
|
"loss": 1.0879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29027576197387517, |
|
"eval_loss": 0.22676756978034973, |
|
"eval_runtime": 144.3205, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.506, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4053840999350272e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|