|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 23.44766505636071, |
|
"eval_steps": 100, |
|
"global_step": 900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2576489533011272, |
|
"grad_norm": 5.974159240722656, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 8.6521, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5152979066022544, |
|
"grad_norm": 3.2873778343200684, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 8.3497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7729468599033816, |
|
"grad_norm": 2.522794485092163, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 7.9896, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.0418679549114331, |
|
"grad_norm": 1.5867993831634521, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 8.1084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.2995169082125604, |
|
"grad_norm": 1.300277829170227, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 7.3393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5571658615136876, |
|
"grad_norm": 1.1117545366287231, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 7.1266, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8148148148148149, |
|
"grad_norm": 0.9784226417541504, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 6.9662, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0837359098228663, |
|
"grad_norm": 0.9447280764579773, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 7.2558, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.3413848631239937, |
|
"grad_norm": 1.1698005199432373, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 6.6472, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5990338164251208, |
|
"grad_norm": 1.544875144958496, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 6.392, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5990338164251208, |
|
"eval_loss": 6.279848575592041, |
|
"eval_runtime": 8.7699, |
|
"eval_samples_per_second": 183.012, |
|
"eval_steps_per_second": 5.815, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.8566827697262482, |
|
"grad_norm": 1.4024465084075928, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 6.1704, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.1256038647342996, |
|
"grad_norm": 1.2708696126937866, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 6.369, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.3832528180354267, |
|
"grad_norm": 0.7357354760169983, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 5.8538, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.640901771336554, |
|
"grad_norm": 0.5254281163215637, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 5.7556, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.898550724637681, |
|
"grad_norm": 0.8618260025978088, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 5.6495, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.1674718196457325, |
|
"grad_norm": 1.0980093479156494, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 5.9024, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.42512077294686, |
|
"grad_norm": 1.3449251651763916, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 5.4768, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.6827697262479875, |
|
"grad_norm": 1.2047125101089478, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 5.3661, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.940418679549114, |
|
"grad_norm": 1.6401121616363525, |
|
"learning_rate": 2e-05, |
|
"loss": 5.2836, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.209339774557166, |
|
"grad_norm": 1.8774281740188599, |
|
"learning_rate": 1.999831241633323e-05, |
|
"loss": 5.5386, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.209339774557166, |
|
"eval_loss": 5.1901116371154785, |
|
"eval_runtime": 8.0613, |
|
"eval_samples_per_second": 199.099, |
|
"eval_steps_per_second": 6.326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.466988727858293, |
|
"grad_norm": 2.0122838020324707, |
|
"learning_rate": 1.9993250234920638e-05, |
|
"loss": 5.145, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.72463768115942, |
|
"grad_norm": 1.5243710279464722, |
|
"learning_rate": 1.9984815164333163e-05, |
|
"loss": 5.0726, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.982286634460547, |
|
"grad_norm": 1.613467812538147, |
|
"learning_rate": 1.9973010051548274e-05, |
|
"loss": 5.1139, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.251207729468599, |
|
"grad_norm": 1.169236183166504, |
|
"learning_rate": 1.9957838880989076e-05, |
|
"loss": 5.1586, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.508856682769727, |
|
"grad_norm": 1.2285501956939697, |
|
"learning_rate": 1.9939306773179498e-05, |
|
"loss": 4.9011, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.766505636070853, |
|
"grad_norm": 1.4358283281326294, |
|
"learning_rate": 1.9917419983016025e-05, |
|
"loss": 4.8479, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.035426731078905, |
|
"grad_norm": 1.3988443613052368, |
|
"learning_rate": 1.989218589765658e-05, |
|
"loss": 5.097, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.293075684380033, |
|
"grad_norm": 1.1683136224746704, |
|
"learning_rate": 1.9863613034027224e-05, |
|
"loss": 4.7351, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.550724637681159, |
|
"grad_norm": 1.2952402830123901, |
|
"learning_rate": 1.9831711035947552e-05, |
|
"loss": 4.6976, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.808373590982287, |
|
"grad_norm": 1.4266207218170166, |
|
"learning_rate": 1.979649067087574e-05, |
|
"loss": 4.6391, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.808373590982287, |
|
"eval_loss": 4.603694438934326, |
|
"eval_runtime": 8.3521, |
|
"eval_samples_per_second": 192.167, |
|
"eval_steps_per_second": 6.106, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.077294685990339, |
|
"grad_norm": 2.173689126968384, |
|
"learning_rate": 1.9757963826274357e-05, |
|
"loss": 4.8403, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.334943639291465, |
|
"grad_norm": 1.874500036239624, |
|
"learning_rate": 1.971614350559814e-05, |
|
"loss": 4.4785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.592592592592592, |
|
"grad_norm": 1.9841059446334839, |
|
"learning_rate": 1.967104382390511e-05, |
|
"loss": 4.4265, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.85024154589372, |
|
"grad_norm": 2.523819923400879, |
|
"learning_rate": 1.9622680003092503e-05, |
|
"loss": 4.3497, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 9.119162640901772, |
|
"grad_norm": 2.2171430587768555, |
|
"learning_rate": 1.9571068366759143e-05, |
|
"loss": 4.5573, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.376811594202898, |
|
"grad_norm": 2.516089916229248, |
|
"learning_rate": 1.951622633469592e-05, |
|
"loss": 4.2197, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.634460547504025, |
|
"grad_norm": 3.2976479530334473, |
|
"learning_rate": 1.9458172417006347e-05, |
|
"loss": 4.1526, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.892109500805153, |
|
"grad_norm": 2.310131311416626, |
|
"learning_rate": 1.9396926207859085e-05, |
|
"loss": 4.1294, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 10.161030595813205, |
|
"grad_norm": 2.445627450942993, |
|
"learning_rate": 1.933250837887457e-05, |
|
"loss": 4.3354, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.418679549114332, |
|
"grad_norm": 2.188768148422241, |
|
"learning_rate": 1.9264940672148018e-05, |
|
"loss": 4.0545, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.418679549114332, |
|
"eval_loss": 4.0533294677734375, |
|
"eval_runtime": 8.3508, |
|
"eval_samples_per_second": 192.197, |
|
"eval_steps_per_second": 6.107, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.676328502415458, |
|
"grad_norm": 3.4046390056610107, |
|
"learning_rate": 1.9194245892911077e-05, |
|
"loss": 4.0121, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.933977455716587, |
|
"grad_norm": 2.701680898666382, |
|
"learning_rate": 1.9120447901834708e-05, |
|
"loss": 3.9847, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 11.202898550724637, |
|
"grad_norm": 2.2474334239959717, |
|
"learning_rate": 1.9043571606975776e-05, |
|
"loss": 4.1875, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 11.460547504025765, |
|
"grad_norm": 2.2355682849884033, |
|
"learning_rate": 1.8963642955370203e-05, |
|
"loss": 3.9325, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.718196457326892, |
|
"grad_norm": 2.977954626083374, |
|
"learning_rate": 1.888068892427538e-05, |
|
"loss": 3.8992, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.97584541062802, |
|
"grad_norm": 2.2921981811523438, |
|
"learning_rate": 1.879473751206489e-05, |
|
"loss": 3.858, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 12.24476650563607, |
|
"grad_norm": 1.8335373401641846, |
|
"learning_rate": 1.8705817728778626e-05, |
|
"loss": 4.0718, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 12.502415458937199, |
|
"grad_norm": 2.2911853790283203, |
|
"learning_rate": 1.8613959586331364e-05, |
|
"loss": 3.8305, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.760064412238325, |
|
"grad_norm": 2.170738697052002, |
|
"learning_rate": 1.851919408838327e-05, |
|
"loss": 3.7923, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 13.028985507246377, |
|
"grad_norm": 1.8763988018035889, |
|
"learning_rate": 1.842155321987566e-05, |
|
"loss": 4.014, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.028985507246377, |
|
"eval_loss": 3.779900550842285, |
|
"eval_runtime": 8.1613, |
|
"eval_samples_per_second": 196.661, |
|
"eval_steps_per_second": 6.249, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.286634460547504, |
|
"grad_norm": 2.2329862117767334, |
|
"learning_rate": 1.8321069936235503e-05, |
|
"loss": 3.753, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 13.544283413848632, |
|
"grad_norm": 2.3311359882354736, |
|
"learning_rate": 1.821777815225245e-05, |
|
"loss": 3.7099, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 13.801932367149758, |
|
"grad_norm": 2.5093774795532227, |
|
"learning_rate": 1.8111712730632024e-05, |
|
"loss": 3.6944, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 14.07085346215781, |
|
"grad_norm": 2.2526538372039795, |
|
"learning_rate": 1.800290947022884e-05, |
|
"loss": 3.9225, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 14.328502415458937, |
|
"grad_norm": 2.5449490547180176, |
|
"learning_rate": 1.789140509396394e-05, |
|
"loss": 3.6411, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 14.586151368760065, |
|
"grad_norm": 2.722266435623169, |
|
"learning_rate": 1.777723723643014e-05, |
|
"loss": 3.6571, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 14.843800322061192, |
|
"grad_norm": 2.3767805099487305, |
|
"learning_rate": 1.766044443118978e-05, |
|
"loss": 3.6036, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 15.112721417069244, |
|
"grad_norm": 2.4693193435668945, |
|
"learning_rate": 1.7541066097768965e-05, |
|
"loss": 3.8201, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 15.37037037037037, |
|
"grad_norm": 2.3329803943634033, |
|
"learning_rate": 1.7419142528352815e-05, |
|
"loss": 3.5643, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 15.628019323671497, |
|
"grad_norm": 2.396742343902588, |
|
"learning_rate": 1.729471487418621e-05, |
|
"loss": 3.5476, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.628019323671497, |
|
"eval_loss": 3.5582447052001953, |
|
"eval_runtime": 8.3607, |
|
"eval_samples_per_second": 191.969, |
|
"eval_steps_per_second": 6.1, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.885668276972625, |
|
"grad_norm": 2.3643219470977783, |
|
"learning_rate": 1.7167825131684516e-05, |
|
"loss": 3.5246, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 16.154589371980677, |
|
"grad_norm": 3.809258460998535, |
|
"learning_rate": 1.7038516128259118e-05, |
|
"loss": 3.7499, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 16.412238325281802, |
|
"grad_norm": 2.6895406246185303, |
|
"learning_rate": 1.6906831507862446e-05, |
|
"loss": 3.499, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 16.66988727858293, |
|
"grad_norm": 2.7662012577056885, |
|
"learning_rate": 1.6772815716257414e-05, |
|
"loss": 3.4918, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 16.92753623188406, |
|
"grad_norm": 2.217256784439087, |
|
"learning_rate": 1.6636513986016215e-05, |
|
"loss": 3.4541, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 17.19645732689211, |
|
"grad_norm": 3.010216474533081, |
|
"learning_rate": 1.64979723212536e-05, |
|
"loss": 3.6718, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 17.454106280193237, |
|
"grad_norm": 3.430988073348999, |
|
"learning_rate": 1.6357237482099682e-05, |
|
"loss": 3.4517, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 17.711755233494365, |
|
"grad_norm": 2.378178119659424, |
|
"learning_rate": 1.621435696891765e-05, |
|
"loss": 3.4259, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 17.96940418679549, |
|
"grad_norm": 2.877561569213867, |
|
"learning_rate": 1.606937900627157e-05, |
|
"loss": 3.4104, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 18.238325281803544, |
|
"grad_norm": 2.5511293411254883, |
|
"learning_rate": 1.5922352526649803e-05, |
|
"loss": 3.6157, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.238325281803544, |
|
"eval_loss": 3.421673059463501, |
|
"eval_runtime": 8.3529, |
|
"eval_samples_per_second": 192.149, |
|
"eval_steps_per_second": 6.106, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.49597423510467, |
|
"grad_norm": 2.8884575366973877, |
|
"learning_rate": 1.5773327153949465e-05, |
|
"loss": 3.4023, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 18.753623188405797, |
|
"grad_norm": 2.9396398067474365, |
|
"learning_rate": 1.5622353186727542e-05, |
|
"loss": 3.3875, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.022544283413847, |
|
"grad_norm": 3.390272855758667, |
|
"learning_rate": 1.5469481581224274e-05, |
|
"loss": 3.596, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 19.280193236714975, |
|
"grad_norm": 2.9179751873016357, |
|
"learning_rate": 1.531476393416456e-05, |
|
"loss": 3.3614, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 19.537842190016104, |
|
"grad_norm": 4.342045307159424, |
|
"learning_rate": 1.5158252465343242e-05, |
|
"loss": 3.3597, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 19.79549114331723, |
|
"grad_norm": 2.955822706222534, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 3.3584, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 20.064412238325282, |
|
"grad_norm": 2.6151533126831055, |
|
"learning_rate": 1.4840059950989992e-05, |
|
"loss": 3.5532, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 20.32206119162641, |
|
"grad_norm": 3.1554718017578125, |
|
"learning_rate": 1.467848630075608e-05, |
|
"loss": 3.3281, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 20.579710144927535, |
|
"grad_norm": 2.572737455368042, |
|
"learning_rate": 1.4515333583108896e-05, |
|
"loss": 3.3162, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.837359098228664, |
|
"grad_norm": 3.3176584243774414, |
|
"learning_rate": 1.4350656864820733e-05, |
|
"loss": 3.3161, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.837359098228664, |
|
"eval_loss": 3.3334078788757324, |
|
"eval_runtime": 8.7701, |
|
"eval_samples_per_second": 183.009, |
|
"eval_steps_per_second": 5.815, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 21.106280193236714, |
|
"grad_norm": 3.9218039512634277, |
|
"learning_rate": 1.4184511727039612e-05, |
|
"loss": 3.5253, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 21.363929146537842, |
|
"grad_norm": 3.392868995666504, |
|
"learning_rate": 1.4016954246529697e-05, |
|
"loss": 3.2947, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 21.62157809983897, |
|
"grad_norm": 3.4435510635375977, |
|
"learning_rate": 1.3848040976744459e-05, |
|
"loss": 3.3021, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 21.879227053140095, |
|
"grad_norm": 3.319772243499756, |
|
"learning_rate": 1.3677828928738934e-05, |
|
"loss": 3.3022, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 22.14814814814815, |
|
"grad_norm": 3.0634384155273438, |
|
"learning_rate": 1.3506375551927546e-05, |
|
"loss": 3.4676, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 22.405797101449274, |
|
"grad_norm": 4.093255996704102, |
|
"learning_rate": 1.3333738714693958e-05, |
|
"loss": 3.2705, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 22.663446054750402, |
|
"grad_norm": 3.1328248977661133, |
|
"learning_rate": 1.3159976684859528e-05, |
|
"loss": 3.2759, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 22.92109500805153, |
|
"grad_norm": 3.3607850074768066, |
|
"learning_rate": 1.2985148110016947e-05, |
|
"loss": 3.26, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 23.19001610305958, |
|
"grad_norm": 4.250182628631592, |
|
"learning_rate": 1.2809311997735697e-05, |
|
"loss": 3.4547, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 23.44766505636071, |
|
"grad_norm": 3.2897439002990723, |
|
"learning_rate": 1.2632527695645993e-05, |
|
"loss": 3.2482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 23.44766505636071, |
|
"eval_loss": 3.2658839225769043, |
|
"eval_runtime": 8.3079, |
|
"eval_samples_per_second": 193.189, |
|
"eval_steps_per_second": 6.139, |
|
"step": 900 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7940579890639667e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|