|
{ |
|
"best_metric": 0.21579746901988983, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.16359918200409, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008179959100204499, |
|
"grad_norm": 0.4496954381465912, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.8848, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008179959100204499, |
|
"eval_loss": 0.5526403784751892, |
|
"eval_runtime": 164.1948, |
|
"eval_samples_per_second": 3.137, |
|
"eval_steps_per_second": 0.786, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016359918200408998, |
|
"grad_norm": 0.38289597630500793, |
|
"learning_rate": 2.026e-05, |
|
"loss": 0.6338, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00245398773006135, |
|
"grad_norm": 0.678950309753418, |
|
"learning_rate": 3.039e-05, |
|
"loss": 1.371, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0032719836400817996, |
|
"grad_norm": 0.5606029629707336, |
|
"learning_rate": 4.052e-05, |
|
"loss": 1.0274, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00408997955010225, |
|
"grad_norm": 0.5244286060333252, |
|
"learning_rate": 5.065e-05, |
|
"loss": 0.7953, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0049079754601227, |
|
"grad_norm": 0.49360060691833496, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.8689, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0057259713701431495, |
|
"grad_norm": 1.8924438953399658, |
|
"learning_rate": 7.091e-05, |
|
"loss": 1.2078, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006543967280163599, |
|
"grad_norm": 0.60125732421875, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.951, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007361963190184049, |
|
"grad_norm": 0.5913432836532593, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.9561, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0081799591002045, |
|
"grad_norm": 0.6411980390548706, |
|
"learning_rate": 0.0001013, |
|
"loss": 0.9956, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00899795501022495, |
|
"grad_norm": 0.6919670701026917, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 0.8319, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0098159509202454, |
|
"grad_norm": 0.6590487360954285, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 0.7345, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01063394683026585, |
|
"grad_norm": 0.82487952709198, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 0.8608, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011451942740286299, |
|
"grad_norm": 0.7508109211921692, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 0.9751, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012269938650306749, |
|
"grad_norm": 0.7014967203140259, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 0.9172, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013087934560327199, |
|
"grad_norm": 0.6674103140830994, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 0.5077, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013905930470347648, |
|
"grad_norm": 0.8871914744377136, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 1.1221, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014723926380368098, |
|
"grad_norm": 0.8045795559883118, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 0.9631, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015541922290388548, |
|
"grad_norm": 0.8533051013946533, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 0.9807, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016359918200409, |
|
"grad_norm": 0.5677836537361145, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 0.4352, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01717791411042945, |
|
"grad_norm": 1.4102709293365479, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 1.2272, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0179959100204499, |
|
"grad_norm": 1.0736172199249268, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 1.1928, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01881390593047035, |
|
"grad_norm": 1.2938551902770996, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 0.9907, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0196319018404908, |
|
"grad_norm": 0.9903382658958435, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 0.8166, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02044989775051125, |
|
"grad_norm": 0.8400692939758301, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 0.9552, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0212678936605317, |
|
"grad_norm": 0.6418869495391846, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 0.4748, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.022085889570552148, |
|
"grad_norm": 0.7171022295951843, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 0.6061, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022903885480572598, |
|
"grad_norm": 0.9005898237228394, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 0.7294, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023721881390593048, |
|
"grad_norm": 0.853922426700592, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.8019, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.024539877300613498, |
|
"grad_norm": 0.6506239175796509, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 0.5485, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025357873210633947, |
|
"grad_norm": 0.8468363285064697, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 0.8216, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.026175869120654397, |
|
"grad_norm": 1.2255985736846924, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 0.499, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026993865030674847, |
|
"grad_norm": 0.7245408296585083, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 0.4545, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.027811860940695297, |
|
"grad_norm": 0.5149531960487366, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 0.3527, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.028629856850715747, |
|
"grad_norm": 0.7418886423110962, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 0.6099, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.029447852760736196, |
|
"grad_norm": 0.3611685037612915, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 0.152, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.030265848670756646, |
|
"grad_norm": 0.2011786550283432, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 0.0166, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.031083844580777096, |
|
"grad_norm": 0.09635209292173386, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 0.0076, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03190184049079755, |
|
"grad_norm": 0.08170394599437714, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 0.0055, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.032719836400818, |
|
"grad_norm": 0.10278013348579407, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 0.0038, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03353783231083845, |
|
"grad_norm": 0.22206488251686096, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 0.0034, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0343558282208589, |
|
"grad_norm": 0.06311889737844467, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 0.0029, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03517382413087935, |
|
"grad_norm": 0.12246158719062805, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 0.0043, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0359918200408998, |
|
"grad_norm": 0.08296216279268265, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 0.0021, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03680981595092025, |
|
"grad_norm": 0.053368937224149704, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 0.001, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0376278118609407, |
|
"grad_norm": 0.0612197071313858, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 0.001, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03844580777096115, |
|
"grad_norm": 0.03648490086197853, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 0.001, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0392638036809816, |
|
"grad_norm": 0.020957158878445625, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.0004, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04008179959100205, |
|
"grad_norm": 0.31076347827911377, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 0.0053, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0408997955010225, |
|
"grad_norm": 0.009556309320032597, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 0.0002, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0408997955010225, |
|
"eval_loss": 0.31697726249694824, |
|
"eval_runtime": 164.5433, |
|
"eval_samples_per_second": 3.13, |
|
"eval_steps_per_second": 0.784, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04171779141104295, |
|
"grad_norm": 1.0068371295928955, |
|
"learning_rate": 7.944052631578947e-05, |
|
"loss": 0.9885, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0425357873210634, |
|
"grad_norm": 0.724956214427948, |
|
"learning_rate": 7.890736842105263e-05, |
|
"loss": 0.8083, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.043353783231083846, |
|
"grad_norm": 0.5116149187088013, |
|
"learning_rate": 7.837421052631579e-05, |
|
"loss": 0.7497, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.044171779141104296, |
|
"grad_norm": 0.5890445709228516, |
|
"learning_rate": 7.784105263157893e-05, |
|
"loss": 1.0557, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.044989775051124746, |
|
"grad_norm": 0.6366136074066162, |
|
"learning_rate": 7.730789473684211e-05, |
|
"loss": 1.0897, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.045807770961145196, |
|
"grad_norm": 0.503930926322937, |
|
"learning_rate": 7.677473684210526e-05, |
|
"loss": 0.909, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.046625766871165646, |
|
"grad_norm": 0.3874259293079376, |
|
"learning_rate": 7.624157894736842e-05, |
|
"loss": 0.6214, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.047443762781186095, |
|
"grad_norm": 0.46610915660858154, |
|
"learning_rate": 7.570842105263158e-05, |
|
"loss": 0.5465, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.048261758691206545, |
|
"grad_norm": 0.38099950551986694, |
|
"learning_rate": 7.517526315789474e-05, |
|
"loss": 0.3863, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.049079754601226995, |
|
"grad_norm": 0.7457447648048401, |
|
"learning_rate": 7.464210526315789e-05, |
|
"loss": 0.4988, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.049897750511247445, |
|
"grad_norm": 0.48318201303482056, |
|
"learning_rate": 7.410894736842106e-05, |
|
"loss": 0.6316, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.050715746421267895, |
|
"grad_norm": 0.41538336873054504, |
|
"learning_rate": 7.35757894736842e-05, |
|
"loss": 0.3852, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.051533742331288344, |
|
"grad_norm": 0.40224334597587585, |
|
"learning_rate": 7.304263157894737e-05, |
|
"loss": 0.4715, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.052351738241308794, |
|
"grad_norm": 0.9810552597045898, |
|
"learning_rate": 7.250947368421053e-05, |
|
"loss": 0.6195, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.053169734151329244, |
|
"grad_norm": 0.930385172367096, |
|
"learning_rate": 7.197631578947368e-05, |
|
"loss": 0.6228, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.053987730061349694, |
|
"grad_norm": 0.5943039655685425, |
|
"learning_rate": 7.144315789473684e-05, |
|
"loss": 0.7253, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.054805725971370144, |
|
"grad_norm": 0.9984376430511475, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.9143, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05562372188139059, |
|
"grad_norm": 0.7382859587669373, |
|
"learning_rate": 7.037684210526316e-05, |
|
"loss": 0.9344, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05644171779141104, |
|
"grad_norm": 0.5910463333129883, |
|
"learning_rate": 6.984368421052632e-05, |
|
"loss": 0.5557, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05725971370143149, |
|
"grad_norm": 0.5349307060241699, |
|
"learning_rate": 6.931052631578947e-05, |
|
"loss": 0.6016, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05807770961145194, |
|
"grad_norm": 0.5327895283699036, |
|
"learning_rate": 6.877736842105263e-05, |
|
"loss": 0.5221, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05889570552147239, |
|
"grad_norm": 0.6906758546829224, |
|
"learning_rate": 6.824421052631579e-05, |
|
"loss": 0.8609, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05971370143149284, |
|
"grad_norm": 2.961278200149536, |
|
"learning_rate": 6.771105263157895e-05, |
|
"loss": 0.707, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06053169734151329, |
|
"grad_norm": 0.5922215580940247, |
|
"learning_rate": 6.71778947368421e-05, |
|
"loss": 0.6751, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06134969325153374, |
|
"grad_norm": 0.7490167617797852, |
|
"learning_rate": 6.664473684210527e-05, |
|
"loss": 0.7576, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06216768916155419, |
|
"grad_norm": 0.7223999500274658, |
|
"learning_rate": 6.611157894736842e-05, |
|
"loss": 0.6025, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06298568507157465, |
|
"grad_norm": 0.6571590304374695, |
|
"learning_rate": 6.557842105263158e-05, |
|
"loss": 0.7033, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0638036809815951, |
|
"grad_norm": 0.5444533824920654, |
|
"learning_rate": 6.504526315789474e-05, |
|
"loss": 0.5138, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06462167689161555, |
|
"grad_norm": 0.5655301213264465, |
|
"learning_rate": 6.451210526315789e-05, |
|
"loss": 0.4987, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.065439672801636, |
|
"grad_norm": 0.632288932800293, |
|
"learning_rate": 6.397894736842105e-05, |
|
"loss": 0.4189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06625766871165645, |
|
"grad_norm": 0.7677326202392578, |
|
"learning_rate": 6.344578947368421e-05, |
|
"loss": 0.3, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0670756646216769, |
|
"grad_norm": 0.4775019884109497, |
|
"learning_rate": 6.291263157894737e-05, |
|
"loss": 0.3159, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06789366053169735, |
|
"grad_norm": 0.41117969155311584, |
|
"learning_rate": 6.237947368421053e-05, |
|
"loss": 0.317, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0687116564417178, |
|
"grad_norm": 0.1944044977426529, |
|
"learning_rate": 6.184631578947368e-05, |
|
"loss": 0.074, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06952965235173825, |
|
"grad_norm": 0.267262727022171, |
|
"learning_rate": 6.131315789473684e-05, |
|
"loss": 0.1004, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0703476482617587, |
|
"grad_norm": 0.4343488812446594, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.1652, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07116564417177915, |
|
"grad_norm": 0.016880128532648087, |
|
"learning_rate": 6.024684210526315e-05, |
|
"loss": 0.0009, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0719836400817996, |
|
"grad_norm": 0.22920526564121246, |
|
"learning_rate": 5.9713684210526305e-05, |
|
"loss": 0.0066, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07280163599182005, |
|
"grad_norm": 0.1432953178882599, |
|
"learning_rate": 5.918052631578947e-05, |
|
"loss": 0.002, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0736196319018405, |
|
"grad_norm": 0.015287825837731361, |
|
"learning_rate": 5.8647368421052634e-05, |
|
"loss": 0.0007, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07443762781186095, |
|
"grad_norm": 0.15962694585323334, |
|
"learning_rate": 5.811421052631579e-05, |
|
"loss": 0.0084, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0752556237218814, |
|
"grad_norm": 0.0835452675819397, |
|
"learning_rate": 5.758105263157894e-05, |
|
"loss": 0.0026, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07607361963190185, |
|
"grad_norm": 0.3013435900211334, |
|
"learning_rate": 5.70478947368421e-05, |
|
"loss": 0.0046, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0768916155419223, |
|
"grad_norm": 0.007186358794569969, |
|
"learning_rate": 5.6514736842105256e-05, |
|
"loss": 0.0004, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07770961145194274, |
|
"grad_norm": 0.0493503175675869, |
|
"learning_rate": 5.5981578947368424e-05, |
|
"loss": 0.0019, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0785276073619632, |
|
"grad_norm": 0.09706468880176544, |
|
"learning_rate": 5.544842105263158e-05, |
|
"loss": 0.0017, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07934560327198364, |
|
"grad_norm": 0.0098764318972826, |
|
"learning_rate": 5.491526315789474e-05, |
|
"loss": 0.0004, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0801635991820041, |
|
"grad_norm": 0.16427290439605713, |
|
"learning_rate": 5.438210526315789e-05, |
|
"loss": 0.0069, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08098159509202454, |
|
"grad_norm": 0.02355344407260418, |
|
"learning_rate": 5.384894736842105e-05, |
|
"loss": 0.0006, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.081799591002045, |
|
"grad_norm": 0.022215725854039192, |
|
"learning_rate": 5.331578947368421e-05, |
|
"loss": 0.0007, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.081799591002045, |
|
"eval_loss": 0.2767239809036255, |
|
"eval_runtime": 164.6277, |
|
"eval_samples_per_second": 3.128, |
|
"eval_steps_per_second": 0.784, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08261758691206544, |
|
"grad_norm": 2.208616256713867, |
|
"learning_rate": 5.278263157894736e-05, |
|
"loss": 0.8554, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0834355828220859, |
|
"grad_norm": 0.6053175330162048, |
|
"learning_rate": 5.224947368421053e-05, |
|
"loss": 0.8102, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08425357873210634, |
|
"grad_norm": 0.443279892206192, |
|
"learning_rate": 5.171631578947368e-05, |
|
"loss": 0.6613, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0850715746421268, |
|
"grad_norm": 0.5657853484153748, |
|
"learning_rate": 5.1183157894736844e-05, |
|
"loss": 0.904, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08588957055214724, |
|
"grad_norm": 0.5122092366218567, |
|
"learning_rate": 5.065e-05, |
|
"loss": 0.7585, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08670756646216769, |
|
"grad_norm": 0.4323250353336334, |
|
"learning_rate": 5.011684210526315e-05, |
|
"loss": 0.6443, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08752556237218814, |
|
"grad_norm": 0.4903777539730072, |
|
"learning_rate": 4.958368421052631e-05, |
|
"loss": 0.8381, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08834355828220859, |
|
"grad_norm": 0.49713271856307983, |
|
"learning_rate": 4.9050526315789473e-05, |
|
"loss": 0.6974, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08916155419222904, |
|
"grad_norm": 0.5440729260444641, |
|
"learning_rate": 4.851736842105263e-05, |
|
"loss": 0.7326, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08997955010224949, |
|
"grad_norm": 0.5971400737762451, |
|
"learning_rate": 4.798421052631579e-05, |
|
"loss": 0.8564, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09079754601226994, |
|
"grad_norm": 0.4081849157810211, |
|
"learning_rate": 4.745105263157895e-05, |
|
"loss": 0.5119, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09161554192229039, |
|
"grad_norm": 0.42180880904197693, |
|
"learning_rate": 4.69178947368421e-05, |
|
"loss": 0.479, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09243353783231084, |
|
"grad_norm": 0.48688748478889465, |
|
"learning_rate": 4.638473684210526e-05, |
|
"loss": 0.5893, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09325153374233129, |
|
"grad_norm": 0.3098672926425934, |
|
"learning_rate": 4.585157894736842e-05, |
|
"loss": 0.3024, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09406952965235174, |
|
"grad_norm": 0.3973608911037445, |
|
"learning_rate": 4.531842105263158e-05, |
|
"loss": 0.4749, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09488752556237219, |
|
"grad_norm": 0.45003950595855713, |
|
"learning_rate": 4.478526315789473e-05, |
|
"loss": 0.5641, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09570552147239264, |
|
"grad_norm": 0.4103372395038605, |
|
"learning_rate": 4.425210526315789e-05, |
|
"loss": 0.3926, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09652351738241309, |
|
"grad_norm": 0.5700619220733643, |
|
"learning_rate": 4.3718947368421054e-05, |
|
"loss": 0.7772, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09734151329243354, |
|
"grad_norm": 0.5302966833114624, |
|
"learning_rate": 4.318578947368421e-05, |
|
"loss": 0.5547, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09815950920245399, |
|
"grad_norm": 0.5432780981063843, |
|
"learning_rate": 4.265263157894736e-05, |
|
"loss": 0.7294, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09897750511247444, |
|
"grad_norm": 0.5698531866073608, |
|
"learning_rate": 4.211947368421052e-05, |
|
"loss": 0.6319, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09979550102249489, |
|
"grad_norm": 0.4369616210460663, |
|
"learning_rate": 4.1586315789473684e-05, |
|
"loss": 0.4439, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10061349693251534, |
|
"grad_norm": 0.5331543684005737, |
|
"learning_rate": 4.105315789473684e-05, |
|
"loss": 0.5258, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10143149284253579, |
|
"grad_norm": 0.5208017230033875, |
|
"learning_rate": 4.052e-05, |
|
"loss": 0.5332, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10224948875255624, |
|
"grad_norm": 0.6325660943984985, |
|
"learning_rate": 3.998684210526316e-05, |
|
"loss": 0.7877, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10306748466257669, |
|
"grad_norm": 0.8065600991249084, |
|
"learning_rate": 3.945368421052631e-05, |
|
"loss": 0.8469, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10388548057259714, |
|
"grad_norm": 0.5705050230026245, |
|
"learning_rate": 3.892052631578947e-05, |
|
"loss": 0.5351, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10470347648261759, |
|
"grad_norm": 0.60113525390625, |
|
"learning_rate": 3.838736842105263e-05, |
|
"loss": 0.5405, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10552147239263804, |
|
"grad_norm": 0.5440464019775391, |
|
"learning_rate": 3.785421052631579e-05, |
|
"loss": 0.5673, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10633946830265849, |
|
"grad_norm": 0.6333761215209961, |
|
"learning_rate": 3.732105263157894e-05, |
|
"loss": 0.7063, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10715746421267894, |
|
"grad_norm": 0.3166744112968445, |
|
"learning_rate": 3.67878947368421e-05, |
|
"loss": 0.2672, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10797546012269939, |
|
"grad_norm": 0.47239211201667786, |
|
"learning_rate": 3.6254736842105264e-05, |
|
"loss": 0.2944, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10879345603271984, |
|
"grad_norm": 0.6278908252716064, |
|
"learning_rate": 3.572157894736842e-05, |
|
"loss": 0.7175, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10961145194274029, |
|
"grad_norm": 6.725371837615967, |
|
"learning_rate": 3.518842105263158e-05, |
|
"loss": 0.5899, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11042944785276074, |
|
"grad_norm": 0.3569982945919037, |
|
"learning_rate": 3.465526315789473e-05, |
|
"loss": 0.2153, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11124744376278119, |
|
"grad_norm": 0.8054752945899963, |
|
"learning_rate": 3.4122105263157894e-05, |
|
"loss": 0.6044, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.11206543967280164, |
|
"grad_norm": 0.37957316637039185, |
|
"learning_rate": 3.358894736842105e-05, |
|
"loss": 0.2575, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11288343558282209, |
|
"grad_norm": 0.33962732553482056, |
|
"learning_rate": 3.305578947368421e-05, |
|
"loss": 0.1472, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11370143149284254, |
|
"grad_norm": 0.1770872324705124, |
|
"learning_rate": 3.252263157894737e-05, |
|
"loss": 0.008, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11451942740286299, |
|
"grad_norm": 0.09861530363559723, |
|
"learning_rate": 3.198947368421052e-05, |
|
"loss": 0.0033, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11533742331288344, |
|
"grad_norm": 0.1032843068242073, |
|
"learning_rate": 3.1456315789473684e-05, |
|
"loss": 0.0027, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11615541922290389, |
|
"grad_norm": 0.11989390105009079, |
|
"learning_rate": 3.092315789473684e-05, |
|
"loss": 0.0018, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11697341513292434, |
|
"grad_norm": 0.01104150153696537, |
|
"learning_rate": 3.039e-05, |
|
"loss": 0.0004, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11779141104294479, |
|
"grad_norm": 0.01930282451212406, |
|
"learning_rate": 2.9856842105263153e-05, |
|
"loss": 0.0005, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11860940695296524, |
|
"grad_norm": 0.15893378853797913, |
|
"learning_rate": 2.9323684210526317e-05, |
|
"loss": 0.0031, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11942740286298568, |
|
"grad_norm": 0.03184283524751663, |
|
"learning_rate": 2.879052631578947e-05, |
|
"loss": 0.0009, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12024539877300613, |
|
"grad_norm": 0.06102335825562477, |
|
"learning_rate": 2.8257368421052628e-05, |
|
"loss": 0.0013, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12106339468302658, |
|
"grad_norm": 0.0802997276186943, |
|
"learning_rate": 2.772421052631579e-05, |
|
"loss": 0.0015, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12188139059304703, |
|
"grad_norm": 0.02622400037944317, |
|
"learning_rate": 2.7191052631578946e-05, |
|
"loss": 0.0005, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 0.019685884937644005, |
|
"learning_rate": 2.6657894736842104e-05, |
|
"loss": 0.0006, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"eval_loss": 0.22924812138080597, |
|
"eval_runtime": 164.6274, |
|
"eval_samples_per_second": 3.128, |
|
"eval_steps_per_second": 0.784, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12351738241308793, |
|
"grad_norm": 0.42591241002082825, |
|
"learning_rate": 2.6124736842105265e-05, |
|
"loss": 0.847, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12433537832310838, |
|
"grad_norm": 0.37749016284942627, |
|
"learning_rate": 2.5591578947368422e-05, |
|
"loss": 0.643, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.12515337423312883, |
|
"grad_norm": 7.373504638671875, |
|
"learning_rate": 2.5058421052631576e-05, |
|
"loss": 1.1166, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1259713701431493, |
|
"grad_norm": 0.4218708276748657, |
|
"learning_rate": 2.4525263157894737e-05, |
|
"loss": 0.5926, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.12678936605316973, |
|
"grad_norm": 0.415558397769928, |
|
"learning_rate": 2.3992105263157894e-05, |
|
"loss": 0.539, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1276073619631902, |
|
"grad_norm": 0.367865651845932, |
|
"learning_rate": 2.345894736842105e-05, |
|
"loss": 0.4419, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12842535787321063, |
|
"grad_norm": 0.47910165786743164, |
|
"learning_rate": 2.292578947368421e-05, |
|
"loss": 0.72, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1292433537832311, |
|
"grad_norm": 0.547726035118103, |
|
"learning_rate": 2.2392631578947366e-05, |
|
"loss": 0.7997, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13006134969325153, |
|
"grad_norm": 0.49771469831466675, |
|
"learning_rate": 2.1859473684210527e-05, |
|
"loss": 0.7018, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.130879345603272, |
|
"grad_norm": 0.6116258502006531, |
|
"learning_rate": 2.132631578947368e-05, |
|
"loss": 0.907, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13169734151329243, |
|
"grad_norm": 0.5866420865058899, |
|
"learning_rate": 2.0793157894736842e-05, |
|
"loss": 0.6216, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1325153374233129, |
|
"grad_norm": 0.6702650785446167, |
|
"learning_rate": 2.026e-05, |
|
"loss": 0.6452, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.5496453046798706, |
|
"learning_rate": 1.9726842105263157e-05, |
|
"loss": 0.6332, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1341513292433538, |
|
"grad_norm": 0.37603631615638733, |
|
"learning_rate": 1.9193684210526314e-05, |
|
"loss": 0.3876, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13496932515337423, |
|
"grad_norm": 0.4126095473766327, |
|
"learning_rate": 1.866052631578947e-05, |
|
"loss": 0.4168, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1357873210633947, |
|
"grad_norm": 0.6646726131439209, |
|
"learning_rate": 1.8127368421052632e-05, |
|
"loss": 0.8142, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13660531697341513, |
|
"grad_norm": 0.3889711797237396, |
|
"learning_rate": 1.759421052631579e-05, |
|
"loss": 0.4465, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1374233128834356, |
|
"grad_norm": 0.44813233613967896, |
|
"learning_rate": 1.7061052631578947e-05, |
|
"loss": 0.4387, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13824130879345603, |
|
"grad_norm": 0.4522460699081421, |
|
"learning_rate": 1.6527894736842104e-05, |
|
"loss": 0.4947, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1390593047034765, |
|
"grad_norm": 0.5248112082481384, |
|
"learning_rate": 1.599473684210526e-05, |
|
"loss": 0.5836, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13987730061349693, |
|
"grad_norm": 0.6201620697975159, |
|
"learning_rate": 1.546157894736842e-05, |
|
"loss": 0.8913, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1406952965235174, |
|
"grad_norm": 0.565233051776886, |
|
"learning_rate": 1.4928421052631576e-05, |
|
"loss": 0.5188, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.14151329243353783, |
|
"grad_norm": 0.4735608994960785, |
|
"learning_rate": 1.4395263157894735e-05, |
|
"loss": 0.3935, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1423312883435583, |
|
"grad_norm": 0.6751230955123901, |
|
"learning_rate": 1.3862105263157895e-05, |
|
"loss": 0.7061, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.14314928425357873, |
|
"grad_norm": 0.5896326303482056, |
|
"learning_rate": 1.3328947368421052e-05, |
|
"loss": 0.6753, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1439672801635992, |
|
"grad_norm": 0.46921586990356445, |
|
"learning_rate": 1.2795789473684211e-05, |
|
"loss": 0.4023, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14478527607361963, |
|
"grad_norm": 0.8884191513061523, |
|
"learning_rate": 1.2262631578947368e-05, |
|
"loss": 0.5684, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1456032719836401, |
|
"grad_norm": 0.34956350922584534, |
|
"learning_rate": 1.1729473684210526e-05, |
|
"loss": 0.1983, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14642126789366053, |
|
"grad_norm": 0.6150217056274414, |
|
"learning_rate": 1.1196315789473683e-05, |
|
"loss": 0.6407, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.147239263803681, |
|
"grad_norm": 0.41867491602897644, |
|
"learning_rate": 1.066315789473684e-05, |
|
"loss": 0.3322, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14805725971370143, |
|
"grad_norm": 0.6983818411827087, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.7427, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1488752556237219, |
|
"grad_norm": 0.7040908336639404, |
|
"learning_rate": 9.596842105263157e-06, |
|
"loss": 0.6962, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14969325153374233, |
|
"grad_norm": 0.6211279630661011, |
|
"learning_rate": 9.063684210526316e-06, |
|
"loss": 0.5084, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1505112474437628, |
|
"grad_norm": 0.4377440810203552, |
|
"learning_rate": 8.530526315789473e-06, |
|
"loss": 0.226, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.15132924335378323, |
|
"grad_norm": 0.1414121389389038, |
|
"learning_rate": 7.99736842105263e-06, |
|
"loss": 0.052, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1521472392638037, |
|
"grad_norm": 0.06823990494012833, |
|
"learning_rate": 7.464210526315788e-06, |
|
"loss": 0.0031, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15296523517382413, |
|
"grad_norm": 0.013092203065752983, |
|
"learning_rate": 6.931052631578947e-06, |
|
"loss": 0.0006, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1537832310838446, |
|
"grad_norm": 0.19664902985095978, |
|
"learning_rate": 6.3978947368421055e-06, |
|
"loss": 0.0032, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15460122699386503, |
|
"grad_norm": 0.016594722867012024, |
|
"learning_rate": 5.864736842105263e-06, |
|
"loss": 0.0006, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1554192229038855, |
|
"grad_norm": 0.009124848060309887, |
|
"learning_rate": 5.33157894736842e-06, |
|
"loss": 0.0005, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15623721881390593, |
|
"grad_norm": 0.10051782429218292, |
|
"learning_rate": 4.7984210526315785e-06, |
|
"loss": 0.0042, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1570552147239264, |
|
"grad_norm": 0.0568082332611084, |
|
"learning_rate": 4.265263157894737e-06, |
|
"loss": 0.0029, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15787321063394683, |
|
"grad_norm": 0.09655116498470306, |
|
"learning_rate": 3.732105263157894e-06, |
|
"loss": 0.0036, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1586912065439673, |
|
"grad_norm": 0.17335928976535797, |
|
"learning_rate": 3.1989473684210527e-06, |
|
"loss": 0.007, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15950920245398773, |
|
"grad_norm": 0.008331399410963058, |
|
"learning_rate": 2.66578947368421e-06, |
|
"loss": 0.0005, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1603271983640082, |
|
"grad_norm": 0.1623714417219162, |
|
"learning_rate": 2.1326315789473684e-06, |
|
"loss": 0.0043, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.16114519427402862, |
|
"grad_norm": 0.05319036543369293, |
|
"learning_rate": 1.5994736842105264e-06, |
|
"loss": 0.0018, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1619631901840491, |
|
"grad_norm": 0.006065657362341881, |
|
"learning_rate": 1.0663157894736842e-06, |
|
"loss": 0.0004, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.16278118609406952, |
|
"grad_norm": 0.010582847520709038, |
|
"learning_rate": 5.331578947368421e-07, |
|
"loss": 0.0005, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.16359918200409, |
|
"grad_norm": 0.026805628091096878, |
|
"learning_rate": 0.0, |
|
"loss": 0.001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16359918200409, |
|
"eval_loss": 0.21579746901988983, |
|
"eval_runtime": 165.9203, |
|
"eval_samples_per_second": 3.104, |
|
"eval_steps_per_second": 0.777, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.484049111154688e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|