|
{ |
|
"best_metric": 1.5823931694030762, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.11293054771315642, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000564652738565782, |
|
"grad_norm": 0.5402987599372864, |
|
"learning_rate": 1.013e-05, |
|
"loss": 1.4633, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000564652738565782, |
|
"eval_loss": 2.135667562484741, |
|
"eval_runtime": 33.7351, |
|
"eval_samples_per_second": 22.113, |
|
"eval_steps_per_second": 5.543, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001129305477131564, |
|
"grad_norm": 0.36689239740371704, |
|
"learning_rate": 2.026e-05, |
|
"loss": 1.8992, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0016939582156973462, |
|
"grad_norm": 0.3763646185398102, |
|
"learning_rate": 3.039e-05, |
|
"loss": 0.8535, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002258610954263128, |
|
"grad_norm": 0.34807297587394714, |
|
"learning_rate": 4.052e-05, |
|
"loss": 1.7749, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00282326369282891, |
|
"grad_norm": 0.5206761360168457, |
|
"learning_rate": 5.065e-05, |
|
"loss": 1.8843, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0033879164313946925, |
|
"grad_norm": 0.3446935713291168, |
|
"learning_rate": 6.078e-05, |
|
"loss": 2.4299, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003952569169960474, |
|
"grad_norm": 0.353129506111145, |
|
"learning_rate": 7.091e-05, |
|
"loss": 1.9346, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004517221908526256, |
|
"grad_norm": 0.2856239378452301, |
|
"learning_rate": 8.104e-05, |
|
"loss": 1.7326, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005081874647092038, |
|
"grad_norm": 0.4531644880771637, |
|
"learning_rate": 9.117e-05, |
|
"loss": 1.7827, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00564652738565782, |
|
"grad_norm": 0.47059357166290283, |
|
"learning_rate": 0.0001013, |
|
"loss": 2.4921, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006211180124223602, |
|
"grad_norm": 0.4820435345172882, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 1.8598, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006775832862789385, |
|
"grad_norm": 0.37354958057403564, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 1.7825, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007340485601355167, |
|
"grad_norm": 0.3483826220035553, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 2.0361, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007905138339920948, |
|
"grad_norm": 0.45971325039863586, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 1.9319, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00846979107848673, |
|
"grad_norm": 0.9301943182945251, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 1.6979, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009034443817052512, |
|
"grad_norm": 0.32556048035621643, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 2.2257, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.009599096555618294, |
|
"grad_norm": 0.46048063039779663, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 1.7091, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010163749294184076, |
|
"grad_norm": 0.3488789498806, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 2.019, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010728402032749858, |
|
"grad_norm": 0.5194416642189026, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 1.9075, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01129305477131564, |
|
"grad_norm": 0.46956583857536316, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 1.8816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011857707509881422, |
|
"grad_norm": 0.4933469891548157, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 1.729, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012422360248447204, |
|
"grad_norm": 0.3997938930988312, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 1.6959, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.012987012987012988, |
|
"grad_norm": 0.5408119559288025, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 2.0254, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01355166572557877, |
|
"grad_norm": 0.41266539692878723, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 1.9554, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.014116318464144552, |
|
"grad_norm": 0.5079236030578613, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 2.0457, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014680971202710334, |
|
"grad_norm": 0.5814744830131531, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 2.0399, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.015245623941276116, |
|
"grad_norm": 0.6485530138015747, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 1.4858, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.015810276679841896, |
|
"grad_norm": 0.5585585832595825, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 1.9193, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01637492941840768, |
|
"grad_norm": 0.6046280860900879, |
|
"learning_rate": 9.117e-05, |
|
"loss": 2.3537, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01693958215697346, |
|
"grad_norm": 0.5263966917991638, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 1.5335, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017504234895539244, |
|
"grad_norm": 0.8364748358726501, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 1.7004, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018068887634105024, |
|
"grad_norm": 0.5951048135757446, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 2.4489, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.018633540372670808, |
|
"grad_norm": 0.5281526446342468, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 1.1862, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.019198193111236588, |
|
"grad_norm": 0.8356189727783203, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 1.5012, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.019762845849802372, |
|
"grad_norm": 0.6029136180877686, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 1.4578, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.020327498588368152, |
|
"grad_norm": 0.6786731481552124, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 1.547, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.020892151326933936, |
|
"grad_norm": 0.877357542514801, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 1.7824, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021456804065499716, |
|
"grad_norm": 1.265729546546936, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 2.1181, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0220214568040655, |
|
"grad_norm": 1.4670414924621582, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 1.3309, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02258610954263128, |
|
"grad_norm": 1.3509232997894287, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 2.5148, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.023150762281197064, |
|
"grad_norm": 1.4195291996002197, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 2.0859, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.023715415019762844, |
|
"grad_norm": 1.6967036724090576, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 1.6782, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.024280067758328628, |
|
"grad_norm": 1.9785702228546143, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 1.8815, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.024844720496894408, |
|
"grad_norm": 2.209860324859619, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 1.5545, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.025409373235460192, |
|
"grad_norm": 2.2609925270080566, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 1.3009, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.025974025974025976, |
|
"grad_norm": 3.240757703781128, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 2.1366, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.026538678712591756, |
|
"grad_norm": 3.1858623027801514, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 1.119, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02710333145115754, |
|
"grad_norm": 3.827558994293213, |
|
"learning_rate": 8.104e-05, |
|
"loss": 1.5671, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02766798418972332, |
|
"grad_norm": 4.33996057510376, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 1.3891, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.028232636928289104, |
|
"grad_norm": 5.596718788146973, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 1.572, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028232636928289104, |
|
"eval_loss": 1.7320042848587036, |
|
"eval_runtime": 33.7715, |
|
"eval_samples_per_second": 22.09, |
|
"eval_steps_per_second": 5.537, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028797289666854884, |
|
"grad_norm": 0.710443913936615, |
|
"learning_rate": 7.944052631578947e-05, |
|
"loss": 2.2345, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.029361942405420668, |
|
"grad_norm": 0.6612917184829712, |
|
"learning_rate": 7.890736842105263e-05, |
|
"loss": 2.0899, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.029926595143986448, |
|
"grad_norm": 0.7144291996955872, |
|
"learning_rate": 7.837421052631579e-05, |
|
"loss": 0.9424, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.030491247882552232, |
|
"grad_norm": 0.6780564188957214, |
|
"learning_rate": 7.784105263157893e-05, |
|
"loss": 2.0563, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.031055900621118012, |
|
"grad_norm": 0.7346010208129883, |
|
"learning_rate": 7.730789473684211e-05, |
|
"loss": 1.8291, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03162055335968379, |
|
"grad_norm": 0.4565852880477905, |
|
"learning_rate": 7.677473684210526e-05, |
|
"loss": 1.2565, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.032185206098249576, |
|
"grad_norm": 0.5821838974952698, |
|
"learning_rate": 7.624157894736842e-05, |
|
"loss": 1.6761, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03274985883681536, |
|
"grad_norm": 1.3336795568466187, |
|
"learning_rate": 7.570842105263158e-05, |
|
"loss": 1.9984, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.033314511575381144, |
|
"grad_norm": 0.7882184982299805, |
|
"learning_rate": 7.517526315789474e-05, |
|
"loss": 0.7926, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03387916431394692, |
|
"grad_norm": 0.4720506966114044, |
|
"learning_rate": 7.464210526315789e-05, |
|
"loss": 1.5786, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.034443817052512704, |
|
"grad_norm": 0.4207099974155426, |
|
"learning_rate": 7.410894736842106e-05, |
|
"loss": 1.1193, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03500846979107849, |
|
"grad_norm": 0.5080116391181946, |
|
"learning_rate": 7.35757894736842e-05, |
|
"loss": 1.6818, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03557312252964427, |
|
"grad_norm": 0.5660998225212097, |
|
"learning_rate": 7.304263157894737e-05, |
|
"loss": 1.8537, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03613777526821005, |
|
"grad_norm": 0.4343119263648987, |
|
"learning_rate": 7.250947368421053e-05, |
|
"loss": 1.3433, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03670242800677583, |
|
"grad_norm": 0.561114490032196, |
|
"learning_rate": 7.197631578947368e-05, |
|
"loss": 1.6089, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.037267080745341616, |
|
"grad_norm": 0.41408398747444153, |
|
"learning_rate": 7.144315789473684e-05, |
|
"loss": 2.1082, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0378317334839074, |
|
"grad_norm": 0.40642455220222473, |
|
"learning_rate": 7.091e-05, |
|
"loss": 1.952, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.038396386222473176, |
|
"grad_norm": 0.49146607518196106, |
|
"learning_rate": 7.037684210526316e-05, |
|
"loss": 1.929, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03896103896103896, |
|
"grad_norm": 0.43934860825538635, |
|
"learning_rate": 6.984368421052632e-05, |
|
"loss": 1.8151, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.039525691699604744, |
|
"grad_norm": 0.39021074771881104, |
|
"learning_rate": 6.931052631578947e-05, |
|
"loss": 1.7151, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04009034443817053, |
|
"grad_norm": 0.5213768482208252, |
|
"learning_rate": 6.877736842105263e-05, |
|
"loss": 2.1129, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.040654997176736304, |
|
"grad_norm": 0.42245039343833923, |
|
"learning_rate": 6.824421052631579e-05, |
|
"loss": 2.1972, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04121964991530209, |
|
"grad_norm": 0.4925639033317566, |
|
"learning_rate": 6.771105263157895e-05, |
|
"loss": 2.3312, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.04178430265386787, |
|
"grad_norm": 0.41615986824035645, |
|
"learning_rate": 6.71778947368421e-05, |
|
"loss": 1.9326, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.042348955392433656, |
|
"grad_norm": 0.4477844834327698, |
|
"learning_rate": 6.664473684210527e-05, |
|
"loss": 1.9983, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04291360813099943, |
|
"grad_norm": 0.45667552947998047, |
|
"learning_rate": 6.611157894736842e-05, |
|
"loss": 1.7302, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 0.5657249093055725, |
|
"learning_rate": 6.557842105263158e-05, |
|
"loss": 1.3501, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.044042913608131, |
|
"grad_norm": 0.48479339480400085, |
|
"learning_rate": 6.504526315789474e-05, |
|
"loss": 2.1838, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.044607566346696784, |
|
"grad_norm": 0.5657017230987549, |
|
"learning_rate": 6.451210526315789e-05, |
|
"loss": 1.5316, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04517221908526256, |
|
"grad_norm": 0.6498041152954102, |
|
"learning_rate": 6.397894736842105e-05, |
|
"loss": 1.7707, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.045736871823828344, |
|
"grad_norm": 0.5205366611480713, |
|
"learning_rate": 6.344578947368421e-05, |
|
"loss": 1.5243, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04630152456239413, |
|
"grad_norm": 0.6816235780715942, |
|
"learning_rate": 6.291263157894737e-05, |
|
"loss": 1.9903, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04686617730095991, |
|
"grad_norm": 0.5851727724075317, |
|
"learning_rate": 6.237947368421053e-05, |
|
"loss": 1.086, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04743083003952569, |
|
"grad_norm": 0.5946785807609558, |
|
"learning_rate": 6.184631578947368e-05, |
|
"loss": 1.6933, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04799548277809147, |
|
"grad_norm": 0.9167661666870117, |
|
"learning_rate": 6.131315789473684e-05, |
|
"loss": 1.3954, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.048560135516657256, |
|
"grad_norm": 0.6855114698410034, |
|
"learning_rate": 6.078e-05, |
|
"loss": 1.6794, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04912478825522304, |
|
"grad_norm": 0.9877620935440063, |
|
"learning_rate": 6.024684210526315e-05, |
|
"loss": 1.5267, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.049689440993788817, |
|
"grad_norm": 1.2663705348968506, |
|
"learning_rate": 5.9713684210526305e-05, |
|
"loss": 1.2142, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0502540937323546, |
|
"grad_norm": 0.8730637431144714, |
|
"learning_rate": 5.918052631578947e-05, |
|
"loss": 1.9942, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.050818746470920384, |
|
"grad_norm": 1.022507905960083, |
|
"learning_rate": 5.8647368421052634e-05, |
|
"loss": 1.7698, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05138339920948617, |
|
"grad_norm": 1.0070384740829468, |
|
"learning_rate": 5.811421052631579e-05, |
|
"loss": 2.1813, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05194805194805195, |
|
"grad_norm": 1.0603997707366943, |
|
"learning_rate": 5.758105263157894e-05, |
|
"loss": 1.5043, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05251270468661773, |
|
"grad_norm": 1.5354747772216797, |
|
"learning_rate": 5.70478947368421e-05, |
|
"loss": 1.4871, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05307735742518351, |
|
"grad_norm": 1.4360660314559937, |
|
"learning_rate": 5.6514736842105256e-05, |
|
"loss": 1.1194, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.053642010163749296, |
|
"grad_norm": 2.7259883880615234, |
|
"learning_rate": 5.5981578947368424e-05, |
|
"loss": 1.3859, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05420666290231508, |
|
"grad_norm": 2.0067079067230225, |
|
"learning_rate": 5.544842105263158e-05, |
|
"loss": 1.1774, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.054771315640880856, |
|
"grad_norm": 1.9540057182312012, |
|
"learning_rate": 5.491526315789474e-05, |
|
"loss": 1.838, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05533596837944664, |
|
"grad_norm": 3.2931625843048096, |
|
"learning_rate": 5.438210526315789e-05, |
|
"loss": 1.8081, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.055900621118012424, |
|
"grad_norm": 4.112388610839844, |
|
"learning_rate": 5.384894736842105e-05, |
|
"loss": 1.0519, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05646527385657821, |
|
"grad_norm": 4.771429061889648, |
|
"learning_rate": 5.331578947368421e-05, |
|
"loss": 2.072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05646527385657821, |
|
"eval_loss": 1.61591374874115, |
|
"eval_runtime": 33.6813, |
|
"eval_samples_per_second": 22.149, |
|
"eval_steps_per_second": 5.552, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.057029926595143984, |
|
"grad_norm": 0.26157695055007935, |
|
"learning_rate": 5.278263157894736e-05, |
|
"loss": 1.3786, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05759457933370977, |
|
"grad_norm": 0.31872278451919556, |
|
"learning_rate": 5.224947368421053e-05, |
|
"loss": 1.7082, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05815923207227555, |
|
"grad_norm": 0.40259334444999695, |
|
"learning_rate": 5.171631578947368e-05, |
|
"loss": 1.7991, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.058723884810841336, |
|
"grad_norm": 0.5885865688323975, |
|
"learning_rate": 5.1183157894736844e-05, |
|
"loss": 1.5969, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05928853754940711, |
|
"grad_norm": 0.41334208846092224, |
|
"learning_rate": 5.065e-05, |
|
"loss": 1.8226, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.059853190287972896, |
|
"grad_norm": 0.5378217101097107, |
|
"learning_rate": 5.011684210526315e-05, |
|
"loss": 1.6466, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06041784302653868, |
|
"grad_norm": 0.5085050463676453, |
|
"learning_rate": 4.958368421052631e-05, |
|
"loss": 1.9653, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.060982495765104464, |
|
"grad_norm": 0.49602341651916504, |
|
"learning_rate": 4.9050526315789473e-05, |
|
"loss": 1.409, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06154714850367024, |
|
"grad_norm": 0.5390165448188782, |
|
"learning_rate": 4.851736842105263e-05, |
|
"loss": 1.3066, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.062111801242236024, |
|
"grad_norm": 0.5828697085380554, |
|
"learning_rate": 4.798421052631579e-05, |
|
"loss": 1.8615, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06267645398080181, |
|
"grad_norm": 0.63508141040802, |
|
"learning_rate": 4.745105263157895e-05, |
|
"loss": 2.0458, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06324110671936758, |
|
"grad_norm": 0.5427883863449097, |
|
"learning_rate": 4.69178947368421e-05, |
|
"loss": 1.7312, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06380575945793338, |
|
"grad_norm": 0.5502323508262634, |
|
"learning_rate": 4.638473684210526e-05, |
|
"loss": 2.3985, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06437041219649915, |
|
"grad_norm": 0.4255567491054535, |
|
"learning_rate": 4.585157894736842e-05, |
|
"loss": 2.0969, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06493506493506493, |
|
"grad_norm": 0.6290948390960693, |
|
"learning_rate": 4.531842105263158e-05, |
|
"loss": 1.712, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06549971767363072, |
|
"grad_norm": 0.6249313950538635, |
|
"learning_rate": 4.478526315789473e-05, |
|
"loss": 1.1716, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0660643704121965, |
|
"grad_norm": 0.4874264597892761, |
|
"learning_rate": 4.425210526315789e-05, |
|
"loss": 1.614, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06662902315076229, |
|
"grad_norm": 0.5078814625740051, |
|
"learning_rate": 4.3718947368421054e-05, |
|
"loss": 2.0362, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06719367588932806, |
|
"grad_norm": 0.6948334574699402, |
|
"learning_rate": 4.318578947368421e-05, |
|
"loss": 1.6969, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06775832862789384, |
|
"grad_norm": 0.46054255962371826, |
|
"learning_rate": 4.265263157894736e-05, |
|
"loss": 1.9061, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06832298136645963, |
|
"grad_norm": 0.4674294590950012, |
|
"learning_rate": 4.211947368421052e-05, |
|
"loss": 1.4067, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06888763410502541, |
|
"grad_norm": 0.49073663353919983, |
|
"learning_rate": 4.1586315789473684e-05, |
|
"loss": 2.3371, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06945228684359118, |
|
"grad_norm": 0.49056804180145264, |
|
"learning_rate": 4.105315789473684e-05, |
|
"loss": 2.1801, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07001693958215698, |
|
"grad_norm": 0.608372688293457, |
|
"learning_rate": 4.052e-05, |
|
"loss": 2.0307, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07058159232072275, |
|
"grad_norm": 0.46089833974838257, |
|
"learning_rate": 3.998684210526316e-05, |
|
"loss": 1.9543, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07114624505928854, |
|
"grad_norm": 0.4646291136741638, |
|
"learning_rate": 3.945368421052631e-05, |
|
"loss": 1.6708, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07171089779785432, |
|
"grad_norm": 0.545235276222229, |
|
"learning_rate": 3.892052631578947e-05, |
|
"loss": 2.0372, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0722755505364201, |
|
"grad_norm": 0.7160872220993042, |
|
"learning_rate": 3.838736842105263e-05, |
|
"loss": 2.0102, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07284020327498589, |
|
"grad_norm": 0.5753077864646912, |
|
"learning_rate": 3.785421052631579e-05, |
|
"loss": 1.7705, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07340485601355166, |
|
"grad_norm": 0.6134698390960693, |
|
"learning_rate": 3.732105263157894e-05, |
|
"loss": 1.5995, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07396950875211744, |
|
"grad_norm": 0.5752911567687988, |
|
"learning_rate": 3.67878947368421e-05, |
|
"loss": 1.4896, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07453416149068323, |
|
"grad_norm": 0.7670300602912903, |
|
"learning_rate": 3.6254736842105264e-05, |
|
"loss": 1.784, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07509881422924901, |
|
"grad_norm": 0.5910040140151978, |
|
"learning_rate": 3.572157894736842e-05, |
|
"loss": 1.3716, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0756634669678148, |
|
"grad_norm": 0.5962346196174622, |
|
"learning_rate": 3.518842105263158e-05, |
|
"loss": 1.6385, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07622811970638058, |
|
"grad_norm": 1.3207964897155762, |
|
"learning_rate": 3.465526315789473e-05, |
|
"loss": 1.2742, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07679277244494635, |
|
"grad_norm": 1.1188924312591553, |
|
"learning_rate": 3.4122105263157894e-05, |
|
"loss": 1.9659, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07735742518351214, |
|
"grad_norm": 0.6829027533531189, |
|
"learning_rate": 3.358894736842105e-05, |
|
"loss": 1.1681, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07792207792207792, |
|
"grad_norm": 0.8137729167938232, |
|
"learning_rate": 3.305578947368421e-05, |
|
"loss": 1.4063, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07848673066064371, |
|
"grad_norm": 0.711093008518219, |
|
"learning_rate": 3.252263157894737e-05, |
|
"loss": 1.4551, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.07905138339920949, |
|
"grad_norm": 1.0196043252944946, |
|
"learning_rate": 3.198947368421052e-05, |
|
"loss": 1.8365, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07961603613777526, |
|
"grad_norm": 0.8647313714027405, |
|
"learning_rate": 3.1456315789473684e-05, |
|
"loss": 1.253, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.08018068887634106, |
|
"grad_norm": 0.863608717918396, |
|
"learning_rate": 3.092315789473684e-05, |
|
"loss": 1.2664, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08074534161490683, |
|
"grad_norm": 1.0043619871139526, |
|
"learning_rate": 3.039e-05, |
|
"loss": 1.5368, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08130999435347261, |
|
"grad_norm": 1.3296562433242798, |
|
"learning_rate": 2.9856842105263153e-05, |
|
"loss": 0.8377, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0818746470920384, |
|
"grad_norm": 1.5807889699935913, |
|
"learning_rate": 2.9323684210526317e-05, |
|
"loss": 1.4706, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08243929983060418, |
|
"grad_norm": 1.4404282569885254, |
|
"learning_rate": 2.879052631578947e-05, |
|
"loss": 1.6419, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08300395256916997, |
|
"grad_norm": 1.8414446115493774, |
|
"learning_rate": 2.8257368421052628e-05, |
|
"loss": 1.2406, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08356860530773574, |
|
"grad_norm": 6.275533199310303, |
|
"learning_rate": 2.772421052631579e-05, |
|
"loss": 2.1809, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08413325804630152, |
|
"grad_norm": 3.404439687728882, |
|
"learning_rate": 2.7191052631578946e-05, |
|
"loss": 1.8676, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08469791078486731, |
|
"grad_norm": 4.620980262756348, |
|
"learning_rate": 2.6657894736842104e-05, |
|
"loss": 2.8781, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08469791078486731, |
|
"eval_loss": 1.586492657661438, |
|
"eval_runtime": 33.7145, |
|
"eval_samples_per_second": 22.127, |
|
"eval_steps_per_second": 5.547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08526256352343309, |
|
"grad_norm": 0.3272818326950073, |
|
"learning_rate": 2.6124736842105265e-05, |
|
"loss": 1.4871, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08582721626199886, |
|
"grad_norm": 0.32894277572631836, |
|
"learning_rate": 2.5591578947368422e-05, |
|
"loss": 1.7465, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08639186900056466, |
|
"grad_norm": 0.34903210401535034, |
|
"learning_rate": 2.5058421052631576e-05, |
|
"loss": 2.1659, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.309485524892807, |
|
"learning_rate": 2.4525263157894737e-05, |
|
"loss": 0.9477, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08752117447769622, |
|
"grad_norm": 0.3330695927143097, |
|
"learning_rate": 2.3992105263157894e-05, |
|
"loss": 1.5259, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.088085827216262, |
|
"grad_norm": 0.3170928657054901, |
|
"learning_rate": 2.345894736842105e-05, |
|
"loss": 2.1571, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08865047995482778, |
|
"grad_norm": 0.44350671768188477, |
|
"learning_rate": 2.292578947368421e-05, |
|
"loss": 1.7132, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.08921513269339357, |
|
"grad_norm": 0.3251064419746399, |
|
"learning_rate": 2.2392631578947366e-05, |
|
"loss": 1.7776, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08977978543195934, |
|
"grad_norm": 0.4512995183467865, |
|
"learning_rate": 2.1859473684210527e-05, |
|
"loss": 1.6642, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09034443817052512, |
|
"grad_norm": 0.9298767447471619, |
|
"learning_rate": 2.132631578947368e-05, |
|
"loss": 1.8871, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 0.39221644401550293, |
|
"learning_rate": 2.0793157894736842e-05, |
|
"loss": 1.6345, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09147374364765669, |
|
"grad_norm": 0.4242939054965973, |
|
"learning_rate": 2.026e-05, |
|
"loss": 1.5644, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09203839638622248, |
|
"grad_norm": 0.39541876316070557, |
|
"learning_rate": 1.9726842105263157e-05, |
|
"loss": 2.0449, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09260304912478826, |
|
"grad_norm": 0.49802130460739136, |
|
"learning_rate": 1.9193684210526314e-05, |
|
"loss": 1.7162, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09316770186335403, |
|
"grad_norm": 0.38496336340904236, |
|
"learning_rate": 1.866052631578947e-05, |
|
"loss": 2.2872, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09373235460191982, |
|
"grad_norm": 0.5159594416618347, |
|
"learning_rate": 1.8127368421052632e-05, |
|
"loss": 1.8203, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0942970073404856, |
|
"grad_norm": 0.7461097836494446, |
|
"learning_rate": 1.759421052631579e-05, |
|
"loss": 2.0484, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09486166007905138, |
|
"grad_norm": 0.4127659797668457, |
|
"learning_rate": 1.7061052631578947e-05, |
|
"loss": 1.8052, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.09542631281761717, |
|
"grad_norm": 0.4347990155220032, |
|
"learning_rate": 1.6527894736842104e-05, |
|
"loss": 1.7951, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09599096555618294, |
|
"grad_norm": 0.4800684154033661, |
|
"learning_rate": 1.599473684210526e-05, |
|
"loss": 1.5261, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09655561829474874, |
|
"grad_norm": 0.3702009320259094, |
|
"learning_rate": 1.546157894736842e-05, |
|
"loss": 2.0826, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09712027103331451, |
|
"grad_norm": 0.6166195273399353, |
|
"learning_rate": 1.4928421052631576e-05, |
|
"loss": 1.437, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09768492377188029, |
|
"grad_norm": 0.5393872857093811, |
|
"learning_rate": 1.4395263157894735e-05, |
|
"loss": 2.0329, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09824957651044608, |
|
"grad_norm": 0.3947698771953583, |
|
"learning_rate": 1.3862105263157895e-05, |
|
"loss": 1.9888, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09881422924901186, |
|
"grad_norm": 0.5108495354652405, |
|
"learning_rate": 1.3328947368421052e-05, |
|
"loss": 1.6187, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09937888198757763, |
|
"grad_norm": 0.42177772521972656, |
|
"learning_rate": 1.2795789473684211e-05, |
|
"loss": 1.752, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.09994353472614342, |
|
"grad_norm": 0.4926985800266266, |
|
"learning_rate": 1.2262631578947368e-05, |
|
"loss": 2.1683, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1005081874647092, |
|
"grad_norm": 0.46454355120658875, |
|
"learning_rate": 1.1729473684210526e-05, |
|
"loss": 1.4214, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10107284020327499, |
|
"grad_norm": 0.4612448811531067, |
|
"learning_rate": 1.1196315789473683e-05, |
|
"loss": 1.7107, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10163749294184077, |
|
"grad_norm": 0.5232647657394409, |
|
"learning_rate": 1.066315789473684e-05, |
|
"loss": 2.0586, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10220214568040654, |
|
"grad_norm": 0.5285339951515198, |
|
"learning_rate": 1.013e-05, |
|
"loss": 1.5791, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.10276679841897234, |
|
"grad_norm": 0.46414557099342346, |
|
"learning_rate": 9.596842105263157e-06, |
|
"loss": 1.7391, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.10333145115753811, |
|
"grad_norm": 0.5057702660560608, |
|
"learning_rate": 9.063684210526316e-06, |
|
"loss": 1.7127, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1038961038961039, |
|
"grad_norm": 0.5308021903038025, |
|
"learning_rate": 8.530526315789473e-06, |
|
"loss": 1.691, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10446075663466968, |
|
"grad_norm": 0.6663089394569397, |
|
"learning_rate": 7.99736842105263e-06, |
|
"loss": 1.403, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10502540937323546, |
|
"grad_norm": 0.601261556148529, |
|
"learning_rate": 7.464210526315788e-06, |
|
"loss": 1.5466, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10559006211180125, |
|
"grad_norm": 0.563347578048706, |
|
"learning_rate": 6.931052631578947e-06, |
|
"loss": 1.389, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10615471485036702, |
|
"grad_norm": 0.6577058434486389, |
|
"learning_rate": 6.3978947368421055e-06, |
|
"loss": 2.0848, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1067193675889328, |
|
"grad_norm": 1.0290292501449585, |
|
"learning_rate": 5.864736842105263e-06, |
|
"loss": 1.715, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.10728402032749859, |
|
"grad_norm": 1.501333475112915, |
|
"learning_rate": 5.33157894736842e-06, |
|
"loss": 1.8911, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10784867306606437, |
|
"grad_norm": 0.7974535226821899, |
|
"learning_rate": 4.7984210526315785e-06, |
|
"loss": 1.6194, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.10841332580463016, |
|
"grad_norm": 0.6893588304519653, |
|
"learning_rate": 4.265263157894737e-06, |
|
"loss": 2.0794, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.10897797854319594, |
|
"grad_norm": 0.7431008815765381, |
|
"learning_rate": 3.732105263157894e-06, |
|
"loss": 1.2137, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.10954263128176171, |
|
"grad_norm": 0.8880150318145752, |
|
"learning_rate": 3.1989473684210527e-06, |
|
"loss": 1.3514, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1101072840203275, |
|
"grad_norm": 1.2837520837783813, |
|
"learning_rate": 2.66578947368421e-06, |
|
"loss": 1.6779, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.11067193675889328, |
|
"grad_norm": 1.7506012916564941, |
|
"learning_rate": 2.1326315789473684e-06, |
|
"loss": 0.9495, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11123658949745906, |
|
"grad_norm": 1.3716044425964355, |
|
"learning_rate": 1.5994736842105264e-06, |
|
"loss": 1.1218, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11180124223602485, |
|
"grad_norm": 1.528428077697754, |
|
"learning_rate": 1.0663157894736842e-06, |
|
"loss": 0.9545, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11236589497459062, |
|
"grad_norm": 1.5911756753921509, |
|
"learning_rate": 5.331578947368421e-07, |
|
"loss": 1.27, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11293054771315642, |
|
"grad_norm": 2.935549259185791, |
|
"learning_rate": 0.0, |
|
"loss": 2.7936, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11293054771315642, |
|
"eval_loss": 1.5823931694030762, |
|
"eval_runtime": 33.5466, |
|
"eval_samples_per_second": 22.238, |
|
"eval_steps_per_second": 5.574, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.675201553104896e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|