|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 552, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005434782608695652, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 10.5124, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010869565217391304, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 10.5941, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.016304347826086956, |
|
"grad_norm": 8.096252368220718, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"loss": 10.475, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.021739130434782608, |
|
"grad_norm": 8.39383943803796, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"loss": 10.4029, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02717391304347826, |
|
"grad_norm": 8.479649232958007, |
|
"learning_rate": 3.529411764705883e-06, |
|
"loss": 10.606, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03260869565217391, |
|
"grad_norm": 8.388175109430223, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 10.4024, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03804347826086957, |
|
"grad_norm": 8.445899787393927, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 10.4772, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.043478260869565216, |
|
"grad_norm": 8.405772228388786, |
|
"learning_rate": 7.058823529411766e-06, |
|
"loss": 10.4004, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04891304347826087, |
|
"grad_norm": 8.44764590867685, |
|
"learning_rate": 8.23529411764706e-06, |
|
"loss": 10.1775, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05434782608695652, |
|
"grad_norm": 8.23897507323131, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 10.2434, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.059782608695652176, |
|
"grad_norm": 8.118852150518913, |
|
"learning_rate": 1.0588235294117648e-05, |
|
"loss": 9.7644, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06521739130434782, |
|
"grad_norm": 8.570315139494753, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 9.7751, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07065217391304347, |
|
"grad_norm": 8.622402474140065, |
|
"learning_rate": 1.2941176470588238e-05, |
|
"loss": 9.2685, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07608695652173914, |
|
"grad_norm": 8.736670863686008, |
|
"learning_rate": 1.4117647058823532e-05, |
|
"loss": 8.897, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08152173913043478, |
|
"grad_norm": 9.172468108894085, |
|
"learning_rate": 1.5294117647058822e-05, |
|
"loss": 8.7101, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 10.228378996373296, |
|
"learning_rate": 1.647058823529412e-05, |
|
"loss": 8.3074, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09239130434782608, |
|
"grad_norm": 10.657372840257251, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 7.8589, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09782608695652174, |
|
"grad_norm": 10.887433964524527, |
|
"learning_rate": 1.8823529411764708e-05, |
|
"loss": 7.4742, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10326086956521739, |
|
"grad_norm": 11.682285639818433, |
|
"learning_rate": 2e-05, |
|
"loss": 6.8416, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10869565217391304, |
|
"grad_norm": 11.901377724871265, |
|
"learning_rate": 1.999982759060109e-05, |
|
"loss": 6.2183, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11413043478260869, |
|
"grad_norm": 11.383373292219964, |
|
"learning_rate": 1.9999310368349344e-05, |
|
"loss": 5.4371, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11956521739130435, |
|
"grad_norm": 9.311596334088138, |
|
"learning_rate": 1.999844835107957e-05, |
|
"loss": 4.7164, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 8.688635937406437, |
|
"learning_rate": 1.9997241568515742e-05, |
|
"loss": 4.456, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13043478260869565, |
|
"grad_norm": 7.4122077747748305, |
|
"learning_rate": 1.9995690062269985e-05, |
|
"loss": 3.8875, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1358695652173913, |
|
"grad_norm": 6.888182537563505, |
|
"learning_rate": 1.9993793885841157e-05, |
|
"loss": 3.5685, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14130434782608695, |
|
"grad_norm": 6.988607551936095, |
|
"learning_rate": 1.9991553104612982e-05, |
|
"loss": 3.4123, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14673913043478262, |
|
"grad_norm": 7.211548625105269, |
|
"learning_rate": 1.998896779585181e-05, |
|
"loss": 3.0838, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15217391304347827, |
|
"grad_norm": 7.767483170773942, |
|
"learning_rate": 1.998603804870395e-05, |
|
"loss": 2.831, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15760869565217392, |
|
"grad_norm": 7.950559222260086, |
|
"learning_rate": 1.9982763964192586e-05, |
|
"loss": 2.6297, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16304347826086957, |
|
"grad_norm": 8.23795631455961, |
|
"learning_rate": 1.9979145655214306e-05, |
|
"loss": 2.2795, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16847826086956522, |
|
"grad_norm": 8.57956169127235, |
|
"learning_rate": 1.9975183246535212e-05, |
|
"loss": 2.0509, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 8.071070816084118, |
|
"learning_rate": 1.99708768747866e-05, |
|
"loss": 1.8279, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1793478260869565, |
|
"grad_norm": 7.042152882720071, |
|
"learning_rate": 1.9966226688460258e-05, |
|
"loss": 1.3567, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18478260869565216, |
|
"grad_norm": 4.814338676579685, |
|
"learning_rate": 1.996123284790336e-05, |
|
"loss": 0.9542, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19021739130434784, |
|
"grad_norm": 2.9434658655739474, |
|
"learning_rate": 1.9955895525312913e-05, |
|
"loss": 0.8261, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1956521739130435, |
|
"grad_norm": 2.452806110360505, |
|
"learning_rate": 1.995021490472983e-05, |
|
"loss": 0.851, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20108695652173914, |
|
"grad_norm": 1.6789979391543146, |
|
"learning_rate": 1.9944191182032588e-05, |
|
"loss": 0.8265, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.20652173913043478, |
|
"grad_norm": 2.0007370440742154, |
|
"learning_rate": 1.9937824564930474e-05, |
|
"loss": 0.8181, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.21195652173913043, |
|
"grad_norm": 2.493212508529885, |
|
"learning_rate": 1.9931115272956405e-05, |
|
"loss": 0.767, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21739130434782608, |
|
"grad_norm": 1.9209687838841931, |
|
"learning_rate": 1.992406353745939e-05, |
|
"loss": 0.7196, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22282608695652173, |
|
"grad_norm": 1.8290330319103352, |
|
"learning_rate": 1.9916669601596515e-05, |
|
"loss": 0.7299, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.22826086956521738, |
|
"grad_norm": 1.7900648029089992, |
|
"learning_rate": 1.990893372032459e-05, |
|
"loss": 0.7229, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.23369565217391305, |
|
"grad_norm": 1.6749799534602232, |
|
"learning_rate": 1.990085616039135e-05, |
|
"loss": 0.7238, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2391304347826087, |
|
"grad_norm": 1.986613572625418, |
|
"learning_rate": 1.989243720032624e-05, |
|
"loss": 0.7332, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.24456521739130435, |
|
"grad_norm": 1.8912806129771145, |
|
"learning_rate": 1.9883677130430827e-05, |
|
"loss": 0.5864, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.7750105086017574, |
|
"learning_rate": 1.9874576252768793e-05, |
|
"loss": 0.6124, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2554347826086957, |
|
"grad_norm": 1.2955635391212061, |
|
"learning_rate": 1.9865134881155504e-05, |
|
"loss": 0.6884, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 1.273010141736733, |
|
"learning_rate": 1.98553533411472e-05, |
|
"loss": 0.6484, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.266304347826087, |
|
"grad_norm": 2.163538460282388, |
|
"learning_rate": 1.9845231970029774e-05, |
|
"loss": 0.7095, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2717391304347826, |
|
"grad_norm": 1.8775881503442995, |
|
"learning_rate": 1.983477111680712e-05, |
|
"loss": 0.604, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27717391304347827, |
|
"grad_norm": 1.5484748822902972, |
|
"learning_rate": 1.9823971142189126e-05, |
|
"loss": 0.6862, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2826086956521739, |
|
"grad_norm": 1.0946391927116763, |
|
"learning_rate": 1.981283241857922e-05, |
|
"loss": 0.6276, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.28804347826086957, |
|
"grad_norm": 1.4879971843628843, |
|
"learning_rate": 1.9801355330061526e-05, |
|
"loss": 0.5763, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.29347826086956524, |
|
"grad_norm": 1.8993705185884953, |
|
"learning_rate": 1.978954027238763e-05, |
|
"loss": 0.5908, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.29891304347826086, |
|
"grad_norm": 1.6076663483914293, |
|
"learning_rate": 1.9777387652962933e-05, |
|
"loss": 0.5543, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30434782608695654, |
|
"grad_norm": 1.1740894440396383, |
|
"learning_rate": 1.9764897890832597e-05, |
|
"loss": 0.5458, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.30978260869565216, |
|
"grad_norm": 1.9838553435397361, |
|
"learning_rate": 1.9752071416667102e-05, |
|
"loss": 0.5046, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.31521739130434784, |
|
"grad_norm": 1.0812842728047714, |
|
"learning_rate": 1.973890867274738e-05, |
|
"loss": 0.5609, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.32065217391304346, |
|
"grad_norm": 1.723223092822651, |
|
"learning_rate": 1.972541011294959e-05, |
|
"loss": 0.4724, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.32608695652173914, |
|
"grad_norm": 1.4887350192643218, |
|
"learning_rate": 1.9711576202729445e-05, |
|
"loss": 0.5168, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33152173913043476, |
|
"grad_norm": 1.533986608527031, |
|
"learning_rate": 1.9697407419106178e-05, |
|
"loss": 0.5374, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.33695652173913043, |
|
"grad_norm": 1.283663400004928, |
|
"learning_rate": 1.9682904250646084e-05, |
|
"loss": 0.622, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3423913043478261, |
|
"grad_norm": 1.511070122779534, |
|
"learning_rate": 1.9668067197445662e-05, |
|
"loss": 0.572, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 1.843030359662425, |
|
"learning_rate": 1.9652896771114416e-05, |
|
"loss": 0.5449, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3532608695652174, |
|
"grad_norm": 2.2753033401712752, |
|
"learning_rate": 1.9637393494757146e-05, |
|
"loss": 0.6883, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.358695652173913, |
|
"grad_norm": 1.1407510209951979, |
|
"learning_rate": 1.962155790295597e-05, |
|
"loss": 0.4357, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3641304347826087, |
|
"grad_norm": 1.351954153650573, |
|
"learning_rate": 1.9605390541751864e-05, |
|
"loss": 0.5109, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3695652173913043, |
|
"grad_norm": 1.2344312626302043, |
|
"learning_rate": 1.9588891968625828e-05, |
|
"loss": 0.5133, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 3.528171261663953, |
|
"learning_rate": 1.9572062752479684e-05, |
|
"loss": 0.7135, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3804347826086957, |
|
"grad_norm": 1.0283054372439564, |
|
"learning_rate": 1.9554903473616432e-05, |
|
"loss": 0.4934, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3858695652173913, |
|
"grad_norm": 1.2480924815092371, |
|
"learning_rate": 1.953741472372027e-05, |
|
"loss": 0.3846, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.391304347826087, |
|
"grad_norm": 1.4701584460006578, |
|
"learning_rate": 1.951959710583616e-05, |
|
"loss": 0.5303, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3967391304347826, |
|
"grad_norm": 2.2396908880712774, |
|
"learning_rate": 1.950145123434907e-05, |
|
"loss": 0.4241, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.40217391304347827, |
|
"grad_norm": 1.7904621917947958, |
|
"learning_rate": 1.9482977734962753e-05, |
|
"loss": 0.6144, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.4076086956521739, |
|
"grad_norm": 1.650705831140192, |
|
"learning_rate": 1.94641772446782e-05, |
|
"loss": 0.592, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.41304347826086957, |
|
"grad_norm": 1.588255971243881, |
|
"learning_rate": 1.9445050411771648e-05, |
|
"loss": 0.5918, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.41847826086956524, |
|
"grad_norm": 1.4379861368277966, |
|
"learning_rate": 1.9425597895772257e-05, |
|
"loss": 0.604, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.42391304347826086, |
|
"grad_norm": 1.7783069990731366, |
|
"learning_rate": 1.9405820367439343e-05, |
|
"loss": 0.6351, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.42934782608695654, |
|
"grad_norm": 1.3451929958729711, |
|
"learning_rate": 1.9385718508739263e-05, |
|
"loss": 0.4487, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 1.5631174238633363, |
|
"learning_rate": 1.9365293012821887e-05, |
|
"loss": 0.5412, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44021739130434784, |
|
"grad_norm": 1.7641796531654723, |
|
"learning_rate": 1.934454458399671e-05, |
|
"loss": 0.4606, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.44565217391304346, |
|
"grad_norm": 2.007206796904478, |
|
"learning_rate": 1.9323473937708565e-05, |
|
"loss": 0.5409, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.45108695652173914, |
|
"grad_norm": 1.6060302211544533, |
|
"learning_rate": 1.9302081800512943e-05, |
|
"loss": 0.5194, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.45652173913043476, |
|
"grad_norm": 1.584139057778314, |
|
"learning_rate": 1.9280368910050943e-05, |
|
"loss": 0.4662, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.46195652173913043, |
|
"grad_norm": 1.8953323400594193, |
|
"learning_rate": 1.9258336015023847e-05, |
|
"loss": 0.4433, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4673913043478261, |
|
"grad_norm": 1.6067605181621798, |
|
"learning_rate": 1.9235983875167296e-05, |
|
"loss": 0.4255, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.47282608695652173, |
|
"grad_norm": 1.4529302278758023, |
|
"learning_rate": 1.9213313261225083e-05, |
|
"loss": 0.4364, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4782608695652174, |
|
"grad_norm": 1.9965642456327142, |
|
"learning_rate": 1.9190324954922594e-05, |
|
"loss": 0.4199, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.483695652173913, |
|
"grad_norm": 1.9458245431232768, |
|
"learning_rate": 1.9167019748939847e-05, |
|
"loss": 0.4024, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4891304347826087, |
|
"grad_norm": 2.000159805579825, |
|
"learning_rate": 1.914339844688415e-05, |
|
"loss": 0.4595, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4945652173913043, |
|
"grad_norm": 1.97378975953703, |
|
"learning_rate": 1.91194618632624e-05, |
|
"loss": 0.4917, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.3771983904411074, |
|
"learning_rate": 1.9095210823452997e-05, |
|
"loss": 0.3341, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5054347826086957, |
|
"grad_norm": 1.8123410249166505, |
|
"learning_rate": 1.9070646163677383e-05, |
|
"loss": 0.4285, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5108695652173914, |
|
"grad_norm": 1.7561172390607174, |
|
"learning_rate": 1.9045768730971198e-05, |
|
"loss": 0.3863, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5163043478260869, |
|
"grad_norm": 1.809060828661053, |
|
"learning_rate": 1.9020579383155087e-05, |
|
"loss": 0.3486, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 1.541206279317173, |
|
"learning_rate": 1.899507898880512e-05, |
|
"loss": 0.1713, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5271739130434783, |
|
"grad_norm": 2.0502484531232343, |
|
"learning_rate": 1.8969268427222823e-05, |
|
"loss": 0.2059, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.532608695652174, |
|
"grad_norm": 1.8524406597388374, |
|
"learning_rate": 1.8943148588404877e-05, |
|
"loss": 0.3856, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5380434782608695, |
|
"grad_norm": 3.385889154621842, |
|
"learning_rate": 1.8916720373012425e-05, |
|
"loss": 0.3027, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5434782608695652, |
|
"grad_norm": 1.2814547066301334, |
|
"learning_rate": 1.8889984692340015e-05, |
|
"loss": 0.1609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5489130434782609, |
|
"grad_norm": 1.473493575445019, |
|
"learning_rate": 1.8862942468284174e-05, |
|
"loss": 0.1658, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5543478260869565, |
|
"grad_norm": 2.2017906861514125, |
|
"learning_rate": 1.883559463331162e-05, |
|
"loss": 0.2269, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5597826086956522, |
|
"grad_norm": 2.9266092953974345, |
|
"learning_rate": 1.880794213042711e-05, |
|
"loss": 0.2638, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5652173913043478, |
|
"grad_norm": 1.2470192969755443, |
|
"learning_rate": 1.8779985913140927e-05, |
|
"loss": 0.1826, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5706521739130435, |
|
"grad_norm": 1.1329281006012806, |
|
"learning_rate": 1.875172694543599e-05, |
|
"loss": 0.0992, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5760869565217391, |
|
"grad_norm": 1.435458967360399, |
|
"learning_rate": 1.8723166201734626e-05, |
|
"loss": 0.1052, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5815217391304348, |
|
"grad_norm": 2.4406380430615244, |
|
"learning_rate": 1.869430466686497e-05, |
|
"loss": 0.1999, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5869565217391305, |
|
"grad_norm": 1.0271614062096617, |
|
"learning_rate": 1.8665143336027e-05, |
|
"loss": 0.0855, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.592391304347826, |
|
"grad_norm": 1.3651592297249626, |
|
"learning_rate": 1.8635683214758213e-05, |
|
"loss": 0.0977, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5978260869565217, |
|
"grad_norm": 0.5945892482638718, |
|
"learning_rate": 1.8605925318898973e-05, |
|
"loss": 0.0337, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6032608695652174, |
|
"grad_norm": 1.194835217639101, |
|
"learning_rate": 1.8575870674557467e-05, |
|
"loss": 0.0722, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 1.762735939201958, |
|
"learning_rate": 1.8545520318074328e-05, |
|
"loss": 0.1228, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6141304347826086, |
|
"grad_norm": 1.017829163872169, |
|
"learning_rate": 1.85148752959869e-05, |
|
"loss": 0.0344, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6195652173913043, |
|
"grad_norm": 1.052690658912748, |
|
"learning_rate": 1.8483936664993152e-05, |
|
"loss": 0.0377, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.7977784022224987, |
|
"learning_rate": 1.8452705491915232e-05, |
|
"loss": 0.141, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6304347826086957, |
|
"grad_norm": 1.8477093237099182, |
|
"learning_rate": 1.8421182853662704e-05, |
|
"loss": 0.0734, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6358695652173914, |
|
"grad_norm": 0.6794730347498438, |
|
"learning_rate": 1.8389369837195387e-05, |
|
"loss": 0.0266, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6413043478260869, |
|
"grad_norm": 0.8818635589659883, |
|
"learning_rate": 1.835726753948589e-05, |
|
"loss": 0.0487, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6467391304347826, |
|
"grad_norm": 1.0608887498751458, |
|
"learning_rate": 1.8324877067481782e-05, |
|
"loss": 0.0275, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6521739130434783, |
|
"grad_norm": 1.3129587931586821, |
|
"learning_rate": 1.829219953806743e-05, |
|
"loss": 0.0642, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.657608695652174, |
|
"grad_norm": 1.8948301224723039, |
|
"learning_rate": 1.825923607802547e-05, |
|
"loss": 0.0785, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6630434782608695, |
|
"grad_norm": 0.2518374968408712, |
|
"learning_rate": 1.8225987823997967e-05, |
|
"loss": 0.0111, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6684782608695652, |
|
"grad_norm": 0.25552971144651465, |
|
"learning_rate": 1.8192455922447227e-05, |
|
"loss": 0.0103, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6739130434782609, |
|
"grad_norm": 0.7841302667217214, |
|
"learning_rate": 1.815864152961624e-05, |
|
"loss": 0.0122, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6793478260869565, |
|
"grad_norm": 0.1515291563958561, |
|
"learning_rate": 1.812454581148884e-05, |
|
"loss": 0.0079, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6847826086956522, |
|
"grad_norm": 0.11584834326779594, |
|
"learning_rate": 1.8090169943749477e-05, |
|
"loss": 0.0055, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6902173913043478, |
|
"grad_norm": 0.1740566784478502, |
|
"learning_rate": 1.8055515111742688e-05, |
|
"loss": 0.0069, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 1.5625062014274096, |
|
"learning_rate": 1.8020582510432234e-05, |
|
"loss": 0.0383, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7010869565217391, |
|
"grad_norm": 0.12273159750563628, |
|
"learning_rate": 1.798537334435986e-05, |
|
"loss": 0.0062, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7065217391304348, |
|
"grad_norm": 3.693193027378141, |
|
"learning_rate": 1.7949888827603813e-05, |
|
"loss": 0.1765, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7119565217391305, |
|
"grad_norm": 0.12477337459792677, |
|
"learning_rate": 1.791413018373692e-05, |
|
"loss": 0.0057, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.717391304347826, |
|
"grad_norm": 0.8357268279739778, |
|
"learning_rate": 1.7878098645784447e-05, |
|
"loss": 0.0163, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7228260869565217, |
|
"grad_norm": 3.8264656288549985, |
|
"learning_rate": 1.7841795456181556e-05, |
|
"loss": 0.1727, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7282608695652174, |
|
"grad_norm": 0.6387227523871831, |
|
"learning_rate": 1.780522186673046e-05, |
|
"loss": 0.0076, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7336956521739131, |
|
"grad_norm": 0.09079528876022976, |
|
"learning_rate": 1.776837913855728e-05, |
|
"loss": 0.0038, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7391304347826086, |
|
"grad_norm": 1.9001901725953279, |
|
"learning_rate": 1.7731268542068536e-05, |
|
"loss": 0.0208, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7445652173913043, |
|
"grad_norm": 0.21704170005212517, |
|
"learning_rate": 1.7693891356907357e-05, |
|
"loss": 0.007, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.7213653784073487, |
|
"learning_rate": 1.7656248871909346e-05, |
|
"loss": 0.0137, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7554347826086957, |
|
"grad_norm": 0.40110602562720454, |
|
"learning_rate": 1.7618342385058147e-05, |
|
"loss": 0.0099, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7608695652173914, |
|
"grad_norm": 2.026407827233553, |
|
"learning_rate": 1.758017320344068e-05, |
|
"loss": 0.0415, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7663043478260869, |
|
"grad_norm": 1.1169723105563958, |
|
"learning_rate": 1.754174264320208e-05, |
|
"loss": 0.0232, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7717391304347826, |
|
"grad_norm": 0.1746366846193237, |
|
"learning_rate": 1.7503052029500308e-05, |
|
"loss": 0.0052, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7771739130434783, |
|
"grad_norm": 2.3203125623649874, |
|
"learning_rate": 1.7464102696460447e-05, |
|
"loss": 0.2205, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 3.9663829407278315, |
|
"learning_rate": 1.7424895987128723e-05, |
|
"loss": 0.223, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7880434782608695, |
|
"grad_norm": 2.9570619026185883, |
|
"learning_rate": 1.738543325342617e-05, |
|
"loss": 0.0697, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7934782608695652, |
|
"grad_norm": 0.07057319843123724, |
|
"learning_rate": 1.7345715856102024e-05, |
|
"loss": 0.0031, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.7989130434782609, |
|
"grad_norm": 0.11320521777018241, |
|
"learning_rate": 1.7305745164686816e-05, |
|
"loss": 0.0042, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8043478260869565, |
|
"grad_norm": 1.3124572295306176, |
|
"learning_rate": 1.7265522557445115e-05, |
|
"loss": 0.021, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8097826086956522, |
|
"grad_norm": 0.42701665371399616, |
|
"learning_rate": 1.7225049421328024e-05, |
|
"loss": 0.0091, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8152173913043478, |
|
"grad_norm": 0.6276112813031721, |
|
"learning_rate": 1.7184327151925366e-05, |
|
"loss": 0.0094, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8206521739130435, |
|
"grad_norm": 1.5664524264393311, |
|
"learning_rate": 1.7143357153417533e-05, |
|
"loss": 0.0256, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8260869565217391, |
|
"grad_norm": 0.41431375770399115, |
|
"learning_rate": 1.710214083852709e-05, |
|
"loss": 0.0117, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8315217391304348, |
|
"grad_norm": 0.3493269925986, |
|
"learning_rate": 1.7060679628470054e-05, |
|
"loss": 0.0084, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8369565217391305, |
|
"grad_norm": 0.3211404898250956, |
|
"learning_rate": 1.7018974952906885e-05, |
|
"loss": 0.0084, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.842391304347826, |
|
"grad_norm": 0.21231254558257762, |
|
"learning_rate": 1.697702824989319e-05, |
|
"loss": 0.0065, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8478260869565217, |
|
"grad_norm": 1.457137599474762, |
|
"learning_rate": 1.693484096583014e-05, |
|
"loss": 0.0226, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8532608695652174, |
|
"grad_norm": 0.19497147073015395, |
|
"learning_rate": 1.6892414555414594e-05, |
|
"loss": 0.0048, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8586956521739131, |
|
"grad_norm": 1.8062131040571878, |
|
"learning_rate": 1.6849750481588936e-05, |
|
"loss": 0.0277, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8641304347826086, |
|
"grad_norm": 1.3188356922598312, |
|
"learning_rate": 1.680685021549063e-05, |
|
"loss": 0.0207, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.26492812790936593, |
|
"learning_rate": 1.6763715236401493e-05, |
|
"loss": 0.0059, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.3017199408994534, |
|
"learning_rate": 1.672034703169669e-05, |
|
"loss": 0.0076, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8804347826086957, |
|
"grad_norm": 0.1252817764595737, |
|
"learning_rate": 1.667674709679344e-05, |
|
"loss": 0.0041, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8858695652173914, |
|
"grad_norm": 1.1529370223873083, |
|
"learning_rate": 1.663291693509946e-05, |
|
"loss": 0.019, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8913043478260869, |
|
"grad_norm": 0.12063163996672908, |
|
"learning_rate": 1.658885805796111e-05, |
|
"loss": 0.0031, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8967391304347826, |
|
"grad_norm": 0.11125376158368971, |
|
"learning_rate": 1.6544571984611306e-05, |
|
"loss": 0.0034, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9021739130434783, |
|
"grad_norm": 0.19945453640512878, |
|
"learning_rate": 1.6500060242117096e-05, |
|
"loss": 0.0051, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.907608695652174, |
|
"grad_norm": 0.07254620014242376, |
|
"learning_rate": 1.6455324365327035e-05, |
|
"loss": 0.0026, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9130434782608695, |
|
"grad_norm": 1.3895686723936829, |
|
"learning_rate": 1.6410365896818253e-05, |
|
"loss": 0.0234, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9184782608695652, |
|
"grad_norm": 0.7517916115731629, |
|
"learning_rate": 1.636518638684325e-05, |
|
"loss": 0.0057, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9239130434782609, |
|
"grad_norm": 0.11708397875230993, |
|
"learning_rate": 1.6319787393276463e-05, |
|
"loss": 0.0036, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9293478260869565, |
|
"grad_norm": 0.027987175186703777, |
|
"learning_rate": 1.6274170481560527e-05, |
|
"loss": 0.0015, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9347826086956522, |
|
"grad_norm": 0.17986790848065237, |
|
"learning_rate": 1.6228337224652307e-05, |
|
"loss": 0.0059, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9402173913043478, |
|
"grad_norm": 0.03867873116439446, |
|
"learning_rate": 1.6182289202968663e-05, |
|
"loss": 0.0017, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9456521739130435, |
|
"grad_norm": 0.057278523890185604, |
|
"learning_rate": 1.613602800433194e-05, |
|
"loss": 0.0024, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9510869565217391, |
|
"grad_norm": 2.728399164781685, |
|
"learning_rate": 1.6089555223915226e-05, |
|
"loss": 0.1588, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.3768997196852311, |
|
"learning_rate": 1.6042872464187352e-05, |
|
"loss": 0.0054, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9619565217391305, |
|
"grad_norm": 4.011589996542784, |
|
"learning_rate": 1.5995981334857625e-05, |
|
"loss": 0.0702, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.967391304347826, |
|
"grad_norm": 0.49004409324214177, |
|
"learning_rate": 1.5948883452820326e-05, |
|
"loss": 0.01, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9728260869565217, |
|
"grad_norm": 0.048813631073329034, |
|
"learning_rate": 1.590158044209897e-05, |
|
"loss": 0.002, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9782608695652174, |
|
"grad_norm": 0.09547901003362863, |
|
"learning_rate": 1.5854073933790277e-05, |
|
"loss": 0.0024, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9836956521739131, |
|
"grad_norm": 2.3086350812363565, |
|
"learning_rate": 1.580636556600796e-05, |
|
"loss": 0.0277, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9891304347826086, |
|
"grad_norm": 2.752485470216331, |
|
"learning_rate": 1.575845698382622e-05, |
|
"loss": 0.0671, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9945652173913043, |
|
"grad_norm": 0.08760080184190135, |
|
"learning_rate": 1.5710349839223034e-05, |
|
"loss": 0.0025, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.052319179757302624, |
|
"learning_rate": 1.566204579102317e-05, |
|
"loss": 0.0016, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.0054347826086956, |
|
"grad_norm": 0.20188982483949725, |
|
"learning_rate": 1.561354650484102e-05, |
|
"loss": 0.0054, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0108695652173914, |
|
"grad_norm": 1.214861582615001, |
|
"learning_rate": 1.556485365302313e-05, |
|
"loss": 0.0095, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.016304347826087, |
|
"grad_norm": 1.1857810014141275, |
|
"learning_rate": 1.5515968914590568e-05, |
|
"loss": 0.0161, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0217391304347827, |
|
"grad_norm": 0.19290187635263223, |
|
"learning_rate": 1.546689397518101e-05, |
|
"loss": 0.004, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.0271739130434783, |
|
"grad_norm": 0.22326269659684472, |
|
"learning_rate": 1.5417630526990613e-05, |
|
"loss": 0.0044, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0326086956521738, |
|
"grad_norm": 0.0690691126927046, |
|
"learning_rate": 1.5368180268715678e-05, |
|
"loss": 0.0022, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0380434782608696, |
|
"grad_norm": 0.519784946142706, |
|
"learning_rate": 1.5318544905494063e-05, |
|
"loss": 0.0075, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.1210215491547705, |
|
"learning_rate": 1.52687261488464e-05, |
|
"loss": 0.0032, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.048913043478261, |
|
"grad_norm": 0.1128182153705411, |
|
"learning_rate": 1.5218725716617062e-05, |
|
"loss": 0.0031, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0543478260869565, |
|
"grad_norm": 0.0917279431010188, |
|
"learning_rate": 1.5168545332914942e-05, |
|
"loss": 0.0032, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.059782608695652, |
|
"grad_norm": 0.1599750281188914, |
|
"learning_rate": 1.5118186728054002e-05, |
|
"loss": 0.0034, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.065217391304348, |
|
"grad_norm": 3.0052317701428906, |
|
"learning_rate": 1.50676516384936e-05, |
|
"loss": 0.2052, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.0706521739130435, |
|
"grad_norm": 0.09347487309598097, |
|
"learning_rate": 1.5016941806778622e-05, |
|
"loss": 0.0024, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.0760869565217392, |
|
"grad_norm": 0.6368154943577347, |
|
"learning_rate": 1.496605898147938e-05, |
|
"loss": 0.0112, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.0815217391304348, |
|
"grad_norm": 0.08805765943523453, |
|
"learning_rate": 1.4915004917131345e-05, |
|
"loss": 0.0025, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 0.05469514003374087, |
|
"learning_rate": 1.4863781374174625e-05, |
|
"loss": 0.002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0923913043478262, |
|
"grad_norm": 0.10652940546536208, |
|
"learning_rate": 1.4812390118893273e-05, |
|
"loss": 0.0032, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.0978260869565217, |
|
"grad_norm": 4.207882558276106, |
|
"learning_rate": 1.4760832923354375e-05, |
|
"loss": 0.0583, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.1032608695652173, |
|
"grad_norm": 0.0699647885839302, |
|
"learning_rate": 1.4709111565346948e-05, |
|
"loss": 0.0026, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.108695652173913, |
|
"grad_norm": 0.30166623168218903, |
|
"learning_rate": 1.4657227828320637e-05, |
|
"loss": 0.006, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1141304347826086, |
|
"grad_norm": 4.199370993333585, |
|
"learning_rate": 1.4605183501324231e-05, |
|
"loss": 0.0775, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1195652173913044, |
|
"grad_norm": 0.32565218496952747, |
|
"learning_rate": 1.4552980378943953e-05, |
|
"loss": 0.0033, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 0.0809703234967001, |
|
"learning_rate": 1.4500620261241598e-05, |
|
"loss": 0.0026, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.06883017026031267, |
|
"learning_rate": 1.4448104953692443e-05, |
|
"loss": 0.0019, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.1358695652173914, |
|
"grad_norm": 0.08112137716749798, |
|
"learning_rate": 1.4395436267123017e-05, |
|
"loss": 0.0025, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.141304347826087, |
|
"grad_norm": 0.0472362130550949, |
|
"learning_rate": 1.4342616017648632e-05, |
|
"loss": 0.0018, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1467391304347827, |
|
"grad_norm": 0.0884620238410297, |
|
"learning_rate": 1.4289646026610789e-05, |
|
"loss": 0.0021, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.1521739130434783, |
|
"grad_norm": 0.04795365977948435, |
|
"learning_rate": 1.423652812051434e-05, |
|
"loss": 0.0017, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.1576086956521738, |
|
"grad_norm": 0.02935797027689571, |
|
"learning_rate": 1.4183264130964545e-05, |
|
"loss": 0.0015, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.1630434782608696, |
|
"grad_norm": 0.0668820523334726, |
|
"learning_rate": 1.4129855894603885e-05, |
|
"loss": 0.0027, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.1684782608695652, |
|
"grad_norm": 0.7758685627388171, |
|
"learning_rate": 1.4076305253048748e-05, |
|
"loss": 0.0105, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 0.7009141120346845, |
|
"learning_rate": 1.4022614052825918e-05, |
|
"loss": 0.01, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.1793478260869565, |
|
"grad_norm": 0.058294067779879076, |
|
"learning_rate": 1.3968784145308907e-05, |
|
"loss": 0.002, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.184782608695652, |
|
"grad_norm": 0.09580668260043325, |
|
"learning_rate": 1.3914817386654112e-05, |
|
"loss": 0.0028, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.190217391304348, |
|
"grad_norm": 4.864872194559485, |
|
"learning_rate": 1.3860715637736817e-05, |
|
"loss": 0.1252, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.1956521739130435, |
|
"grad_norm": 0.15310828918627564, |
|
"learning_rate": 1.3806480764087027e-05, |
|
"loss": 0.003, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2010869565217392, |
|
"grad_norm": 0.3265801320494785, |
|
"learning_rate": 1.3752114635825138e-05, |
|
"loss": 0.005, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2065217391304348, |
|
"grad_norm": 4.409339908706341, |
|
"learning_rate": 1.369761912759744e-05, |
|
"loss": 0.1368, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2119565217391304, |
|
"grad_norm": 0.09658224964632216, |
|
"learning_rate": 1.3642996118511504e-05, |
|
"loss": 0.0027, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.13386998342251066, |
|
"learning_rate": 1.358824749207136e-05, |
|
"loss": 0.0029, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.2228260869565217, |
|
"grad_norm": 0.058694075695156535, |
|
"learning_rate": 1.3533375136112563e-05, |
|
"loss": 0.0019, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2282608695652173, |
|
"grad_norm": 0.1675736492580823, |
|
"learning_rate": 1.3478380942737097e-05, |
|
"loss": 0.0041, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.233695652173913, |
|
"grad_norm": 0.6605378406587118, |
|
"learning_rate": 1.3423266808248123e-05, |
|
"loss": 0.0064, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.2391304347826086, |
|
"grad_norm": 0.07582983219640445, |
|
"learning_rate": 1.3368034633084603e-05, |
|
"loss": 0.0021, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.2445652173913044, |
|
"grad_norm": 0.11839256459523798, |
|
"learning_rate": 1.331268632175576e-05, |
|
"loss": 0.0033, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.498989993420891, |
|
"learning_rate": 1.3257223782775412e-05, |
|
"loss": 0.0058, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2554347826086958, |
|
"grad_norm": 0.0627689672183379, |
|
"learning_rate": 1.3201648928596164e-05, |
|
"loss": 0.0028, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.2608695652173914, |
|
"grad_norm": 0.44003082591712833, |
|
"learning_rate": 1.3145963675543451e-05, |
|
"loss": 0.0056, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.266304347826087, |
|
"grad_norm": 3.9655617256713556, |
|
"learning_rate": 1.3090169943749475e-05, |
|
"loss": 0.0738, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.2717391304347827, |
|
"grad_norm": 0.1490491896911272, |
|
"learning_rate": 1.3034269657086993e-05, |
|
"loss": 0.003, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.2771739130434783, |
|
"grad_norm": 0.255678738387853, |
|
"learning_rate": 1.2978264743102964e-05, |
|
"loss": 0.0036, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.2826086956521738, |
|
"grad_norm": 0.08658556472142168, |
|
"learning_rate": 1.2922157132952106e-05, |
|
"loss": 0.003, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.2880434782608696, |
|
"grad_norm": 0.056388528409829865, |
|
"learning_rate": 1.286594876133028e-05, |
|
"loss": 0.0016, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.2934782608695652, |
|
"grad_norm": 1.5398049755885386, |
|
"learning_rate": 1.2809641566407802e-05, |
|
"loss": 0.0378, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.2989130434782608, |
|
"grad_norm": 0.036566689298081184, |
|
"learning_rate": 1.27532374897626e-05, |
|
"loss": 0.0012, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.04920293791313143, |
|
"learning_rate": 1.2696738476313261e-05, |
|
"loss": 0.0017, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.309782608695652, |
|
"grad_norm": 0.1402817359911882, |
|
"learning_rate": 1.2640146474251979e-05, |
|
"loss": 0.0036, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.315217391304348, |
|
"grad_norm": 0.06831135225959813, |
|
"learning_rate": 1.258346343497736e-05, |
|
"loss": 0.0025, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.3206521739130435, |
|
"grad_norm": 0.028285907167631727, |
|
"learning_rate": 1.2526691313027153e-05, |
|
"loss": 0.001, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3260869565217392, |
|
"grad_norm": 0.33707980146121225, |
|
"learning_rate": 1.2469832066010843e-05, |
|
"loss": 0.0074, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.3315217391304348, |
|
"grad_norm": 0.02312342530538864, |
|
"learning_rate": 1.2412887654542147e-05, |
|
"loss": 0.001, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.3369565217391304, |
|
"grad_norm": 0.026427047059385186, |
|
"learning_rate": 1.2355860042171421e-05, |
|
"loss": 0.0011, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.3423913043478262, |
|
"grad_norm": 2.9263468296261164, |
|
"learning_rate": 1.2298751195317935e-05, |
|
"loss": 0.1557, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.3478260869565217, |
|
"grad_norm": 0.020548021429656328, |
|
"learning_rate": 1.224156308320208e-05, |
|
"loss": 0.0009, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.3532608695652173, |
|
"grad_norm": 0.025684644607937637, |
|
"learning_rate": 1.2184297677777463e-05, |
|
"loss": 0.0011, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.358695652173913, |
|
"grad_norm": 0.4277199026740869, |
|
"learning_rate": 1.2126956953662914e-05, |
|
"loss": 0.0074, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3641304347826086, |
|
"grad_norm": 0.722362923284817, |
|
"learning_rate": 1.2069542888074386e-05, |
|
"loss": 0.0094, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.3695652173913042, |
|
"grad_norm": 0.05042192018129352, |
|
"learning_rate": 1.2012057460756786e-05, |
|
"loss": 0.0016, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 0.04160962471056512, |
|
"learning_rate": 1.1954502653915704e-05, |
|
"loss": 0.0014, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.3804347826086958, |
|
"grad_norm": 0.04523201782339563, |
|
"learning_rate": 1.1896880452149077e-05, |
|
"loss": 0.0016, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.3858695652173914, |
|
"grad_norm": 0.023639170674016628, |
|
"learning_rate": 1.1839192842378737e-05, |
|
"loss": 0.0009, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.04866250108659108, |
|
"learning_rate": 1.1781441813781911e-05, |
|
"loss": 0.0014, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.3967391304347827, |
|
"grad_norm": 0.027392748713626538, |
|
"learning_rate": 1.1723629357722622e-05, |
|
"loss": 0.001, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.4021739130434783, |
|
"grad_norm": 0.04956045333392312, |
|
"learning_rate": 1.1665757467683025e-05, |
|
"loss": 0.0013, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.4076086956521738, |
|
"grad_norm": 0.287445593085176, |
|
"learning_rate": 1.1607828139194683e-05, |
|
"loss": 0.0051, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.4130434782608696, |
|
"grad_norm": 0.13531127988753577, |
|
"learning_rate": 1.1549843369769733e-05, |
|
"loss": 0.0023, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4184782608695652, |
|
"grad_norm": 0.16453092649100554, |
|
"learning_rate": 1.1491805158832028e-05, |
|
"loss": 0.0031, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.4239130434782608, |
|
"grad_norm": 1.4301870845043336, |
|
"learning_rate": 1.1433715507648173e-05, |
|
"loss": 0.0166, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.4293478260869565, |
|
"grad_norm": 0.06079450292325032, |
|
"learning_rate": 1.1375576419258543e-05, |
|
"loss": 0.0016, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.434782608695652, |
|
"grad_norm": 0.12935761070271598, |
|
"learning_rate": 1.1317389898408188e-05, |
|
"loss": 0.0022, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.440217391304348, |
|
"grad_norm": 0.06441466437879496, |
|
"learning_rate": 1.125915795147773e-05, |
|
"loss": 0.0017, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.4456521739130435, |
|
"grad_norm": 0.11938010559111087, |
|
"learning_rate": 1.1200882586414168e-05, |
|
"loss": 0.0021, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.4510869565217392, |
|
"grad_norm": 0.14576252527987352, |
|
"learning_rate": 1.114256581266162e-05, |
|
"loss": 0.0032, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.4565217391304348, |
|
"grad_norm": 0.8091624068148694, |
|
"learning_rate": 1.1084209641092083e-05, |
|
"loss": 0.0098, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.4619565217391304, |
|
"grad_norm": 0.07301592812987565, |
|
"learning_rate": 1.1025816083936036e-05, |
|
"loss": 0.0021, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.4673913043478262, |
|
"grad_norm": 0.019465384139083376, |
|
"learning_rate": 1.0967387154713104e-05, |
|
"loss": 0.0008, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4728260869565217, |
|
"grad_norm": 0.02684807806576838, |
|
"learning_rate": 1.0908924868162605e-05, |
|
"loss": 0.0009, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 2.0536809709774086, |
|
"learning_rate": 1.0850431240174066e-05, |
|
"loss": 0.2241, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.483695652173913, |
|
"grad_norm": 0.5395466577497267, |
|
"learning_rate": 1.0791908287717744e-05, |
|
"loss": 0.0097, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.4891304347826086, |
|
"grad_norm": 3.6218348045652107, |
|
"learning_rate": 1.073335802877504e-05, |
|
"loss": 0.0488, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.4945652173913042, |
|
"grad_norm": 0.0346000232826567, |
|
"learning_rate": 1.0674782482268953e-05, |
|
"loss": 0.0013, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.031039844572176237, |
|
"learning_rate": 1.0616183667994435e-05, |
|
"loss": 0.0011, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.5054347826086958, |
|
"grad_norm": 1.3869410436009917, |
|
"learning_rate": 1.0557563606548751e-05, |
|
"loss": 0.02, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.5108695652173914, |
|
"grad_norm": 0.31857812561228843, |
|
"learning_rate": 1.0498924319261816e-05, |
|
"loss": 0.0046, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.516304347826087, |
|
"grad_norm": 0.018901071551922013, |
|
"learning_rate": 1.0440267828126478e-05, |
|
"loss": 0.0007, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.5217391304347827, |
|
"grad_norm": 0.35747451319055523, |
|
"learning_rate": 1.0381596155728823e-05, |
|
"loss": 0.0077, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5271739130434783, |
|
"grad_norm": 0.038504499041816166, |
|
"learning_rate": 1.0322911325178402e-05, |
|
"loss": 0.0012, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.5326086956521738, |
|
"grad_norm": 0.061533456725221265, |
|
"learning_rate": 1.0264215360038483e-05, |
|
"loss": 0.0018, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.5380434782608696, |
|
"grad_norm": 0.053405604412389306, |
|
"learning_rate": 1.0205510284256286e-05, |
|
"loss": 0.0014, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.5434782608695652, |
|
"grad_norm": 0.1699993644991474, |
|
"learning_rate": 1.0146798122093167e-05, |
|
"loss": 0.0029, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.5489130434782608, |
|
"grad_norm": 0.07043260478387495, |
|
"learning_rate": 1.0088080898054852e-05, |
|
"loss": 0.0013, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.5543478260869565, |
|
"grad_norm": 0.050883436804006456, |
|
"learning_rate": 1.00293606368216e-05, |
|
"loss": 0.0018, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.5597826086956523, |
|
"grad_norm": 0.2015858838482068, |
|
"learning_rate": 9.970639363178401e-06, |
|
"loss": 0.0034, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.15696624949542315, |
|
"learning_rate": 9.91191910194515e-06, |
|
"loss": 0.0024, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.5706521739130435, |
|
"grad_norm": 0.016094697472839387, |
|
"learning_rate": 9.853201877906836e-06, |
|
"loss": 0.0007, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.5760869565217392, |
|
"grad_norm": 2.6447259699825225, |
|
"learning_rate": 9.79448971574372e-06, |
|
"loss": 0.0868, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5815217391304348, |
|
"grad_norm": 0.034146999181789345, |
|
"learning_rate": 9.73578463996152e-06, |
|
"loss": 0.001, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.5869565217391304, |
|
"grad_norm": 2.3913058327100507, |
|
"learning_rate": 9.677088674821601e-06, |
|
"loss": 0.0933, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.5923913043478262, |
|
"grad_norm": 2.7206555164113113, |
|
"learning_rate": 9.618403844271179e-06, |
|
"loss": 0.0834, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.5978260869565217, |
|
"grad_norm": 2.04432325341852, |
|
"learning_rate": 9.559732171873524e-06, |
|
"loss": 0.0509, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6032608695652173, |
|
"grad_norm": 3.408481044696874, |
|
"learning_rate": 9.50107568073819e-06, |
|
"loss": 0.1523, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.608695652173913, |
|
"grad_norm": 0.15857623535162915, |
|
"learning_rate": 9.442436393451252e-06, |
|
"loss": 0.0037, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.6141304347826086, |
|
"grad_norm": 0.48149742863897177, |
|
"learning_rate": 9.383816332005569e-06, |
|
"loss": 0.0066, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6195652173913042, |
|
"grad_norm": 0.43146507945514945, |
|
"learning_rate": 9.325217517731047e-06, |
|
"loss": 0.0063, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 3.7183270965419526, |
|
"learning_rate": 9.266641971224963e-06, |
|
"loss": 0.0717, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.6304347826086958, |
|
"grad_norm": 0.6284145395909966, |
|
"learning_rate": 9.208091712282261e-06, |
|
"loss": 0.0113, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6358695652173914, |
|
"grad_norm": 0.12204274733613643, |
|
"learning_rate": 9.149568759825937e-06, |
|
"loss": 0.003, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.641304347826087, |
|
"grad_norm": 1.1716856729713159, |
|
"learning_rate": 9.091075131837399e-06, |
|
"loss": 0.016, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.6467391304347827, |
|
"grad_norm": 2.3073801254975743, |
|
"learning_rate": 9.032612845286896e-06, |
|
"loss": 0.0625, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.24584369141616186, |
|
"learning_rate": 8.974183916063967e-06, |
|
"loss": 0.0038, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.6576086956521738, |
|
"grad_norm": 0.896272637025756, |
|
"learning_rate": 8.915790358907924e-06, |
|
"loss": 0.0124, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.6630434782608696, |
|
"grad_norm": 3.8696382415332957, |
|
"learning_rate": 8.857434187338381e-06, |
|
"loss": 0.0462, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.6684782608695652, |
|
"grad_norm": 0.12503032249914797, |
|
"learning_rate": 8.799117413585836e-06, |
|
"loss": 0.0025, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.6739130434782608, |
|
"grad_norm": 0.45154839467695335, |
|
"learning_rate": 8.740842048522268e-06, |
|
"loss": 0.0061, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.6793478260869565, |
|
"grad_norm": 0.09419278918622512, |
|
"learning_rate": 8.682610101591813e-06, |
|
"loss": 0.002, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.6847826086956523, |
|
"grad_norm": 0.4958479599321362, |
|
"learning_rate": 8.624423580741462e-06, |
|
"loss": 0.0086, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6902173913043477, |
|
"grad_norm": 0.11770008527271246, |
|
"learning_rate": 8.56628449235183e-06, |
|
"loss": 0.0025, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.6956521739130435, |
|
"grad_norm": 0.369565128723298, |
|
"learning_rate": 8.508194841167975e-06, |
|
"loss": 0.0059, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.7010869565217392, |
|
"grad_norm": 0.06235754588692365, |
|
"learning_rate": 8.450156630230267e-06, |
|
"loss": 0.0019, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.7065217391304348, |
|
"grad_norm": 0.02787223131850643, |
|
"learning_rate": 8.39217186080532e-06, |
|
"loss": 0.0012, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.7119565217391304, |
|
"grad_norm": 0.03719997929743275, |
|
"learning_rate": 8.334242532316977e-06, |
|
"loss": 0.0012, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.7173913043478262, |
|
"grad_norm": 0.42795195182267215, |
|
"learning_rate": 8.276370642277383e-06, |
|
"loss": 0.0048, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.7228260869565217, |
|
"grad_norm": 0.9372903840892463, |
|
"learning_rate": 8.21855818621809e-06, |
|
"loss": 0.0203, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.7282608695652173, |
|
"grad_norm": 0.13870817101483046, |
|
"learning_rate": 8.160807157621262e-06, |
|
"loss": 0.0025, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.733695652173913, |
|
"grad_norm": 0.2445880882562458, |
|
"learning_rate": 8.103119547850924e-06, |
|
"loss": 0.0037, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.06926518467785787, |
|
"learning_rate": 8.045497346084297e-06, |
|
"loss": 0.002, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7445652173913042, |
|
"grad_norm": 0.029704630377944685, |
|
"learning_rate": 7.98794253924322e-06, |
|
"loss": 0.0011, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.02657434909385738, |
|
"learning_rate": 7.930457111925616e-06, |
|
"loss": 0.0012, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.7554347826086958, |
|
"grad_norm": 0.087118861417369, |
|
"learning_rate": 7.873043046337086e-06, |
|
"loss": 0.002, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.7608695652173914, |
|
"grad_norm": 0.029028883768708425, |
|
"learning_rate": 7.815702322222539e-06, |
|
"loss": 0.0009, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.766304347826087, |
|
"grad_norm": 0.574091822654542, |
|
"learning_rate": 7.758436916797923e-06, |
|
"loss": 0.0092, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.7717391304347827, |
|
"grad_norm": 0.043721730276414336, |
|
"learning_rate": 7.701248804682069e-06, |
|
"loss": 0.0014, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.7771739130434783, |
|
"grad_norm": 2.4824141009923726, |
|
"learning_rate": 7.64413995782858e-06, |
|
"loss": 0.1501, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.7826086956521738, |
|
"grad_norm": 0.3656857182755404, |
|
"learning_rate": 7.5871123454578534e-06, |
|
"loss": 0.0055, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.7880434782608696, |
|
"grad_norm": 0.030565125424490584, |
|
"learning_rate": 7.530167933989161e-06, |
|
"loss": 0.001, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.7934782608695652, |
|
"grad_norm": 0.6771809217496879, |
|
"learning_rate": 7.47330868697285e-06, |
|
"loss": 0.01, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7989130434782608, |
|
"grad_norm": 0.24573870561094346, |
|
"learning_rate": 7.4165365650226425e-06, |
|
"loss": 0.0049, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.8043478260869565, |
|
"grad_norm": 0.8696535124002203, |
|
"learning_rate": 7.3598535257480244e-06, |
|
"loss": 0.0126, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.8097826086956523, |
|
"grad_norm": 0.02189894312561321, |
|
"learning_rate": 7.30326152368674e-06, |
|
"loss": 0.0008, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.8152173913043477, |
|
"grad_norm": 0.031609375803459974, |
|
"learning_rate": 7.246762510237404e-06, |
|
"loss": 0.0011, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.8206521739130435, |
|
"grad_norm": 0.020342266321765227, |
|
"learning_rate": 7.1903584335922e-06, |
|
"loss": 0.0008, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.09248271114619741, |
|
"learning_rate": 7.134051238669722e-06, |
|
"loss": 0.0018, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.8315217391304348, |
|
"grad_norm": 0.10061723518020388, |
|
"learning_rate": 7.077842867047897e-06, |
|
"loss": 0.0024, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.8369565217391304, |
|
"grad_norm": 0.21992324150498122, |
|
"learning_rate": 7.021735256897035e-06, |
|
"loss": 0.0027, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.8423913043478262, |
|
"grad_norm": 0.030816726743244916, |
|
"learning_rate": 6.965730342913011e-06, |
|
"loss": 0.0011, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.8478260869565217, |
|
"grad_norm": 0.01683095603625154, |
|
"learning_rate": 6.909830056250527e-06, |
|
"loss": 0.0008, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8532608695652173, |
|
"grad_norm": 0.23379778261250125, |
|
"learning_rate": 6.8540363244565524e-06, |
|
"loss": 0.0043, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.858695652173913, |
|
"grad_norm": 0.03675133534148478, |
|
"learning_rate": 6.798351071403839e-06, |
|
"loss": 0.001, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.8641304347826086, |
|
"grad_norm": 0.1140408877999425, |
|
"learning_rate": 6.742776217224587e-06, |
|
"loss": 0.0027, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.8695652173913042, |
|
"grad_norm": 0.02850900102579577, |
|
"learning_rate": 6.687313678244243e-06, |
|
"loss": 0.0009, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.02532716939465366, |
|
"learning_rate": 6.6319653669154e-06, |
|
"loss": 0.001, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.8804347826086958, |
|
"grad_norm": 0.10582087034471738, |
|
"learning_rate": 6.576733191751879e-06, |
|
"loss": 0.0029, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.8858695652173914, |
|
"grad_norm": 2.4137374896779877, |
|
"learning_rate": 6.521619057262904e-06, |
|
"loss": 0.1004, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.891304347826087, |
|
"grad_norm": 2.0298394937535122, |
|
"learning_rate": 6.466624863887437e-06, |
|
"loss": 0.0361, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.8967391304347827, |
|
"grad_norm": 0.15424873092333466, |
|
"learning_rate": 6.411752507928643e-06, |
|
"loss": 0.0031, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.9021739130434783, |
|
"grad_norm": 0.7343430535593085, |
|
"learning_rate": 6.357003881488499e-06, |
|
"loss": 0.0086, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9076086956521738, |
|
"grad_norm": 0.0169679254906056, |
|
"learning_rate": 6.302380872402562e-06, |
|
"loss": 0.0007, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.026108663412252976, |
|
"learning_rate": 6.247885364174866e-06, |
|
"loss": 0.001, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.9184782608695652, |
|
"grad_norm": 0.022679414032134804, |
|
"learning_rate": 6.193519235912972e-06, |
|
"loss": 0.0008, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.9239130434782608, |
|
"grad_norm": 0.02365404382322627, |
|
"learning_rate": 6.139284362263185e-06, |
|
"loss": 0.0008, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.9293478260869565, |
|
"grad_norm": 0.014446988115359962, |
|
"learning_rate": 6.085182613345893e-06, |
|
"loss": 0.0006, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.9347826086956523, |
|
"grad_norm": 0.016091425374232204, |
|
"learning_rate": 6.031215854691097e-06, |
|
"loss": 0.0007, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.9402173913043477, |
|
"grad_norm": 0.01553827774955186, |
|
"learning_rate": 5.977385947174084e-06, |
|
"loss": 0.0007, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.9456521739130435, |
|
"grad_norm": 0.17966133137766196, |
|
"learning_rate": 5.923694746951253e-06, |
|
"loss": 0.0028, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.9510869565217392, |
|
"grad_norm": 0.02477310360295687, |
|
"learning_rate": 5.8701441053961185e-06, |
|
"loss": 0.0009, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.9565217391304348, |
|
"grad_norm": 0.025478377542260965, |
|
"learning_rate": 5.816735869035458e-06, |
|
"loss": 0.0009, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.9619565217391304, |
|
"grad_norm": 0.01385737253155479, |
|
"learning_rate": 5.7634718794856626e-06, |
|
"loss": 0.0006, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.9673913043478262, |
|
"grad_norm": 0.2920694264321747, |
|
"learning_rate": 5.710353973389215e-06, |
|
"loss": 0.003, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.9728260869565217, |
|
"grad_norm": 0.0609584809389905, |
|
"learning_rate": 5.657383982351368e-06, |
|
"loss": 0.0014, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.9782608695652173, |
|
"grad_norm": 0.014022955163492444, |
|
"learning_rate": 5.604563732876989e-06, |
|
"loss": 0.0006, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.983695652173913, |
|
"grad_norm": 0.02973603833790608, |
|
"learning_rate": 5.55189504630756e-06, |
|
"loss": 0.0009, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.9891304347826086, |
|
"grad_norm": 0.07663989298851219, |
|
"learning_rate": 5.4993797387584056e-06, |
|
"loss": 0.0015, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.9945652173913042, |
|
"grad_norm": 3.723476839809668, |
|
"learning_rate": 5.447019621056049e-06, |
|
"loss": 0.1512, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.023508663828369594, |
|
"learning_rate": 5.394816498675772e-06, |
|
"loss": 0.0008, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.005434782608696, |
|
"grad_norm": 0.014915331253251566, |
|
"learning_rate": 5.342772171679364e-06, |
|
"loss": 0.0006, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.010869565217391, |
|
"grad_norm": 0.15045045132635565, |
|
"learning_rate": 5.290888434653056e-06, |
|
"loss": 0.0035, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.016304347826087, |
|
"grad_norm": 0.02078710490582649, |
|
"learning_rate": 5.239167076645626e-06, |
|
"loss": 0.0009, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.0217391304347827, |
|
"grad_norm": 0.08909809356955653, |
|
"learning_rate": 5.187609881106725e-06, |
|
"loss": 0.0021, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.027173913043478, |
|
"grad_norm": 0.019002236928891497, |
|
"learning_rate": 5.136218625825374e-06, |
|
"loss": 0.0006, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.032608695652174, |
|
"grad_norm": 0.04208850827532741, |
|
"learning_rate": 5.084995082868658e-06, |
|
"loss": 0.0009, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.0380434782608696, |
|
"grad_norm": 0.046840875573742065, |
|
"learning_rate": 5.033941018520625e-06, |
|
"loss": 0.0014, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.0434782608695654, |
|
"grad_norm": 0.1033934706809575, |
|
"learning_rate": 4.983058193221384e-06, |
|
"loss": 0.0019, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.0489130434782608, |
|
"grad_norm": 0.1705166302206335, |
|
"learning_rate": 4.932348361506402e-06, |
|
"loss": 0.0033, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.0543478260869565, |
|
"grad_norm": 0.028909235733879053, |
|
"learning_rate": 4.881813271946e-06, |
|
"loss": 0.0012, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.0597826086956523, |
|
"grad_norm": 0.3030377695298429, |
|
"learning_rate": 4.831454667085059e-06, |
|
"loss": 0.0039, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.0652173913043477, |
|
"grad_norm": 0.0477055277967709, |
|
"learning_rate": 4.781274283382941e-06, |
|
"loss": 0.001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.0706521739130435, |
|
"grad_norm": 0.0199106085983902, |
|
"learning_rate": 4.7312738511536035e-06, |
|
"loss": 0.0008, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.0760869565217392, |
|
"grad_norm": 0.027962787198971308, |
|
"learning_rate": 4.681455094505938e-06, |
|
"loss": 0.001, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.0815217391304346, |
|
"grad_norm": 0.0382934899009715, |
|
"learning_rate": 4.631819731284323e-06, |
|
"loss": 0.0011, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.0869565217391304, |
|
"grad_norm": 0.013418670608056855, |
|
"learning_rate": 4.58236947300939e-06, |
|
"loss": 0.0006, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.092391304347826, |
|
"grad_norm": 0.04141147762016092, |
|
"learning_rate": 4.5331060248189924e-06, |
|
"loss": 0.0013, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.097826086956522, |
|
"grad_norm": 0.029823878767931914, |
|
"learning_rate": 4.4840310854094335e-06, |
|
"loss": 0.001, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.1032608695652173, |
|
"grad_norm": 0.2181034359186816, |
|
"learning_rate": 4.435146346976873e-06, |
|
"loss": 0.004, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.108695652173913, |
|
"grad_norm": 0.36490526428814946, |
|
"learning_rate": 4.386453495158983e-06, |
|
"loss": 0.0042, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.114130434782609, |
|
"grad_norm": 0.0743305865977075, |
|
"learning_rate": 4.33795420897683e-06, |
|
"loss": 0.0011, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.119565217391304, |
|
"grad_norm": 0.3000013681179252, |
|
"learning_rate": 4.289650160776967e-06, |
|
"loss": 0.0046, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 0.05973611485866258, |
|
"learning_rate": 4.241543016173778e-06, |
|
"loss": 0.0011, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.130434782608696, |
|
"grad_norm": 0.02140783876818863, |
|
"learning_rate": 4.19363443399204e-06, |
|
"loss": 0.0008, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.135869565217391, |
|
"grad_norm": 0.01680791379596923, |
|
"learning_rate": 4.1459260662097235e-06, |
|
"loss": 0.0007, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.141304347826087, |
|
"grad_norm": 0.5362708346340234, |
|
"learning_rate": 4.098419557901036e-06, |
|
"loss": 0.0077, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.1467391304347827, |
|
"grad_norm": 0.016360773071928784, |
|
"learning_rate": 4.051116547179677e-06, |
|
"loss": 0.0007, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.1521739130434785, |
|
"grad_norm": 0.28985199290673336, |
|
"learning_rate": 4.00401866514238e-06, |
|
"loss": 0.0044, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.157608695652174, |
|
"grad_norm": 0.01604718518106245, |
|
"learning_rate": 3.957127535812651e-06, |
|
"loss": 0.0007, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.1630434782608696, |
|
"grad_norm": 0.05241001721895836, |
|
"learning_rate": 3.910444776084777e-06, |
|
"loss": 0.0016, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.1684782608695654, |
|
"grad_norm": 0.02209678496389779, |
|
"learning_rate": 3.8639719956680624e-06, |
|
"loss": 0.0008, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 0.020559716878607803, |
|
"learning_rate": 3.817710797031338e-06, |
|
"loss": 0.0008, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.1793478260869565, |
|
"grad_norm": 0.014824391810911752, |
|
"learning_rate": 3.771662775347692e-06, |
|
"loss": 0.0006, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.1847826086956523, |
|
"grad_norm": 0.015796576868617806, |
|
"learning_rate": 3.7258295184394743e-06, |
|
"loss": 0.0007, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.1902173913043477, |
|
"grad_norm": 1.9188157660999832, |
|
"learning_rate": 3.680212606723542e-06, |
|
"loss": 0.0306, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.1956521739130435, |
|
"grad_norm": 0.06391438687127189, |
|
"learning_rate": 3.6348136131567537e-06, |
|
"loss": 0.0019, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.2010869565217392, |
|
"grad_norm": 0.17262747887734978, |
|
"learning_rate": 3.5896341031817517e-06, |
|
"loss": 0.0036, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2065217391304346, |
|
"grad_norm": 0.056665382264410494, |
|
"learning_rate": 3.5446756346729673e-06, |
|
"loss": 0.0012, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.2119565217391304, |
|
"grad_norm": 1.9642610912379441, |
|
"learning_rate": 3.4999397578829076e-06, |
|
"loss": 0.037, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.217391304347826, |
|
"grad_norm": 0.014116778100650137, |
|
"learning_rate": 3.4554280153886967e-06, |
|
"loss": 0.0006, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.2228260869565215, |
|
"grad_norm": 0.024488008150664965, |
|
"learning_rate": 3.4111419420388904e-06, |
|
"loss": 0.001, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.2282608695652173, |
|
"grad_norm": 0.5674032898921303, |
|
"learning_rate": 3.3670830649005437e-06, |
|
"loss": 0.0041, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.233695652173913, |
|
"grad_norm": 0.02286422293729417, |
|
"learning_rate": 3.323252903206562e-06, |
|
"loss": 0.0009, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.239130434782609, |
|
"grad_norm": 0.27168054236566974, |
|
"learning_rate": 3.279652968303313e-06, |
|
"loss": 0.0043, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.244565217391304, |
|
"grad_norm": 0.1593898805811067, |
|
"learning_rate": 3.236284763598512e-06, |
|
"loss": 0.0035, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.013081366094026997, |
|
"learning_rate": 3.1931497845093753e-06, |
|
"loss": 0.0006, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.255434782608696, |
|
"grad_norm": 0.012814297915516075, |
|
"learning_rate": 3.150249518411067e-06, |
|
"loss": 0.0006, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.260869565217391, |
|
"grad_norm": 0.07415100436276072, |
|
"learning_rate": 3.1075854445854093e-06, |
|
"loss": 0.0018, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.266304347826087, |
|
"grad_norm": 0.027114643295979856, |
|
"learning_rate": 3.0651590341698633e-06, |
|
"loss": 0.0009, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.2717391304347827, |
|
"grad_norm": 0.13722514020501544, |
|
"learning_rate": 3.0229717501068133e-06, |
|
"loss": 0.0023, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.2771739130434785, |
|
"grad_norm": 0.023053695918606187, |
|
"learning_rate": 2.981025047093118e-06, |
|
"loss": 0.0009, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.282608695652174, |
|
"grad_norm": 3.7468189613648253, |
|
"learning_rate": 2.9393203715299477e-06, |
|
"loss": 0.0598, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.2880434782608696, |
|
"grad_norm": 0.08634045866789929, |
|
"learning_rate": 2.8978591614729114e-06, |
|
"loss": 0.0015, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.2934782608695654, |
|
"grad_norm": 0.13994711242571936, |
|
"learning_rate": 2.856642846582469e-06, |
|
"loss": 0.0019, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.2989130434782608, |
|
"grad_norm": 0.0519996408733201, |
|
"learning_rate": 2.8156728480746386e-06, |
|
"loss": 0.0011, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.3043478260869565, |
|
"grad_norm": 0.01904905289611891, |
|
"learning_rate": 2.77495057867198e-06, |
|
"loss": 0.0007, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.3097826086956523, |
|
"grad_norm": 1.2476206988634295, |
|
"learning_rate": 2.7344774425548917e-06, |
|
"loss": 0.0339, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.3152173913043477, |
|
"grad_norm": 1.7884596495622582, |
|
"learning_rate": 2.694254835313187e-06, |
|
"loss": 0.1375, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.3206521739130435, |
|
"grad_norm": 0.31025512064642874, |
|
"learning_rate": 2.654284143897976e-06, |
|
"loss": 0.0034, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.3260869565217392, |
|
"grad_norm": 0.3488873501510679, |
|
"learning_rate": 2.6145667465738333e-06, |
|
"loss": 0.0039, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.3315217391304346, |
|
"grad_norm": 0.589409734181312, |
|
"learning_rate": 2.57510401287128e-06, |
|
"loss": 0.0044, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.3369565217391304, |
|
"grad_norm": 0.3987654975780055, |
|
"learning_rate": 2.535897303539554e-06, |
|
"loss": 0.0061, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.342391304347826, |
|
"grad_norm": 0.015719041310887562, |
|
"learning_rate": 2.4969479704996935e-06, |
|
"loss": 0.0006, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"grad_norm": 0.015180271606601303, |
|
"learning_rate": 2.4582573567979196e-06, |
|
"loss": 0.0006, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.3532608695652173, |
|
"grad_norm": 0.04482488635397311, |
|
"learning_rate": 2.4198267965593224e-06, |
|
"loss": 0.0011, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.358695652173913, |
|
"grad_norm": 0.28160845350626884, |
|
"learning_rate": 2.381657614941858e-06, |
|
"loss": 0.005, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.364130434782609, |
|
"grad_norm": 0.09873212459265543, |
|
"learning_rate": 2.3437511280906576e-06, |
|
"loss": 0.002, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.369565217391304, |
|
"grad_norm": 0.028522981368259783, |
|
"learning_rate": 2.306108643092647e-06, |
|
"loss": 0.0008, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 0.030887088059580514, |
|
"learning_rate": 2.268731457931467e-06, |
|
"loss": 0.001, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.380434782608696, |
|
"grad_norm": 0.2056153085824592, |
|
"learning_rate": 2.2316208614427226e-06, |
|
"loss": 0.003, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.385869565217391, |
|
"grad_norm": 0.03316498797260578, |
|
"learning_rate": 2.1947781332695406e-06, |
|
"loss": 0.001, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.391304347826087, |
|
"grad_norm": 0.020603866879399167, |
|
"learning_rate": 2.1582045438184464e-06, |
|
"loss": 0.0007, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.3967391304347827, |
|
"grad_norm": 0.022416446968247912, |
|
"learning_rate": 2.121901354215553e-06, |
|
"loss": 0.0008, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.4021739130434785, |
|
"grad_norm": 1.2759832400444016, |
|
"learning_rate": 2.085869816263081e-06, |
|
"loss": 0.0222, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.407608695652174, |
|
"grad_norm": 2.7040121657564558, |
|
"learning_rate": 2.050111172396192e-06, |
|
"loss": 0.0472, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 2.4130434782608696, |
|
"grad_norm": 0.10233992459998235, |
|
"learning_rate": 2.0146266556401405e-06, |
|
"loss": 0.0016, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.4184782608695654, |
|
"grad_norm": 0.244848209656816, |
|
"learning_rate": 1.97941748956777e-06, |
|
"loss": 0.004, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.4239130434782608, |
|
"grad_norm": 0.05688444318906805, |
|
"learning_rate": 1.944484888257312e-06, |
|
"loss": 0.0013, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.4293478260869565, |
|
"grad_norm": 0.5574195380686696, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 0.0112, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 2.4347826086956523, |
|
"grad_norm": 0.0932057849593417, |
|
"learning_rate": 1.8754541885111631e-06, |
|
"loss": 0.0018, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.4402173913043477, |
|
"grad_norm": 0.10747253772821316, |
|
"learning_rate": 1.8413584703837618e-06, |
|
"loss": 0.0018, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 2.4456521739130435, |
|
"grad_norm": 0.39067007335009907, |
|
"learning_rate": 1.8075440775527754e-06, |
|
"loss": 0.0063, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.4510869565217392, |
|
"grad_norm": 0.028328534672816628, |
|
"learning_rate": 1.7740121760020324e-06, |
|
"loss": 0.001, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 2.4565217391304346, |
|
"grad_norm": 0.12079880404676811, |
|
"learning_rate": 1.740763921974531e-06, |
|
"loss": 0.0024, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.4619565217391304, |
|
"grad_norm": 0.10850662346060039, |
|
"learning_rate": 1.7078004619325728e-06, |
|
"loss": 0.0017, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 2.467391304347826, |
|
"grad_norm": 0.2673103325118139, |
|
"learning_rate": 1.6751229325182194e-06, |
|
"loss": 0.0067, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.4728260869565215, |
|
"grad_norm": 0.20052250560415452, |
|
"learning_rate": 1.6427324605141125e-06, |
|
"loss": 0.0037, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.4782608695652173, |
|
"grad_norm": 0.08452549445673675, |
|
"learning_rate": 1.610630162804615e-06, |
|
"loss": 0.0015, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 2.483695652173913, |
|
"grad_norm": 0.01638519542637996, |
|
"learning_rate": 1.578817146337297e-06, |
|
"loss": 0.0006, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 2.489130434782609, |
|
"grad_norm": 0.03107206330508472, |
|
"learning_rate": 1.5472945080847679e-06, |
|
"loss": 0.0008, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 2.494565217391304, |
|
"grad_norm": 0.03654411098415488, |
|
"learning_rate": 1.516063335006851e-06, |
|
"loss": 0.0009, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.07287899663917816, |
|
"learning_rate": 1.485124704013101e-06, |
|
"loss": 0.0017, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.505434782608696, |
|
"grad_norm": 0.9588849867572242, |
|
"learning_rate": 1.4544796819256724e-06, |
|
"loss": 0.0086, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 2.5108695652173916, |
|
"grad_norm": 0.02467713549047941, |
|
"learning_rate": 1.4241293254425337e-06, |
|
"loss": 0.0007, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 2.516304347826087, |
|
"grad_norm": 0.04748495142661645, |
|
"learning_rate": 1.3940746811010297e-06, |
|
"loss": 0.0011, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 2.5217391304347827, |
|
"grad_norm": 0.03054669361577949, |
|
"learning_rate": 1.3643167852417894e-06, |
|
"loss": 0.001, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.5271739130434785, |
|
"grad_norm": 0.027111109257002528, |
|
"learning_rate": 1.3348566639730032e-06, |
|
"loss": 0.0011, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.532608695652174, |
|
"grad_norm": 0.04377035701857717, |
|
"learning_rate": 1.3056953331350297e-06, |
|
"loss": 0.001, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 2.5380434782608696, |
|
"grad_norm": 0.08382313642398824, |
|
"learning_rate": 1.2768337982653744e-06, |
|
"loss": 0.0014, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 2.5434782608695654, |
|
"grad_norm": 0.030219514519134735, |
|
"learning_rate": 1.2482730545640133e-06, |
|
"loss": 0.0011, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 2.5489130434782608, |
|
"grad_norm": 0.42539314485494417, |
|
"learning_rate": 1.2200140868590759e-06, |
|
"loss": 0.0063, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 2.5543478260869565, |
|
"grad_norm": 0.025687483062924163, |
|
"learning_rate": 1.1920578695728903e-06, |
|
"loss": 0.0009, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.5597826086956523, |
|
"grad_norm": 0.027491319722765094, |
|
"learning_rate": 1.1644053666883803e-06, |
|
"loss": 0.0009, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 2.5652173913043477, |
|
"grad_norm": 0.12070804850917503, |
|
"learning_rate": 1.137057531715825e-06, |
|
"loss": 0.0023, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 2.5706521739130435, |
|
"grad_norm": 0.1648819505998384, |
|
"learning_rate": 1.1100153076599862e-06, |
|
"loss": 0.0025, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 2.5760869565217392, |
|
"grad_norm": 0.1168751069545925, |
|
"learning_rate": 1.0832796269875757e-06, |
|
"loss": 0.0023, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 2.5815217391304346, |
|
"grad_norm": 0.030968178239974237, |
|
"learning_rate": 1.0568514115951256e-06, |
|
"loss": 0.001, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.5869565217391304, |
|
"grad_norm": 1.2108714841296098, |
|
"learning_rate": 1.0307315727771806e-06, |
|
"loss": 0.0126, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 2.592391304347826, |
|
"grad_norm": 0.027899777268609836, |
|
"learning_rate": 1.0049210111948815e-06, |
|
"loss": 0.0009, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 2.5978260869565215, |
|
"grad_norm": 0.03180410299281123, |
|
"learning_rate": 9.794206168449127e-07, |
|
"loss": 0.0009, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 2.6032608695652173, |
|
"grad_norm": 0.033244233145600086, |
|
"learning_rate": 9.542312690288035e-07, |
|
"loss": 0.0009, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 0.03761724722059268, |
|
"learning_rate": 9.293538363226196e-07, |
|
"loss": 0.0013, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.6141304347826084, |
|
"grad_norm": 0.09136376989366057, |
|
"learning_rate": 9.04789176547004e-07, |
|
"loss": 0.0018, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 2.619565217391304, |
|
"grad_norm": 0.18059210345284965, |
|
"learning_rate": 8.80538136737602e-07, |
|
"loss": 0.0029, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 0.030807943380701246, |
|
"learning_rate": 8.566015531158534e-07, |
|
"loss": 0.0008, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 2.630434782608696, |
|
"grad_norm": 0.05710411212363332, |
|
"learning_rate": 8.329802510601559e-07, |
|
"loss": 0.0014, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.6358695652173916, |
|
"grad_norm": 0.061848371459409315, |
|
"learning_rate": 8.096750450774071e-07, |
|
"loss": 0.0016, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.641304347826087, |
|
"grad_norm": 1.0253370343843025, |
|
"learning_rate": 7.866867387749199e-07, |
|
"loss": 0.0166, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 2.6467391304347827, |
|
"grad_norm": 0.029136594892818037, |
|
"learning_rate": 7.640161248327061e-07, |
|
"loss": 0.001, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 2.6521739130434785, |
|
"grad_norm": 1.092489264260611, |
|
"learning_rate": 7.416639849761531e-07, |
|
"loss": 0.0248, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.657608695652174, |
|
"grad_norm": 2.2914238948250363, |
|
"learning_rate": 7.196310899490577e-07, |
|
"loss": 0.0723, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 2.6630434782608696, |
|
"grad_norm": 0.016249601644455224, |
|
"learning_rate": 6.979181994870587e-07, |
|
"loss": 0.0007, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.6684782608695654, |
|
"grad_norm": 0.021265124563151435, |
|
"learning_rate": 6.765260622914361e-07, |
|
"loss": 0.0007, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 2.6739130434782608, |
|
"grad_norm": 0.03831610583206101, |
|
"learning_rate": 6.554554160032899e-07, |
|
"loss": 0.001, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 2.6793478260869565, |
|
"grad_norm": 0.03101608692853337, |
|
"learning_rate": 6.347069871781164e-07, |
|
"loss": 0.0009, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 2.6847826086956523, |
|
"grad_norm": 0.01978989576112469, |
|
"learning_rate": 6.142814912607409e-07, |
|
"loss": 0.0008, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 2.6902173913043477, |
|
"grad_norm": 0.3852432741704962, |
|
"learning_rate": 5.941796325606574e-07, |
|
"loss": 0.007, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.6956521739130435, |
|
"grad_norm": 0.39628033120487305, |
|
"learning_rate": 5.744021042277437e-07, |
|
"loss": 0.0052, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 2.7010869565217392, |
|
"grad_norm": 0.09815745867450933, |
|
"learning_rate": 5.549495882283528e-07, |
|
"loss": 0.0019, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 2.7065217391304346, |
|
"grad_norm": 2.2778045886314655, |
|
"learning_rate": 5.358227553218031e-07, |
|
"loss": 0.0699, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 2.7119565217391304, |
|
"grad_norm": 0.027783255312989117, |
|
"learning_rate": 5.17022265037247e-07, |
|
"loss": 0.0009, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 2.717391304347826, |
|
"grad_norm": 0.04524039432637041, |
|
"learning_rate": 4.985487656509313e-07, |
|
"loss": 0.0013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7228260869565215, |
|
"grad_norm": 1.8660426088847626, |
|
"learning_rate": 4.804028941638405e-07, |
|
"loss": 0.0379, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.7282608695652173, |
|
"grad_norm": 0.05194490259797287, |
|
"learning_rate": 4.6258527627973446e-07, |
|
"loss": 0.0011, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 2.733695652173913, |
|
"grad_norm": 0.5524275731086881, |
|
"learning_rate": 4.450965263835694e-07, |
|
"loss": 0.0059, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 2.7391304347826084, |
|
"grad_norm": 0.09638176861935786, |
|
"learning_rate": 4.2793724752031807e-07, |
|
"loss": 0.0014, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 2.744565217391304, |
|
"grad_norm": 1.5902794253403654, |
|
"learning_rate": 4.111080313741711e-07, |
|
"loss": 0.0265, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.027472533837749617, |
|
"learning_rate": 3.9460945824813635e-07, |
|
"loss": 0.0007, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.755434782608696, |
|
"grad_norm": 0.1279143225656888, |
|
"learning_rate": 3.7844209704403055e-07, |
|
"loss": 0.0029, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 2.7608695652173916, |
|
"grad_norm": 0.026463459883835142, |
|
"learning_rate": 3.626065052428551e-07, |
|
"loss": 0.0008, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 2.766304347826087, |
|
"grad_norm": 0.27505638314757236, |
|
"learning_rate": 3.471032288855869e-07, |
|
"loss": 0.0041, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 2.7717391304347827, |
|
"grad_norm": 0.03755249242727417, |
|
"learning_rate": 3.3193280255433556e-07, |
|
"loss": 0.0011, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.7771739130434785, |
|
"grad_norm": 1.3351363822022542, |
|
"learning_rate": 3.170957493539195e-07, |
|
"loss": 0.0158, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 2.782608695652174, |
|
"grad_norm": 0.02416580008302714, |
|
"learning_rate": 3.0259258089382236e-07, |
|
"loss": 0.0009, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.7880434782608696, |
|
"grad_norm": 0.24305894735810873, |
|
"learning_rate": 2.88423797270555e-07, |
|
"loss": 0.0033, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 2.7934782608695654, |
|
"grad_norm": 0.0170002796253045, |
|
"learning_rate": 2.745898870504116e-07, |
|
"loss": 0.0006, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 2.7989130434782608, |
|
"grad_norm": 0.07161898082689806, |
|
"learning_rate": 2.6109132725262166e-07, |
|
"loss": 0.0017, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.8043478260869565, |
|
"grad_norm": 1.0122242308252756, |
|
"learning_rate": 2.479285833329015e-07, |
|
"loss": 0.0147, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 2.8097826086956523, |
|
"grad_norm": 0.32571610548502183, |
|
"learning_rate": 2.351021091674044e-07, |
|
"loss": 0.0056, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 2.8152173913043477, |
|
"grad_norm": 0.04130977709089724, |
|
"learning_rate": 2.226123470370689e-07, |
|
"loss": 0.0012, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 2.8206521739130435, |
|
"grad_norm": 1.7181531921107351, |
|
"learning_rate": 2.104597276123721e-07, |
|
"loss": 0.0401, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 2.8260869565217392, |
|
"grad_norm": 0.02705959014069028, |
|
"learning_rate": 1.9864466993847808e-07, |
|
"loss": 0.0009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.8315217391304346, |
|
"grad_norm": 0.018768961604310398, |
|
"learning_rate": 1.8716758142078295e-07, |
|
"loss": 0.0007, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 2.8369565217391304, |
|
"grad_norm": 0.018345985510552047, |
|
"learning_rate": 1.7602885781087486e-07, |
|
"loss": 0.0008, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 2.842391304347826, |
|
"grad_norm": 0.02605179058078712, |
|
"learning_rate": 1.6522888319288166e-07, |
|
"loss": 0.0009, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 2.8478260869565215, |
|
"grad_norm": 0.03616601605859018, |
|
"learning_rate": 1.5476802997022812e-07, |
|
"loss": 0.001, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.8532608695652173, |
|
"grad_norm": 0.025004543897905514, |
|
"learning_rate": 1.4464665885279948e-07, |
|
"loss": 0.0008, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.858695652173913, |
|
"grad_norm": 2.0898366886384756, |
|
"learning_rate": 1.3486511884449827e-07, |
|
"loss": 0.0181, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.8641304347826084, |
|
"grad_norm": 1.4763355304020689, |
|
"learning_rate": 1.254237472312092e-07, |
|
"loss": 0.0246, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.869565217391304, |
|
"grad_norm": 0.012470194296572264, |
|
"learning_rate": 1.1632286956917427e-07, |
|
"loss": 0.0006, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 0.01829002489612097, |
|
"learning_rate": 1.075627996737627e-07, |
|
"loss": 0.0008, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.880434782608696, |
|
"grad_norm": 0.38603420185863435, |
|
"learning_rate": 9.914383960865081e-08, |
|
"loss": 0.0047, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.8858695652173916, |
|
"grad_norm": 0.19367430087338477, |
|
"learning_rate": 9.106627967540915e-08, |
|
"loss": 0.0024, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.891304347826087, |
|
"grad_norm": 0.4411181370450418, |
|
"learning_rate": 8.333039840348833e-08, |
|
"loss": 0.0042, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.8967391304347827, |
|
"grad_norm": 0.012209939294930026, |
|
"learning_rate": 7.593646254061448e-08, |
|
"loss": 0.0006, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.9021739130434785, |
|
"grad_norm": 0.330056623962809, |
|
"learning_rate": 6.888472704359661e-08, |
|
"loss": 0.006, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.907608695652174, |
|
"grad_norm": 0.034219507512194006, |
|
"learning_rate": 6.217543506952916e-08, |
|
"loss": 0.001, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.9130434782608696, |
|
"grad_norm": 0.018554429841025816, |
|
"learning_rate": 5.580881796741322e-08, |
|
"loss": 0.0007, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.9184782608695654, |
|
"grad_norm": 0.03648100584653491, |
|
"learning_rate": 4.978509527017283e-08, |
|
"loss": 0.0009, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.9239130434782608, |
|
"grad_norm": 0.024703372477637507, |
|
"learning_rate": 4.410447468709001e-08, |
|
"loss": 0.001, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.9293478260869565, |
|
"grad_norm": 0.04043268074636393, |
|
"learning_rate": 3.8767152096641504e-08, |
|
"loss": 0.001, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.9347826086956523, |
|
"grad_norm": 0.046268704953150705, |
|
"learning_rate": 3.377331153974206e-08, |
|
"loss": 0.0015, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.9402173913043477, |
|
"grad_norm": 0.06556550271817785, |
|
"learning_rate": 2.912312521340277e-08, |
|
"loss": 0.001, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.9456521739130435, |
|
"grad_norm": 0.11585425133268303, |
|
"learning_rate": 2.4816753464789177e-08, |
|
"loss": 0.0018, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.9510869565217392, |
|
"grad_norm": 0.07767672609852741, |
|
"learning_rate": 2.0854344785694593e-08, |
|
"loss": 0.0016, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.9565217391304346, |
|
"grad_norm": 0.4371414896745222, |
|
"learning_rate": 1.7236035807416397e-08, |
|
"loss": 0.0058, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.9619565217391304, |
|
"grad_norm": 0.05437143993551097, |
|
"learning_rate": 1.3961951296053156e-08, |
|
"loss": 0.0012, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.967391304347826, |
|
"grad_norm": 0.08967830928285274, |
|
"learning_rate": 1.1032204148191395e-08, |
|
"loss": 0.0015, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.9728260869565215, |
|
"grad_norm": 0.0632058201324481, |
|
"learning_rate": 8.446895387019815e-09, |
|
"loss": 0.0013, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.9782608695652173, |
|
"grad_norm": 0.5781101930402831, |
|
"learning_rate": 6.206114158845422e-09, |
|
"loss": 0.0104, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.983695652173913, |
|
"grad_norm": 0.03599473788037359, |
|
"learning_rate": 4.309937730015978e-09, |
|
"loss": 0.0009, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.9891304347826084, |
|
"grad_norm": 0.029545010274494552, |
|
"learning_rate": 2.758431484259916e-09, |
|
"loss": 0.001, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.994565217391304, |
|
"grad_norm": 0.04112363341260623, |
|
"learning_rate": 1.5516489204303598e-09, |
|
"loss": 0.001, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.28259585170188845, |
|
"learning_rate": 6.896316506554979e-10, |
|
"loss": 0.0056, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 552, |
|
"total_flos": 4395674998272.0, |
|
"train_loss": 0.49586228307948593, |
|
"train_runtime": 3133.0691, |
|
"train_samples_per_second": 2.813, |
|
"train_steps_per_second": 0.176 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 552, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50000, |
|
"total_flos": 4395674998272.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|