|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 15.873015873015873,
|
|
"eval_steps": 500,
|
|
"global_step": 1000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.015873015873015872,
|
|
"grad_norm": 0.4978693127632141,
|
|
"learning_rate": 5e-05,
|
|
"loss": 0.9589,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"grad_norm": 0.4809919595718384,
|
|
"learning_rate": 0.0001,
|
|
"loss": 0.9265,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.047619047619047616,
|
|
"grad_norm": 0.5111315250396729,
|
|
"learning_rate": 9.999975227016531e-05,
|
|
"loss": 0.9665,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"grad_norm": 0.670375406742096,
|
|
"learning_rate": 9.999900908311602e-05,
|
|
"loss": 0.9922,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.07936507936507936,
|
|
"grad_norm": 0.6541376113891602,
|
|
"learning_rate": 9.999777044621652e-05,
|
|
"loss": 0.7833,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"grad_norm": 0.8511355519294739,
|
|
"learning_rate": 9.999603637174071e-05,
|
|
"loss": 0.8339,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.1111111111111111,
|
|
"grad_norm": 0.933815598487854,
|
|
"learning_rate": 9.999380687687188e-05,
|
|
"loss": 0.7231,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"grad_norm": 1.2876204252243042,
|
|
"learning_rate": 9.999108198370249e-05,
|
|
"loss": 0.9078,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 1.2767138481140137,
|
|
"learning_rate": 9.998786171923407e-05,
|
|
"loss": 0.8635,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"grad_norm": 1.2097059488296509,
|
|
"learning_rate": 9.998414611537681e-05,
|
|
"loss": 0.8626,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.1746031746031746,
|
|
"grad_norm": 1.4272280931472778,
|
|
"learning_rate": 9.997993520894937e-05,
|
|
"loss": 0.9185,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"grad_norm": 0.8439553380012512,
|
|
"learning_rate": 9.997522904167844e-05,
|
|
"loss": 0.6789,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.20634920634920634,
|
|
"grad_norm": 0.8309609293937683,
|
|
"learning_rate": 9.997002766019832e-05,
|
|
"loss": 0.8529,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"grad_norm": 0.9561108350753784,
|
|
"learning_rate": 9.996433111605052e-05,
|
|
"loss": 0.8475,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.23809523809523808,
|
|
"grad_norm": 0.6515036821365356,
|
|
"learning_rate": 9.99581394656832e-05,
|
|
"loss": 0.8345,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"grad_norm": 0.45641326904296875,
|
|
"learning_rate": 9.995145277045061e-05,
|
|
"loss": 0.6654,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.2698412698412698,
|
|
"grad_norm": 0.2796855568885803,
|
|
"learning_rate": 9.994427109661253e-05,
|
|
"loss": 0.6643,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 0.2947935461997986,
|
|
"learning_rate": 9.993659451533353e-05,
|
|
"loss": 0.7327,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.30158730158730157,
|
|
"grad_norm": 0.3294975161552429,
|
|
"learning_rate": 9.992842310268233e-05,
|
|
"loss": 0.7466,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 0.25266706943511963,
|
|
"learning_rate": 9.991975693963107e-05,
|
|
"loss": 0.6628,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 0.26995259523391724,
|
|
"learning_rate": 9.99105961120544e-05,
|
|
"loss": 0.7145,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"grad_norm": 0.29081106185913086,
|
|
"learning_rate": 9.990094071072877e-05,
|
|
"loss": 0.6947,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.36507936507936506,
|
|
"grad_norm": 0.277067095041275,
|
|
"learning_rate": 9.989079083133139e-05,
|
|
"loss": 0.7225,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"grad_norm": 0.27529773116111755,
|
|
"learning_rate": 9.988014657443941e-05,
|
|
"loss": 0.7122,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.3968253968253968,
|
|
"grad_norm": 0.30684614181518555,
|
|
"learning_rate": 9.986900804552878e-05,
|
|
"loss": 0.7015,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"grad_norm": 0.30238044261932373,
|
|
"learning_rate": 9.985737535497337e-05,
|
|
"loss": 0.5781,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 0.30560147762298584,
|
|
"learning_rate": 9.984524861804376e-05,
|
|
"loss": 0.5947,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"grad_norm": 0.280203253030777,
|
|
"learning_rate": 9.983262795490613e-05,
|
|
"loss": 0.7072,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.4603174603174603,
|
|
"grad_norm": 0.28849631547927856,
|
|
"learning_rate": 9.981951349062106e-05,
|
|
"loss": 0.7074,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 0.2815149426460266,
|
|
"learning_rate": 9.980590535514233e-05,
|
|
"loss": 0.5274,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.49206349206349204,
|
|
"grad_norm": 0.26764699816703796,
|
|
"learning_rate": 9.979180368331558e-05,
|
|
"loss": 0.6645,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"grad_norm": 0.29958057403564453,
|
|
"learning_rate": 9.9777208614877e-05,
|
|
"loss": 0.7361,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.5238095238095238,
|
|
"grad_norm": 0.26811736822128296,
|
|
"learning_rate": 9.976212029445194e-05,
|
|
"loss": 0.6962,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"grad_norm": 0.2567647695541382,
|
|
"learning_rate": 9.97465388715535e-05,
|
|
"loss": 0.6077,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.25592276453971863,
|
|
"learning_rate": 9.9730464500581e-05,
|
|
"loss": 0.6288,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 0.24128927290439606,
|
|
"learning_rate": 9.971389734081848e-05,
|
|
"loss": 0.5665,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.5873015873015873,
|
|
"grad_norm": 0.2471931427717209,
|
|
"learning_rate": 9.969683755643317e-05,
|
|
"loss": 0.7,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"grad_norm": 0.24910229444503784,
|
|
"learning_rate": 9.967928531647374e-05,
|
|
"loss": 0.5286,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.6190476190476191,
|
|
"grad_norm": 0.29654461145401,
|
|
"learning_rate": 9.966124079486872e-05,
|
|
"loss": 0.6379,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 0.23167571425437927,
|
|
"learning_rate": 9.96427041704248e-05,
|
|
"loss": 0.5028,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.6507936507936508,
|
|
"grad_norm": 0.3802570402622223,
|
|
"learning_rate": 9.962367562682496e-05,
|
|
"loss": 0.7501,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.2911546230316162,
|
|
"learning_rate": 9.960415535262671e-05,
|
|
"loss": 0.7529,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.6825396825396826,
|
|
"grad_norm": 0.27725136280059814,
|
|
"learning_rate": 9.958414354126022e-05,
|
|
"loss": 0.6338,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"grad_norm": 0.29778677225112915,
|
|
"learning_rate": 9.956364039102642e-05,
|
|
"loss": 0.6084,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.3038597106933594,
|
|
"learning_rate": 9.954264610509497e-05,
|
|
"loss": 0.7813,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"grad_norm": 0.24961970746517181,
|
|
"learning_rate": 9.952116089150232e-05,
|
|
"loss": 0.5784,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.746031746031746,
|
|
"grad_norm": 0.41124090552330017,
|
|
"learning_rate": 9.94991849631496e-05,
|
|
"loss": 0.8362,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"grad_norm": 0.2612743079662323,
|
|
"learning_rate": 9.947671853780054e-05,
|
|
"loss": 0.5879,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.7777777777777778,
|
|
"grad_norm": 0.3509594798088074,
|
|
"learning_rate": 9.94537618380793e-05,
|
|
"loss": 0.6429,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 0.4222470223903656,
|
|
"learning_rate": 9.943031509146825e-05,
|
|
"loss": 0.8086,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.8095238095238095,
|
|
"grad_norm": 0.34031662344932556,
|
|
"learning_rate": 9.940637853030572e-05,
|
|
"loss": 0.7058,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"grad_norm": 0.25386595726013184,
|
|
"learning_rate": 9.938195239178374e-05,
|
|
"loss": 0.5537,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.8412698412698413,
|
|
"grad_norm": 0.27435001730918884,
|
|
"learning_rate": 9.935703691794565e-05,
|
|
"loss": 0.5793,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 0.360727995634079,
|
|
"learning_rate": 9.933163235568367e-05,
|
|
"loss": 0.6103,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.873015873015873,
|
|
"grad_norm": 0.29674389958381653,
|
|
"learning_rate": 9.930573895673657e-05,
|
|
"loss": 0.7375,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"grad_norm": 0.3319956958293915,
|
|
"learning_rate": 9.927935697768698e-05,
|
|
"loss": 0.5953,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.9047619047619048,
|
|
"grad_norm": 0.3237013518810272,
|
|
"learning_rate": 9.925248667995907e-05,
|
|
"loss": 0.6891,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"grad_norm": 0.2946189343929291,
|
|
"learning_rate": 9.922512832981584e-05,
|
|
"loss": 0.5815,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.9365079365079365,
|
|
"grad_norm": 0.31961193680763245,
|
|
"learning_rate": 9.919728219835643e-05,
|
|
"loss": 0.6767,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.30548524856567383,
|
|
"learning_rate": 9.916894856151357e-05,
|
|
"loss": 0.6222,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.9682539682539683,
|
|
"grad_norm": 0.2908201515674591,
|
|
"learning_rate": 9.914012770005072e-05,
|
|
"loss": 0.6102,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"grad_norm": 0.3024301826953888,
|
|
"learning_rate": 9.91108198995594e-05,
|
|
"loss": 0.6281,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.37488242983818054,
|
|
"learning_rate": 9.908102545045625e-05,
|
|
"loss": 0.5405,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 1.0158730158730158,
|
|
"grad_norm": 0.3462425172328949,
|
|
"learning_rate": 9.905074464798024e-05,
|
|
"loss": 0.5831,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 1.0317460317460316,
|
|
"grad_norm": 0.32379499077796936,
|
|
"learning_rate": 9.901997779218967e-05,
|
|
"loss": 0.6897,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 1.0476190476190477,
|
|
"grad_norm": 0.3253431022167206,
|
|
"learning_rate": 9.898872518795932e-05,
|
|
"loss": 0.5935,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 1.0634920634920635,
|
|
"grad_norm": 0.31801578402519226,
|
|
"learning_rate": 9.895698714497724e-05,
|
|
"loss": 0.5721,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 1.0793650793650793,
|
|
"grad_norm": 0.29547229409217834,
|
|
"learning_rate": 9.892476397774186e-05,
|
|
"loss": 0.5041,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 1.0952380952380953,
|
|
"grad_norm": 0.30208516120910645,
|
|
"learning_rate": 9.889205600555877e-05,
|
|
"loss": 0.5027,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.37307029962539673,
|
|
"learning_rate": 9.885886355253758e-05,
|
|
"loss": 0.6963,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 1.126984126984127,
|
|
"grad_norm": 0.31057053804397583,
|
|
"learning_rate": 9.882518694758875e-05,
|
|
"loss": 0.4872,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 0.35556697845458984,
|
|
"learning_rate": 9.879102652442024e-05,
|
|
"loss": 0.6017,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 1.1587301587301586,
|
|
"grad_norm": 0.37607231736183167,
|
|
"learning_rate": 9.875638262153431e-05,
|
|
"loss": 0.6837,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 1.1746031746031746,
|
|
"grad_norm": 0.34590160846710205,
|
|
"learning_rate": 9.872125558222409e-05,
|
|
"loss": 0.5724,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 0.3449731469154358,
|
|
"learning_rate": 9.868564575457023e-05,
|
|
"loss": 0.6157,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 1.2063492063492063,
|
|
"grad_norm": 0.4771505892276764,
|
|
"learning_rate": 9.864955349143734e-05,
|
|
"loss": 0.5829,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 1.2222222222222223,
|
|
"grad_norm": 0.374600887298584,
|
|
"learning_rate": 9.861297915047069e-05,
|
|
"loss": 0.6213,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 1.2380952380952381,
|
|
"grad_norm": 0.40953242778778076,
|
|
"learning_rate": 9.857592309409247e-05,
|
|
"loss": 0.5805,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 1.253968253968254,
|
|
"grad_norm": 0.3891858756542206,
|
|
"learning_rate": 9.853838568949831e-05,
|
|
"loss": 0.5201,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 1.2698412698412698,
|
|
"grad_norm": 0.4599400758743286,
|
|
"learning_rate": 9.850036730865364e-05,
|
|
"loss": 0.6509,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.47590476274490356,
|
|
"learning_rate": 9.846186832828989e-05,
|
|
"loss": 0.5522,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 1.3015873015873016,
|
|
"grad_norm": 0.42077696323394775,
|
|
"learning_rate": 9.842288912990096e-05,
|
|
"loss": 0.6272,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 1.3174603174603174,
|
|
"grad_norm": 0.4116186201572418,
|
|
"learning_rate": 9.838343009973925e-05,
|
|
"loss": 0.5974,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 0.4247848689556122,
|
|
"learning_rate": 9.83434916288119e-05,
|
|
"loss": 0.4948,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 1.3492063492063493,
|
|
"grad_norm": 0.3873782455921173,
|
|
"learning_rate": 9.830307411287695e-05,
|
|
"loss": 0.496,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 1.3650793650793651,
|
|
"grad_norm": 0.4587806463241577,
|
|
"learning_rate": 9.82621779524394e-05,
|
|
"loss": 0.6617,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 1.380952380952381,
|
|
"grad_norm": 0.4379841089248657,
|
|
"learning_rate": 9.822080355274719e-05,
|
|
"loss": 0.5294,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 1.3968253968253967,
|
|
"grad_norm": 0.464910626411438,
|
|
"learning_rate": 9.817895132378725e-05,
|
|
"loss": 0.6855,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 1.4126984126984126,
|
|
"grad_norm": 0.4157741963863373,
|
|
"learning_rate": 9.813662168028144e-05,
|
|
"loss": 0.5563,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.4436641037464142,
|
|
"learning_rate": 9.809381504168234e-05,
|
|
"loss": 0.5291,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 1.4444444444444444,
|
|
"grad_norm": 0.4180889427661896,
|
|
"learning_rate": 9.805053183216923e-05,
|
|
"loss": 0.5158,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 1.4603174603174602,
|
|
"grad_norm": 0.4933961033821106,
|
|
"learning_rate": 9.800677248064382e-05,
|
|
"loss": 0.6885,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 1.4761904761904763,
|
|
"grad_norm": 0.4813699722290039,
|
|
"learning_rate": 9.796253742072596e-05,
|
|
"loss": 0.6305,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 1.492063492063492,
|
|
"grad_norm": 0.4272967278957367,
|
|
"learning_rate": 9.791782709074944e-05,
|
|
"loss": 0.5119,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 1.507936507936508,
|
|
"grad_norm": 0.4510858356952667,
|
|
"learning_rate": 9.787264193375753e-05,
|
|
"loss": 0.5693,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 1.5238095238095237,
|
|
"grad_norm": 0.5349589586257935,
|
|
"learning_rate": 9.782698239749873e-05,
|
|
"loss": 0.6708,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 1.5396825396825395,
|
|
"grad_norm": 0.5285341739654541,
|
|
"learning_rate": 9.778084893442218e-05,
|
|
"loss": 0.6712,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 1.5555555555555556,
|
|
"grad_norm": 0.4625120460987091,
|
|
"learning_rate": 9.77342420016733e-05,
|
|
"loss": 0.5257,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 0.4635828733444214,
|
|
"learning_rate": 9.768716206108921e-05,
|
|
"loss": 0.482,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 0.47050222754478455,
|
|
"learning_rate": 9.763960957919413e-05,
|
|
"loss": 0.4347,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.6031746031746033,
|
|
"grad_norm": 0.42742452025413513,
|
|
"learning_rate": 9.759158502719481e-05,
|
|
"loss": 0.4208,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 1.619047619047619,
|
|
"grad_norm": 0.48628243803977966,
|
|
"learning_rate": 9.754308888097583e-05,
|
|
"loss": 0.5814,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 1.6349206349206349,
|
|
"grad_norm": 0.4874871075153351,
|
|
"learning_rate": 9.749412162109485e-05,
|
|
"loss": 0.5278,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 1.6507936507936507,
|
|
"grad_norm": 0.5010098814964294,
|
|
"learning_rate": 9.744468373277797e-05,
|
|
"loss": 0.5341,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4798610508441925,
|
|
"learning_rate": 9.739477570591473e-05,
|
|
"loss": 0.5088,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 1.6825396825396826,
|
|
"grad_norm": 0.5140134692192078,
|
|
"learning_rate": 9.734439803505345e-05,
|
|
"loss": 0.5922,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 1.6984126984126984,
|
|
"grad_norm": 0.49391329288482666,
|
|
"learning_rate": 9.729355121939621e-05,
|
|
"loss": 0.5445,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 0.5012408494949341,
|
|
"learning_rate": 9.724223576279395e-05,
|
|
"loss": 0.5175,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 1.7301587301587302,
|
|
"grad_norm": 0.5038516521453857,
|
|
"learning_rate": 9.719045217374143e-05,
|
|
"loss": 0.4399,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 1.746031746031746,
|
|
"grad_norm": 0.49503833055496216,
|
|
"learning_rate": 9.713820096537225e-05,
|
|
"loss": 0.483,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.7619047619047619,
|
|
"grad_norm": 0.5000967979431152,
|
|
"learning_rate": 9.708548265545375e-05,
|
|
"loss": 0.6131,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 1.7777777777777777,
|
|
"grad_norm": 0.504001796245575,
|
|
"learning_rate": 9.703229776638185e-05,
|
|
"loss": 0.5121,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 1.7936507936507935,
|
|
"grad_norm": 0.5135077238082886,
|
|
"learning_rate": 9.697864682517592e-05,
|
|
"loss": 0.4606,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 1.8095238095238095,
|
|
"grad_norm": 0.5064616799354553,
|
|
"learning_rate": 9.692453036347351e-05,
|
|
"loss": 0.4862,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 1.8253968253968254,
|
|
"grad_norm": 0.5660854578018188,
|
|
"learning_rate": 9.686994891752508e-05,
|
|
"loss": 0.5925,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 1.8412698412698414,
|
|
"grad_norm": 0.5516797304153442,
|
|
"learning_rate": 9.681490302818874e-05,
|
|
"loss": 0.5986,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 0.5815762281417847,
|
|
"learning_rate": 9.675939324092486e-05,
|
|
"loss": 0.6187,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 1.873015873015873,
|
|
"grad_norm": 0.5087511539459229,
|
|
"learning_rate": 9.670342010579065e-05,
|
|
"loss": 0.499,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.45885273814201355,
|
|
"learning_rate": 9.664698417743475e-05,
|
|
"loss": 0.4405,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 1.9047619047619047,
|
|
"grad_norm": 0.537526547908783,
|
|
"learning_rate": 9.659008601509168e-05,
|
|
"loss": 0.5208,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.9206349206349205,
|
|
"grad_norm": 0.4978830814361572,
|
|
"learning_rate": 9.653272618257631e-05,
|
|
"loss": 0.5475,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 1.9365079365079365,
|
|
"grad_norm": 0.5565654635429382,
|
|
"learning_rate": 9.647490524827834e-05,
|
|
"loss": 0.5459,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 1.9523809523809523,
|
|
"grad_norm": 0.5845757126808167,
|
|
"learning_rate": 9.641662378515659e-05,
|
|
"loss": 0.6169,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 1.9682539682539684,
|
|
"grad_norm": 0.5273924469947815,
|
|
"learning_rate": 9.635788237073334e-05,
|
|
"loss": 0.519,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 1.9841269841269842,
|
|
"grad_norm": 0.5515849590301514,
|
|
"learning_rate": 9.629868158708861e-05,
|
|
"loss": 0.52,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.7463253736495972,
|
|
"learning_rate": 9.623902202085444e-05,
|
|
"loss": 0.5024,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 2.015873015873016,
|
|
"grad_norm": 0.5206636190414429,
|
|
"learning_rate": 9.617890426320899e-05,
|
|
"loss": 0.4819,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 2.0317460317460316,
|
|
"grad_norm": 0.4978935122489929,
|
|
"learning_rate": 9.611832890987076e-05,
|
|
"loss": 0.4031,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 2.0476190476190474,
|
|
"grad_norm": 0.5565934181213379,
|
|
"learning_rate": 9.605729656109265e-05,
|
|
"loss": 0.5879,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 2.0634920634920633,
|
|
"grad_norm": 0.5003566741943359,
|
|
"learning_rate": 9.599580782165598e-05,
|
|
"loss": 0.3628,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 2.0793650793650795,
|
|
"grad_norm": 0.4868488609790802,
|
|
"learning_rate": 9.593386330086458e-05,
|
|
"loss": 0.3807,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 2.0952380952380953,
|
|
"grad_norm": 0.5097118616104126,
|
|
"learning_rate": 9.587146361253868e-05,
|
|
"loss": 0.4166,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 2.111111111111111,
|
|
"grad_norm": 0.5274227857589722,
|
|
"learning_rate": 9.580860937500884e-05,
|
|
"loss": 0.385,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 2.126984126984127,
|
|
"grad_norm": 0.5781636238098145,
|
|
"learning_rate": 9.57453012111099e-05,
|
|
"loss": 0.3981,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.6308386921882629,
|
|
"learning_rate": 9.568153974817464e-05,
|
|
"loss": 0.4357,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 2.1587301587301586,
|
|
"grad_norm": 0.6387614011764526,
|
|
"learning_rate": 9.561732561802778e-05,
|
|
"loss": 0.4168,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 2.1746031746031744,
|
|
"grad_norm": 0.6377487182617188,
|
|
"learning_rate": 9.555265945697953e-05,
|
|
"loss": 0.3831,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 2.1904761904761907,
|
|
"grad_norm": 0.7271438241004944,
|
|
"learning_rate": 9.548754190581939e-05,
|
|
"loss": 0.3844,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 2.2063492063492065,
|
|
"grad_norm": 0.8928720951080322,
|
|
"learning_rate": 9.542197360980978e-05,
|
|
"loss": 0.5863,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.8302777409553528,
|
|
"learning_rate": 9.53559552186796e-05,
|
|
"loss": 0.4477,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.238095238095238,
|
|
"grad_norm": 0.7997470498085022,
|
|
"learning_rate": 9.528948738661784e-05,
|
|
"loss": 0.3644,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 2.253968253968254,
|
|
"grad_norm": 0.8765047192573547,
|
|
"learning_rate": 9.522257077226717e-05,
|
|
"loss": 0.3806,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 2.2698412698412698,
|
|
"grad_norm": 0.7953476309776306,
|
|
"learning_rate": 9.51552060387172e-05,
|
|
"loss": 0.3829,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 0.8067965507507324,
|
|
"learning_rate": 9.508739385349812e-05,
|
|
"loss": 0.4414,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 2.3015873015873014,
|
|
"grad_norm": 0.7154417037963867,
|
|
"learning_rate": 9.501913488857399e-05,
|
|
"loss": 0.3377,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 2.317460317460317,
|
|
"grad_norm": 0.8233152627944946,
|
|
"learning_rate": 9.49504298203361e-05,
|
|
"loss": 0.4463,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 0.8649589419364929,
|
|
"learning_rate": 9.488127932959625e-05,
|
|
"loss": 0.3966,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 2.3492063492063493,
|
|
"grad_norm": 0.834513247013092,
|
|
"learning_rate": 9.481168410158003e-05,
|
|
"loss": 0.5009,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 2.365079365079365,
|
|
"grad_norm": 0.7996335625648499,
|
|
"learning_rate": 9.474164482592002e-05,
|
|
"loss": 0.4546,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 0.9039611220359802,
|
|
"learning_rate": 9.467116219664894e-05,
|
|
"loss": 0.4492,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 2.3968253968253967,
|
|
"grad_norm": 0.8271594643592834,
|
|
"learning_rate": 9.460023691219277e-05,
|
|
"loss": 0.3569,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 2.4126984126984126,
|
|
"grad_norm": 0.9009270071983337,
|
|
"learning_rate": 9.45288696753639e-05,
|
|
"loss": 0.4727,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 0.7487375736236572,
|
|
"learning_rate": 9.445706119335407e-05,
|
|
"loss": 0.3298,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 2.4444444444444446,
|
|
"grad_norm": 0.8869822025299072,
|
|
"learning_rate": 9.438481217772744e-05,
|
|
"loss": 0.4476,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 2.4603174603174605,
|
|
"grad_norm": 0.8800178170204163,
|
|
"learning_rate": 9.431212334441343e-05,
|
|
"loss": 0.4377,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 2.4761904761904763,
|
|
"grad_norm": 0.8610995411872864,
|
|
"learning_rate": 9.423899541369978e-05,
|
|
"loss": 0.409,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 2.492063492063492,
|
|
"grad_norm": 0.8344472050666809,
|
|
"learning_rate": 9.41654291102253e-05,
|
|
"loss": 0.427,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 2.507936507936508,
|
|
"grad_norm": 0.9956201314926147,
|
|
"learning_rate": 9.409142516297269e-05,
|
|
"loss": 0.5661,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 2.5238095238095237,
|
|
"grad_norm": 0.8969646692276001,
|
|
"learning_rate": 9.401698430526142e-05,
|
|
"loss": 0.4215,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 2.5396825396825395,
|
|
"grad_norm": 0.918438732624054,
|
|
"learning_rate": 9.394210727474028e-05,
|
|
"loss": 0.4774,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 2.5555555555555554,
|
|
"grad_norm": 0.8604788780212402,
|
|
"learning_rate": 9.386679481338033e-05,
|
|
"loss": 0.3978,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.7847458124160767,
|
|
"learning_rate": 9.379104766746722e-05,
|
|
"loss": 0.3602,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 2.5873015873015874,
|
|
"grad_norm": 0.8306839466094971,
|
|
"learning_rate": 9.371486658759416e-05,
|
|
"loss": 0.466,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 2.6031746031746033,
|
|
"grad_norm": 0.8458616137504578,
|
|
"learning_rate": 9.363825232865413e-05,
|
|
"loss": 0.4077,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 2.619047619047619,
|
|
"grad_norm": 0.933336615562439,
|
|
"learning_rate": 9.356120564983266e-05,
|
|
"loss": 0.4652,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 2.634920634920635,
|
|
"grad_norm": 0.9182778596878052,
|
|
"learning_rate": 9.348372731460023e-05,
|
|
"loss": 0.3775,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 2.6507936507936507,
|
|
"grad_norm": 0.9331458806991577,
|
|
"learning_rate": 9.340581809070459e-05,
|
|
"loss": 0.4362,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 0.8755380511283875,
|
|
"learning_rate": 9.332747875016332e-05,
|
|
"loss": 0.363,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 2.682539682539683,
|
|
"grad_norm": 0.8975720405578613,
|
|
"learning_rate": 9.324871006925613e-05,
|
|
"loss": 0.4007,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 2.6984126984126986,
|
|
"grad_norm": 1.1305972337722778,
|
|
"learning_rate": 9.316951282851707e-05,
|
|
"loss": 0.5013,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 0.8970773220062256,
|
|
"learning_rate": 9.308988781272694e-05,
|
|
"loss": 0.4052,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 2.7301587301587302,
|
|
"grad_norm": 1.0294140577316284,
|
|
"learning_rate": 9.300983581090541e-05,
|
|
"loss": 0.4707,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 2.746031746031746,
|
|
"grad_norm": 0.9334731698036194,
|
|
"learning_rate": 9.292935761630326e-05,
|
|
"loss": 0.3639,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 2.761904761904762,
|
|
"grad_norm": 0.9174486398696899,
|
|
"learning_rate": 9.284845402639446e-05,
|
|
"loss": 0.3959,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.9317827224731445,
|
|
"learning_rate": 9.276712584286833e-05,
|
|
"loss": 0.3916,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 2.7936507936507935,
|
|
"grad_norm": 0.9498136639595032,
|
|
"learning_rate": 9.26853738716216e-05,
|
|
"loss": 0.4551,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 2.8095238095238093,
|
|
"grad_norm": 0.8333742022514343,
|
|
"learning_rate": 9.260319892275034e-05,
|
|
"loss": 0.3518,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 2.825396825396825,
|
|
"grad_norm": 0.8575045466423035,
|
|
"learning_rate": 9.2520601810542e-05,
|
|
"loss": 0.3623,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 2.8412698412698414,
|
|
"grad_norm": 1.100193977355957,
|
|
"learning_rate": 9.243758335346735e-05,
|
|
"loss": 0.5737,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.9462725520133972,
|
|
"learning_rate": 9.235414437417234e-05,
|
|
"loss": 0.4491,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 2.873015873015873,
|
|
"grad_norm": 0.8208152651786804,
|
|
"learning_rate": 9.227028569946996e-05,
|
|
"loss": 0.3799,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.8733758330345154,
|
|
"learning_rate": 9.2186008160332e-05,
|
|
"loss": 0.4313,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 2.9047619047619047,
|
|
"grad_norm": 0.8397769927978516,
|
|
"learning_rate": 9.210131259188095e-05,
|
|
"loss": 0.3718,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 2.9206349206349205,
|
|
"grad_norm": 1.0263302326202393,
|
|
"learning_rate": 9.201619983338153e-05,
|
|
"loss": 0.5163,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 2.9365079365079367,
|
|
"grad_norm": 0.7651734948158264,
|
|
"learning_rate": 9.193067072823251e-05,
|
|
"loss": 0.3483,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 2.9523809523809526,
|
|
"grad_norm": 0.92905592918396,
|
|
"learning_rate": 9.18447261239584e-05,
|
|
"loss": 0.5041,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 2.9682539682539684,
|
|
"grad_norm": 0.8523809909820557,
|
|
"learning_rate": 9.175836687220084e-05,
|
|
"loss": 0.381,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 2.984126984126984,
|
|
"grad_norm": 0.8607370257377625,
|
|
"learning_rate": 9.167159382871039e-05,
|
|
"loss": 0.3953,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 1.321708083152771,
|
|
"learning_rate": 9.15844078533379e-05,
|
|
"loss": 0.4583,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 3.015873015873016,
|
|
"grad_norm": 0.7019425630569458,
|
|
"learning_rate": 9.149680981002609e-05,
|
|
"loss": 0.2773,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 3.0317460317460316,
|
|
"grad_norm": 0.6896389126777649,
|
|
"learning_rate": 9.140880056680088e-05,
|
|
"loss": 0.2746,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 3.0476190476190474,
|
|
"grad_norm": 0.779511570930481,
|
|
"learning_rate": 9.13203809957629e-05,
|
|
"loss": 0.3052,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 3.0634920634920633,
|
|
"grad_norm": 0.8268155455589294,
|
|
"learning_rate": 9.123155197307876e-05,
|
|
"loss": 0.3045,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 3.0793650793650795,
|
|
"grad_norm": 0.7496017813682556,
|
|
"learning_rate": 9.114231437897244e-05,
|
|
"loss": 0.2231,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 3.0952380952380953,
|
|
"grad_norm": 0.8415669798851013,
|
|
"learning_rate": 9.105266909771653e-05,
|
|
"loss": 0.2298,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 3.111111111111111,
|
|
"grad_norm": 1.05263090133667,
|
|
"learning_rate": 9.096261701762342e-05,
|
|
"loss": 0.2488,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 3.126984126984127,
|
|
"grad_norm": 1.238415241241455,
|
|
"learning_rate": 9.087215903103662e-05,
|
|
"loss": 0.2806,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 3.142857142857143,
|
|
"grad_norm": 1.1588196754455566,
|
|
"learning_rate": 9.078129603432181e-05,
|
|
"loss": 0.245,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 3.1587301587301586,
|
|
"grad_norm": 1.584652304649353,
|
|
"learning_rate": 9.069002892785797e-05,
|
|
"loss": 0.295,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 1.3894325494766235,
|
|
"learning_rate": 9.059835861602853e-05,
|
|
"loss": 0.2349,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 3.1904761904761907,
|
|
"grad_norm": 1.66408109664917,
|
|
"learning_rate": 9.050628600721234e-05,
|
|
"loss": 0.2627,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 3.2063492063492065,
|
|
"grad_norm": 1.2087987661361694,
|
|
"learning_rate": 9.041381201377468e-05,
|
|
"loss": 0.2159,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 3.2222222222222223,
|
|
"grad_norm": 1.369932770729065,
|
|
"learning_rate": 9.032093755205822e-05,
|
|
"loss": 0.2341,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 3.238095238095238,
|
|
"grad_norm": 1.6366993188858032,
|
|
"learning_rate": 9.0227663542374e-05,
|
|
"loss": 0.2893,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 3.253968253968254,
|
|
"grad_norm": 1.529963731765747,
|
|
"learning_rate": 9.013399090899217e-05,
|
|
"loss": 0.2395,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 3.2698412698412698,
|
|
"grad_norm": 1.7285979986190796,
|
|
"learning_rate": 9.003992058013302e-05,
|
|
"loss": 0.3451,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 3.2857142857142856,
|
|
"grad_norm": 1.3240851163864136,
|
|
"learning_rate": 8.99454534879576e-05,
|
|
"loss": 0.2469,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 3.3015873015873014,
|
|
"grad_norm": 1.3964006900787354,
|
|
"learning_rate": 8.985059056855858e-05,
|
|
"loss": 0.2456,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 3.317460317460317,
|
|
"grad_norm": 1.405621886253357,
|
|
"learning_rate": 8.975533276195102e-05,
|
|
"loss": 0.2347,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 1.3338896036148071,
|
|
"learning_rate": 8.965968101206291e-05,
|
|
"loss": 0.2988,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 3.3492063492063493,
|
|
"grad_norm": 1.329379677772522,
|
|
"learning_rate": 8.956363626672595e-05,
|
|
"loss": 0.2651,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 3.365079365079365,
|
|
"grad_norm": 1.3324720859527588,
|
|
"learning_rate": 8.94671994776661e-05,
|
|
"loss": 0.2527,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 3.380952380952381,
|
|
"grad_norm": 1.2702524662017822,
|
|
"learning_rate": 8.937037160049416e-05,
|
|
"loss": 0.2763,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 3.3968253968253967,
|
|
"grad_norm": 1.270229458808899,
|
|
"learning_rate": 8.927315359469626e-05,
|
|
"loss": 0.236,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 3.4126984126984126,
|
|
"grad_norm": 1.3164818286895752,
|
|
"learning_rate": 8.917554642362443e-05,
|
|
"loss": 0.2476,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 3.4285714285714284,
|
|
"grad_norm": 1.2434004545211792,
|
|
"learning_rate": 8.907755105448704e-05,
|
|
"loss": 0.2387,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 3.4444444444444446,
|
|
"grad_norm": 1.0932611227035522,
|
|
"learning_rate": 8.89791684583391e-05,
|
|
"loss": 0.2195,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 3.4603174603174605,
|
|
"grad_norm": 1.334930181503296,
|
|
"learning_rate": 8.888039961007282e-05,
|
|
"loss": 0.2725,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 3.4761904761904763,
|
|
"grad_norm": 1.1716219186782837,
|
|
"learning_rate": 8.87812454884078e-05,
|
|
"loss": 0.2515,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 3.492063492063492,
|
|
"grad_norm": 1.1771153211593628,
|
|
"learning_rate": 8.868170707588142e-05,
|
|
"loss": 0.2286,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 3.507936507936508,
|
|
"grad_norm": 1.2309902906417847,
|
|
"learning_rate": 8.858178535883905e-05,
|
|
"loss": 0.2365,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 3.5238095238095237,
|
|
"grad_norm": 0.9976351261138916,
|
|
"learning_rate": 8.848148132742431e-05,
|
|
"loss": 0.22,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 3.5396825396825395,
|
|
"grad_norm": 1.1791083812713623,
|
|
"learning_rate": 8.838079597556925e-05,
|
|
"loss": 0.2683,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 3.5555555555555554,
|
|
"grad_norm": 1.1750749349594116,
|
|
"learning_rate": 8.827973030098448e-05,
|
|
"loss": 0.2396,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 1.054264783859253,
|
|
"learning_rate": 8.81782853051493e-05,
|
|
"loss": 0.2396,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 3.5873015873015874,
|
|
"grad_norm": 1.1976933479309082,
|
|
"learning_rate": 8.807646199330187e-05,
|
|
"loss": 0.2393,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 3.6031746031746033,
|
|
"grad_norm": 1.4662325382232666,
|
|
"learning_rate": 8.797426137442897e-05,
|
|
"loss": 0.3188,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 3.619047619047619,
|
|
"grad_norm": 1.5771795511245728,
|
|
"learning_rate": 8.787168446125638e-05,
|
|
"loss": 0.3204,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 3.634920634920635,
|
|
"grad_norm": 1.3994357585906982,
|
|
"learning_rate": 8.776873227023852e-05,
|
|
"loss": 0.3045,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 3.6507936507936507,
|
|
"grad_norm": 1.9753646850585938,
|
|
"learning_rate": 8.766540582154859e-05,
|
|
"loss": 0.2306,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 3.6666666666666665,
|
|
"grad_norm": 1.4474598169326782,
|
|
"learning_rate": 8.756170613906833e-05,
|
|
"loss": 0.2581,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 3.682539682539683,
|
|
"grad_norm": 1.1273548603057861,
|
|
"learning_rate": 8.745763425037797e-05,
|
|
"loss": 0.2213,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 3.6984126984126986,
|
|
"grad_norm": 1.0989768505096436,
|
|
"learning_rate": 8.735319118674596e-05,
|
|
"loss": 0.2063,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 3.7142857142857144,
|
|
"grad_norm": 1.243393063545227,
|
|
"learning_rate": 8.724837798311882e-05,
|
|
"loss": 0.2539,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 3.7301587301587302,
|
|
"grad_norm": 1.1233344078063965,
|
|
"learning_rate": 8.714319567811088e-05,
|
|
"loss": 0.2225,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 3.746031746031746,
|
|
"grad_norm": 1.2728500366210938,
|
|
"learning_rate": 8.703764531399392e-05,
|
|
"loss": 0.246,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 3.761904761904762,
|
|
"grad_norm": 1.2673249244689941,
|
|
"learning_rate": 8.69317279366869e-05,
|
|
"loss": 0.2881,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 3.7777777777777777,
|
|
"grad_norm": 1.4421532154083252,
|
|
"learning_rate": 8.682544459574562e-05,
|
|
"loss": 0.3309,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 3.7936507936507935,
|
|
"grad_norm": 1.217529296875,
|
|
"learning_rate": 8.671879634435224e-05,
|
|
"loss": 0.2815,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 3.8095238095238093,
|
|
"grad_norm": 1.1456962823867798,
|
|
"learning_rate": 8.661178423930491e-05,
|
|
"loss": 0.2557,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 3.825396825396825,
|
|
"grad_norm": 1.0717531442642212,
|
|
"learning_rate": 8.650440934100728e-05,
|
|
"loss": 0.2471,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 3.8412698412698414,
|
|
"grad_norm": 1.217034935951233,
|
|
"learning_rate": 8.6396672713458e-05,
|
|
"loss": 0.2883,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 1.237244725227356,
|
|
"learning_rate": 8.628857542424009e-05,
|
|
"loss": 0.2953,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 3.873015873015873,
|
|
"grad_norm": 1.2947179079055786,
|
|
"learning_rate": 8.618011854451056e-05,
|
|
"loss": 0.3134,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 1.2005493640899658,
|
|
"learning_rate": 8.607130314898956e-05,
|
|
"loss": 0.2655,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 3.9047619047619047,
|
|
"grad_norm": 1.387406826019287,
|
|
"learning_rate": 8.596213031594991e-05,
|
|
"loss": 0.3133,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 3.9206349206349205,
|
|
"grad_norm": 1.297012209892273,
|
|
"learning_rate": 8.585260112720631e-05,
|
|
"loss": 0.2747,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 3.9365079365079367,
|
|
"grad_norm": 1.12217378616333,
|
|
"learning_rate": 8.57427166681047e-05,
|
|
"loss": 0.2444,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 3.9523809523809526,
|
|
"grad_norm": 1.2482068538665771,
|
|
"learning_rate": 8.56324780275114e-05,
|
|
"loss": 0.2887,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 1.2814184427261353,
|
|
"learning_rate": 8.552188629780244e-05,
|
|
"loss": 0.284,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 3.984126984126984,
|
|
"grad_norm": 1.1486774682998657,
|
|
"learning_rate": 8.541094257485265e-05,
|
|
"loss": 0.2636,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 1.6360046863555908,
|
|
"learning_rate": 8.529964795802485e-05,
|
|
"loss": 0.2305,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 4.015873015873016,
|
|
"grad_norm": 0.7815824151039124,
|
|
"learning_rate": 8.518800355015892e-05,
|
|
"loss": 0.1427,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 4.031746031746032,
|
|
"grad_norm": 0.9590736031532288,
|
|
"learning_rate": 8.507601045756085e-05,
|
|
"loss": 0.1609,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 4.0476190476190474,
|
|
"grad_norm": 0.9721108078956604,
|
|
"learning_rate": 8.49636697899919e-05,
|
|
"loss": 0.1429,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 4.063492063492063,
|
|
"grad_norm": 1.0513888597488403,
|
|
"learning_rate": 8.485098266065744e-05,
|
|
"loss": 0.1344,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 4.079365079365079,
|
|
"grad_norm": 1.1911511421203613,
|
|
"learning_rate": 8.473795018619604e-05,
|
|
"loss": 0.135,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 4.095238095238095,
|
|
"grad_norm": 1.052157998085022,
|
|
"learning_rate": 8.462457348666835e-05,
|
|
"loss": 0.1146,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 4.111111111111111,
|
|
"grad_norm": 1.4159713983535767,
|
|
"learning_rate": 8.4510853685546e-05,
|
|
"loss": 0.1359,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 4.1269841269841265,
|
|
"grad_norm": 1.6234732866287231,
|
|
"learning_rate": 8.439679190970052e-05,
|
|
"loss": 0.1634,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 4.142857142857143,
|
|
"grad_norm": 1.2149155139923096,
|
|
"learning_rate": 8.428238928939207e-05,
|
|
"loss": 0.1051,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 4.158730158730159,
|
|
"grad_norm": 1.527443528175354,
|
|
"learning_rate": 8.416764695825834e-05,
|
|
"loss": 0.1519,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 4.174603174603175,
|
|
"grad_norm": 1.3665393590927124,
|
|
"learning_rate": 8.405256605330331e-05,
|
|
"loss": 0.1366,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 4.190476190476191,
|
|
"grad_norm": 1.2650479078292847,
|
|
"learning_rate": 8.39371477148859e-05,
|
|
"loss": 0.1314,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 4.2063492063492065,
|
|
"grad_norm": 0.9967718124389648,
|
|
"learning_rate": 8.382139308670875e-05,
|
|
"loss": 0.1173,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 4.222222222222222,
|
|
"grad_norm": 1.1094558238983154,
|
|
"learning_rate": 8.370530331580686e-05,
|
|
"loss": 0.1126,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 4.238095238095238,
|
|
"grad_norm": 1.0152033567428589,
|
|
"learning_rate": 8.35888795525362e-05,
|
|
"loss": 0.089,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 4.253968253968254,
|
|
"grad_norm": 1.2841627597808838,
|
|
"learning_rate": 8.347212295056239e-05,
|
|
"loss": 0.1292,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 4.26984126984127,
|
|
"grad_norm": 1.416364073753357,
|
|
"learning_rate": 8.335503466684915e-05,
|
|
"loss": 0.1444,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 1.2542331218719482,
|
|
"learning_rate": 8.323761586164695e-05,
|
|
"loss": 0.1313,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 4.301587301587301,
|
|
"grad_norm": 1.3430452346801758,
|
|
"learning_rate": 8.311986769848141e-05,
|
|
"loss": 0.1405,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 4.317460317460317,
|
|
"grad_norm": 1.3169519901275635,
|
|
"learning_rate": 8.300179134414188e-05,
|
|
"loss": 0.1429,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 4.333333333333333,
|
|
"grad_norm": 1.2539156675338745,
|
|
"learning_rate": 8.288338796866976e-05,
|
|
"loss": 0.1382,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 4.349206349206349,
|
|
"grad_norm": 1.365218997001648,
|
|
"learning_rate": 8.276465874534702e-05,
|
|
"loss": 0.1236,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 4.365079365079365,
|
|
"grad_norm": 1.4856258630752563,
|
|
"learning_rate": 8.264560485068446e-05,
|
|
"loss": 0.1516,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 4.380952380952381,
|
|
"grad_norm": 1.139467477798462,
|
|
"learning_rate": 8.252622746441021e-05,
|
|
"loss": 0.1187,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 4.396825396825397,
|
|
"grad_norm": 1.1698997020721436,
|
|
"learning_rate": 8.240652776945781e-05,
|
|
"loss": 0.133,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 4.412698412698413,
|
|
"grad_norm": 1.284920334815979,
|
|
"learning_rate": 8.228650695195472e-05,
|
|
"loss": 0.1564,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 4.428571428571429,
|
|
"grad_norm": 1.2975406646728516,
|
|
"learning_rate": 8.216616620121043e-05,
|
|
"loss": 0.1476,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 1.28453528881073,
|
|
"learning_rate": 8.204550670970469e-05,
|
|
"loss": 0.1444,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.4603174603174605,
|
|
"grad_norm": 1.2703144550323486,
|
|
"learning_rate": 8.192452967307576e-05,
|
|
"loss": 0.1627,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 4.476190476190476,
|
|
"grad_norm": 1.2940740585327148,
|
|
"learning_rate": 8.180323629010848e-05,
|
|
"loss": 0.1384,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 4.492063492063492,
|
|
"grad_norm": 1.2578924894332886,
|
|
"learning_rate": 8.168162776272244e-05,
|
|
"loss": 0.1301,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 4.507936507936508,
|
|
"grad_norm": 1.2214442491531372,
|
|
"learning_rate": 8.155970529596006e-05,
|
|
"loss": 0.139,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 4.523809523809524,
|
|
"grad_norm": 1.436343789100647,
|
|
"learning_rate": 8.143747009797464e-05,
|
|
"loss": 0.1522,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 4.5396825396825395,
|
|
"grad_norm": 1.179060459136963,
|
|
"learning_rate": 8.131492338001839e-05,
|
|
"loss": 0.1236,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 4.555555555555555,
|
|
"grad_norm": 1.3683005571365356,
|
|
"learning_rate": 8.119206635643045e-05,
|
|
"loss": 0.1489,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 4.571428571428571,
|
|
"grad_norm": 1.2832778692245483,
|
|
"learning_rate": 8.106890024462481e-05,
|
|
"loss": 0.1388,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 4.587301587301587,
|
|
"grad_norm": 1.0831190347671509,
|
|
"learning_rate": 8.094542626507828e-05,
|
|
"loss": 0.1219,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 4.603174603174603,
|
|
"grad_norm": 1.212108850479126,
|
|
"learning_rate": 8.082164564131845e-05,
|
|
"loss": 0.1331,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 4.619047619047619,
|
|
"grad_norm": 1.157487154006958,
|
|
"learning_rate": 8.069755959991142e-05,
|
|
"loss": 0.1306,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 4.634920634920634,
|
|
"grad_norm": 1.194389820098877,
|
|
"learning_rate": 8.057316937044977e-05,
|
|
"loss": 0.1361,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 4.650793650793651,
|
|
"grad_norm": 1.2109564542770386,
|
|
"learning_rate": 8.044847618554034e-05,
|
|
"loss": 0.138,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 1.0707926750183105,
|
|
"learning_rate": 8.032348128079203e-05,
|
|
"loss": 0.1078,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 4.682539682539683,
|
|
"grad_norm": 1.179071307182312,
|
|
"learning_rate": 8.019818589480352e-05,
|
|
"loss": 0.1397,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 4.698412698412699,
|
|
"grad_norm": 1.3288228511810303,
|
|
"learning_rate": 8.0072591269151e-05,
|
|
"loss": 0.1613,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 4.714285714285714,
|
|
"grad_norm": 1.24984872341156,
|
|
"learning_rate": 7.994669864837594e-05,
|
|
"loss": 0.1457,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 4.73015873015873,
|
|
"grad_norm": 1.2009999752044678,
|
|
"learning_rate": 7.982050927997264e-05,
|
|
"loss": 0.1257,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 4.746031746031746,
|
|
"grad_norm": 1.207233190536499,
|
|
"learning_rate": 7.969402441437594e-05,
|
|
"loss": 0.1567,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 1.1672086715698242,
|
|
"learning_rate": 7.956724530494887e-05,
|
|
"loss": 0.1274,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 4.777777777777778,
|
|
"grad_norm": 1.506867527961731,
|
|
"learning_rate": 7.944017320797013e-05,
|
|
"loss": 0.139,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 4.7936507936507935,
|
|
"grad_norm": 1.4278178215026855,
|
|
"learning_rate": 7.931280938262169e-05,
|
|
"loss": 0.1357,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 4.809523809523809,
|
|
"grad_norm": 1.599716067314148,
|
|
"learning_rate": 7.918515509097634e-05,
|
|
"loss": 0.1704,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 4.825396825396825,
|
|
"grad_norm": 1.3049015998840332,
|
|
"learning_rate": 7.905721159798513e-05,
|
|
"loss": 0.1379,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 4.841269841269841,
|
|
"grad_norm": 1.3524868488311768,
|
|
"learning_rate": 7.89289801714649e-05,
|
|
"loss": 0.1545,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 4.857142857142857,
|
|
"grad_norm": 1.2142527103424072,
|
|
"learning_rate": 7.880046208208563e-05,
|
|
"loss": 0.1453,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 4.8730158730158735,
|
|
"grad_norm": 1.1084891557693481,
|
|
"learning_rate": 7.867165860335792e-05,
|
|
"loss": 0.1427,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 4.888888888888889,
|
|
"grad_norm": 1.266802191734314,
|
|
"learning_rate": 7.854257101162037e-05,
|
|
"loss": 0.1396,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 4.904761904761905,
|
|
"grad_norm": 1.1826775074005127,
|
|
"learning_rate": 7.841320058602688e-05,
|
|
"loss": 0.1514,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 4.920634920634921,
|
|
"grad_norm": 1.4232659339904785,
|
|
"learning_rate": 7.828354860853399e-05,
|
|
"loss": 0.1472,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 4.936507936507937,
|
|
"grad_norm": 1.1436830759048462,
|
|
"learning_rate": 7.815361636388827e-05,
|
|
"loss": 0.1249,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 4.9523809523809526,
|
|
"grad_norm": 1.3001309633255005,
|
|
"learning_rate": 7.802340513961342e-05,
|
|
"loss": 0.1663,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 4.968253968253968,
|
|
"grad_norm": 1.4213690757751465,
|
|
"learning_rate": 7.789291622599767e-05,
|
|
"loss": 0.1538,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 4.984126984126984,
|
|
"grad_norm": 1.5043220520019531,
|
|
"learning_rate": 7.776215091608085e-05,
|
|
"loss": 0.151,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 1.6825261116027832,
|
|
"learning_rate": 7.763111050564178e-05,
|
|
"loss": 0.1485,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 5.015873015873016,
|
|
"grad_norm": 0.8601322174072266,
|
|
"learning_rate": 7.749979629318516e-05,
|
|
"loss": 0.0703,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 5.031746031746032,
|
|
"grad_norm": 0.7637147903442383,
|
|
"learning_rate": 7.736820957992895e-05,
|
|
"loss": 0.0633,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 5.0476190476190474,
|
|
"grad_norm": 0.8896054625511169,
|
|
"learning_rate": 7.723635166979133e-05,
|
|
"loss": 0.0652,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 5.063492063492063,
|
|
"grad_norm": 0.9216472506523132,
|
|
"learning_rate": 7.710422386937784e-05,
|
|
"loss": 0.0585,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 5.079365079365079,
|
|
"grad_norm": 0.7166661024093628,
|
|
"learning_rate": 7.697182748796841e-05,
|
|
"loss": 0.0531,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 5.095238095238095,
|
|
"grad_norm": 0.9962891936302185,
|
|
"learning_rate": 7.683916383750436e-05,
|
|
"loss": 0.072,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 5.111111111111111,
|
|
"grad_norm": 0.7969011068344116,
|
|
"learning_rate": 7.670623423257548e-05,
|
|
"loss": 0.0554,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 5.1269841269841265,
|
|
"grad_norm": 0.8427059650421143,
|
|
"learning_rate": 7.657303999040693e-05,
|
|
"loss": 0.0534,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 5.142857142857143,
|
|
"grad_norm": 0.9813700914382935,
|
|
"learning_rate": 7.64395824308462e-05,
|
|
"loss": 0.0696,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 0.8625731468200684,
|
|
"learning_rate": 7.630586287635008e-05,
|
|
"loss": 0.0562,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 5.174603174603175,
|
|
"grad_norm": 0.9820646047592163,
|
|
"learning_rate": 7.617188265197148e-05,
|
|
"loss": 0.063,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 5.190476190476191,
|
|
"grad_norm": 1.0742745399475098,
|
|
"learning_rate": 7.603764308534636e-05,
|
|
"loss": 0.0689,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 5.2063492063492065,
|
|
"grad_norm": 0.9532903432846069,
|
|
"learning_rate": 7.590314550668054e-05,
|
|
"loss": 0.0667,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 5.222222222222222,
|
|
"grad_norm": 0.8958349227905273,
|
|
"learning_rate": 7.576839124873653e-05,
|
|
"loss": 0.0538,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 5.238095238095238,
|
|
"grad_norm": 0.9804512858390808,
|
|
"learning_rate": 7.563338164682036e-05,
|
|
"loss": 0.0689,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 5.253968253968254,
|
|
"grad_norm": 1.0487632751464844,
|
|
"learning_rate": 7.549811803876825e-05,
|
|
"loss": 0.0671,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 5.26984126984127,
|
|
"grad_norm": 0.9195834994316101,
|
|
"learning_rate": 7.536260176493348e-05,
|
|
"loss": 0.0669,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 5.285714285714286,
|
|
"grad_norm": 0.9964186549186707,
|
|
"learning_rate": 7.5226834168173e-05,
|
|
"loss": 0.0688,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 5.301587301587301,
|
|
"grad_norm": 0.8904131054878235,
|
|
"learning_rate": 7.509081659383417e-05,
|
|
"loss": 0.0636,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 0.965900182723999,
|
|
"learning_rate": 7.495455038974146e-05,
|
|
"loss": 0.0769,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 5.333333333333333,
|
|
"grad_norm": 0.9203529357910156,
|
|
"learning_rate": 7.481803690618303e-05,
|
|
"loss": 0.0554,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 5.349206349206349,
|
|
"grad_norm": 1.2281473875045776,
|
|
"learning_rate": 7.46812774958974e-05,
|
|
"loss": 0.0735,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 5.365079365079365,
|
|
"grad_norm": 1.0948208570480347,
|
|
"learning_rate": 7.454427351405999e-05,
|
|
"loss": 0.0705,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 5.380952380952381,
|
|
"grad_norm": 1.0401225090026855,
|
|
"learning_rate": 7.440702631826977e-05,
|
|
"loss": 0.07,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 5.396825396825397,
|
|
"grad_norm": 0.9042516350746155,
|
|
"learning_rate": 7.426953726853574e-05,
|
|
"loss": 0.0628,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 5.412698412698413,
|
|
"grad_norm": 0.9594908356666565,
|
|
"learning_rate": 7.413180772726348e-05,
|
|
"loss": 0.0606,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 5.428571428571429,
|
|
"grad_norm": 1.0593825578689575,
|
|
"learning_rate": 7.399383905924165e-05,
|
|
"loss": 0.0652,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 5.444444444444445,
|
|
"grad_norm": 1.0469237565994263,
|
|
"learning_rate": 7.385563263162847e-05,
|
|
"loss": 0.0636,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 5.4603174603174605,
|
|
"grad_norm": 0.9159653782844543,
|
|
"learning_rate": 7.371718981393815e-05,
|
|
"loss": 0.0566,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 0.9596768021583557,
|
|
"learning_rate": 7.357851197802735e-05,
|
|
"loss": 0.0659,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 5.492063492063492,
|
|
"grad_norm": 0.8929640054702759,
|
|
"learning_rate": 7.343960049808156e-05,
|
|
"loss": 0.0586,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 5.507936507936508,
|
|
"grad_norm": 0.859683632850647,
|
|
"learning_rate": 7.330045675060149e-05,
|
|
"loss": 0.0522,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 5.523809523809524,
|
|
"grad_norm": 1.026452898979187,
|
|
"learning_rate": 7.316108211438945e-05,
|
|
"loss": 0.0679,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 5.5396825396825395,
|
|
"grad_norm": 0.9891062378883362,
|
|
"learning_rate": 7.302147797053569e-05,
|
|
"loss": 0.072,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.9392737150192261,
|
|
"learning_rate": 7.288164570240463e-05,
|
|
"loss": 0.062,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 5.571428571428571,
|
|
"grad_norm": 1.1346358060836792,
|
|
"learning_rate": 7.274158669562126e-05,
|
|
"loss": 0.0666,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 5.587301587301587,
|
|
"grad_norm": 0.8670554757118225,
|
|
"learning_rate": 7.26013023380574e-05,
|
|
"loss": 0.0572,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 5.603174603174603,
|
|
"grad_norm": 1.020330786705017,
|
|
"learning_rate": 7.246079401981784e-05,
|
|
"loss": 0.0617,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 5.619047619047619,
|
|
"grad_norm": 1.0136491060256958,
|
|
"learning_rate": 7.232006313322667e-05,
|
|
"loss": 0.0853,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 1.010423183441162,
|
|
"learning_rate": 7.217911107281352e-05,
|
|
"loss": 0.0705,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 5.650793650793651,
|
|
"grad_norm": 0.9768037796020508,
|
|
"learning_rate": 7.203793923529956e-05,
|
|
"loss": 0.0853,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.9990655183792114,
|
|
"learning_rate": 7.189654901958385e-05,
|
|
"loss": 0.0715,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 5.682539682539683,
|
|
"grad_norm": 1.0247498750686646,
|
|
"learning_rate": 7.175494182672939e-05,
|
|
"loss": 0.0712,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 5.698412698412699,
|
|
"grad_norm": 1.0099873542785645,
|
|
"learning_rate": 7.161311905994922e-05,
|
|
"loss": 0.0712,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 1.0355095863342285,
|
|
"learning_rate": 7.147108212459257e-05,
|
|
"loss": 0.0722,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 5.73015873015873,
|
|
"grad_norm": 1.1409605741500854,
|
|
"learning_rate": 7.13288324281309e-05,
|
|
"loss": 0.0688,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 5.746031746031746,
|
|
"grad_norm": 1.1082065105438232,
|
|
"learning_rate": 7.118637138014396e-05,
|
|
"loss": 0.0781,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 5.761904761904762,
|
|
"grad_norm": 1.1074239015579224,
|
|
"learning_rate": 7.104370039230583e-05,
|
|
"loss": 0.0705,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 5.777777777777778,
|
|
"grad_norm": 0.9265062212944031,
|
|
"learning_rate": 7.090082087837091e-05,
|
|
"loss": 0.0593,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 5.7936507936507935,
|
|
"grad_norm": 0.911005437374115,
|
|
"learning_rate": 7.075773425415994e-05,
|
|
"loss": 0.0678,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 5.809523809523809,
|
|
"grad_norm": 1.0349949598312378,
|
|
"learning_rate": 7.061444193754596e-05,
|
|
"loss": 0.078,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 5.825396825396825,
|
|
"grad_norm": 1.0515737533569336,
|
|
"learning_rate": 7.047094534844023e-05,
|
|
"loss": 0.0666,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 5.841269841269841,
|
|
"grad_norm": 1.179187297821045,
|
|
"learning_rate": 7.032724590877821e-05,
|
|
"loss": 0.0774,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 5.857142857142857,
|
|
"grad_norm": 1.1190379858016968,
|
|
"learning_rate": 7.018334504250545e-05,
|
|
"loss": 0.0891,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 5.8730158730158735,
|
|
"grad_norm": 0.9958922863006592,
|
|
"learning_rate": 7.003924417556343e-05,
|
|
"loss": 0.0711,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 5.888888888888889,
|
|
"grad_norm": 1.053802728652954,
|
|
"learning_rate": 6.989494473587554e-05,
|
|
"loss": 0.0759,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 5.904761904761905,
|
|
"grad_norm": 0.9447202682495117,
|
|
"learning_rate": 6.975044815333282e-05,
|
|
"loss": 0.0713,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 5.920634920634921,
|
|
"grad_norm": 0.9191451668739319,
|
|
"learning_rate": 6.960575585977984e-05,
|
|
"loss": 0.0655,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 5.936507936507937,
|
|
"grad_norm": 1.1037213802337646,
|
|
"learning_rate": 6.946086928900054e-05,
|
|
"loss": 0.0831,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.9468006491661072,
|
|
"learning_rate": 6.931578987670396e-05,
|
|
"loss": 0.059,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 5.968253968253968,
|
|
"grad_norm": 1.1110552549362183,
|
|
"learning_rate": 6.917051906051006e-05,
|
|
"loss": 0.0709,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 5.984126984126984,
|
|
"grad_norm": 1.1933718919754028,
|
|
"learning_rate": 6.902505827993541e-05,
|
|
"loss": 0.1004,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 1.4565590620040894,
|
|
"learning_rate": 6.887940897637908e-05,
|
|
"loss": 0.0915,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 6.015873015873016,
|
|
"grad_norm": 0.6238571405410767,
|
|
"learning_rate": 6.873357259310815e-05,
|
|
"loss": 0.0431,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 6.031746031746032,
|
|
"grad_norm": 0.4840649366378784,
|
|
"learning_rate": 6.858755057524354e-05,
|
|
"loss": 0.0358,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 6.0476190476190474,
|
|
"grad_norm": 0.48597481846809387,
|
|
"learning_rate": 6.844134436974567e-05,
|
|
"loss": 0.0222,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 6.063492063492063,
|
|
"grad_norm": 0.6410611867904663,
|
|
"learning_rate": 6.829495542540013e-05,
|
|
"loss": 0.0404,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 6.079365079365079,
|
|
"grad_norm": 0.5220045447349548,
|
|
"learning_rate": 6.814838519280324e-05,
|
|
"loss": 0.0303,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 6.095238095238095,
|
|
"grad_norm": 0.6196178793907166,
|
|
"learning_rate": 6.80016351243478e-05,
|
|
"loss": 0.0391,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.64337158203125,
|
|
"learning_rate": 6.785470667420862e-05,
|
|
"loss": 0.0338,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 6.1269841269841265,
|
|
"grad_norm": 0.8072399497032166,
|
|
"learning_rate": 6.77076012983281e-05,
|
|
"loss": 0.0413,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 6.142857142857143,
|
|
"grad_norm": 0.6252787709236145,
|
|
"learning_rate": 6.75603204544019e-05,
|
|
"loss": 0.0332,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 6.158730158730159,
|
|
"grad_norm": 0.7571528553962708,
|
|
"learning_rate": 6.741286560186437e-05,
|
|
"loss": 0.0375,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 6.174603174603175,
|
|
"grad_norm": 0.5972614884376526,
|
|
"learning_rate": 6.726523820187413e-05,
|
|
"loss": 0.0333,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 6.190476190476191,
|
|
"grad_norm": 0.6365858316421509,
|
|
"learning_rate": 6.711743971729967e-05,
|
|
"loss": 0.0264,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 6.2063492063492065,
|
|
"grad_norm": 0.7397788763046265,
|
|
"learning_rate": 6.696947161270476e-05,
|
|
"loss": 0.0319,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 6.222222222222222,
|
|
"grad_norm": 0.6979987025260925,
|
|
"learning_rate": 6.682133535433393e-05,
|
|
"loss": 0.0415,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 6.238095238095238,
|
|
"grad_norm": 0.6048802733421326,
|
|
"learning_rate": 6.667303241009803e-05,
|
|
"loss": 0.031,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 6.253968253968254,
|
|
"grad_norm": 0.7918148040771484,
|
|
"learning_rate": 6.652456424955963e-05,
|
|
"loss": 0.0342,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 6.26984126984127,
|
|
"grad_norm": 0.5297304391860962,
|
|
"learning_rate": 6.637593234391843e-05,
|
|
"loss": 0.0283,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 6.285714285714286,
|
|
"grad_norm": 0.6882847547531128,
|
|
"learning_rate": 6.622713816599673e-05,
|
|
"loss": 0.0327,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 6.301587301587301,
|
|
"grad_norm": 0.5969606637954712,
|
|
"learning_rate": 6.60781831902248e-05,
|
|
"loss": 0.0344,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 6.317460317460317,
|
|
"grad_norm": 0.5623995065689087,
|
|
"learning_rate": 6.592906889262632e-05,
|
|
"loss": 0.0292,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 6.333333333333333,
|
|
"grad_norm": 0.7312327027320862,
|
|
"learning_rate": 6.577979675080369e-05,
|
|
"loss": 0.0358,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 6.349206349206349,
|
|
"grad_norm": 0.5290599465370178,
|
|
"learning_rate": 6.563036824392344e-05,
|
|
"loss": 0.0265,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 6.365079365079365,
|
|
"grad_norm": 0.604269802570343,
|
|
"learning_rate": 6.548078485270152e-05,
|
|
"loss": 0.0311,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 6.380952380952381,
|
|
"grad_norm": 0.6508985161781311,
|
|
"learning_rate": 6.533104805938873e-05,
|
|
"loss": 0.0325,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 6.396825396825397,
|
|
"grad_norm": 0.7835598587989807,
|
|
"learning_rate": 6.518115934775585e-05,
|
|
"loss": 0.0311,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 6.412698412698413,
|
|
"grad_norm": 0.6879574656486511,
|
|
"learning_rate": 6.503112020307916e-05,
|
|
"loss": 0.039,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.8170531392097473,
|
|
"learning_rate": 6.488093211212555e-05,
|
|
"loss": 0.0476,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 6.444444444444445,
|
|
"grad_norm": 0.635261058807373,
|
|
"learning_rate": 6.473059656313782e-05,
|
|
"loss": 0.0315,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 6.4603174603174605,
|
|
"grad_norm": 0.6152068972587585,
|
|
"learning_rate": 6.458011504582005e-05,
|
|
"loss": 0.0303,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 6.476190476190476,
|
|
"grad_norm": 0.6500536799430847,
|
|
"learning_rate": 6.442948905132266e-05,
|
|
"loss": 0.0227,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 6.492063492063492,
|
|
"grad_norm": 0.792615532875061,
|
|
"learning_rate": 6.427872007222777e-05,
|
|
"loss": 0.0254,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 6.507936507936508,
|
|
"grad_norm": 0.7331106066703796,
|
|
"learning_rate": 6.412780960253436e-05,
|
|
"loss": 0.0307,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 6.523809523809524,
|
|
"grad_norm": 0.7086438536643982,
|
|
"learning_rate": 6.397675913764347e-05,
|
|
"loss": 0.0275,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 6.5396825396825395,
|
|
"grad_norm": 0.8358487486839294,
|
|
"learning_rate": 6.382557017434332e-05,
|
|
"loss": 0.0466,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 6.555555555555555,
|
|
"grad_norm": 0.6510606408119202,
|
|
"learning_rate": 6.367424421079463e-05,
|
|
"loss": 0.037,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 6.571428571428571,
|
|
"grad_norm": 0.8983582854270935,
|
|
"learning_rate": 6.352278274651561e-05,
|
|
"loss": 0.0379,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 6.587301587301587,
|
|
"grad_norm": 0.7613969445228577,
|
|
"learning_rate": 6.337118728236721e-05,
|
|
"loss": 0.0358,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 6.603174603174603,
|
|
"grad_norm": 0.8371831774711609,
|
|
"learning_rate": 6.321945932053822e-05,
|
|
"loss": 0.046,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 6.619047619047619,
|
|
"grad_norm": 0.7133164405822754,
|
|
"learning_rate": 6.306760036453035e-05,
|
|
"loss": 0.0276,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 6.634920634920634,
|
|
"grad_norm": 0.6740472316741943,
|
|
"learning_rate": 6.291561191914333e-05,
|
|
"loss": 0.0383,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 6.650793650793651,
|
|
"grad_norm": 0.6885079741477966,
|
|
"learning_rate": 6.276349549046007e-05,
|
|
"loss": 0.0368,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.8201141953468323,
|
|
"learning_rate": 6.261125258583171e-05,
|
|
"loss": 0.0487,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 6.682539682539683,
|
|
"grad_norm": 0.6679426431655884,
|
|
"learning_rate": 6.245888471386263e-05,
|
|
"loss": 0.0318,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 6.698412698412699,
|
|
"grad_norm": 0.8221629858016968,
|
|
"learning_rate": 6.230639338439549e-05,
|
|
"loss": 0.0392,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 6.714285714285714,
|
|
"grad_norm": 0.7618691921234131,
|
|
"learning_rate": 6.215378010849641e-05,
|
|
"loss": 0.0373,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 6.73015873015873,
|
|
"grad_norm": 0.7761756181716919,
|
|
"learning_rate": 6.200104639843985e-05,
|
|
"loss": 0.0366,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 6.746031746031746,
|
|
"grad_norm": 0.8383869528770447,
|
|
"learning_rate": 6.184819376769364e-05,
|
|
"loss": 0.0375,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 6.761904761904762,
|
|
"grad_norm": 0.78884357213974,
|
|
"learning_rate": 6.169522373090412e-05,
|
|
"loss": 0.0487,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 6.777777777777778,
|
|
"grad_norm": 0.7803629040718079,
|
|
"learning_rate": 6.154213780388092e-05,
|
|
"loss": 0.0373,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 6.7936507936507935,
|
|
"grad_norm": 0.5684940218925476,
|
|
"learning_rate": 6.138893750358212e-05,
|
|
"loss": 0.0297,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 6.809523809523809,
|
|
"grad_norm": 0.7369560599327087,
|
|
"learning_rate": 6.123562434809912e-05,
|
|
"loss": 0.0372,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 6.825396825396825,
|
|
"grad_norm": 0.47202688455581665,
|
|
"learning_rate": 6.108219985664161e-05,
|
|
"loss": 0.0243,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 6.841269841269841,
|
|
"grad_norm": 0.6708411574363708,
|
|
"learning_rate": 6.0928665549522554e-05,
|
|
"loss": 0.0348,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 6.857142857142857,
|
|
"grad_norm": 0.8175257444381714,
|
|
"learning_rate": 6.0775022948143115e-05,
|
|
"loss": 0.05,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 6.8730158730158735,
|
|
"grad_norm": 0.7456179261207581,
|
|
"learning_rate": 6.06212735749775e-05,
|
|
"loss": 0.0356,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 6.888888888888889,
|
|
"grad_norm": 0.615135908126831,
|
|
"learning_rate": 6.046741895355802e-05,
|
|
"loss": 0.0292,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 6.904761904761905,
|
|
"grad_norm": 0.6926703453063965,
|
|
"learning_rate": 6.031346060845986e-05,
|
|
"loss": 0.035,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 6.920634920634921,
|
|
"grad_norm": 0.9521751403808594,
|
|
"learning_rate": 6.015940006528602e-05,
|
|
"loss": 0.0478,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 6.936507936507937,
|
|
"grad_norm": 0.6635673642158508,
|
|
"learning_rate": 6.0005238850652234e-05,
|
|
"loss": 0.0405,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 6.9523809523809526,
|
|
"grad_norm": 0.6299306154251099,
|
|
"learning_rate": 5.9850978492171794e-05,
|
|
"loss": 0.0328,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 6.968253968253968,
|
|
"grad_norm": 0.7513844966888428,
|
|
"learning_rate": 5.96966205184404e-05,
|
|
"loss": 0.0335,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 6.984126984126984,
|
|
"grad_norm": 0.9874755144119263,
|
|
"learning_rate": 5.954216645902109e-05,
|
|
"loss": 0.0416,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.8250815272331238,
|
|
"learning_rate": 5.9387617844429e-05,
|
|
"loss": 0.0368,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 7.015873015873016,
|
|
"grad_norm": 0.4338611364364624,
|
|
"learning_rate": 5.923297620611623e-05,
|
|
"loss": 0.0189,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 7.031746031746032,
|
|
"grad_norm": 0.5719791054725647,
|
|
"learning_rate": 5.907824307645669e-05,
|
|
"loss": 0.0169,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 7.0476190476190474,
|
|
"grad_norm": 0.38255706429481506,
|
|
"learning_rate": 5.892341998873089e-05,
|
|
"loss": 0.0186,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 7.063492063492063,
|
|
"grad_norm": 0.3592822253704071,
|
|
"learning_rate": 5.876850847711073e-05,
|
|
"loss": 0.0166,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 7.079365079365079,
|
|
"grad_norm": 0.6182012557983398,
|
|
"learning_rate": 5.861351007664434e-05,
|
|
"loss": 0.0236,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 7.095238095238095,
|
|
"grad_norm": 0.5176107883453369,
|
|
"learning_rate": 5.845842632324088e-05,
|
|
"loss": 0.0253,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 7.111111111111111,
|
|
"grad_norm": 0.4049137830734253,
|
|
"learning_rate": 5.83032587536552e-05,
|
|
"loss": 0.0221,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 7.1269841269841265,
|
|
"grad_norm": 0.4034527540206909,
|
|
"learning_rate": 5.814800890547278e-05,
|
|
"loss": 0.0182,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 7.142857142857143,
|
|
"grad_norm": 0.4478590488433838,
|
|
"learning_rate": 5.799267831709442e-05,
|
|
"loss": 0.0208,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 7.158730158730159,
|
|
"grad_norm": 0.4524051547050476,
|
|
"learning_rate": 5.78372685277209e-05,
|
|
"loss": 0.0147,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 7.174603174603175,
|
|
"grad_norm": 0.4985044300556183,
|
|
"learning_rate": 5.7681781077337905e-05,
|
|
"loss": 0.0198,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 7.190476190476191,
|
|
"grad_norm": 0.4616793692111969,
|
|
"learning_rate": 5.752621750670068e-05,
|
|
"loss": 0.0171,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 7.2063492063492065,
|
|
"grad_norm": 0.4235040247440338,
|
|
"learning_rate": 5.737057935731868e-05,
|
|
"loss": 0.0159,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 7.222222222222222,
|
|
"grad_norm": 0.42039763927459717,
|
|
"learning_rate": 5.721486817144044e-05,
|
|
"loss": 0.0168,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 7.238095238095238,
|
|
"grad_norm": 0.40982750058174133,
|
|
"learning_rate": 5.705908549203823e-05,
|
|
"loss": 0.0153,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 7.253968253968254,
|
|
"grad_norm": 0.44600027799606323,
|
|
"learning_rate": 5.690323286279274e-05,
|
|
"loss": 0.0167,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 7.26984126984127,
|
|
"grad_norm": 0.5298761129379272,
|
|
"learning_rate": 5.674731182807781e-05,
|
|
"loss": 0.0158,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 7.285714285714286,
|
|
"grad_norm": 0.3657887279987335,
|
|
"learning_rate": 5.659132393294514e-05,
|
|
"loss": 0.0188,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 7.301587301587301,
|
|
"grad_norm": 0.4426786005496979,
|
|
"learning_rate": 5.643527072310891e-05,
|
|
"loss": 0.0197,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 7.317460317460317,
|
|
"grad_norm": 0.5749462842941284,
|
|
"learning_rate": 5.627915374493061e-05,
|
|
"loss": 0.0181,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 7.333333333333333,
|
|
"grad_norm": 0.5059666633605957,
|
|
"learning_rate": 5.612297454540352e-05,
|
|
"loss": 0.0206,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 7.349206349206349,
|
|
"grad_norm": 0.5599040389060974,
|
|
"learning_rate": 5.596673467213756e-05,
|
|
"loss": 0.0148,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 7.365079365079365,
|
|
"grad_norm": 0.5010665059089661,
|
|
"learning_rate": 5.581043567334383e-05,
|
|
"loss": 0.0186,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 7.380952380952381,
|
|
"grad_norm": 0.49025240540504456,
|
|
"learning_rate": 5.5654079097819345e-05,
|
|
"loss": 0.0237,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 7.396825396825397,
|
|
"grad_norm": 0.4369467794895172,
|
|
"learning_rate": 5.5497666494931654e-05,
|
|
"loss": 0.017,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 7.412698412698413,
|
|
"grad_norm": 0.4754543602466583,
|
|
"learning_rate": 5.5341199414603493e-05,
|
|
"loss": 0.0202,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 7.428571428571429,
|
|
"grad_norm": 0.4779890179634094,
|
|
"learning_rate": 5.518467940729739e-05,
|
|
"loss": 0.0221,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 7.444444444444445,
|
|
"grad_norm": 0.5082346796989441,
|
|
"learning_rate": 5.502810802400039e-05,
|
|
"loss": 0.0191,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 7.4603174603174605,
|
|
"grad_norm": 0.4045872688293457,
|
|
"learning_rate": 5.487148681620862e-05,
|
|
"loss": 0.0181,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 7.476190476190476,
|
|
"grad_norm": 0.306020587682724,
|
|
"learning_rate": 5.4714817335911894e-05,
|
|
"loss": 0.011,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 7.492063492063492,
|
|
"grad_norm": 0.4682234823703766,
|
|
"learning_rate": 5.455810113557839e-05,
|
|
"loss": 0.0126,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 7.507936507936508,
|
|
"grad_norm": 0.46444806456565857,
|
|
"learning_rate": 5.440133976813926e-05,
|
|
"loss": 0.0205,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 7.523809523809524,
|
|
"grad_norm": 1.0911283493041992,
|
|
"learning_rate": 5.4244534786973214e-05,
|
|
"loss": 0.0209,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 7.5396825396825395,
|
|
"grad_norm": 0.4805389642715454,
|
|
"learning_rate": 5.40876877458911e-05,
|
|
"loss": 0.0186,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 7.555555555555555,
|
|
"grad_norm": 0.5102893114089966,
|
|
"learning_rate": 5.3930800199120616e-05,
|
|
"loss": 0.02,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 7.571428571428571,
|
|
"grad_norm": 0.44652751088142395,
|
|
"learning_rate": 5.377387370129079e-05,
|
|
"loss": 0.0176,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 7.587301587301587,
|
|
"grad_norm": 0.5319653153419495,
|
|
"learning_rate": 5.361690980741663e-05,
|
|
"loss": 0.0276,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 7.603174603174603,
|
|
"grad_norm": 0.42663267254829407,
|
|
"learning_rate": 5.345991007288371e-05,
|
|
"loss": 0.0165,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 7.619047619047619,
|
|
"grad_norm": 0.5141676068305969,
|
|
"learning_rate": 5.330287605343279e-05,
|
|
"loss": 0.0206,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 7.634920634920634,
|
|
"grad_norm": 0.37202200293540955,
|
|
"learning_rate": 5.314580930514431e-05,
|
|
"loss": 0.014,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 7.650793650793651,
|
|
"grad_norm": 0.5131287574768066,
|
|
"learning_rate": 5.298871138442307e-05,
|
|
"loss": 0.018,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 7.666666666666667,
|
|
"grad_norm": 0.5241144895553589,
|
|
"learning_rate": 5.283158384798275e-05,
|
|
"loss": 0.0174,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 7.682539682539683,
|
|
"grad_norm": 0.4443790316581726,
|
|
"learning_rate": 5.267442825283048e-05,
|
|
"loss": 0.0194,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 7.698412698412699,
|
|
"grad_norm": 0.46092358231544495,
|
|
"learning_rate": 5.2517246156251455e-05,
|
|
"loss": 0.0138,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 7.714285714285714,
|
|
"grad_norm": 0.5907039046287537,
|
|
"learning_rate": 5.236003911579345e-05,
|
|
"loss": 0.028,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 7.73015873015873,
|
|
"grad_norm": 0.5472407341003418,
|
|
"learning_rate": 5.220280868925145e-05,
|
|
"loss": 0.0201,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 7.746031746031746,
|
|
"grad_norm": 0.522294282913208,
|
|
"learning_rate": 5.204555643465215e-05,
|
|
"loss": 0.021,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 7.761904761904762,
|
|
"grad_norm": 0.5975657105445862,
|
|
"learning_rate": 5.1888283910238555e-05,
|
|
"loss": 0.0198,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 7.777777777777778,
|
|
"grad_norm": 0.6385313868522644,
|
|
"learning_rate": 5.173099267445451e-05,
|
|
"loss": 0.0222,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 7.7936507936507935,
|
|
"grad_norm": 0.5334087014198303,
|
|
"learning_rate": 5.157368428592933e-05,
|
|
"loss": 0.0183,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 7.809523809523809,
|
|
"grad_norm": 0.6203488111495972,
|
|
"learning_rate": 5.1416360303462206e-05,
|
|
"loss": 0.0329,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 7.825396825396825,
|
|
"grad_norm": 0.5505366325378418,
|
|
"learning_rate": 5.125902228600693e-05,
|
|
"loss": 0.0169,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 7.841269841269841,
|
|
"grad_norm": 0.4648919999599457,
|
|
"learning_rate": 5.110167179265636e-05,
|
|
"loss": 0.0182,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 7.857142857142857,
|
|
"grad_norm": 0.3623007833957672,
|
|
"learning_rate": 5.094431038262693e-05,
|
|
"loss": 0.0155,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 7.8730158730158735,
|
|
"grad_norm": 0.4798755347728729,
|
|
"learning_rate": 5.078693961524329e-05,
|
|
"loss": 0.02,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 7.888888888888889,
|
|
"grad_norm": 0.5778583288192749,
|
|
"learning_rate": 5.062956104992285e-05,
|
|
"loss": 0.0318,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 7.904761904761905,
|
|
"grad_norm": 0.37309491634368896,
|
|
"learning_rate": 5.0472176246160184e-05,
|
|
"loss": 0.0116,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 7.920634920634921,
|
|
"grad_norm": 0.6432266235351562,
|
|
"learning_rate": 5.031478676351179e-05,
|
|
"loss": 0.0188,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 7.936507936507937,
|
|
"grad_norm": 0.43156516551971436,
|
|
"learning_rate": 5.01573941615805e-05,
|
|
"loss": 0.0179,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 7.9523809523809526,
|
|
"grad_norm": 0.553710401058197,
|
|
"learning_rate": 5e-05,
|
|
"loss": 0.0192,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 7.968253968253968,
|
|
"grad_norm": 0.39197760820388794,
|
|
"learning_rate": 4.984260583841953e-05,
|
|
"loss": 0.0177,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 7.984126984126984,
|
|
"grad_norm": 0.5970882773399353,
|
|
"learning_rate": 4.9685213236488216e-05,
|
|
"loss": 0.025,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"grad_norm": 0.44673952460289,
|
|
"learning_rate": 4.9527823753839834e-05,
|
|
"loss": 0.0121,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 8.015873015873016,
|
|
"grad_norm": 0.3288459777832031,
|
|
"learning_rate": 4.937043895007717e-05,
|
|
"loss": 0.0167,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 8.031746031746032,
|
|
"grad_norm": 0.410833477973938,
|
|
"learning_rate": 4.9213060384756716e-05,
|
|
"loss": 0.0147,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 8.047619047619047,
|
|
"grad_norm": 0.34271591901779175,
|
|
"learning_rate": 4.9055689617373084e-05,
|
|
"loss": 0.0108,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 8.063492063492063,
|
|
"grad_norm": 0.22280845046043396,
|
|
"learning_rate": 4.8898328207343666e-05,
|
|
"loss": 0.0076,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 8.079365079365079,
|
|
"grad_norm": 0.404482364654541,
|
|
"learning_rate": 4.874097771399308e-05,
|
|
"loss": 0.0124,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 8.095238095238095,
|
|
"grad_norm": 0.3690173327922821,
|
|
"learning_rate": 4.858363969653781e-05,
|
|
"loss": 0.0167,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 8.11111111111111,
|
|
"grad_norm": 0.31355366110801697,
|
|
"learning_rate": 4.8426315714070684e-05,
|
|
"loss": 0.0143,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 8.126984126984127,
|
|
"grad_norm": 0.24391916394233704,
|
|
"learning_rate": 4.8269007325545506e-05,
|
|
"loss": 0.0111,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 8.142857142857142,
|
|
"grad_norm": 0.39755526185035706,
|
|
"learning_rate": 4.8111716089761456e-05,
|
|
"loss": 0.0145,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 8.158730158730158,
|
|
"grad_norm": 0.27595722675323486,
|
|
"learning_rate": 4.7954443565347865e-05,
|
|
"loss": 0.01,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 8.174603174603174,
|
|
"grad_norm": 0.304116815328598,
|
|
"learning_rate": 4.779719131074857e-05,
|
|
"loss": 0.0105,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 8.19047619047619,
|
|
"grad_norm": 0.2722436487674713,
|
|
"learning_rate": 4.7639960884206576e-05,
|
|
"loss": 0.0089,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 8.206349206349206,
|
|
"grad_norm": 0.2728959321975708,
|
|
"learning_rate": 4.7482753843748564e-05,
|
|
"loss": 0.0108,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 8.222222222222221,
|
|
"grad_norm": 0.2411596029996872,
|
|
"learning_rate": 4.7325571747169545e-05,
|
|
"loss": 0.0085,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 8.238095238095237,
|
|
"grad_norm": 0.23578131198883057,
|
|
"learning_rate": 4.716841615201726e-05,
|
|
"loss": 0.008,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 8.253968253968253,
|
|
"grad_norm": 0.3611275255680084,
|
|
"learning_rate": 4.7011288615576934e-05,
|
|
"loss": 0.0141,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 8.26984126984127,
|
|
"grad_norm": 0.3158744275569916,
|
|
"learning_rate": 4.6854190694855694e-05,
|
|
"loss": 0.0115,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 8.285714285714286,
|
|
"grad_norm": 0.40253180265426636,
|
|
"learning_rate": 4.6697123946567227e-05,
|
|
"loss": 0.013,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 8.301587301587302,
|
|
"grad_norm": 0.290996789932251,
|
|
"learning_rate": 4.65400899271163e-05,
|
|
"loss": 0.0103,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 8.317460317460318,
|
|
"grad_norm": 0.37486013770103455,
|
|
"learning_rate": 4.63830901925834e-05,
|
|
"loss": 0.0155,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 8.333333333333334,
|
|
"grad_norm": 0.42451635003089905,
|
|
"learning_rate": 4.6226126298709224e-05,
|
|
"loss": 0.0175,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 8.34920634920635,
|
|
"grad_norm": 0.4372078776359558,
|
|
"learning_rate": 4.60691998008794e-05,
|
|
"loss": 0.0203,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 8.365079365079366,
|
|
"grad_norm": 0.3044324517250061,
|
|
"learning_rate": 4.5912312254108905e-05,
|
|
"loss": 0.0139,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 8.380952380952381,
|
|
"grad_norm": 0.39817896485328674,
|
|
"learning_rate": 4.575546521302681e-05,
|
|
"loss": 0.0135,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 8.396825396825397,
|
|
"grad_norm": 0.3401551842689514,
|
|
"learning_rate": 4.5598660231860746e-05,
|
|
"loss": 0.0107,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 8.412698412698413,
|
|
"grad_norm": 0.3589102625846863,
|
|
"learning_rate": 4.544189886442162e-05,
|
|
"loss": 0.0131,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 8.428571428571429,
|
|
"grad_norm": 0.4164977967739105,
|
|
"learning_rate": 4.528518266408811e-05,
|
|
"loss": 0.015,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 8.444444444444445,
|
|
"grad_norm": 0.5136562585830688,
|
|
"learning_rate": 4.5128513183791386e-05,
|
|
"loss": 0.016,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 8.46031746031746,
|
|
"grad_norm": 0.36152708530426025,
|
|
"learning_rate": 4.49718919759996e-05,
|
|
"loss": 0.015,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 8.476190476190476,
|
|
"grad_norm": 0.2721676230430603,
|
|
"learning_rate": 4.481532059270262e-05,
|
|
"loss": 0.0083,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 8.492063492063492,
|
|
"grad_norm": 0.2820744216442108,
|
|
"learning_rate": 4.465880058539652e-05,
|
|
"loss": 0.01,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 8.507936507936508,
|
|
"grad_norm": 0.3638380467891693,
|
|
"learning_rate": 4.450233350506836e-05,
|
|
"loss": 0.0101,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 8.523809523809524,
|
|
"grad_norm": 0.3278939723968506,
|
|
"learning_rate": 4.4345920902180647e-05,
|
|
"loss": 0.0104,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 8.53968253968254,
|
|
"grad_norm": 0.3926644027233124,
|
|
"learning_rate": 4.418956432665618e-05,
|
|
"loss": 0.0125,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 8.555555555555555,
|
|
"grad_norm": 0.3797055780887604,
|
|
"learning_rate": 4.403326532786245e-05,
|
|
"loss": 0.0111,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 8.571428571428571,
|
|
"grad_norm": 0.26904818415641785,
|
|
"learning_rate": 4.387702545459649e-05,
|
|
"loss": 0.009,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 8.587301587301587,
|
|
"grad_norm": 0.32789549231529236,
|
|
"learning_rate": 4.3720846255069406e-05,
|
|
"loss": 0.0075,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 8.603174603174603,
|
|
"grad_norm": 0.19732752442359924,
|
|
"learning_rate": 4.356472927689109e-05,
|
|
"loss": 0.008,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 8.619047619047619,
|
|
"grad_norm": 0.23964589834213257,
|
|
"learning_rate": 4.3408676067054866e-05,
|
|
"loss": 0.0102,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 8.634920634920634,
|
|
"grad_norm": 0.4041917026042938,
|
|
"learning_rate": 4.32526881719222e-05,
|
|
"loss": 0.0188,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 8.65079365079365,
|
|
"grad_norm": 0.4420047998428345,
|
|
"learning_rate": 4.3096767137207256e-05,
|
|
"loss": 0.0138,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 8.666666666666666,
|
|
"grad_norm": 0.43801549077033997,
|
|
"learning_rate": 4.2940914507961775e-05,
|
|
"loss": 0.012,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 8.682539682539682,
|
|
"grad_norm": 0.24375741183757782,
|
|
"learning_rate": 4.278513182855956e-05,
|
|
"loss": 0.0078,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 8.698412698412698,
|
|
"grad_norm": 0.48987898230552673,
|
|
"learning_rate": 4.262942064268134e-05,
|
|
"loss": 0.0184,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 8.714285714285714,
|
|
"grad_norm": 0.38676026463508606,
|
|
"learning_rate": 4.247378249329933e-05,
|
|
"loss": 0.0122,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 8.73015873015873,
|
|
"grad_norm": 0.20567281544208527,
|
|
"learning_rate": 4.23182189226621e-05,
|
|
"loss": 0.0076,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 8.746031746031747,
|
|
"grad_norm": 0.28698331117630005,
|
|
"learning_rate": 4.21627314722791e-05,
|
|
"loss": 0.0084,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 8.761904761904763,
|
|
"grad_norm": 0.3160061836242676,
|
|
"learning_rate": 4.20073216829056e-05,
|
|
"loss": 0.0111,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 8.777777777777779,
|
|
"grad_norm": 0.2930062711238861,
|
|
"learning_rate": 4.185199109452721e-05,
|
|
"loss": 0.0107,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 8.793650793650794,
|
|
"grad_norm": 0.3634200692176819,
|
|
"learning_rate": 4.169674124634481e-05,
|
|
"loss": 0.0101,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 8.80952380952381,
|
|
"grad_norm": 0.37438124418258667,
|
|
"learning_rate": 4.1541573676759126e-05,
|
|
"loss": 0.014,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 8.825396825396826,
|
|
"grad_norm": 0.3476526141166687,
|
|
"learning_rate": 4.138648992335566e-05,
|
|
"loss": 0.0129,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 8.841269841269842,
|
|
"grad_norm": 0.18964612483978271,
|
|
"learning_rate": 4.12314915228893e-05,
|
|
"loss": 0.0062,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 8.857142857142858,
|
|
"grad_norm": 0.35653162002563477,
|
|
"learning_rate": 4.107658001126913e-05,
|
|
"loss": 0.0131,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 8.873015873015873,
|
|
"grad_norm": 0.38258370757102966,
|
|
"learning_rate": 4.092175692354333e-05,
|
|
"loss": 0.0119,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 8.88888888888889,
|
|
"grad_norm": 0.2177157700061798,
|
|
"learning_rate": 4.0767023793883785e-05,
|
|
"loss": 0.0062,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 8.904761904761905,
|
|
"grad_norm": 0.3157006502151489,
|
|
"learning_rate": 4.0612382155571026e-05,
|
|
"loss": 0.0116,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 8.920634920634921,
|
|
"grad_norm": 0.5421932935714722,
|
|
"learning_rate": 4.045783354097893e-05,
|
|
"loss": 0.0251,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 8.936507936507937,
|
|
"grad_norm": 0.4682704210281372,
|
|
"learning_rate": 4.0303379481559623e-05,
|
|
"loss": 0.0193,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 8.952380952380953,
|
|
"grad_norm": 0.36263760924339294,
|
|
"learning_rate": 4.0149021507828224e-05,
|
|
"loss": 0.0155,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 8.968253968253968,
|
|
"grad_norm": 0.3147249221801758,
|
|
"learning_rate": 3.9994761149347784e-05,
|
|
"loss": 0.0114,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 8.984126984126984,
|
|
"grad_norm": 0.41839832067489624,
|
|
"learning_rate": 3.984059993471399e-05,
|
|
"loss": 0.0154,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"grad_norm": 0.37561434507369995,
|
|
"learning_rate": 3.968653939154017e-05,
|
|
"loss": 0.0103,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 9.015873015873016,
|
|
"grad_norm": 0.31883716583251953,
|
|
"learning_rate": 3.9532581046442e-05,
|
|
"loss": 0.0082,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 9.031746031746032,
|
|
"grad_norm": 0.23053289949893951,
|
|
"learning_rate": 3.937872642502252e-05,
|
|
"loss": 0.0073,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 9.047619047619047,
|
|
"grad_norm": 0.25523173809051514,
|
|
"learning_rate": 3.9224977051856904e-05,
|
|
"loss": 0.008,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 9.063492063492063,
|
|
"grad_norm": 0.20138682425022125,
|
|
"learning_rate": 3.907133445047747e-05,
|
|
"loss": 0.007,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 9.079365079365079,
|
|
"grad_norm": 0.2522388696670532,
|
|
"learning_rate": 3.8917800143358404e-05,
|
|
"loss": 0.0064,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 9.095238095238095,
|
|
"grad_norm": 0.32254767417907715,
|
|
"learning_rate": 3.8764375651900906e-05,
|
|
"loss": 0.0121,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 9.11111111111111,
|
|
"grad_norm": 0.2257680743932724,
|
|
"learning_rate": 3.861106249641789e-05,
|
|
"loss": 0.0069,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 9.126984126984127,
|
|
"grad_norm": 0.20319634675979614,
|
|
"learning_rate": 3.84578621961191e-05,
|
|
"loss": 0.0083,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 9.142857142857142,
|
|
"grad_norm": 0.21617092192173004,
|
|
"learning_rate": 3.830477626909589e-05,
|
|
"loss": 0.0081,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 9.158730158730158,
|
|
"grad_norm": 0.3438735902309418,
|
|
"learning_rate": 3.8151806232306374e-05,
|
|
"loss": 0.0113,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 9.174603174603174,
|
|
"grad_norm": 0.29311296343803406,
|
|
"learning_rate": 3.7998953601560175e-05,
|
|
"loss": 0.0097,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 9.19047619047619,
|
|
"grad_norm": 0.16206145286560059,
|
|
"learning_rate": 3.784621989150361e-05,
|
|
"loss": 0.0059,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 9.206349206349206,
|
|
"grad_norm": 0.22121606767177582,
|
|
"learning_rate": 3.769360661560453e-05,
|
|
"loss": 0.0084,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 9.222222222222221,
|
|
"grad_norm": 0.25994566082954407,
|
|
"learning_rate": 3.75411152861374e-05,
|
|
"loss": 0.0104,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 9.238095238095237,
|
|
"grad_norm": 0.18151433765888214,
|
|
"learning_rate": 3.73887474141683e-05,
|
|
"loss": 0.0056,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 9.253968253968253,
|
|
"grad_norm": 0.18867704272270203,
|
|
"learning_rate": 3.723650450953994e-05,
|
|
"loss": 0.006,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 9.26984126984127,
|
|
"grad_norm": 0.3016846776008606,
|
|
"learning_rate": 3.708438808085668e-05,
|
|
"loss": 0.0136,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 9.285714285714286,
|
|
"grad_norm": 0.41189849376678467,
|
|
"learning_rate": 3.693239963546967e-05,
|
|
"loss": 0.0168,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 9.301587301587302,
|
|
"grad_norm": 0.2735559940338135,
|
|
"learning_rate": 3.6780540679461784e-05,
|
|
"loss": 0.0097,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 9.317460317460318,
|
|
"grad_norm": 0.23788434267044067,
|
|
"learning_rate": 3.662881271763279e-05,
|
|
"loss": 0.0068,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 9.333333333333334,
|
|
"grad_norm": 0.14663733541965485,
|
|
"learning_rate": 3.64772172534844e-05,
|
|
"loss": 0.0046,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 9.34920634920635,
|
|
"grad_norm": 0.3166827857494354,
|
|
"learning_rate": 3.63257557892054e-05,
|
|
"loss": 0.0082,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 9.365079365079366,
|
|
"grad_norm": 0.24929101765155792,
|
|
"learning_rate": 3.6174429825656685e-05,
|
|
"loss": 0.0104,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 9.380952380952381,
|
|
"grad_norm": 0.27766042947769165,
|
|
"learning_rate": 3.602324086235655e-05,
|
|
"loss": 0.0079,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 9.396825396825397,
|
|
"grad_norm": 0.25808480381965637,
|
|
"learning_rate": 3.587219039746564e-05,
|
|
"loss": 0.0076,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 9.412698412698413,
|
|
"grad_norm": 0.2501043677330017,
|
|
"learning_rate": 3.572127992777223e-05,
|
|
"loss": 0.0103,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 9.428571428571429,
|
|
"grad_norm": 0.2836500108242035,
|
|
"learning_rate": 3.557051094867735e-05,
|
|
"loss": 0.0082,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 9.444444444444445,
|
|
"grad_norm": 0.3479957580566406,
|
|
"learning_rate": 3.541988495417997e-05,
|
|
"loss": 0.0126,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 9.46031746031746,
|
|
"grad_norm": 0.2896635830402374,
|
|
"learning_rate": 3.5269403436862175e-05,
|
|
"loss": 0.0072,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 9.476190476190476,
|
|
"grad_norm": 0.2840765416622162,
|
|
"learning_rate": 3.511906788787447e-05,
|
|
"loss": 0.0101,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 9.492063492063492,
|
|
"grad_norm": 0.3210354745388031,
|
|
"learning_rate": 3.496887979692084e-05,
|
|
"loss": 0.0085,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 9.507936507936508,
|
|
"grad_norm": 0.27587252855300903,
|
|
"learning_rate": 3.481884065224415e-05,
|
|
"loss": 0.0087,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 9.523809523809524,
|
|
"grad_norm": 0.3219284117221832,
|
|
"learning_rate": 3.466895194061128e-05,
|
|
"loss": 0.009,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 9.53968253968254,
|
|
"grad_norm": 0.17630243301391602,
|
|
"learning_rate": 3.451921514729848e-05,
|
|
"loss": 0.0059,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 9.555555555555555,
|
|
"grad_norm": 0.25327348709106445,
|
|
"learning_rate": 3.436963175607656e-05,
|
|
"loss": 0.0081,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 9.571428571428571,
|
|
"grad_norm": 0.3768535554409027,
|
|
"learning_rate": 3.422020324919632e-05,
|
|
"loss": 0.0113,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 9.587301587301587,
|
|
"grad_norm": 0.1651473492383957,
|
|
"learning_rate": 3.4070931107373675e-05,
|
|
"loss": 0.0049,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 9.603174603174603,
|
|
"grad_norm": 0.23368506133556366,
|
|
"learning_rate": 3.39218168097752e-05,
|
|
"loss": 0.008,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 9.619047619047619,
|
|
"grad_norm": 0.1572844684123993,
|
|
"learning_rate": 3.377286183400328e-05,
|
|
"loss": 0.0048,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 9.634920634920634,
|
|
"grad_norm": 0.2425893396139145,
|
|
"learning_rate": 3.362406765608158e-05,
|
|
"loss": 0.0084,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 9.65079365079365,
|
|
"grad_norm": 0.280091255903244,
|
|
"learning_rate": 3.3475435750440356e-05,
|
|
"loss": 0.0114,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 9.666666666666666,
|
|
"grad_norm": 0.34356409311294556,
|
|
"learning_rate": 3.332696758990197e-05,
|
|
"loss": 0.0101,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 9.682539682539682,
|
|
"grad_norm": 0.26575177907943726,
|
|
"learning_rate": 3.3178664645666066e-05,
|
|
"loss": 0.0076,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 9.698412698412698,
|
|
"grad_norm": 0.38795173168182373,
|
|
"learning_rate": 3.303052838729525e-05,
|
|
"loss": 0.0141,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 9.714285714285714,
|
|
"grad_norm": 0.17991788685321808,
|
|
"learning_rate": 3.2882560282700336e-05,
|
|
"loss": 0.0071,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 9.73015873015873,
|
|
"grad_norm": 0.26826414465904236,
|
|
"learning_rate": 3.273476179812588e-05,
|
|
"loss": 0.0084,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 9.746031746031747,
|
|
"grad_norm": 0.4353213906288147,
|
|
"learning_rate": 3.258713439813566e-05,
|
|
"loss": 0.0138,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 9.761904761904763,
|
|
"grad_norm": 0.27039167284965515,
|
|
"learning_rate": 3.243967954559811e-05,
|
|
"loss": 0.0075,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 9.777777777777779,
|
|
"grad_norm": 0.1729506552219391,
|
|
"learning_rate": 3.229239870167191e-05,
|
|
"loss": 0.0066,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 9.793650793650794,
|
|
"grad_norm": 0.31375908851623535,
|
|
"learning_rate": 3.2145293325791395e-05,
|
|
"loss": 0.0091,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 9.80952380952381,
|
|
"grad_norm": 0.2373589277267456,
|
|
"learning_rate": 3.199836487565222e-05,
|
|
"loss": 0.0077,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 9.825396825396826,
|
|
"grad_norm": 0.3218036890029907,
|
|
"learning_rate": 3.1851614807196774e-05,
|
|
"loss": 0.0142,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 9.841269841269842,
|
|
"grad_norm": 0.2621251940727234,
|
|
"learning_rate": 3.170504457459989e-05,
|
|
"loss": 0.0085,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 9.857142857142858,
|
|
"grad_norm": 0.2235831618309021,
|
|
"learning_rate": 3.155865563025433e-05,
|
|
"loss": 0.0085,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 9.873015873015873,
|
|
"grad_norm": 0.3102441728115082,
|
|
"learning_rate": 3.1412449424756474e-05,
|
|
"loss": 0.0091,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 9.88888888888889,
|
|
"grad_norm": 0.3454819321632385,
|
|
"learning_rate": 3.1266427406891856e-05,
|
|
"loss": 0.0078,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 9.904761904761905,
|
|
"grad_norm": 0.1699669510126114,
|
|
"learning_rate": 3.112059102362093e-05,
|
|
"loss": 0.005,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 9.920634920634921,
|
|
"grad_norm": 0.21184861660003662,
|
|
"learning_rate": 3.0974941720064585e-05,
|
|
"loss": 0.0059,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 9.936507936507937,
|
|
"grad_norm": 0.21373149752616882,
|
|
"learning_rate": 3.082948093948997e-05,
|
|
"loss": 0.0067,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 9.952380952380953,
|
|
"grad_norm": 0.17170457541942596,
|
|
"learning_rate": 3.0684210123296055e-05,
|
|
"loss": 0.0061,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 9.968253968253968,
|
|
"grad_norm": 0.33514630794525146,
|
|
"learning_rate": 3.053913071099947e-05,
|
|
"loss": 0.0136,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 9.984126984126984,
|
|
"grad_norm": 0.34444811940193176,
|
|
"learning_rate": 3.0394244140220163e-05,
|
|
"loss": 0.0129,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"grad_norm": 0.2810363173484802,
|
|
"learning_rate": 3.0249551846667207e-05,
|
|
"loss": 0.0072,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 10.015873015873016,
|
|
"grad_norm": 0.16898448765277863,
|
|
"learning_rate": 3.010505526412447e-05,
|
|
"loss": 0.0057,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 10.031746031746032,
|
|
"grad_norm": 0.27064862847328186,
|
|
"learning_rate": 2.996075582443658e-05,
|
|
"loss": 0.0081,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 10.047619047619047,
|
|
"grad_norm": 0.11674167960882187,
|
|
"learning_rate": 2.981665495749457e-05,
|
|
"loss": 0.0044,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 10.063492063492063,
|
|
"grad_norm": 0.18693989515304565,
|
|
"learning_rate": 2.9672754091221805e-05,
|
|
"loss": 0.0071,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 10.079365079365079,
|
|
"grad_norm": 0.19624684751033783,
|
|
"learning_rate": 2.9529054651559772e-05,
|
|
"loss": 0.0065,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 10.095238095238095,
|
|
"grad_norm": 0.13836269080638885,
|
|
"learning_rate": 2.938555806245406e-05,
|
|
"loss": 0.0045,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 10.11111111111111,
|
|
"grad_norm": 0.2417069971561432,
|
|
"learning_rate": 2.9242265745840063e-05,
|
|
"loss": 0.0091,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 10.126984126984127,
|
|
"grad_norm": 0.18066619336605072,
|
|
"learning_rate": 2.9099179121629117e-05,
|
|
"loss": 0.006,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 10.142857142857142,
|
|
"grad_norm": 0.2307615429162979,
|
|
"learning_rate": 2.895629960769417e-05,
|
|
"loss": 0.0078,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 10.158730158730158,
|
|
"grad_norm": 0.1858942061662674,
|
|
"learning_rate": 2.881362861985606e-05,
|
|
"loss": 0.007,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 10.174603174603174,
|
|
"grad_norm": 0.20081129670143127,
|
|
"learning_rate": 2.867116757186911e-05,
|
|
"loss": 0.0073,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 10.19047619047619,
|
|
"grad_norm": 0.2889654338359833,
|
|
"learning_rate": 2.8528917875407433e-05,
|
|
"loss": 0.0088,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 10.206349206349206,
|
|
"grad_norm": 0.22024375200271606,
|
|
"learning_rate": 2.838688094005078e-05,
|
|
"loss": 0.0061,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 10.222222222222221,
|
|
"grad_norm": 0.2205890566110611,
|
|
"learning_rate": 2.8245058173270622e-05,
|
|
"loss": 0.0072,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 10.238095238095237,
|
|
"grad_norm": 0.21441209316253662,
|
|
"learning_rate": 2.8103450980416136e-05,
|
|
"loss": 0.0054,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 10.253968253968253,
|
|
"grad_norm": 0.18930909037590027,
|
|
"learning_rate": 2.796206076470044e-05,
|
|
"loss": 0.0066,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 10.26984126984127,
|
|
"grad_norm": 0.16868965327739716,
|
|
"learning_rate": 2.7820888927186483e-05,
|
|
"loss": 0.0048,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 10.285714285714286,
|
|
"grad_norm": 0.3065090775489807,
|
|
"learning_rate": 2.7679936866773315e-05,
|
|
"loss": 0.0088,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 10.301587301587302,
|
|
"grad_norm": 0.21105839312076569,
|
|
"learning_rate": 2.753920598018217e-05,
|
|
"loss": 0.0057,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 10.317460317460318,
|
|
"grad_norm": 0.07848194986581802,
|
|
"learning_rate": 2.739869766194263e-05,
|
|
"loss": 0.0031,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 10.333333333333334,
|
|
"grad_norm": 0.23540142178535461,
|
|
"learning_rate": 2.7258413304378734e-05,
|
|
"loss": 0.0078,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 10.34920634920635,
|
|
"grad_norm": 0.2934277057647705,
|
|
"learning_rate": 2.7118354297595396e-05,
|
|
"loss": 0.0065,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 10.365079365079366,
|
|
"grad_norm": 0.2042340338230133,
|
|
"learning_rate": 2.6978522029464325e-05,
|
|
"loss": 0.005,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 10.380952380952381,
|
|
"grad_norm": 0.2258983999490738,
|
|
"learning_rate": 2.683891788561055e-05,
|
|
"loss": 0.0074,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 10.396825396825397,
|
|
"grad_norm": 0.18975599110126495,
|
|
"learning_rate": 2.669954324939852e-05,
|
|
"loss": 0.0071,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 10.412698412698413,
|
|
"grad_norm": 0.16135640442371368,
|
|
"learning_rate": 2.6560399501918465e-05,
|
|
"loss": 0.0058,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 10.428571428571429,
|
|
"grad_norm": 0.30178365111351013,
|
|
"learning_rate": 2.6421488021972673e-05,
|
|
"loss": 0.0086,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 10.444444444444445,
|
|
"grad_norm": 0.3351801037788391,
|
|
"learning_rate": 2.6282810186061862e-05,
|
|
"loss": 0.0132,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 10.46031746031746,
|
|
"grad_norm": 0.25116395950317383,
|
|
"learning_rate": 2.6144367368371535e-05,
|
|
"loss": 0.0081,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 10.476190476190476,
|
|
"grad_norm": 0.2531328797340393,
|
|
"learning_rate": 2.600616094075835e-05,
|
|
"loss": 0.0082,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 10.492063492063492,
|
|
"grad_norm": 0.22533273696899414,
|
|
"learning_rate": 2.5868192272736514e-05,
|
|
"loss": 0.0065,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 10.507936507936508,
|
|
"grad_norm": 0.18789933621883392,
|
|
"learning_rate": 2.5730462731464273e-05,
|
|
"loss": 0.0048,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 10.523809523809524,
|
|
"grad_norm": 0.2593654990196228,
|
|
"learning_rate": 2.5592973681730236e-05,
|
|
"loss": 0.008,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 10.53968253968254,
|
|
"grad_norm": 0.2563331425189972,
|
|
"learning_rate": 2.5455726485940012e-05,
|
|
"loss": 0.0099,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 10.555555555555555,
|
|
"grad_norm": 0.2012241631746292,
|
|
"learning_rate": 2.5318722504102604e-05,
|
|
"loss": 0.0051,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 10.571428571428571,
|
|
"grad_norm": 0.3327932059764862,
|
|
"learning_rate": 2.5181963093816962e-05,
|
|
"loss": 0.0077,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 10.587301587301587,
|
|
"grad_norm": 0.2965086102485657,
|
|
"learning_rate": 2.504544961025853e-05,
|
|
"loss": 0.0089,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 10.603174603174603,
|
|
"grad_norm": 0.2296365350484848,
|
|
"learning_rate": 2.4909183406165836e-05,
|
|
"loss": 0.0068,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 10.619047619047619,
|
|
"grad_norm": 0.3457624018192291,
|
|
"learning_rate": 2.4773165831827018e-05,
|
|
"loss": 0.0083,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 10.634920634920634,
|
|
"grad_norm": 0.20112329721450806,
|
|
"learning_rate": 2.4637398235066527e-05,
|
|
"loss": 0.0061,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 10.65079365079365,
|
|
"grad_norm": 0.19829870760440826,
|
|
"learning_rate": 2.450188196123177e-05,
|
|
"loss": 0.0063,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 10.666666666666666,
|
|
"grad_norm": 0.17704661190509796,
|
|
"learning_rate": 2.4366618353179644e-05,
|
|
"loss": 0.0045,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 10.682539682539682,
|
|
"grad_norm": 0.27905184030532837,
|
|
"learning_rate": 2.423160875126348e-05,
|
|
"loss": 0.009,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 10.698412698412698,
|
|
"grad_norm": 0.18189361691474915,
|
|
"learning_rate": 2.4096854493319477e-05,
|
|
"loss": 0.0069,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 10.714285714285714,
|
|
"grad_norm": 0.2877546548843384,
|
|
"learning_rate": 2.3962356914653657e-05,
|
|
"loss": 0.0064,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 10.73015873015873,
|
|
"grad_norm": 0.27436089515686035,
|
|
"learning_rate": 2.3828117348028528e-05,
|
|
"loss": 0.009,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 10.746031746031747,
|
|
"grad_norm": 0.11570344120264053,
|
|
"learning_rate": 2.3694137123649946e-05,
|
|
"loss": 0.0038,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 10.761904761904763,
|
|
"grad_norm": 0.29015523195266724,
|
|
"learning_rate": 2.3560417569153796e-05,
|
|
"loss": 0.0079,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 10.777777777777779,
|
|
"grad_norm": 0.23264740407466888,
|
|
"learning_rate": 2.342696000959309e-05,
|
|
"loss": 0.0087,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 10.793650793650794,
|
|
"grad_norm": 0.23853233456611633,
|
|
"learning_rate": 2.3293765767424537e-05,
|
|
"loss": 0.0068,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 10.80952380952381,
|
|
"grad_norm": 0.11449386179447174,
|
|
"learning_rate": 2.3160836162495653e-05,
|
|
"loss": 0.0033,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 10.825396825396826,
|
|
"grad_norm": 0.15624088048934937,
|
|
"learning_rate": 2.3028172512031604e-05,
|
|
"loss": 0.005,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 10.841269841269842,
|
|
"grad_norm": 0.17482654750347137,
|
|
"learning_rate": 2.289577613062218e-05,
|
|
"loss": 0.0053,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 10.857142857142858,
|
|
"grad_norm": 0.1657302975654602,
|
|
"learning_rate": 2.2763648330208688e-05,
|
|
"loss": 0.0044,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 10.873015873015873,
|
|
"grad_norm": 0.3183576762676239,
|
|
"learning_rate": 2.2631790420071064e-05,
|
|
"loss": 0.0087,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 10.88888888888889,
|
|
"grad_norm": 0.2113347351551056,
|
|
"learning_rate": 2.2500203706814856e-05,
|
|
"loss": 0.0057,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 10.904761904761905,
|
|
"grad_norm": 0.20787814259529114,
|
|
"learning_rate": 2.2368889494358235e-05,
|
|
"loss": 0.0066,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 10.920634920634921,
|
|
"grad_norm": 0.19461645185947418,
|
|
"learning_rate": 2.2237849083919142e-05,
|
|
"loss": 0.0056,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 10.936507936507937,
|
|
"grad_norm": 0.3162117302417755,
|
|
"learning_rate": 2.2107083774002364e-05,
|
|
"loss": 0.0102,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 10.952380952380953,
|
|
"grad_norm": 0.1498049944639206,
|
|
"learning_rate": 2.1976594860386597e-05,
|
|
"loss": 0.0054,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 10.968253968253968,
|
|
"grad_norm": 0.25862017273902893,
|
|
"learning_rate": 2.1846383636111743e-05,
|
|
"loss": 0.0063,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 10.984126984126984,
|
|
"grad_norm": 0.2787252962589264,
|
|
"learning_rate": 2.1716451391466008e-05,
|
|
"loss": 0.004,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"grad_norm": 0.5165538787841797,
|
|
"learning_rate": 2.1586799413973135e-05,
|
|
"loss": 0.0117,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 11.015873015873016,
|
|
"grad_norm": 0.16975046694278717,
|
|
"learning_rate": 2.1457428988379635e-05,
|
|
"loss": 0.0053,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 11.031746031746032,
|
|
"grad_norm": 0.09435385465621948,
|
|
"learning_rate": 2.1328341396642093e-05,
|
|
"loss": 0.0032,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 11.047619047619047,
|
|
"grad_norm": 0.0928262248635292,
|
|
"learning_rate": 2.1199537917914386e-05,
|
|
"loss": 0.0031,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 11.063492063492063,
|
|
"grad_norm": 0.1879938691854477,
|
|
"learning_rate": 2.107101982853511e-05,
|
|
"loss": 0.0052,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 11.079365079365079,
|
|
"grad_norm": 0.13509397208690643,
|
|
"learning_rate": 2.0942788402014867e-05,
|
|
"loss": 0.005,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 11.095238095238095,
|
|
"grad_norm": 0.10293649882078171,
|
|
"learning_rate": 2.0814844909023663e-05,
|
|
"loss": 0.0038,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 11.11111111111111,
|
|
"grad_norm": 0.26907050609588623,
|
|
"learning_rate": 2.068719061737831e-05,
|
|
"loss": 0.0086,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 11.126984126984127,
|
|
"grad_norm": 0.1459931880235672,
|
|
"learning_rate": 2.0559826792029884e-05,
|
|
"loss": 0.0045,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 11.142857142857142,
|
|
"grad_norm": 0.10803816467523575,
|
|
"learning_rate": 2.0432754695051136e-05,
|
|
"loss": 0.0034,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 11.158730158730158,
|
|
"grad_norm": 0.07795245200395584,
|
|
"learning_rate": 2.0305975585624058e-05,
|
|
"loss": 0.0031,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 11.174603174603174,
|
|
"grad_norm": 0.14636225998401642,
|
|
"learning_rate": 2.0179490720027372e-05,
|
|
"loss": 0.0055,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 11.19047619047619,
|
|
"grad_norm": 0.0945882797241211,
|
|
"learning_rate": 2.005330135162408e-05,
|
|
"loss": 0.0036,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 11.206349206349206,
|
|
"grad_norm": 0.16662253439426422,
|
|
"learning_rate": 1.992740873084899e-05,
|
|
"loss": 0.0042,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 11.222222222222221,
|
|
"grad_norm": 0.2733784019947052,
|
|
"learning_rate": 1.9801814105196497e-05,
|
|
"loss": 0.0066,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 11.238095238095237,
|
|
"grad_norm": 0.27156999707221985,
|
|
"learning_rate": 1.9676518719207977e-05,
|
|
"loss": 0.0069,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 11.253968253968253,
|
|
"grad_norm": 0.23552264273166656,
|
|
"learning_rate": 1.9551523814459665e-05,
|
|
"loss": 0.0071,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 11.26984126984127,
|
|
"grad_norm": 0.09834027290344238,
|
|
"learning_rate": 1.9426830629550242e-05,
|
|
"loss": 0.0035,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 11.285714285714286,
|
|
"grad_norm": 0.1471029371023178,
|
|
"learning_rate": 1.9302440400088606e-05,
|
|
"loss": 0.0055,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 11.301587301587302,
|
|
"grad_norm": 0.20986461639404297,
|
|
"learning_rate": 1.917835435868155e-05,
|
|
"loss": 0.0063,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 11.317460317460318,
|
|
"grad_norm": 0.29454532265663147,
|
|
"learning_rate": 1.9054573734921714e-05,
|
|
"loss": 0.0098,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 11.333333333333334,
|
|
"grad_norm": 0.1742410510778427,
|
|
"learning_rate": 1.8931099755375203e-05,
|
|
"loss": 0.0044,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 11.34920634920635,
|
|
"grad_norm": 0.13173726201057434,
|
|
"learning_rate": 1.880793364356956e-05,
|
|
"loss": 0.0055,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 11.365079365079366,
|
|
"grad_norm": 0.20177853107452393,
|
|
"learning_rate": 1.8685076619981608e-05,
|
|
"loss": 0.006,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 11.380952380952381,
|
|
"grad_norm": 0.1103038340806961,
|
|
"learning_rate": 1.8562529902025372e-05,
|
|
"loss": 0.0037,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 11.396825396825397,
|
|
"grad_norm": 0.22189675271511078,
|
|
"learning_rate": 1.844029470403993e-05,
|
|
"loss": 0.0066,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 11.412698412698413,
|
|
"grad_norm": 0.21314705908298492,
|
|
"learning_rate": 1.8318372237277565e-05,
|
|
"loss": 0.0065,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 11.428571428571429,
|
|
"grad_norm": 0.1456424593925476,
|
|
"learning_rate": 1.8196763709891524e-05,
|
|
"loss": 0.0049,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 11.444444444444445,
|
|
"grad_norm": 0.1834188550710678,
|
|
"learning_rate": 1.8075470326924243e-05,
|
|
"loss": 0.0067,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 11.46031746031746,
|
|
"grad_norm": 0.2855736017227173,
|
|
"learning_rate": 1.795449329029531e-05,
|
|
"loss": 0.009,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 11.476190476190476,
|
|
"grad_norm": 0.15806177258491516,
|
|
"learning_rate": 1.7833833798789595e-05,
|
|
"loss": 0.0044,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 11.492063492063492,
|
|
"grad_norm": 0.16890814900398254,
|
|
"learning_rate": 1.7713493048045294e-05,
|
|
"loss": 0.0056,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 11.507936507936508,
|
|
"grad_norm": 0.24409544467926025,
|
|
"learning_rate": 1.7593472230542202e-05,
|
|
"loss": 0.0069,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 11.523809523809524,
|
|
"grad_norm": 0.2861270010471344,
|
|
"learning_rate": 1.747377253558982e-05,
|
|
"loss": 0.0078,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 11.53968253968254,
|
|
"grad_norm": 0.17466863989830017,
|
|
"learning_rate": 1.7354395149315534e-05,
|
|
"loss": 0.0044,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 11.555555555555555,
|
|
"grad_norm": 0.2202078104019165,
|
|
"learning_rate": 1.7235341254653005e-05,
|
|
"loss": 0.0071,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 11.571428571428571,
|
|
"grad_norm": 0.25968992710113525,
|
|
"learning_rate": 1.711661203133026e-05,
|
|
"loss": 0.0052,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 11.587301587301587,
|
|
"grad_norm": 0.10932864248752594,
|
|
"learning_rate": 1.6998208655858137e-05,
|
|
"loss": 0.0033,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 11.603174603174603,
|
|
"grad_norm": 0.1846671849489212,
|
|
"learning_rate": 1.6880132301518598e-05,
|
|
"loss": 0.0049,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 11.619047619047619,
|
|
"grad_norm": 0.18320026993751526,
|
|
"learning_rate": 1.6762384138353078e-05,
|
|
"loss": 0.0048,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 11.634920634920634,
|
|
"grad_norm": 0.18667708337306976,
|
|
"learning_rate": 1.6644965333150847e-05,
|
|
"loss": 0.0041,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 11.65079365079365,
|
|
"grad_norm": 0.29703792929649353,
|
|
"learning_rate": 1.6527877049437622e-05,
|
|
"loss": 0.0098,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 11.666666666666666,
|
|
"grad_norm": 0.1451849490404129,
|
|
"learning_rate": 1.6411120447463807e-05,
|
|
"loss": 0.0034,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 11.682539682539682,
|
|
"grad_norm": 0.28783440589904785,
|
|
"learning_rate": 1.6294696684193154e-05,
|
|
"loss": 0.009,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 11.698412698412698,
|
|
"grad_norm": 0.22581429779529572,
|
|
"learning_rate": 1.617860691329126e-05,
|
|
"loss": 0.0044,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 11.714285714285714,
|
|
"grad_norm": 0.20482461154460907,
|
|
"learning_rate": 1.6062852285114123e-05,
|
|
"loss": 0.007,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 11.73015873015873,
|
|
"grad_norm": 0.10219337791204453,
|
|
"learning_rate": 1.5947433946696693e-05,
|
|
"loss": 0.0031,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 11.746031746031747,
|
|
"grad_norm": 0.2273254692554474,
|
|
"learning_rate": 1.583235304174167e-05,
|
|
"loss": 0.0069,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 11.761904761904763,
|
|
"grad_norm": 0.3083495497703552,
|
|
"learning_rate": 1.5717610710607948e-05,
|
|
"loss": 0.0116,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 11.777777777777779,
|
|
"grad_norm": 0.2324836254119873,
|
|
"learning_rate": 1.5603208090299498e-05,
|
|
"loss": 0.0065,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 11.793650793650794,
|
|
"grad_norm": 0.14565986394882202,
|
|
"learning_rate": 1.5489146314454002e-05,
|
|
"loss": 0.0041,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 11.80952380952381,
|
|
"grad_norm": 0.18284986913204193,
|
|
"learning_rate": 1.537542651333167e-05,
|
|
"loss": 0.0043,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 11.825396825396826,
|
|
"grad_norm": 0.21167722344398499,
|
|
"learning_rate": 1.5262049813803958e-05,
|
|
"loss": 0.0066,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 11.841269841269842,
|
|
"grad_norm": 0.16525444388389587,
|
|
"learning_rate": 1.5149017339342574e-05,
|
|
"loss": 0.0047,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 11.857142857142858,
|
|
"grad_norm": 0.17935959994792938,
|
|
"learning_rate": 1.503633021000812e-05,
|
|
"loss": 0.0053,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 11.873015873015873,
|
|
"grad_norm": 0.2582390010356903,
|
|
"learning_rate": 1.4923989542439159e-05,
|
|
"loss": 0.0052,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 11.88888888888889,
|
|
"grad_norm": 0.06719334423542023,
|
|
"learning_rate": 1.4811996449841098e-05,
|
|
"loss": 0.0025,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 11.904761904761905,
|
|
"grad_norm": 0.19448348879814148,
|
|
"learning_rate": 1.4700352041975163e-05,
|
|
"loss": 0.0059,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 11.920634920634921,
|
|
"grad_norm": 0.30000415444374084,
|
|
"learning_rate": 1.458905742514734e-05,
|
|
"loss": 0.0089,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 11.936507936507937,
|
|
"grad_norm": 0.19624555110931396,
|
|
"learning_rate": 1.447811370219757e-05,
|
|
"loss": 0.0067,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 11.952380952380953,
|
|
"grad_norm": 0.16108612716197968,
|
|
"learning_rate": 1.4367521972488612e-05,
|
|
"loss": 0.0036,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 11.968253968253968,
|
|
"grad_norm": 0.10793477296829224,
|
|
"learning_rate": 1.4257283331895315e-05,
|
|
"loss": 0.0032,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 11.984126984126984,
|
|
"grad_norm": 0.19331948459148407,
|
|
"learning_rate": 1.4147398872793693e-05,
|
|
"loss": 0.0054,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"grad_norm": 0.3868754208087921,
|
|
"learning_rate": 1.4037869684050115e-05,
|
|
"loss": 0.0066,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 12.015873015873016,
|
|
"grad_norm": 0.1854810267686844,
|
|
"learning_rate": 1.3928696851010443e-05,
|
|
"loss": 0.0052,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 12.031746031746032,
|
|
"grad_norm": 0.1465175747871399,
|
|
"learning_rate": 1.3819881455489458e-05,
|
|
"loss": 0.0064,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 12.047619047619047,
|
|
"grad_norm": 0.09918566048145294,
|
|
"learning_rate": 1.3711424575759912e-05,
|
|
"loss": 0.0033,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 12.063492063492063,
|
|
"grad_norm": 0.1635628491640091,
|
|
"learning_rate": 1.3603327286542023e-05,
|
|
"loss": 0.0044,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 12.079365079365079,
|
|
"grad_norm": 0.1613842099905014,
|
|
"learning_rate": 1.3495590658992718e-05,
|
|
"loss": 0.0048,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 12.095238095238095,
|
|
"grad_norm": 0.13634873926639557,
|
|
"learning_rate": 1.33882157606951e-05,
|
|
"loss": 0.0034,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 12.11111111111111,
|
|
"grad_norm": 0.15302757918834686,
|
|
"learning_rate": 1.3281203655647756e-05,
|
|
"loss": 0.0047,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 12.126984126984127,
|
|
"grad_norm": 0.10601391643285751,
|
|
"learning_rate": 1.317455540425439e-05,
|
|
"loss": 0.0031,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 12.142857142857142,
|
|
"grad_norm": 0.16901229321956635,
|
|
"learning_rate": 1.3068272063313102e-05,
|
|
"loss": 0.004,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 12.158730158730158,
|
|
"grad_norm": 0.11270225793123245,
|
|
"learning_rate": 1.2962354686006084e-05,
|
|
"loss": 0.0036,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 12.174603174603174,
|
|
"grad_norm": 0.17881913483142853,
|
|
"learning_rate": 1.2856804321889115e-05,
|
|
"loss": 0.0061,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 12.19047619047619,
|
|
"grad_norm": 0.27680760622024536,
|
|
"learning_rate": 1.2751622016881182e-05,
|
|
"loss": 0.0087,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 12.206349206349206,
|
|
"grad_norm": 0.14763417840003967,
|
|
"learning_rate": 1.2646808813254042e-05,
|
|
"loss": 0.0039,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 12.222222222222221,
|
|
"grad_norm": 0.21186058223247528,
|
|
"learning_rate": 1.2542365749622049e-05,
|
|
"loss": 0.0065,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 12.238095238095237,
|
|
"grad_norm": 0.13028453290462494,
|
|
"learning_rate": 1.2438293860931682e-05,
|
|
"loss": 0.0037,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 12.253968253968253,
|
|
"grad_norm": 0.1220482587814331,
|
|
"learning_rate": 1.2334594178451425e-05,
|
|
"loss": 0.0034,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 12.26984126984127,
|
|
"grad_norm": 0.10451938956975937,
|
|
"learning_rate": 1.2231267729761487e-05,
|
|
"loss": 0.0034,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 12.285714285714286,
|
|
"grad_norm": 0.06596413254737854,
|
|
"learning_rate": 1.2128315538743646e-05,
|
|
"loss": 0.0025,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 12.301587301587302,
|
|
"grad_norm": 0.18053588271141052,
|
|
"learning_rate": 1.2025738625571026e-05,
|
|
"loss": 0.0043,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 12.317460317460318,
|
|
"grad_norm": 0.2295704185962677,
|
|
"learning_rate": 1.1923538006698154e-05,
|
|
"loss": 0.0076,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 12.333333333333334,
|
|
"grad_norm": 0.21795432269573212,
|
|
"learning_rate": 1.1821714694850689e-05,
|
|
"loss": 0.0062,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 12.34920634920635,
|
|
"grad_norm": 0.110650934278965,
|
|
"learning_rate": 1.172026969901553e-05,
|
|
"loss": 0.0033,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 12.365079365079366,
|
|
"grad_norm": 0.14939086139202118,
|
|
"learning_rate": 1.161920402443077e-05,
|
|
"loss": 0.0053,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 12.380952380952381,
|
|
"grad_norm": 0.14100809395313263,
|
|
"learning_rate": 1.1518518672575701e-05,
|
|
"loss": 0.0047,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 12.396825396825397,
|
|
"grad_norm": 0.1589258462190628,
|
|
"learning_rate": 1.1418214641160958e-05,
|
|
"loss": 0.0041,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 12.412698412698413,
|
|
"grad_norm": 0.22199559211730957,
|
|
"learning_rate": 1.1318292924118584e-05,
|
|
"loss": 0.0048,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 12.428571428571429,
|
|
"grad_norm": 0.1654834747314453,
|
|
"learning_rate": 1.1218754511592217e-05,
|
|
"loss": 0.0052,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 12.444444444444445,
|
|
"grad_norm": 0.18298682570457458,
|
|
"learning_rate": 1.1119600389927182e-05,
|
|
"loss": 0.0053,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 12.46031746031746,
|
|
"grad_norm": 0.13524076342582703,
|
|
"learning_rate": 1.1020831541660915e-05,
|
|
"loss": 0.0038,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 12.476190476190476,
|
|
"grad_norm": 0.17973224818706512,
|
|
"learning_rate": 1.092244894551298e-05,
|
|
"loss": 0.0047,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 12.492063492063492,
|
|
"grad_norm": 0.06217047572135925,
|
|
"learning_rate": 1.0824453576375576e-05,
|
|
"loss": 0.0026,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 12.507936507936508,
|
|
"grad_norm": 0.17186515033245087,
|
|
"learning_rate": 1.0726846405303754e-05,
|
|
"loss": 0.0043,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 12.523809523809524,
|
|
"grad_norm": 0.22013287246227264,
|
|
"learning_rate": 1.062962839950587e-05,
|
|
"loss": 0.0057,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 12.53968253968254,
|
|
"grad_norm": 0.1783435344696045,
|
|
"learning_rate": 1.0532800522333897e-05,
|
|
"loss": 0.0057,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 12.555555555555555,
|
|
"grad_norm": 0.21852487325668335,
|
|
"learning_rate": 1.0436363733274057e-05,
|
|
"loss": 0.0053,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 12.571428571428571,
|
|
"grad_norm": 0.22835583984851837,
|
|
"learning_rate": 1.0340318987937097e-05,
|
|
"loss": 0.0056,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 12.587301587301587,
|
|
"grad_norm": 0.14611005783081055,
|
|
"learning_rate": 1.0244667238048988e-05,
|
|
"loss": 0.004,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 12.603174603174603,
|
|
"grad_norm": 0.13122573494911194,
|
|
"learning_rate": 1.014940943144142e-05,
|
|
"loss": 0.0034,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 12.619047619047619,
|
|
"grad_norm": 0.1692192703485489,
|
|
"learning_rate": 1.0054546512042424e-05,
|
|
"loss": 0.0036,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 12.634920634920634,
|
|
"grad_norm": 0.10081874579191208,
|
|
"learning_rate": 9.960079419866985e-06,
|
|
"loss": 0.0028,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 12.65079365079365,
|
|
"grad_norm": 0.16554361581802368,
|
|
"learning_rate": 9.866009091007833e-06,
|
|
"loss": 0.004,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 12.666666666666666,
|
|
"grad_norm": 0.11980407685041428,
|
|
"learning_rate": 9.772336457626014e-06,
|
|
"loss": 0.0033,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 12.682539682539682,
|
|
"grad_norm": 0.19346101582050323,
|
|
"learning_rate": 9.679062447941778e-06,
|
|
"loss": 0.0054,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 12.698412698412698,
|
|
"grad_norm": 0.21870972216129303,
|
|
"learning_rate": 9.586187986225325e-06,
|
|
"loss": 0.0056,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 12.714285714285714,
|
|
"grad_norm": 0.18945957720279694,
|
|
"learning_rate": 9.493713992787672e-06,
|
|
"loss": 0.0056,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 12.73015873015873,
|
|
"grad_norm": 0.25288915634155273,
|
|
"learning_rate": 9.401641383971477e-06,
|
|
"loss": 0.0067,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 12.746031746031747,
|
|
"grad_norm": 0.15972785651683807,
|
|
"learning_rate": 9.309971072142038e-06,
|
|
"loss": 0.0041,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 12.761904761904763,
|
|
"grad_norm": 0.2357502579689026,
|
|
"learning_rate": 9.218703965678204e-06,
|
|
"loss": 0.0059,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 12.777777777777779,
|
|
"grad_norm": 0.23380345106124878,
|
|
"learning_rate": 9.127840968963381e-06,
|
|
"loss": 0.0072,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 12.793650793650794,
|
|
"grad_norm": 0.13809677958488464,
|
|
"learning_rate": 9.03738298237658e-06,
|
|
"loss": 0.0046,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 12.80952380952381,
|
|
"grad_norm": 0.26843348145484924,
|
|
"learning_rate": 8.94733090228349e-06,
|
|
"loss": 0.007,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 12.825396825396826,
|
|
"grad_norm": 0.30479297041893005,
|
|
"learning_rate": 8.857685621027568e-06,
|
|
"loss": 0.0072,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 12.841269841269842,
|
|
"grad_norm": 0.09838364273309708,
|
|
"learning_rate": 8.768448026921245e-06,
|
|
"loss": 0.0032,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 12.857142857142858,
|
|
"grad_norm": 0.13536061346530914,
|
|
"learning_rate": 8.67961900423711e-06,
|
|
"loss": 0.0031,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 12.873015873015873,
|
|
"grad_norm": 0.12725569307804108,
|
|
"learning_rate": 8.591199433199126e-06,
|
|
"loss": 0.0034,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 12.88888888888889,
|
|
"grad_norm": 0.1910911351442337,
|
|
"learning_rate": 8.503190189973914e-06,
|
|
"loss": 0.0048,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 12.904761904761905,
|
|
"grad_norm": 0.08065954595804214,
|
|
"learning_rate": 8.415592146662104e-06,
|
|
"loss": 0.0027,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 12.920634920634921,
|
|
"grad_norm": 0.20949719846248627,
|
|
"learning_rate": 8.328406171289621e-06,
|
|
"loss": 0.0056,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 12.936507936507937,
|
|
"grad_norm": 0.11893566697835922,
|
|
"learning_rate": 8.24163312779917e-06,
|
|
"loss": 0.0036,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 12.952380952380953,
|
|
"grad_norm": 0.28728553652763367,
|
|
"learning_rate": 8.155273876041614e-06,
|
|
"loss": 0.0098,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 12.968253968253968,
|
|
"grad_norm": 0.2053646296262741,
|
|
"learning_rate": 8.069329271767484e-06,
|
|
"loss": 0.0057,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 12.984126984126984,
|
|
"grad_norm": 0.186600461602211,
|
|
"learning_rate": 7.983800166618482e-06,
|
|
"loss": 0.0044,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"grad_norm": 0.18637099862098694,
|
|
"learning_rate": 7.898687408119065e-06,
|
|
"loss": 0.0034,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 13.015873015873016,
|
|
"grad_norm": 0.23288948833942413,
|
|
"learning_rate": 7.813991839667995e-06,
|
|
"loss": 0.006,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 13.031746031746032,
|
|
"grad_norm": 0.11603759229183197,
|
|
"learning_rate": 7.72971430053005e-06,
|
|
"loss": 0.0032,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 13.047619047619047,
|
|
"grad_norm": 0.11359909176826477,
|
|
"learning_rate": 7.645855625827658e-06,
|
|
"loss": 0.0036,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 13.063492063492063,
|
|
"grad_norm": 0.1750001609325409,
|
|
"learning_rate": 7.56241664653266e-06,
|
|
"loss": 0.0047,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 13.079365079365079,
|
|
"grad_norm": 0.08407314866781235,
|
|
"learning_rate": 7.4793981894580034e-06,
|
|
"loss": 0.003,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 13.095238095238095,
|
|
"grad_norm": 0.15450453758239746,
|
|
"learning_rate": 7.396801077249676e-06,
|
|
"loss": 0.004,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 13.11111111111111,
|
|
"grad_norm": 0.1506980061531067,
|
|
"learning_rate": 7.3146261283784104e-06,
|
|
"loss": 0.004,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 13.126984126984127,
|
|
"grad_norm": 0.0932818278670311,
|
|
"learning_rate": 7.2328741571316696e-06,
|
|
"loss": 0.0028,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 13.142857142857142,
|
|
"grad_norm": 0.1964637041091919,
|
|
"learning_rate": 7.1515459736055505e-06,
|
|
"loss": 0.0047,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 13.158730158730158,
|
|
"grad_norm": 0.11378604173660278,
|
|
"learning_rate": 7.070642383696763e-06,
|
|
"loss": 0.0036,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 13.174603174603174,
|
|
"grad_norm": 0.07380079478025436,
|
|
"learning_rate": 6.990164189094589e-06,
|
|
"loss": 0.0024,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 13.19047619047619,
|
|
"grad_norm": 0.11589548736810684,
|
|
"learning_rate": 6.910112187273066e-06,
|
|
"loss": 0.0036,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 13.206349206349206,
|
|
"grad_norm": 0.2268502563238144,
|
|
"learning_rate": 6.830487171482935e-06,
|
|
"loss": 0.0065,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 13.222222222222221,
|
|
"grad_norm": 0.1941031664609909,
|
|
"learning_rate": 6.751289930743882e-06,
|
|
"loss": 0.0043,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 13.238095238095237,
|
|
"grad_norm": 0.14726468920707703,
|
|
"learning_rate": 6.6725212498366885e-06,
|
|
"loss": 0.0044,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 13.253968253968253,
|
|
"grad_norm": 0.09331656992435455,
|
|
"learning_rate": 6.594181909295427e-06,
|
|
"loss": 0.003,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 13.26984126984127,
|
|
"grad_norm": 0.1862584948539734,
|
|
"learning_rate": 6.516272685399788e-06,
|
|
"loss": 0.005,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 13.285714285714286,
|
|
"grad_norm": 0.14406166970729828,
|
|
"learning_rate": 6.438794350167337e-06,
|
|
"loss": 0.0039,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 13.301587301587302,
|
|
"grad_norm": 0.09058280289173126,
|
|
"learning_rate": 6.36174767134588e-06,
|
|
"loss": 0.0033,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 13.317460317460318,
|
|
"grad_norm": 0.1405523121356964,
|
|
"learning_rate": 6.285133412405858e-06,
|
|
"loss": 0.0035,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 13.333333333333334,
|
|
"grad_norm": 0.14805886149406433,
|
|
"learning_rate": 6.20895233253278e-06,
|
|
"loss": 0.0046,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 13.34920634920635,
|
|
"grad_norm": 0.2134266048669815,
|
|
"learning_rate": 6.133205186619695e-06,
|
|
"loss": 0.0064,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 13.365079365079366,
|
|
"grad_norm": 0.09715571254491806,
|
|
"learning_rate": 6.057892725259717e-06,
|
|
"loss": 0.003,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 13.380952380952381,
|
|
"grad_norm": 0.16215340793132782,
|
|
"learning_rate": 5.983015694738597e-06,
|
|
"loss": 0.0052,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 13.396825396825397,
|
|
"grad_norm": 0.12831249833106995,
|
|
"learning_rate": 5.908574837027309e-06,
|
|
"loss": 0.0035,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 13.412698412698413,
|
|
"grad_norm": 0.23066161572933197,
|
|
"learning_rate": 5.83457088977471e-06,
|
|
"loss": 0.006,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 13.428571428571429,
|
|
"grad_norm": 0.16153094172477722,
|
|
"learning_rate": 5.761004586300234e-06,
|
|
"loss": 0.0032,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 13.444444444444445,
|
|
"grad_norm": 0.1263124942779541,
|
|
"learning_rate": 5.687876655586583e-06,
|
|
"loss": 0.0033,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 13.46031746031746,
|
|
"grad_norm": 0.22407254576683044,
|
|
"learning_rate": 5.615187822272583e-06,
|
|
"loss": 0.0069,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 13.476190476190476,
|
|
"grad_norm": 0.2908068001270294,
|
|
"learning_rate": 5.542938806645931e-06,
|
|
"loss": 0.0087,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 13.492063492063492,
|
|
"grad_norm": 0.18590912222862244,
|
|
"learning_rate": 5.4711303246361144e-06,
|
|
"loss": 0.0048,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 13.507936507936508,
|
|
"grad_norm": 0.17562605440616608,
|
|
"learning_rate": 5.399763087807236e-06,
|
|
"loss": 0.0044,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 13.523809523809524,
|
|
"grad_norm": 0.07766014337539673,
|
|
"learning_rate": 5.328837803351083e-06,
|
|
"loss": 0.0025,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 13.53968253968254,
|
|
"grad_norm": 0.16575992107391357,
|
|
"learning_rate": 5.258355174079993e-06,
|
|
"loss": 0.0045,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 13.555555555555555,
|
|
"grad_norm": 0.1963498741388321,
|
|
"learning_rate": 5.188315898419971e-06,
|
|
"loss": 0.0061,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 13.571428571428571,
|
|
"grad_norm": 0.2301764041185379,
|
|
"learning_rate": 5.118720670403748e-06,
|
|
"loss": 0.0051,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 13.587301587301587,
|
|
"grad_norm": 0.16544826328754425,
|
|
"learning_rate": 5.04957017966391e-06,
|
|
"loss": 0.0033,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 13.603174603174603,
|
|
"grad_norm": 0.17942006886005402,
|
|
"learning_rate": 4.980865111426003e-06,
|
|
"loss": 0.004,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 13.619047619047619,
|
|
"grad_norm": 0.14243295788764954,
|
|
"learning_rate": 4.912606146501886e-06,
|
|
"loss": 0.0035,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 13.634920634920634,
|
|
"grad_norm": 0.14227573573589325,
|
|
"learning_rate": 4.844793961282812e-06,
|
|
"loss": 0.0034,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 13.65079365079365,
|
|
"grad_norm": 0.14716386795043945,
|
|
"learning_rate": 4.777429227732844e-06,
|
|
"loss": 0.0033,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 13.666666666666666,
|
|
"grad_norm": 0.2278168946504593,
|
|
"learning_rate": 4.710512613382151e-06,
|
|
"loss": 0.006,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 13.682539682539682,
|
|
"grad_norm": 0.2408359944820404,
|
|
"learning_rate": 4.644044781320422e-06,
|
|
"loss": 0.0072,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 13.698412698412698,
|
|
"grad_norm": 0.23890067636966705,
|
|
"learning_rate": 4.578026390190232e-06,
|
|
"loss": 0.0051,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 13.714285714285714,
|
|
"grad_norm": 0.2770053446292877,
|
|
"learning_rate": 4.5124580941806165e-06,
|
|
"loss": 0.0078,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 13.73015873015873,
|
|
"grad_norm": 0.16485559940338135,
|
|
"learning_rate": 4.447340543020473e-06,
|
|
"loss": 0.0033,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 13.746031746031747,
|
|
"grad_norm": 0.1674467772245407,
|
|
"learning_rate": 4.382674381972224e-06,
|
|
"loss": 0.0041,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 13.761904761904763,
|
|
"grad_norm": 0.09436249732971191,
|
|
"learning_rate": 4.318460251825357e-06,
|
|
"loss": 0.0031,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 13.777777777777779,
|
|
"grad_norm": 0.0673573687672615,
|
|
"learning_rate": 4.254698788890127e-06,
|
|
"loss": 0.0025,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 13.793650793650794,
|
|
"grad_norm": 0.18255870044231415,
|
|
"learning_rate": 4.191390624991159e-06,
|
|
"loss": 0.0047,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 13.80952380952381,
|
|
"grad_norm": 0.13948306441307068,
|
|
"learning_rate": 4.12853638746134e-06,
|
|
"loss": 0.0032,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 13.825396825396826,
|
|
"grad_norm": 0.24183286726474762,
|
|
"learning_rate": 4.0661366991354365e-06,
|
|
"loss": 0.0063,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 13.841269841269842,
|
|
"grad_norm": 0.10236512869596481,
|
|
"learning_rate": 4.004192178344029e-06,
|
|
"loss": 0.003,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 13.857142857142858,
|
|
"grad_norm": 0.10468772053718567,
|
|
"learning_rate": 3.942703438907358e-06,
|
|
"loss": 0.003,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 13.873015873015873,
|
|
"grad_norm": 0.1839323341846466,
|
|
"learning_rate": 3.881671090129247e-06,
|
|
"loss": 0.0047,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 13.88888888888889,
|
|
"grad_norm": 0.245498925447464,
|
|
"learning_rate": 3.821095736791008e-06,
|
|
"loss": 0.0063,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 13.904761904761905,
|
|
"grad_norm": 0.08903949707746506,
|
|
"learning_rate": 3.7609779791455744e-06,
|
|
"loss": 0.0027,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 13.920634920634921,
|
|
"grad_norm": 0.10096840560436249,
|
|
"learning_rate": 3.7013184129113976e-06,
|
|
"loss": 0.0025,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 13.936507936507937,
|
|
"grad_norm": 0.16196174919605255,
|
|
"learning_rate": 3.6421176292666783e-06,
|
|
"loss": 0.0049,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 13.952380952380953,
|
|
"grad_norm": 0.2010921686887741,
|
|
"learning_rate": 3.58337621484342e-06,
|
|
"loss": 0.0047,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 13.968253968253968,
|
|
"grad_norm": 0.20379731059074402,
|
|
"learning_rate": 3.525094751721669e-06,
|
|
"loss": 0.0049,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 13.984126984126984,
|
|
"grad_norm": 0.13787353038787842,
|
|
"learning_rate": 3.4672738174236884e-06,
|
|
"loss": 0.0038,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"grad_norm": 0.21144546568393707,
|
|
"learning_rate": 3.4099139849083307e-06,
|
|
"loss": 0.0058,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 14.015873015873016,
|
|
"grad_norm": 0.16598111391067505,
|
|
"learning_rate": 3.353015822565253e-06,
|
|
"loss": 0.0046,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 14.031746031746032,
|
|
"grad_norm": 0.11316211521625519,
|
|
"learning_rate": 3.296579894209345e-06,
|
|
"loss": 0.0033,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 14.047619047619047,
|
|
"grad_norm": 0.1642863005399704,
|
|
"learning_rate": 3.2406067590751433e-06,
|
|
"loss": 0.0042,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 14.063492063492063,
|
|
"grad_norm": 0.06231338158249855,
|
|
"learning_rate": 3.1850969718112745e-06,
|
|
"loss": 0.0023,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 14.079365079365079,
|
|
"grad_norm": 0.07541368156671524,
|
|
"learning_rate": 3.1300510824749273e-06,
|
|
"loss": 0.0027,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 14.095238095238095,
|
|
"grad_norm": 0.10008185356855392,
|
|
"learning_rate": 3.0754696365265068e-06,
|
|
"loss": 0.0028,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 14.11111111111111,
|
|
"grad_norm": 0.10464094579219818,
|
|
"learning_rate": 3.0213531748240764e-06,
|
|
"loss": 0.0031,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 14.126984126984127,
|
|
"grad_norm": 0.09949090331792831,
|
|
"learning_rate": 2.9677022336181413e-06,
|
|
"loss": 0.003,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 14.142857142857142,
|
|
"grad_norm": 0.08555309474468231,
|
|
"learning_rate": 2.914517344546258e-06,
|
|
"loss": 0.0028,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 14.158730158730158,
|
|
"grad_norm": 0.10682200640439987,
|
|
"learning_rate": 2.8617990346277657e-06,
|
|
"loss": 0.0032,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 14.174603174603174,
|
|
"grad_norm": 0.09806779026985168,
|
|
"learning_rate": 2.8095478262585907e-06,
|
|
"loss": 0.0035,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 14.19047619047619,
|
|
"grad_norm": 0.13682028651237488,
|
|
"learning_rate": 2.7577642372060673e-06,
|
|
"loss": 0.003,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 14.206349206349206,
|
|
"grad_norm": 0.1651875525712967,
|
|
"learning_rate": 2.7064487806037985e-06,
|
|
"loss": 0.0043,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 14.222222222222221,
|
|
"grad_norm": 0.14128713309764862,
|
|
"learning_rate": 2.6556019649465525e-06,
|
|
"loss": 0.0032,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 14.238095238095237,
|
|
"grad_norm": 0.18472391366958618,
|
|
"learning_rate": 2.6052242940852787e-06,
|
|
"loss": 0.0055,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 14.253968253968253,
|
|
"grad_norm": 0.12015866488218307,
|
|
"learning_rate": 2.5553162672220465e-06,
|
|
"loss": 0.0027,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 14.26984126984127,
|
|
"grad_norm": 0.20532798767089844,
|
|
"learning_rate": 2.5058783789051467e-06,
|
|
"loss": 0.006,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 14.285714285714286,
|
|
"grad_norm": 0.11659039556980133,
|
|
"learning_rate": 2.45691111902418e-06,
|
|
"loss": 0.0033,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 14.301587301587302,
|
|
"grad_norm": 0.22555606067180634,
|
|
"learning_rate": 2.4084149728051952e-06,
|
|
"loss": 0.0057,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 14.317460317460318,
|
|
"grad_norm": 0.10364361107349396,
|
|
"learning_rate": 2.360390420805869e-06,
|
|
"loss": 0.003,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 14.333333333333334,
|
|
"grad_norm": 0.15920886397361755,
|
|
"learning_rate": 2.3128379389108e-06,
|
|
"loss": 0.0039,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 14.34920634920635,
|
|
"grad_norm": 0.10381603240966797,
|
|
"learning_rate": 2.2657579983267064e-06,
|
|
"loss": 0.0028,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 14.365079365079366,
|
|
"grad_norm": 0.17512689530849457,
|
|
"learning_rate": 2.219151065577829e-06,
|
|
"loss": 0.0046,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 14.380952380952381,
|
|
"grad_norm": 0.22503690421581268,
|
|
"learning_rate": 2.1730176025012816e-06,
|
|
"loss": 0.0063,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 14.396825396825397,
|
|
"grad_norm": 0.17018793523311615,
|
|
"learning_rate": 2.1273580662424796e-06,
|
|
"loss": 0.0048,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 14.412698412698413,
|
|
"grad_norm": 0.22725212574005127,
|
|
"learning_rate": 2.082172909250568e-06,
|
|
"loss": 0.0069,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 14.428571428571429,
|
|
"grad_norm": 0.12136708199977875,
|
|
"learning_rate": 2.0374625792740464e-06,
|
|
"loss": 0.003,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 14.444444444444445,
|
|
"grad_norm": 0.06128573417663574,
|
|
"learning_rate": 1.993227519356189e-06,
|
|
"loss": 0.0022,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 14.46031746031746,
|
|
"grad_norm": 0.0800539031624794,
|
|
"learning_rate": 1.9494681678307703e-06,
|
|
"loss": 0.0025,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 14.476190476190476,
|
|
"grad_norm": 0.2250363528728485,
|
|
"learning_rate": 1.906184958317664e-06,
|
|
"loss": 0.0056,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 14.492063492063492,
|
|
"grad_norm": 0.06574003398418427,
|
|
"learning_rate": 1.8633783197185783e-06,
|
|
"loss": 0.0025,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 14.507936507936508,
|
|
"grad_norm": 0.1733701229095459,
|
|
"learning_rate": 1.8210486762127499e-06,
|
|
"loss": 0.0052,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 14.523809523809524,
|
|
"grad_norm": 0.14052851498126984,
|
|
"learning_rate": 1.7791964472528232e-06,
|
|
"loss": 0.0035,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 14.53968253968254,
|
|
"grad_norm": 0.20883136987686157,
|
|
"learning_rate": 1.737822047560611e-06,
|
|
"loss": 0.006,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 14.555555555555555,
|
|
"grad_norm": 0.18126244843006134,
|
|
"learning_rate": 1.696925887123052e-06,
|
|
"loss": 0.0043,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 14.571428571428571,
|
|
"grad_norm": 0.19093488156795502,
|
|
"learning_rate": 1.656508371188109e-06,
|
|
"loss": 0.0045,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 14.587301587301587,
|
|
"grad_norm": 0.16476662456989288,
|
|
"learning_rate": 1.6165699002607671e-06,
|
|
"loss": 0.0037,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 14.603174603174603,
|
|
"grad_norm": 0.12128468602895737,
|
|
"learning_rate": 1.5771108700990412e-06,
|
|
"loss": 0.0034,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 14.619047619047619,
|
|
"grad_norm": 0.07109358161687851,
|
|
"learning_rate": 1.538131671710108e-06,
|
|
"loss": 0.0027,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 14.634920634920634,
|
|
"grad_norm": 0.12868039309978485,
|
|
"learning_rate": 1.4996326913463754e-06,
|
|
"loss": 0.0042,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 14.65079365079365,
|
|
"grad_norm": 0.10166194289922714,
|
|
"learning_rate": 1.461614310501691e-06,
|
|
"loss": 0.0027,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 14.666666666666666,
|
|
"grad_norm": 0.1676546037197113,
|
|
"learning_rate": 1.4240769059075342e-06,
|
|
"loss": 0.0045,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 14.682539682539682,
|
|
"grad_norm": 0.16010187566280365,
|
|
"learning_rate": 1.387020849529319e-06,
|
|
"loss": 0.0048,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 14.698412698412698,
|
|
"grad_norm": 0.22581593692302704,
|
|
"learning_rate": 1.3504465085626638e-06,
|
|
"loss": 0.0076,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 14.714285714285714,
|
|
"grad_norm": 0.1580013781785965,
|
|
"learning_rate": 1.3143542454297885e-06,
|
|
"loss": 0.004,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 14.73015873015873,
|
|
"grad_norm": 0.2010050266981125,
|
|
"learning_rate": 1.2787444177759068e-06,
|
|
"loss": 0.0058,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 14.746031746031747,
|
|
"grad_norm": 0.2182077020406723,
|
|
"learning_rate": 1.243617378465689e-06,
|
|
"loss": 0.0053,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 14.761904761904763,
|
|
"grad_norm": 0.23424509167671204,
|
|
"learning_rate": 1.208973475579761e-06,
|
|
"loss": 0.0055,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 14.777777777777779,
|
|
"grad_norm": 0.1593056619167328,
|
|
"learning_rate": 1.1748130524112666e-06,
|
|
"loss": 0.0038,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 14.793650793650794,
|
|
"grad_norm": 0.1183331161737442,
|
|
"learning_rate": 1.1411364474624264e-06,
|
|
"loss": 0.0035,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 14.80952380952381,
|
|
"grad_norm": 0.1267019659280777,
|
|
"learning_rate": 1.1079439944412406e-06,
|
|
"loss": 0.0037,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 14.825396825396826,
|
|
"grad_norm": 0.1250416487455368,
|
|
"learning_rate": 1.075236022258147e-06,
|
|
"loss": 0.0033,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 14.841269841269842,
|
|
"grad_norm": 0.19605623185634613,
|
|
"learning_rate": 1.0430128550227625e-06,
|
|
"loss": 0.0034,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 14.857142857142858,
|
|
"grad_norm": 0.18377277255058289,
|
|
"learning_rate": 1.0112748120406856e-06,
|
|
"loss": 0.007,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 14.873015873015873,
|
|
"grad_norm": 0.1912008672952652,
|
|
"learning_rate": 9.800222078103271e-07,
|
|
"loss": 0.0042,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 14.88888888888889,
|
|
"grad_norm": 0.1927856057882309,
|
|
"learning_rate": 9.492553520197733e-07,
|
|
"loss": 0.0055,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 14.904761904761905,
|
|
"grad_norm": 0.103274405002594,
|
|
"learning_rate": 9.189745495437608e-07,
|
|
"loss": 0.0034,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 14.920634920634921,
|
|
"grad_norm": 0.1846938282251358,
|
|
"learning_rate": 8.891801004406119e-07,
|
|
"loss": 0.0047,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 14.936507936507937,
|
|
"grad_norm": 0.12870021164417267,
|
|
"learning_rate": 8.59872299949288e-07,
|
|
"loss": 0.0028,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 14.952380952380953,
|
|
"grad_norm": 0.09814100712537766,
|
|
"learning_rate": 8.31051438486441e-07,
|
|
"loss": 0.0027,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 14.968253968253968,
|
|
"grad_norm": 0.12259647250175476,
|
|
"learning_rate": 8.027178016435765e-07,
|
|
"loss": 0.003,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 14.984126984126984,
|
|
"grad_norm": 0.2572350800037384,
|
|
"learning_rate": 7.748716701841685e-07,
|
|
"loss": 0.006,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"grad_norm": 0.26040682196617126,
|
|
"learning_rate": 7.475133200409212e-07,
|
|
"loss": 0.0048,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 15.015873015873016,
|
|
"grad_norm": 0.1166323646903038,
|
|
"learning_rate": 7.206430223130278e-07,
|
|
"loss": 0.0028,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 15.031746031746032,
|
|
"grad_norm": 0.11518598347902298,
|
|
"learning_rate": 6.9426104326345e-07,
|
|
"loss": 0.0031,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 15.047619047619047,
|
|
"grad_norm": 0.18673783540725708,
|
|
"learning_rate": 6.683676443163311e-07,
|
|
"loss": 0.0048,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 15.063492063492063,
|
|
"grad_norm": 0.1127839982509613,
|
|
"learning_rate": 6.429630820543598e-07,
|
|
"loss": 0.0031,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 15.079365079365079,
|
|
"grad_norm": 0.18263711035251617,
|
|
"learning_rate": 6.180476082162656e-07,
|
|
"loss": 0.004,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 15.095238095238095,
|
|
"grad_norm": 0.1486678421497345,
|
|
"learning_rate": 5.936214696942887e-07,
|
|
"loss": 0.0037,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 15.11111111111111,
|
|
"grad_norm": 0.2178022712469101,
|
|
"learning_rate": 5.696849085317646e-07,
|
|
"loss": 0.0057,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 15.126984126984127,
|
|
"grad_norm": 0.12073294818401337,
|
|
"learning_rate": 5.462381619207091e-07,
|
|
"loss": 0.0031,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 15.142857142857142,
|
|
"grad_norm": 0.12311496585607529,
|
|
"learning_rate": 5.232814621994598e-07,
|
|
"loss": 0.0036,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 15.158730158730158,
|
|
"grad_norm": 0.16713330149650574,
|
|
"learning_rate": 5.008150368503994e-07,
|
|
"loss": 0.0038,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 15.174603174603174,
|
|
"grad_norm": 0.1170608177781105,
|
|
"learning_rate": 4.788391084976862e-07,
|
|
"loss": 0.0033,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 15.19047619047619,
|
|
"grad_norm": 0.06233490630984306,
|
|
"learning_rate": 4.573538949050327e-07,
|
|
"loss": 0.0023,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 15.206349206349206,
|
|
"grad_norm": 0.13149504363536835,
|
|
"learning_rate": 4.363596089735911e-07,
|
|
"loss": 0.0031,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 15.222222222222221,
|
|
"grad_norm": 0.16984321177005768,
|
|
"learning_rate": 4.1585645873978284e-07,
|
|
"loss": 0.0046,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 15.238095238095237,
|
|
"grad_norm": 0.14544299244880676,
|
|
"learning_rate": 3.958446473733002e-07,
|
|
"loss": 0.0033,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 15.253968253968253,
|
|
"grad_norm": 0.17623476684093475,
|
|
"learning_rate": 3.7632437317505207e-07,
|
|
"loss": 0.0046,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 15.26984126984127,
|
|
"grad_norm": 0.12246549874544144,
|
|
"learning_rate": 3.572958295752049e-07,
|
|
"loss": 0.0034,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 15.285714285714286,
|
|
"grad_norm": 0.14989396929740906,
|
|
"learning_rate": 3.387592051312782e-07,
|
|
"loss": 0.0036,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 15.301587301587302,
|
|
"grad_norm": 0.19900646805763245,
|
|
"learning_rate": 3.207146835262742e-07,
|
|
"loss": 0.0057,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 15.317460317460318,
|
|
"grad_norm": 0.1741442084312439,
|
|
"learning_rate": 3.0316244356683454e-07,
|
|
"loss": 0.0047,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 15.333333333333334,
|
|
"grad_norm": 0.15245862305164337,
|
|
"learning_rate": 2.8610265918151414e-07,
|
|
"loss": 0.0046,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 15.34920634920635,
|
|
"grad_norm": 0.19708728790283203,
|
|
"learning_rate": 2.695354994190047e-07,
|
|
"loss": 0.0058,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 15.365079365079366,
|
|
"grad_norm": 0.13684900104999542,
|
|
"learning_rate": 2.534611284465083e-07,
|
|
"loss": 0.0037,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 15.380952380952381,
|
|
"grad_norm": 0.18838024139404297,
|
|
"learning_rate": 2.3787970554806084e-07,
|
|
"loss": 0.0043,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 15.396825396825397,
|
|
"grad_norm": 0.18869999051094055,
|
|
"learning_rate": 2.2279138512300567e-07,
|
|
"loss": 0.0056,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 15.412698412698413,
|
|
"grad_norm": 0.14952099323272705,
|
|
"learning_rate": 2.0819631668442253e-07,
|
|
"loss": 0.0038,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 15.428571428571429,
|
|
"grad_norm": 0.20797456800937653,
|
|
"learning_rate": 1.940946448576675e-07,
|
|
"loss": 0.0056,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 15.444444444444445,
|
|
"grad_norm": 0.17077018320560455,
|
|
"learning_rate": 1.8048650937893542e-07,
|
|
"loss": 0.0049,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 15.46031746031746,
|
|
"grad_norm": 0.16229721903800964,
|
|
"learning_rate": 1.6737204509387206e-07,
|
|
"loss": 0.0038,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 15.476190476190476,
|
|
"grad_norm": 0.06878882646560669,
|
|
"learning_rate": 1.5475138195623629e-07,
|
|
"loss": 0.0024,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 15.492063492063492,
|
|
"grad_norm": 0.1002248004078865,
|
|
"learning_rate": 1.4262464502663443e-07,
|
|
"loss": 0.0028,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 15.507936507936508,
|
|
"grad_norm": 0.1598724126815796,
|
|
"learning_rate": 1.309919544712268e-07,
|
|
"loss": 0.0051,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 15.523809523809524,
|
|
"grad_norm": 0.20375491678714752,
|
|
"learning_rate": 1.1985342556060652e-07,
|
|
"loss": 0.0048,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 15.53968253968254,
|
|
"grad_norm": 0.15220007300376892,
|
|
"learning_rate": 1.0920916866861142e-07,
|
|
"loss": 0.0037,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 15.555555555555555,
|
|
"grad_norm": 0.13166747987270355,
|
|
"learning_rate": 9.905928927123609e-08,
|
|
"loss": 0.0041,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 15.571428571428571,
|
|
"grad_norm": 0.16521938145160675,
|
|
"learning_rate": 8.940388794559939e-08,
|
|
"loss": 0.0043,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 15.587301587301587,
|
|
"grad_norm": 0.22669538855552673,
|
|
"learning_rate": 8.02430603689397e-08,
|
|
"loss": 0.006,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 15.603174603174603,
|
|
"grad_norm": 0.09708595275878906,
|
|
"learning_rate": 7.157689731767669e-08,
|
|
"loss": 0.0025,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 15.619047619047619,
|
|
"grad_norm": 0.2131219506263733,
|
|
"learning_rate": 6.340548466648443e-08,
|
|
"loss": 0.0051,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 15.634920634920634,
|
|
"grad_norm": 0.1999976485967636,
|
|
"learning_rate": 5.572890338748082e-08,
|
|
"loss": 0.0046,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 15.65079365079365,
|
|
"grad_norm": 0.10222487151622772,
|
|
"learning_rate": 4.8547229549383844e-08,
|
|
"loss": 0.0037,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 15.666666666666666,
|
|
"grad_norm": 0.25009259581565857,
|
|
"learning_rate": 4.186053431680104e-08,
|
|
"loss": 0.0068,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 15.682539682539682,
|
|
"grad_norm": 0.06356369704008102,
|
|
"learning_rate": 3.566888394948009e-08,
|
|
"loss": 0.0022,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 15.698412698412698,
|
|
"grad_norm": 0.13318653404712677,
|
|
"learning_rate": 2.997233980168157e-08,
|
|
"loss": 0.0038,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 15.714285714285714,
|
|
"grad_norm": 0.05918239429593086,
|
|
"learning_rate": 2.4770958321568283e-08,
|
|
"loss": 0.0022,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 15.73015873015873,
|
|
"grad_norm": 0.1082151010632515,
|
|
"learning_rate": 2.0064791050633526e-08,
|
|
"loss": 0.0031,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 15.746031746031747,
|
|
"grad_norm": 0.22153517603874207,
|
|
"learning_rate": 1.5853884623195925e-08,
|
|
"loss": 0.0049,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 15.761904761904763,
|
|
"grad_norm": 0.09333167225122452,
|
|
"learning_rate": 1.2138280765944254e-08,
|
|
"loss": 0.0028,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 15.777777777777779,
|
|
"grad_norm": 0.14806319773197174,
|
|
"learning_rate": 8.918016297515541e-09,
|
|
"loss": 0.0031,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 15.793650793650794,
|
|
"grad_norm": 0.15807633101940155,
|
|
"learning_rate": 6.193123128134248e-09,
|
|
"loss": 0.0041,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 15.80952380952381,
|
|
"grad_norm": 0.1491064578294754,
|
|
"learning_rate": 3.963628259290308e-09,
|
|
"loss": 0.0039,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 15.825396825396826,
|
|
"grad_norm": 0.1288636475801468,
|
|
"learning_rate": 2.229553783478222e-09,
|
|
"loss": 0.0035,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 15.841269841269842,
|
|
"grad_norm": 0.17619061470031738,
|
|
"learning_rate": 9.90916883986115e-10,
|
|
"loss": 0.0056,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 15.857142857142858,
|
|
"grad_norm": 0.1407734900712967,
|
|
"learning_rate": 2.477298346958978e-10,
|
|
"loss": 0.0038,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 15.873015873015873,
|
|
"grad_norm": 0.172795370221138,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0048,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 15.873015873015873,
|
|
"step": 1000,
|
|
"total_flos": 1.610662192030679e+17,
|
|
"train_loss": 0.14257763476669788,
|
|
"train_runtime": 58489.782,
|
|
"train_samples_per_second": 0.274,
|
|
"train_steps_per_second": 0.017
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 1000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 16,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.610662192030679e+17,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|