llama-7b-lexical-substitution / trainer_state.json
Pierluigi Cassotti
upload model
e690c95
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.995713673381912,
"eval_steps": 500,
"global_step": 11660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 0.00019914236706689538,
"loss": 2.573,
"step": 50
},
{
"epoch": 0.09,
"learning_rate": 0.00019828473413379075,
"loss": 1.8291,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 0.00019742710120068612,
"loss": 1.7257,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 0.00019656946826758148,
"loss": 1.6338,
"step": 200
},
{
"epoch": 0.21,
"learning_rate": 0.00019571183533447685,
"loss": 1.6359,
"step": 250
},
{
"epoch": 0.26,
"learning_rate": 0.00019485420240137222,
"loss": 1.5935,
"step": 300
},
{
"epoch": 0.3,
"learning_rate": 0.0001939965694682676,
"loss": 1.5494,
"step": 350
},
{
"epoch": 0.34,
"learning_rate": 0.00019313893653516296,
"loss": 1.559,
"step": 400
},
{
"epoch": 0.39,
"learning_rate": 0.00019228130360205833,
"loss": 1.5403,
"step": 450
},
{
"epoch": 0.43,
"learning_rate": 0.0001914236706689537,
"loss": 1.528,
"step": 500
},
{
"epoch": 0.47,
"learning_rate": 0.00019056603773584906,
"loss": 1.4899,
"step": 550
},
{
"epoch": 0.51,
"learning_rate": 0.00018970840480274443,
"loss": 1.4545,
"step": 600
},
{
"epoch": 0.56,
"learning_rate": 0.0001888507718696398,
"loss": 1.4658,
"step": 650
},
{
"epoch": 0.6,
"learning_rate": 0.00018799313893653517,
"loss": 1.4666,
"step": 700
},
{
"epoch": 0.64,
"learning_rate": 0.00018713550600343054,
"loss": 1.4603,
"step": 750
},
{
"epoch": 0.69,
"learning_rate": 0.0001862778730703259,
"loss": 1.4256,
"step": 800
},
{
"epoch": 0.73,
"learning_rate": 0.00018542024013722128,
"loss": 1.4422,
"step": 850
},
{
"epoch": 0.77,
"learning_rate": 0.00018456260720411664,
"loss": 1.4255,
"step": 900
},
{
"epoch": 0.81,
"learning_rate": 0.000183704974271012,
"loss": 1.3872,
"step": 950
},
{
"epoch": 0.86,
"learning_rate": 0.00018284734133790738,
"loss": 1.3886,
"step": 1000
},
{
"epoch": 0.9,
"learning_rate": 0.00018198970840480275,
"loss": 1.4147,
"step": 1050
},
{
"epoch": 0.94,
"learning_rate": 0.00018113207547169812,
"loss": 1.405,
"step": 1100
},
{
"epoch": 0.99,
"learning_rate": 0.0001802744425385935,
"loss": 1.4027,
"step": 1150
},
{
"epoch": 1.03,
"learning_rate": 0.00017941680960548886,
"loss": 1.1787,
"step": 1200
},
{
"epoch": 1.07,
"learning_rate": 0.00017855917667238422,
"loss": 1.058,
"step": 1250
},
{
"epoch": 1.11,
"learning_rate": 0.00017770154373927962,
"loss": 1.0577,
"step": 1300
},
{
"epoch": 1.16,
"learning_rate": 0.00017684391080617496,
"loss": 1.0675,
"step": 1350
},
{
"epoch": 1.2,
"learning_rate": 0.00017598627787307033,
"loss": 1.0459,
"step": 1400
},
{
"epoch": 1.24,
"learning_rate": 0.0001751286449399657,
"loss": 1.072,
"step": 1450
},
{
"epoch": 1.29,
"learning_rate": 0.00017427101200686107,
"loss": 1.0654,
"step": 1500
},
{
"epoch": 1.33,
"learning_rate": 0.00017341337907375644,
"loss": 1.0594,
"step": 1550
},
{
"epoch": 1.37,
"learning_rate": 0.00017255574614065183,
"loss": 1.0594,
"step": 1600
},
{
"epoch": 1.41,
"learning_rate": 0.00017169811320754717,
"loss": 1.0851,
"step": 1650
},
{
"epoch": 1.46,
"learning_rate": 0.00017084048027444254,
"loss": 1.066,
"step": 1700
},
{
"epoch": 1.5,
"learning_rate": 0.0001699828473413379,
"loss": 1.0591,
"step": 1750
},
{
"epoch": 1.54,
"learning_rate": 0.00016912521440823328,
"loss": 1.0692,
"step": 1800
},
{
"epoch": 1.59,
"learning_rate": 0.00016826758147512865,
"loss": 1.0772,
"step": 1850
},
{
"epoch": 1.63,
"learning_rate": 0.00016740994854202404,
"loss": 1.075,
"step": 1900
},
{
"epoch": 1.67,
"learning_rate": 0.00016655231560891938,
"loss": 1.0607,
"step": 1950
},
{
"epoch": 1.71,
"learning_rate": 0.00016569468267581475,
"loss": 1.0682,
"step": 2000
},
{
"epoch": 1.76,
"learning_rate": 0.00016483704974271012,
"loss": 1.0616,
"step": 2050
},
{
"epoch": 1.8,
"learning_rate": 0.0001639794168096055,
"loss": 1.0655,
"step": 2100
},
{
"epoch": 1.84,
"learning_rate": 0.00016312178387650086,
"loss": 1.0555,
"step": 2150
},
{
"epoch": 1.89,
"learning_rate": 0.00016226415094339625,
"loss": 1.0761,
"step": 2200
},
{
"epoch": 1.93,
"learning_rate": 0.0001614065180102916,
"loss": 1.0776,
"step": 2250
},
{
"epoch": 1.97,
"learning_rate": 0.00016054888507718696,
"loss": 1.0574,
"step": 2300
},
{
"epoch": 2.01,
"learning_rate": 0.00015969125214408233,
"loss": 0.9443,
"step": 2350
},
{
"epoch": 2.06,
"learning_rate": 0.0001588336192109777,
"loss": 0.7435,
"step": 2400
},
{
"epoch": 2.1,
"learning_rate": 0.00015797598627787307,
"loss": 0.7457,
"step": 2450
},
{
"epoch": 2.14,
"learning_rate": 0.00015711835334476847,
"loss": 0.7553,
"step": 2500
},
{
"epoch": 2.19,
"learning_rate": 0.0001562607204116638,
"loss": 0.7452,
"step": 2550
},
{
"epoch": 2.23,
"learning_rate": 0.00015540308747855918,
"loss": 0.7736,
"step": 2600
},
{
"epoch": 2.27,
"learning_rate": 0.00015454545454545454,
"loss": 0.7649,
"step": 2650
},
{
"epoch": 2.31,
"learning_rate": 0.0001536878216123499,
"loss": 0.7557,
"step": 2700
},
{
"epoch": 2.36,
"learning_rate": 0.0001528301886792453,
"loss": 0.7556,
"step": 2750
},
{
"epoch": 2.4,
"learning_rate": 0.00015197255574614068,
"loss": 0.7751,
"step": 2800
},
{
"epoch": 2.44,
"learning_rate": 0.00015111492281303602,
"loss": 0.7613,
"step": 2850
},
{
"epoch": 2.49,
"learning_rate": 0.0001502572898799314,
"loss": 0.7751,
"step": 2900
},
{
"epoch": 2.53,
"learning_rate": 0.00014939965694682676,
"loss": 0.774,
"step": 2950
},
{
"epoch": 2.57,
"learning_rate": 0.00014854202401372212,
"loss": 0.7803,
"step": 3000
},
{
"epoch": 2.61,
"learning_rate": 0.00014768439108061752,
"loss": 0.7691,
"step": 3050
},
{
"epoch": 2.66,
"learning_rate": 0.0001468267581475129,
"loss": 0.7684,
"step": 3100
},
{
"epoch": 2.7,
"learning_rate": 0.00014596912521440823,
"loss": 0.7811,
"step": 3150
},
{
"epoch": 2.74,
"learning_rate": 0.0001451114922813036,
"loss": 0.7631,
"step": 3200
},
{
"epoch": 2.79,
"learning_rate": 0.00014425385934819897,
"loss": 0.7782,
"step": 3250
},
{
"epoch": 2.83,
"learning_rate": 0.00014339622641509434,
"loss": 0.7686,
"step": 3300
},
{
"epoch": 2.87,
"learning_rate": 0.00014253859348198973,
"loss": 0.7919,
"step": 3350
},
{
"epoch": 2.91,
"learning_rate": 0.0001416809605488851,
"loss": 0.7768,
"step": 3400
},
{
"epoch": 2.96,
"learning_rate": 0.00014082332761578044,
"loss": 0.7837,
"step": 3450
},
{
"epoch": 3.0,
"learning_rate": 0.0001399656946826758,
"loss": 0.7739,
"step": 3500
},
{
"epoch": 3.04,
"learning_rate": 0.00013910806174957118,
"loss": 0.4864,
"step": 3550
},
{
"epoch": 3.09,
"learning_rate": 0.00013825042881646655,
"loss": 0.4876,
"step": 3600
},
{
"epoch": 3.13,
"learning_rate": 0.00013739279588336194,
"loss": 0.4899,
"step": 3650
},
{
"epoch": 3.17,
"learning_rate": 0.0001365351629502573,
"loss": 0.4833,
"step": 3700
},
{
"epoch": 3.21,
"learning_rate": 0.00013567753001715265,
"loss": 0.5126,
"step": 3750
},
{
"epoch": 3.26,
"learning_rate": 0.00013481989708404802,
"loss": 0.4891,
"step": 3800
},
{
"epoch": 3.3,
"learning_rate": 0.0001339622641509434,
"loss": 0.5,
"step": 3850
},
{
"epoch": 3.34,
"learning_rate": 0.00013310463121783879,
"loss": 0.5078,
"step": 3900
},
{
"epoch": 3.39,
"learning_rate": 0.00013224699828473415,
"loss": 0.4931,
"step": 3950
},
{
"epoch": 3.43,
"learning_rate": 0.00013138936535162952,
"loss": 0.5062,
"step": 4000
},
{
"epoch": 3.47,
"learning_rate": 0.00013053173241852486,
"loss": 0.509,
"step": 4050
},
{
"epoch": 3.51,
"learning_rate": 0.00012967409948542023,
"loss": 0.5097,
"step": 4100
},
{
"epoch": 3.56,
"learning_rate": 0.0001288164665523156,
"loss": 0.5225,
"step": 4150
},
{
"epoch": 3.6,
"learning_rate": 0.000127958833619211,
"loss": 0.5133,
"step": 4200
},
{
"epoch": 3.64,
"learning_rate": 0.00012710120068610637,
"loss": 0.5183,
"step": 4250
},
{
"epoch": 3.69,
"learning_rate": 0.00012624356775300173,
"loss": 0.5259,
"step": 4300
},
{
"epoch": 3.73,
"learning_rate": 0.00012538593481989708,
"loss": 0.5172,
"step": 4350
},
{
"epoch": 3.77,
"learning_rate": 0.00012452830188679244,
"loss": 0.5135,
"step": 4400
},
{
"epoch": 3.81,
"learning_rate": 0.0001236706689536878,
"loss": 0.5218,
"step": 4450
},
{
"epoch": 3.86,
"learning_rate": 0.0001228130360205832,
"loss": 0.5252,
"step": 4500
},
{
"epoch": 3.9,
"learning_rate": 0.00012195540308747856,
"loss": 0.5139,
"step": 4550
},
{
"epoch": 3.94,
"learning_rate": 0.00012109777015437393,
"loss": 0.5267,
"step": 4600
},
{
"epoch": 3.99,
"learning_rate": 0.0001202401372212693,
"loss": 0.5193,
"step": 4650
},
{
"epoch": 4.03,
"learning_rate": 0.00011938250428816467,
"loss": 0.3797,
"step": 4700
},
{
"epoch": 4.07,
"learning_rate": 0.00011852487135506002,
"loss": 0.3031,
"step": 4750
},
{
"epoch": 4.11,
"learning_rate": 0.00011766723842195542,
"loss": 0.2867,
"step": 4800
},
{
"epoch": 4.16,
"learning_rate": 0.00011680960548885078,
"loss": 0.2986,
"step": 4850
},
{
"epoch": 4.2,
"learning_rate": 0.00011595197255574614,
"loss": 0.3021,
"step": 4900
},
{
"epoch": 4.24,
"learning_rate": 0.00011509433962264151,
"loss": 0.2983,
"step": 4950
},
{
"epoch": 4.29,
"learning_rate": 0.00011423670668953688,
"loss": 0.3014,
"step": 5000
},
{
"epoch": 4.33,
"learning_rate": 0.00011337907375643224,
"loss": 0.2966,
"step": 5050
},
{
"epoch": 4.37,
"learning_rate": 0.00011252144082332763,
"loss": 0.3014,
"step": 5100
},
{
"epoch": 4.41,
"learning_rate": 0.00011166380789022299,
"loss": 0.3105,
"step": 5150
},
{
"epoch": 4.46,
"learning_rate": 0.00011080617495711836,
"loss": 0.3115,
"step": 5200
},
{
"epoch": 4.5,
"learning_rate": 0.00010994854202401372,
"loss": 0.3139,
"step": 5250
},
{
"epoch": 4.54,
"learning_rate": 0.00010909090909090909,
"loss": 0.3089,
"step": 5300
},
{
"epoch": 4.59,
"learning_rate": 0.00010823327615780447,
"loss": 0.3069,
"step": 5350
},
{
"epoch": 4.63,
"learning_rate": 0.00010737564322469984,
"loss": 0.3181,
"step": 5400
},
{
"epoch": 4.67,
"learning_rate": 0.0001065180102915952,
"loss": 0.3129,
"step": 5450
},
{
"epoch": 4.71,
"learning_rate": 0.00010566037735849057,
"loss": 0.3199,
"step": 5500
},
{
"epoch": 4.76,
"learning_rate": 0.00010480274442538594,
"loss": 0.3079,
"step": 5550
},
{
"epoch": 4.8,
"learning_rate": 0.0001039451114922813,
"loss": 0.32,
"step": 5600
},
{
"epoch": 4.84,
"learning_rate": 0.00010308747855917669,
"loss": 0.3157,
"step": 5650
},
{
"epoch": 4.89,
"learning_rate": 0.00010222984562607205,
"loss": 0.3181,
"step": 5700
},
{
"epoch": 4.93,
"learning_rate": 0.00010137221269296742,
"loss": 0.3217,
"step": 5750
},
{
"epoch": 4.97,
"learning_rate": 0.00010051457975986278,
"loss": 0.3132,
"step": 5800
},
{
"epoch": 5.02,
"learning_rate": 9.965694682675816e-05,
"loss": 0.2703,
"step": 5850
},
{
"epoch": 5.06,
"learning_rate": 9.879931389365353e-05,
"loss": 0.1618,
"step": 5900
},
{
"epoch": 5.1,
"learning_rate": 9.794168096054888e-05,
"loss": 0.1767,
"step": 5950
},
{
"epoch": 5.14,
"learning_rate": 9.708404802744427e-05,
"loss": 0.1647,
"step": 6000
},
{
"epoch": 5.19,
"learning_rate": 9.622641509433963e-05,
"loss": 0.1725,
"step": 6050
},
{
"epoch": 5.23,
"learning_rate": 9.536878216123499e-05,
"loss": 0.1732,
"step": 6100
},
{
"epoch": 5.27,
"learning_rate": 9.451114922813037e-05,
"loss": 0.168,
"step": 6150
},
{
"epoch": 5.32,
"learning_rate": 9.365351629502574e-05,
"loss": 0.1769,
"step": 6200
},
{
"epoch": 5.36,
"learning_rate": 9.27958833619211e-05,
"loss": 0.1718,
"step": 6250
},
{
"epoch": 5.4,
"learning_rate": 9.193825042881648e-05,
"loss": 0.1734,
"step": 6300
},
{
"epoch": 5.44,
"learning_rate": 9.108061749571185e-05,
"loss": 0.1815,
"step": 6350
},
{
"epoch": 5.49,
"learning_rate": 9.02229845626072e-05,
"loss": 0.1761,
"step": 6400
},
{
"epoch": 5.53,
"learning_rate": 8.936535162950258e-05,
"loss": 0.1783,
"step": 6450
},
{
"epoch": 5.57,
"learning_rate": 8.850771869639795e-05,
"loss": 0.1764,
"step": 6500
},
{
"epoch": 5.62,
"learning_rate": 8.765008576329331e-05,
"loss": 0.1763,
"step": 6550
},
{
"epoch": 5.66,
"learning_rate": 8.679245283018869e-05,
"loss": 0.1684,
"step": 6600
},
{
"epoch": 5.7,
"learning_rate": 8.593481989708406e-05,
"loss": 0.174,
"step": 6650
},
{
"epoch": 5.74,
"learning_rate": 8.507718696397941e-05,
"loss": 0.1724,
"step": 6700
},
{
"epoch": 5.79,
"learning_rate": 8.42195540308748e-05,
"loss": 0.177,
"step": 6750
},
{
"epoch": 5.83,
"learning_rate": 8.336192109777016e-05,
"loss": 0.1753,
"step": 6800
},
{
"epoch": 5.87,
"learning_rate": 8.250428816466552e-05,
"loss": 0.1765,
"step": 6850
},
{
"epoch": 5.92,
"learning_rate": 8.16466552315609e-05,
"loss": 0.1748,
"step": 6900
},
{
"epoch": 5.96,
"learning_rate": 8.078902229845627e-05,
"loss": 0.1781,
"step": 6950
},
{
"epoch": 6.0,
"learning_rate": 7.993138936535164e-05,
"loss": 0.1838,
"step": 7000
},
{
"epoch": 6.04,
"learning_rate": 7.9073756432247e-05,
"loss": 0.0912,
"step": 7050
},
{
"epoch": 6.09,
"learning_rate": 7.821612349914237e-05,
"loss": 0.0874,
"step": 7100
},
{
"epoch": 6.13,
"learning_rate": 7.735849056603774e-05,
"loss": 0.0879,
"step": 7150
},
{
"epoch": 6.17,
"learning_rate": 7.650085763293311e-05,
"loss": 0.0922,
"step": 7200
},
{
"epoch": 6.22,
"learning_rate": 7.564322469982848e-05,
"loss": 0.0979,
"step": 7250
},
{
"epoch": 6.26,
"learning_rate": 7.478559176672385e-05,
"loss": 0.0853,
"step": 7300
},
{
"epoch": 6.3,
"learning_rate": 7.392795883361922e-05,
"loss": 0.0866,
"step": 7350
},
{
"epoch": 6.34,
"learning_rate": 7.307032590051459e-05,
"loss": 0.0876,
"step": 7400
},
{
"epoch": 6.39,
"learning_rate": 7.221269296740995e-05,
"loss": 0.0895,
"step": 7450
},
{
"epoch": 6.43,
"learning_rate": 7.135506003430532e-05,
"loss": 0.0925,
"step": 7500
},
{
"epoch": 6.47,
"learning_rate": 7.049742710120069e-05,
"loss": 0.0915,
"step": 7550
},
{
"epoch": 6.52,
"learning_rate": 6.963979416809606e-05,
"loss": 0.0844,
"step": 7600
},
{
"epoch": 6.56,
"learning_rate": 6.878216123499143e-05,
"loss": 0.0949,
"step": 7650
},
{
"epoch": 6.6,
"learning_rate": 6.79245283018868e-05,
"loss": 0.0893,
"step": 7700
},
{
"epoch": 6.64,
"learning_rate": 6.706689536878217e-05,
"loss": 0.0921,
"step": 7750
},
{
"epoch": 6.69,
"learning_rate": 6.620926243567753e-05,
"loss": 0.0879,
"step": 7800
},
{
"epoch": 6.73,
"learning_rate": 6.53516295025729e-05,
"loss": 0.0886,
"step": 7850
},
{
"epoch": 6.77,
"learning_rate": 6.449399656946827e-05,
"loss": 0.0912,
"step": 7900
},
{
"epoch": 6.82,
"learning_rate": 6.363636363636364e-05,
"loss": 0.0898,
"step": 7950
},
{
"epoch": 6.86,
"learning_rate": 6.277873070325901e-05,
"loss": 0.0936,
"step": 8000
},
{
"epoch": 6.9,
"learning_rate": 6.192109777015438e-05,
"loss": 0.0903,
"step": 8050
},
{
"epoch": 6.94,
"learning_rate": 6.106346483704975e-05,
"loss": 0.0876,
"step": 8100
},
{
"epoch": 6.99,
"learning_rate": 6.020583190394511e-05,
"loss": 0.0887,
"step": 8150
},
{
"epoch": 7.03,
"learning_rate": 5.9348198970840484e-05,
"loss": 0.0559,
"step": 8200
},
{
"epoch": 7.07,
"learning_rate": 5.849056603773585e-05,
"loss": 0.042,
"step": 8250
},
{
"epoch": 7.12,
"learning_rate": 5.763293310463123e-05,
"loss": 0.0443,
"step": 8300
},
{
"epoch": 7.16,
"learning_rate": 5.677530017152659e-05,
"loss": 0.0419,
"step": 8350
},
{
"epoch": 7.2,
"learning_rate": 5.591766723842196e-05,
"loss": 0.0406,
"step": 8400
},
{
"epoch": 7.24,
"learning_rate": 5.506003430531733e-05,
"loss": 0.043,
"step": 8450
},
{
"epoch": 7.29,
"learning_rate": 5.4202401372212695e-05,
"loss": 0.042,
"step": 8500
},
{
"epoch": 7.33,
"learning_rate": 5.3344768439108064e-05,
"loss": 0.0404,
"step": 8550
},
{
"epoch": 7.37,
"learning_rate": 5.248713550600344e-05,
"loss": 0.0415,
"step": 8600
},
{
"epoch": 7.42,
"learning_rate": 5.16295025728988e-05,
"loss": 0.0443,
"step": 8650
},
{
"epoch": 7.46,
"learning_rate": 5.077186963979417e-05,
"loss": 0.0435,
"step": 8700
},
{
"epoch": 7.5,
"learning_rate": 4.991423670668954e-05,
"loss": 0.0393,
"step": 8750
},
{
"epoch": 7.54,
"learning_rate": 4.9056603773584906e-05,
"loss": 0.0424,
"step": 8800
},
{
"epoch": 7.59,
"learning_rate": 4.819897084048028e-05,
"loss": 0.0411,
"step": 8850
},
{
"epoch": 7.63,
"learning_rate": 4.7341337907375644e-05,
"loss": 0.0409,
"step": 8900
},
{
"epoch": 7.67,
"learning_rate": 4.648370497427101e-05,
"loss": 0.0431,
"step": 8950
},
{
"epoch": 7.72,
"learning_rate": 4.562607204116639e-05,
"loss": 0.0411,
"step": 9000
},
{
"epoch": 7.76,
"learning_rate": 4.476843910806175e-05,
"loss": 0.0378,
"step": 9050
},
{
"epoch": 7.8,
"learning_rate": 4.391080617495712e-05,
"loss": 0.0402,
"step": 9100
},
{
"epoch": 7.84,
"learning_rate": 4.305317324185249e-05,
"loss": 0.0396,
"step": 9150
},
{
"epoch": 7.89,
"learning_rate": 4.219554030874786e-05,
"loss": 0.0402,
"step": 9200
},
{
"epoch": 7.93,
"learning_rate": 4.1337907375643224e-05,
"loss": 0.0419,
"step": 9250
},
{
"epoch": 7.97,
"learning_rate": 4.04802744425386e-05,
"loss": 0.0403,
"step": 9300
},
{
"epoch": 8.02,
"learning_rate": 3.962264150943397e-05,
"loss": 0.0313,
"step": 9350
},
{
"epoch": 8.06,
"learning_rate": 3.876500857632933e-05,
"loss": 0.0147,
"step": 9400
},
{
"epoch": 8.1,
"learning_rate": 3.7907375643224705e-05,
"loss": 0.0174,
"step": 9450
},
{
"epoch": 8.14,
"learning_rate": 3.704974271012007e-05,
"loss": 0.0159,
"step": 9500
},
{
"epoch": 8.19,
"learning_rate": 3.6192109777015435e-05,
"loss": 0.0151,
"step": 9550
},
{
"epoch": 8.23,
"learning_rate": 3.533447684391081e-05,
"loss": 0.0165,
"step": 9600
},
{
"epoch": 8.27,
"learning_rate": 3.447684391080618e-05,
"loss": 0.0165,
"step": 9650
},
{
"epoch": 8.32,
"learning_rate": 3.361921097770154e-05,
"loss": 0.0154,
"step": 9700
},
{
"epoch": 8.36,
"learning_rate": 3.2761578044596916e-05,
"loss": 0.0158,
"step": 9750
},
{
"epoch": 8.4,
"learning_rate": 3.1903945111492285e-05,
"loss": 0.0162,
"step": 9800
},
{
"epoch": 8.44,
"learning_rate": 3.1046312178387653e-05,
"loss": 0.0142,
"step": 9850
},
{
"epoch": 8.49,
"learning_rate": 3.018867924528302e-05,
"loss": 0.0172,
"step": 9900
},
{
"epoch": 8.53,
"learning_rate": 2.933104631217839e-05,
"loss": 0.0178,
"step": 9950
},
{
"epoch": 8.57,
"learning_rate": 2.847341337907376e-05,
"loss": 0.0146,
"step": 10000
},
{
"epoch": 8.62,
"learning_rate": 2.7615780445969124e-05,
"loss": 0.0144,
"step": 10050
},
{
"epoch": 8.66,
"learning_rate": 2.6758147512864496e-05,
"loss": 0.0141,
"step": 10100
},
{
"epoch": 8.7,
"learning_rate": 2.5900514579759865e-05,
"loss": 0.0143,
"step": 10150
},
{
"epoch": 8.74,
"learning_rate": 2.504288164665523e-05,
"loss": 0.0164,
"step": 10200
},
{
"epoch": 8.79,
"learning_rate": 2.4185248713550602e-05,
"loss": 0.017,
"step": 10250
},
{
"epoch": 8.83,
"learning_rate": 2.332761578044597e-05,
"loss": 0.0156,
"step": 10300
},
{
"epoch": 8.87,
"learning_rate": 2.246998284734134e-05,
"loss": 0.0139,
"step": 10350
},
{
"epoch": 8.92,
"learning_rate": 2.1612349914236708e-05,
"loss": 0.0147,
"step": 10400
},
{
"epoch": 8.96,
"learning_rate": 2.0754716981132076e-05,
"loss": 0.0151,
"step": 10450
},
{
"epoch": 9.0,
"learning_rate": 1.9897084048027445e-05,
"loss": 0.015,
"step": 10500
},
{
"epoch": 9.04,
"learning_rate": 1.9039451114922813e-05,
"loss": 0.006,
"step": 10550
},
{
"epoch": 9.09,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.0064,
"step": 10600
},
{
"epoch": 9.13,
"learning_rate": 1.732418524871355e-05,
"loss": 0.007,
"step": 10650
},
{
"epoch": 9.17,
"learning_rate": 1.6466552315608923e-05,
"loss": 0.0062,
"step": 10700
},
{
"epoch": 9.22,
"learning_rate": 1.5608919382504288e-05,
"loss": 0.0055,
"step": 10750
},
{
"epoch": 9.26,
"learning_rate": 1.4751286449399656e-05,
"loss": 0.0058,
"step": 10800
},
{
"epoch": 9.3,
"learning_rate": 1.3893653516295027e-05,
"loss": 0.0062,
"step": 10850
},
{
"epoch": 9.34,
"learning_rate": 1.3036020583190395e-05,
"loss": 0.0057,
"step": 10900
},
{
"epoch": 9.39,
"learning_rate": 1.2178387650085764e-05,
"loss": 0.0064,
"step": 10950
},
{
"epoch": 9.43,
"learning_rate": 1.1320754716981132e-05,
"loss": 0.0062,
"step": 11000
},
{
"epoch": 9.47,
"learning_rate": 1.0463121783876503e-05,
"loss": 0.006,
"step": 11050
},
{
"epoch": 9.52,
"learning_rate": 9.60548885077187e-06,
"loss": 0.0064,
"step": 11100
},
{
"epoch": 9.56,
"learning_rate": 8.747855917667238e-06,
"loss": 0.0061,
"step": 11150
},
{
"epoch": 9.6,
"learning_rate": 7.890222984562608e-06,
"loss": 0.0056,
"step": 11200
},
{
"epoch": 9.64,
"learning_rate": 7.032590051457977e-06,
"loss": 0.0056,
"step": 11250
},
{
"epoch": 9.69,
"learning_rate": 6.1749571183533456e-06,
"loss": 0.0059,
"step": 11300
},
{
"epoch": 9.73,
"learning_rate": 5.317324185248713e-06,
"loss": 0.0062,
"step": 11350
},
{
"epoch": 9.77,
"learning_rate": 4.459691252144083e-06,
"loss": 0.0056,
"step": 11400
},
{
"epoch": 9.82,
"learning_rate": 3.6020583190394517e-06,
"loss": 0.0061,
"step": 11450
},
{
"epoch": 9.86,
"learning_rate": 2.74442538593482e-06,
"loss": 0.0061,
"step": 11500
},
{
"epoch": 9.9,
"learning_rate": 1.8867924528301887e-06,
"loss": 0.0056,
"step": 11550
},
{
"epoch": 9.94,
"learning_rate": 1.0291595197255577e-06,
"loss": 0.0058,
"step": 11600
},
{
"epoch": 9.99,
"learning_rate": 1.7152658662092624e-07,
"loss": 0.0064,
"step": 11650
}
],
"logging_steps": 50,
"max_steps": 11660,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.652879536528646e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}