nep-spell-htf / trainer_state.json
duraad's picture
After 10 epochs
ee86c86 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 20000,
"global_step": 101152,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 4.980149277433699e-05,
"loss": 0.0094,
"step": 1000
},
{
"epoch": 0.16,
"learning_rate": 4.9404478323010964e-05,
"loss": 0.0102,
"step": 2000
},
{
"epoch": 0.24,
"learning_rate": 4.900746387168493e-05,
"loss": 0.0098,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 4.86104494203589e-05,
"loss": 0.0101,
"step": 4000
},
{
"epoch": 0.4,
"learning_rate": 4.821343496903287e-05,
"loss": 0.0098,
"step": 5000
},
{
"epoch": 0.47,
"learning_rate": 4.781642051770684e-05,
"loss": 0.0097,
"step": 6000
},
{
"epoch": 0.55,
"learning_rate": 4.741940606638082e-05,
"loss": 0.0099,
"step": 7000
},
{
"epoch": 0.63,
"learning_rate": 4.702239161505479e-05,
"loss": 0.01,
"step": 8000
},
{
"epoch": 0.71,
"learning_rate": 4.6625377163728765e-05,
"loss": 0.01,
"step": 9000
},
{
"epoch": 0.79,
"learning_rate": 4.6228362712402734e-05,
"loss": 0.0119,
"step": 10000
},
{
"epoch": 0.87,
"learning_rate": 4.5831348261076704e-05,
"loss": 0.0095,
"step": 11000
},
{
"epoch": 0.95,
"learning_rate": 4.543433380975068e-05,
"loss": 0.0097,
"step": 12000
},
{
"epoch": 1.03,
"learning_rate": 4.503731935842465e-05,
"loss": 0.0095,
"step": 13000
},
{
"epoch": 1.11,
"learning_rate": 4.464030490709862e-05,
"loss": 0.0089,
"step": 14000
},
{
"epoch": 1.19,
"learning_rate": 4.424329045577259e-05,
"loss": 0.0088,
"step": 15000
},
{
"epoch": 1.27,
"learning_rate": 4.3846276004446566e-05,
"loss": 0.0088,
"step": 16000
},
{
"epoch": 1.34,
"learning_rate": 4.3449261553120535e-05,
"loss": 0.0085,
"step": 17000
},
{
"epoch": 1.42,
"learning_rate": 4.3052247101794505e-05,
"loss": 0.0087,
"step": 18000
},
{
"epoch": 1.5,
"learning_rate": 4.265523265046848e-05,
"loss": 0.009,
"step": 19000
},
{
"epoch": 1.58,
"learning_rate": 4.225821819914245e-05,
"loss": 0.0087,
"step": 20000
},
{
"epoch": 1.58,
"eval_accuracy": 0.5290210343191523,
"eval_exact_match": 0.5290210343191523,
"eval_f1": 0.531319521324266,
"eval_loss": 0.005351942032575607,
"eval_precision": 0.5358216036691444,
"eval_recall": 0.5290210343191523,
"eval_runtime": 356.516,
"eval_samples_per_second": 17.736,
"eval_steps_per_second": 8.869,
"step": 20000
},
{
"epoch": 1.66,
"learning_rate": 4.186120374781643e-05,
"loss": 0.0082,
"step": 21000
},
{
"epoch": 1.74,
"learning_rate": 4.14641892964904e-05,
"loss": 0.0084,
"step": 22000
},
{
"epoch": 1.82,
"learning_rate": 4.1067174845164366e-05,
"loss": 0.0082,
"step": 23000
},
{
"epoch": 1.9,
"learning_rate": 4.0670160393838336e-05,
"loss": 0.0084,
"step": 24000
},
{
"epoch": 1.98,
"learning_rate": 4.0273145942512306e-05,
"loss": 0.0084,
"step": 25000
},
{
"epoch": 2.06,
"learning_rate": 3.987613149118628e-05,
"loss": 0.008,
"step": 26000
},
{
"epoch": 2.14,
"learning_rate": 3.947911703986025e-05,
"loss": 0.0075,
"step": 27000
},
{
"epoch": 2.21,
"learning_rate": 3.908210258853423e-05,
"loss": 0.0078,
"step": 28000
},
{
"epoch": 2.29,
"learning_rate": 3.86850881372082e-05,
"loss": 0.008,
"step": 29000
},
{
"epoch": 2.37,
"learning_rate": 3.828807368588217e-05,
"loss": 0.0075,
"step": 30000
},
{
"epoch": 2.45,
"learning_rate": 3.7891059234556144e-05,
"loss": 0.0077,
"step": 31000
},
{
"epoch": 2.53,
"learning_rate": 3.749404478323011e-05,
"loss": 0.0074,
"step": 32000
},
{
"epoch": 2.61,
"learning_rate": 3.709703033190408e-05,
"loss": 0.0078,
"step": 33000
},
{
"epoch": 2.69,
"learning_rate": 3.670001588057805e-05,
"loss": 0.008,
"step": 34000
},
{
"epoch": 2.77,
"learning_rate": 3.630300142925203e-05,
"loss": 0.0073,
"step": 35000
},
{
"epoch": 2.85,
"learning_rate": 3.5905986977926e-05,
"loss": 0.0076,
"step": 36000
},
{
"epoch": 2.93,
"learning_rate": 3.550897252659997e-05,
"loss": 0.0076,
"step": 37000
},
{
"epoch": 3.01,
"learning_rate": 3.5111958075273945e-05,
"loss": 0.0078,
"step": 38000
},
{
"epoch": 3.08,
"learning_rate": 3.4714943623947914e-05,
"loss": 0.007,
"step": 39000
},
{
"epoch": 3.16,
"learning_rate": 3.431792917262189e-05,
"loss": 0.0065,
"step": 40000
},
{
"epoch": 3.16,
"eval_accuracy": 0.5473667562865728,
"eval_exact_match": 0.5473667562865728,
"eval_f1": 0.5494543729242448,
"eval_loss": 0.005287344101816416,
"eval_precision": 0.5535347145342401,
"eval_recall": 0.5473667562865728,
"eval_runtime": 354.3494,
"eval_samples_per_second": 17.844,
"eval_steps_per_second": 8.923,
"step": 40000
},
{
"epoch": 3.24,
"learning_rate": 3.3920914721295854e-05,
"loss": 0.007,
"step": 41000
},
{
"epoch": 3.32,
"learning_rate": 3.352390026996983e-05,
"loss": 0.0071,
"step": 42000
},
{
"epoch": 3.4,
"learning_rate": 3.31268858186438e-05,
"loss": 0.0074,
"step": 43000
},
{
"epoch": 3.48,
"learning_rate": 3.272987136731777e-05,
"loss": 0.0068,
"step": 44000
},
{
"epoch": 3.56,
"learning_rate": 3.2332856915991746e-05,
"loss": 0.0067,
"step": 45000
},
{
"epoch": 3.64,
"learning_rate": 3.1935842464665715e-05,
"loss": 0.0071,
"step": 46000
},
{
"epoch": 3.72,
"learning_rate": 3.153882801333969e-05,
"loss": 0.0067,
"step": 47000
},
{
"epoch": 3.8,
"learning_rate": 3.114181356201366e-05,
"loss": 0.0071,
"step": 48000
},
{
"epoch": 3.88,
"learning_rate": 3.074479911068763e-05,
"loss": 0.0073,
"step": 49000
},
{
"epoch": 3.95,
"learning_rate": 3.03477846593616e-05,
"loss": 0.0067,
"step": 50000
},
{
"epoch": 4.03,
"learning_rate": 2.9950770208035574e-05,
"loss": 0.0067,
"step": 51000
},
{
"epoch": 4.11,
"learning_rate": 2.9553755756709543e-05,
"loss": 0.0064,
"step": 52000
},
{
"epoch": 4.19,
"learning_rate": 2.9156741305383516e-05,
"loss": 0.0062,
"step": 53000
},
{
"epoch": 4.27,
"learning_rate": 2.875972685405749e-05,
"loss": 0.0066,
"step": 54000
},
{
"epoch": 4.35,
"learning_rate": 2.8362712402731462e-05,
"loss": 0.0062,
"step": 55000
},
{
"epoch": 4.43,
"learning_rate": 2.7965697951405435e-05,
"loss": 0.0065,
"step": 56000
},
{
"epoch": 4.51,
"learning_rate": 2.7568683500079405e-05,
"loss": 0.0066,
"step": 57000
},
{
"epoch": 4.59,
"learning_rate": 2.7171669048753375e-05,
"loss": 0.0063,
"step": 58000
},
{
"epoch": 4.67,
"learning_rate": 2.6774654597427344e-05,
"loss": 0.0064,
"step": 59000
},
{
"epoch": 4.75,
"learning_rate": 2.6377640146101317e-05,
"loss": 0.006,
"step": 60000
},
{
"epoch": 4.75,
"eval_accuracy": 0.5701407559702673,
"eval_exact_match": 0.5701407559702673,
"eval_f1": 0.5724919605672413,
"eval_loss": 0.0050529008731245995,
"eval_precision": 0.5770994780958406,
"eval_recall": 0.5701407559702673,
"eval_runtime": 354.5246,
"eval_samples_per_second": 17.835,
"eval_steps_per_second": 8.919,
"step": 60000
},
{
"epoch": 4.82,
"learning_rate": 2.598062569477529e-05,
"loss": 0.0065,
"step": 61000
},
{
"epoch": 4.9,
"learning_rate": 2.5583611243449263e-05,
"loss": 0.0063,
"step": 62000
},
{
"epoch": 4.98,
"learning_rate": 2.5186596792123236e-05,
"loss": 0.0068,
"step": 63000
},
{
"epoch": 5.06,
"learning_rate": 2.4789582340797206e-05,
"loss": 0.006,
"step": 64000
},
{
"epoch": 5.14,
"learning_rate": 2.4392567889471176e-05,
"loss": 0.006,
"step": 65000
},
{
"epoch": 5.22,
"learning_rate": 2.399555343814515e-05,
"loss": 0.0058,
"step": 66000
},
{
"epoch": 5.3,
"learning_rate": 2.359853898681912e-05,
"loss": 0.0056,
"step": 67000
},
{
"epoch": 5.38,
"learning_rate": 2.3201524535493095e-05,
"loss": 0.0057,
"step": 68000
},
{
"epoch": 5.46,
"learning_rate": 2.2804510084167068e-05,
"loss": 0.0059,
"step": 69000
},
{
"epoch": 5.54,
"learning_rate": 2.2407495632841037e-05,
"loss": 0.0057,
"step": 70000
},
{
"epoch": 5.62,
"learning_rate": 2.2010481181515007e-05,
"loss": 0.0058,
"step": 71000
},
{
"epoch": 5.69,
"learning_rate": 2.161346673018898e-05,
"loss": 0.0059,
"step": 72000
},
{
"epoch": 5.77,
"learning_rate": 2.1216452278862953e-05,
"loss": 0.0059,
"step": 73000
},
{
"epoch": 5.85,
"learning_rate": 2.0819437827536923e-05,
"loss": 0.006,
"step": 74000
},
{
"epoch": 5.93,
"learning_rate": 2.0422423376210896e-05,
"loss": 0.0056,
"step": 75000
},
{
"epoch": 6.01,
"learning_rate": 2.002540892488487e-05,
"loss": 0.0058,
"step": 76000
},
{
"epoch": 6.09,
"learning_rate": 1.9628394473558838e-05,
"loss": 0.0054,
"step": 77000
},
{
"epoch": 6.17,
"learning_rate": 1.9231380022232808e-05,
"loss": 0.0061,
"step": 78000
},
{
"epoch": 6.25,
"learning_rate": 1.883436557090678e-05,
"loss": 0.0057,
"step": 79000
},
{
"epoch": 6.33,
"learning_rate": 1.8437351119580754e-05,
"loss": 0.0053,
"step": 80000
},
{
"epoch": 6.33,
"eval_accuracy": 0.5827929780167642,
"eval_exact_match": 0.5827929780167642,
"eval_f1": 0.5851441826137382,
"eval_loss": 0.004922935273498297,
"eval_precision": 0.5897517001423375,
"eval_recall": 0.5827929780167642,
"eval_runtime": 354.2336,
"eval_samples_per_second": 17.85,
"eval_steps_per_second": 8.926,
"step": 80000
},
{
"epoch": 6.41,
"learning_rate": 1.8040336668254727e-05,
"loss": 0.0054,
"step": 81000
},
{
"epoch": 6.49,
"learning_rate": 1.76433222169287e-05,
"loss": 0.0054,
"step": 82000
},
{
"epoch": 6.56,
"learning_rate": 1.7246307765602666e-05,
"loss": 0.0056,
"step": 83000
},
{
"epoch": 6.64,
"learning_rate": 1.684929331427664e-05,
"loss": 0.0055,
"step": 84000
},
{
"epoch": 6.72,
"learning_rate": 1.6452278862950612e-05,
"loss": 0.0051,
"step": 85000
},
{
"epoch": 6.8,
"learning_rate": 1.6055264411624585e-05,
"loss": 0.0057,
"step": 86000
},
{
"epoch": 6.88,
"learning_rate": 1.5658249960298555e-05,
"loss": 0.0053,
"step": 87000
},
{
"epoch": 6.96,
"learning_rate": 1.5261235508972528e-05,
"loss": 0.0053,
"step": 88000
},
{
"epoch": 7.04,
"learning_rate": 1.4864221057646499e-05,
"loss": 0.0049,
"step": 89000
},
{
"epoch": 7.12,
"learning_rate": 1.4467206606320472e-05,
"loss": 0.0054,
"step": 90000
},
{
"epoch": 7.2,
"learning_rate": 1.4070192154994443e-05,
"loss": 0.0052,
"step": 91000
},
{
"epoch": 7.28,
"learning_rate": 1.3673177703668413e-05,
"loss": 0.0052,
"step": 92000
},
{
"epoch": 7.36,
"learning_rate": 1.3276163252342386e-05,
"loss": 0.0053,
"step": 93000
},
{
"epoch": 7.43,
"learning_rate": 1.2879148801016357e-05,
"loss": 0.005,
"step": 94000
},
{
"epoch": 7.51,
"learning_rate": 1.2482134349690329e-05,
"loss": 0.0054,
"step": 95000
},
{
"epoch": 7.59,
"learning_rate": 1.2085119898364302e-05,
"loss": 0.0052,
"step": 96000
},
{
"epoch": 7.67,
"learning_rate": 1.1688105447038273e-05,
"loss": 0.0051,
"step": 97000
},
{
"epoch": 7.75,
"learning_rate": 1.1291090995712244e-05,
"loss": 0.005,
"step": 98000
},
{
"epoch": 7.83,
"learning_rate": 1.0894076544386216e-05,
"loss": 0.005,
"step": 99000
},
{
"epoch": 7.91,
"learning_rate": 1.0497062093060187e-05,
"loss": 0.005,
"step": 100000
},
{
"epoch": 7.91,
"eval_accuracy": 0.5921239917760557,
"eval_exact_match": 0.5921239917760557,
"eval_f1": 0.594369761189309,
"eval_loss": 0.004835059866309166,
"eval_precision": 0.5987664083504666,
"eval_recall": 0.5921239917760557,
"eval_runtime": 354.4854,
"eval_samples_per_second": 17.837,
"eval_steps_per_second": 8.92,
"step": 100000
},
{
"epoch": 7.99,
"learning_rate": 1.010004764173416e-05,
"loss": 0.005,
"step": 101000
}
],
"logging_steps": 1000,
"max_steps": 126440,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.0696817746378752e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}