noniewiem's picture
End of training
9f80eab
raw
history blame
10.5 kB
{
"best_metric": 0.1436834264468159,
"best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-5e-5-15000-42/checkpoint-1600",
"epoch": 78.77611940298507,
"global_step": 2600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.03,
"learning_rate": 2.5e-05,
"loss": 0.6426,
"step": 100
},
{
"epoch": 3.03,
"eval_loss": 0.6255418658256531,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.2752,
"eval_samples_per_second": 126.039,
"eval_steps_per_second": 15.83,
"step": 100
},
{
"epoch": 6.06,
"learning_rate": 5e-05,
"loss": 0.6176,
"step": 200
},
{
"epoch": 6.06,
"eval_loss": 0.630817711353302,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.1785,
"eval_samples_per_second": 127.53,
"eval_steps_per_second": 16.018,
"step": 200
},
{
"epoch": 9.09,
"learning_rate": 4.9662162162162164e-05,
"loss": 0.6183,
"step": 300
},
{
"epoch": 9.09,
"eval_loss": 0.6186701059341431,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8187,
"eval_samples_per_second": 133.398,
"eval_steps_per_second": 16.755,
"step": 300
},
{
"epoch": 12.12,
"learning_rate": 4.9324324324324325e-05,
"loss": 0.6162,
"step": 400
},
{
"epoch": 12.12,
"eval_loss": 0.6158396005630493,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.9658,
"eval_samples_per_second": 130.935,
"eval_steps_per_second": 16.445,
"step": 400
},
{
"epoch": 15.15,
"learning_rate": 4.8986486486486486e-05,
"loss": 0.614,
"step": 500
},
{
"epoch": 15.15,
"eval_loss": 0.6250273585319519,
"eval_matthews_correlation": -0.02929206145132745,
"eval_runtime": 7.9393,
"eval_samples_per_second": 131.373,
"eval_steps_per_second": 16.5,
"step": 500
},
{
"epoch": 18.18,
"learning_rate": 4.8648648648648654e-05,
"loss": 0.6096,
"step": 600
},
{
"epoch": 18.18,
"eval_loss": 0.618541419506073,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.9167,
"eval_samples_per_second": 131.746,
"eval_steps_per_second": 16.547,
"step": 600
},
{
"epoch": 21.21,
"learning_rate": 4.8310810810810816e-05,
"loss": 0.6055,
"step": 700
},
{
"epoch": 21.21,
"eval_loss": 0.6223562359809875,
"eval_matthews_correlation": 0.017470726784935973,
"eval_runtime": 7.909,
"eval_samples_per_second": 131.875,
"eval_steps_per_second": 16.563,
"step": 700
},
{
"epoch": 24.24,
"learning_rate": 4.7979729729729736e-05,
"loss": 0.6001,
"step": 800
},
{
"epoch": 24.24,
"eval_loss": 0.6550844311714172,
"eval_matthews_correlation": 0.13008617806157513,
"eval_runtime": 8.0719,
"eval_samples_per_second": 129.214,
"eval_steps_per_second": 16.229,
"step": 800
},
{
"epoch": 27.27,
"learning_rate": 4.764527027027027e-05,
"loss": 0.5909,
"step": 900
},
{
"epoch": 27.27,
"eval_loss": 0.6533612012863159,
"eval_matthews_correlation": 0.056561954805504064,
"eval_runtime": 8.0431,
"eval_samples_per_second": 129.676,
"eval_steps_per_second": 16.287,
"step": 900
},
{
"epoch": 30.3,
"learning_rate": 4.730743243243244e-05,
"loss": 0.5726,
"step": 1000
},
{
"epoch": 30.3,
"eval_loss": 0.6678630709648132,
"eval_matthews_correlation": 0.10285544103286985,
"eval_runtime": 7.9909,
"eval_samples_per_second": 130.524,
"eval_steps_per_second": 16.394,
"step": 1000
},
{
"epoch": 33.33,
"learning_rate": 4.697297297297297e-05,
"loss": 0.5524,
"step": 1100
},
{
"epoch": 33.33,
"eval_loss": 0.6901304721832275,
"eval_matthews_correlation": 0.06313184843830015,
"eval_runtime": 8.3103,
"eval_samples_per_second": 125.507,
"eval_steps_per_second": 15.764,
"step": 1100
},
{
"epoch": 36.36,
"learning_rate": 4.663513513513514e-05,
"loss": 0.5167,
"step": 1200
},
{
"epoch": 36.36,
"eval_loss": 0.7026833891868591,
"eval_matthews_correlation": 0.09479027897712053,
"eval_runtime": 8.0473,
"eval_samples_per_second": 129.608,
"eval_steps_per_second": 16.279,
"step": 1200
},
{
"epoch": 39.39,
"learning_rate": 4.6297297297297295e-05,
"loss": 0.4779,
"step": 1300
},
{
"epoch": 39.39,
"eval_loss": 0.7578131556510925,
"eval_matthews_correlation": 0.10121883963858187,
"eval_runtime": 8.2334,
"eval_samples_per_second": 126.679,
"eval_steps_per_second": 15.911,
"step": 1300
},
{
"epoch": 42.42,
"learning_rate": 4.5959459459459463e-05,
"loss": 0.4271,
"step": 1400
},
{
"epoch": 42.42,
"eval_loss": 0.8021395802497864,
"eval_matthews_correlation": 0.11075235134282446,
"eval_runtime": 8.0267,
"eval_samples_per_second": 129.941,
"eval_steps_per_second": 16.32,
"step": 1400
},
{
"epoch": 45.45,
"learning_rate": 4.5621621621621625e-05,
"loss": 0.3888,
"step": 1500
},
{
"epoch": 45.45,
"eval_loss": 0.8813392519950867,
"eval_matthews_correlation": 0.10250522978751038,
"eval_runtime": 7.9207,
"eval_samples_per_second": 131.68,
"eval_steps_per_second": 16.539,
"step": 1500
},
{
"epoch": 48.48,
"learning_rate": 4.5283783783783786e-05,
"loss": 0.3428,
"step": 1600
},
{
"epoch": 48.48,
"eval_loss": 0.9361783862113953,
"eval_matthews_correlation": 0.1436834264468159,
"eval_runtime": 7.8976,
"eval_samples_per_second": 132.066,
"eval_steps_per_second": 16.587,
"step": 1600
},
{
"epoch": 51.51,
"learning_rate": 4.494594594594595e-05,
"loss": 0.2977,
"step": 1700
},
{
"epoch": 51.51,
"eval_loss": 1.078627347946167,
"eval_matthews_correlation": 0.11181658143300324,
"eval_runtime": 7.9966,
"eval_samples_per_second": 130.43,
"eval_steps_per_second": 16.382,
"step": 1700
},
{
"epoch": 54.54,
"learning_rate": 4.460810810810811e-05,
"loss": 0.2642,
"step": 1800
},
{
"epoch": 54.54,
"eval_loss": 1.0609544515609741,
"eval_matthews_correlation": 0.09013603883941315,
"eval_runtime": 7.8862,
"eval_samples_per_second": 132.257,
"eval_steps_per_second": 16.611,
"step": 1800
},
{
"epoch": 57.57,
"learning_rate": 4.427027027027027e-05,
"loss": 0.2272,
"step": 1900
},
{
"epoch": 57.57,
"eval_loss": 1.183494210243225,
"eval_matthews_correlation": 0.11545854045964393,
"eval_runtime": 7.9453,
"eval_samples_per_second": 131.273,
"eval_steps_per_second": 16.488,
"step": 1900
},
{
"epoch": 60.6,
"learning_rate": 4.393243243243244e-05,
"loss": 0.1915,
"step": 2000
},
{
"epoch": 60.6,
"eval_loss": 1.2531063556671143,
"eval_matthews_correlation": 0.12244528836896967,
"eval_runtime": 8.0199,
"eval_samples_per_second": 130.051,
"eval_steps_per_second": 16.334,
"step": 2000
},
{
"epoch": 63.63,
"learning_rate": 4.359459459459459e-05,
"loss": 0.1691,
"step": 2100
},
{
"epoch": 63.63,
"eval_loss": 1.3903430700302124,
"eval_matthews_correlation": 0.07541181195571064,
"eval_runtime": 7.9376,
"eval_samples_per_second": 131.4,
"eval_steps_per_second": 16.504,
"step": 2100
},
{
"epoch": 66.66,
"learning_rate": 4.325675675675676e-05,
"loss": 0.1491,
"step": 2200
},
{
"epoch": 66.66,
"eval_loss": 1.4947072267532349,
"eval_matthews_correlation": 0.06742580984707468,
"eval_runtime": 7.9502,
"eval_samples_per_second": 131.191,
"eval_steps_per_second": 16.477,
"step": 2200
},
{
"epoch": 69.69,
"learning_rate": 4.291891891891892e-05,
"loss": 0.1339,
"step": 2300
},
{
"epoch": 69.69,
"eval_loss": 1.5433533191680908,
"eval_matthews_correlation": 0.0736455413240434,
"eval_runtime": 8.2493,
"eval_samples_per_second": 126.434,
"eval_steps_per_second": 15.88,
"step": 2300
},
{
"epoch": 72.72,
"learning_rate": 4.258108108108108e-05,
"loss": 0.1164,
"step": 2400
},
{
"epoch": 72.72,
"eval_loss": 1.5793086290359497,
"eval_matthews_correlation": 0.11645872984022461,
"eval_runtime": 8.2848,
"eval_samples_per_second": 125.893,
"eval_steps_per_second": 15.812,
"step": 2400
},
{
"epoch": 75.75,
"learning_rate": 4.2243243243243244e-05,
"loss": 0.1078,
"step": 2500
},
{
"epoch": 75.75,
"eval_loss": 1.5938163995742798,
"eval_matthews_correlation": 0.09946100084630931,
"eval_runtime": 8.3263,
"eval_samples_per_second": 125.266,
"eval_steps_per_second": 15.733,
"step": 2500
},
{
"epoch": 78.78,
"learning_rate": 4.1905405405405406e-05,
"loss": 0.0974,
"step": 2600
},
{
"epoch": 78.78,
"eval_loss": 1.700919508934021,
"eval_matthews_correlation": 0.07568068132313144,
"eval_runtime": 8.4149,
"eval_samples_per_second": 123.947,
"eval_steps_per_second": 15.568,
"step": 2600
},
{
"epoch": 78.78,
"step": 2600,
"total_flos": 1.7353653717551284e+19,
"train_loss": 0.40566940747774566,
"train_runtime": 7554.577,
"train_samples_per_second": 508.301,
"train_steps_per_second": 1.986
}
],
"max_steps": 15000,
"num_train_epochs": 455,
"total_flos": 1.7353653717551284e+19,
"trial_name": null,
"trial_params": null
}