noniewiem's picture
End of training
5497460
raw
history blame
4.71 kB
{
"best_metric": 0.00286100001416597,
"best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-2e-5-15000-42/checkpoint-100",
"epoch": 33.32835820895522,
"global_step": 1100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.03,
"learning_rate": 9.800000000000001e-06,
"loss": 0.655,
"step": 100
},
{
"epoch": 3.03,
"eval_loss": 0.6341390013694763,
"eval_matthews_correlation": 0.00286100001416597,
"eval_runtime": 7.8213,
"eval_samples_per_second": 133.354,
"eval_steps_per_second": 16.749,
"step": 100
},
{
"epoch": 6.06,
"learning_rate": 1.98e-05,
"loss": 0.6174,
"step": 200
},
{
"epoch": 6.06,
"eval_loss": 0.6281591057777405,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.9503,
"eval_samples_per_second": 131.19,
"eval_steps_per_second": 16.477,
"step": 200
},
{
"epoch": 9.09,
"learning_rate": 1.9867567567567568e-05,
"loss": 0.6196,
"step": 300
},
{
"epoch": 9.09,
"eval_loss": 0.6198328137397766,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8212,
"eval_samples_per_second": 133.356,
"eval_steps_per_second": 16.749,
"step": 300
},
{
"epoch": 12.12,
"learning_rate": 1.9733783783783785e-05,
"loss": 0.6158,
"step": 400
},
{
"epoch": 12.12,
"eval_loss": 0.6199322938919067,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8954,
"eval_samples_per_second": 132.102,
"eval_steps_per_second": 16.592,
"step": 400
},
{
"epoch": 15.15,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.6175,
"step": 500
},
{
"epoch": 15.15,
"eval_loss": 0.6180645823478699,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.2397,
"eval_samples_per_second": 126.583,
"eval_steps_per_second": 15.899,
"step": 500
},
{
"epoch": 18.18,
"learning_rate": 1.9464864864864867e-05,
"loss": 0.6152,
"step": 600
},
{
"epoch": 18.18,
"eval_loss": 0.619079053401947,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8304,
"eval_samples_per_second": 133.198,
"eval_steps_per_second": 16.73,
"step": 600
},
{
"epoch": 21.21,
"learning_rate": 1.932972972972973e-05,
"loss": 0.617,
"step": 700
},
{
"epoch": 21.21,
"eval_loss": 0.6184842586517334,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.2213,
"eval_samples_per_second": 126.865,
"eval_steps_per_second": 15.934,
"step": 700
},
{
"epoch": 24.24,
"learning_rate": 1.9194594594594596e-05,
"loss": 0.6191,
"step": 800
},
{
"epoch": 24.24,
"eval_loss": 0.6185177564620972,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.887,
"eval_samples_per_second": 132.243,
"eval_steps_per_second": 16.61,
"step": 800
},
{
"epoch": 27.27,
"learning_rate": 1.905945945945946e-05,
"loss": 0.6162,
"step": 900
},
{
"epoch": 27.27,
"eval_loss": 0.6183082461357117,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8268,
"eval_samples_per_second": 133.26,
"eval_steps_per_second": 16.737,
"step": 900
},
{
"epoch": 30.3,
"learning_rate": 1.8924324324324325e-05,
"loss": 0.6166,
"step": 1000
},
{
"epoch": 30.3,
"eval_loss": 0.6183302402496338,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.795,
"eval_samples_per_second": 133.803,
"eval_steps_per_second": 16.806,
"step": 1000
},
{
"epoch": 33.33,
"learning_rate": 1.878918918918919e-05,
"loss": 0.6177,
"step": 1100
},
{
"epoch": 33.33,
"eval_loss": 0.6182125210762024,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8695,
"eval_samples_per_second": 132.537,
"eval_steps_per_second": 16.647,
"step": 1100
},
{
"epoch": 33.33,
"step": 1100,
"total_flos": 7.341930418964005e+18,
"train_loss": 0.6206500174782493,
"train_runtime": 3140.0413,
"train_samples_per_second": 1222.914,
"train_steps_per_second": 4.777
}
],
"max_steps": 15000,
"num_train_epochs": 455,
"total_flos": 7.341930418964005e+18,
"trial_name": null,
"trial_params": null
}