|
{ |
|
"best_metric": 0.1436834264468159, |
|
"best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-5e-5-15000-42/checkpoint-1600", |
|
"epoch": 78.77611940298507, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6426, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 0.6255418658256531, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 8.2752, |
|
"eval_samples_per_second": 126.039, |
|
"eval_steps_per_second": 15.83, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6176, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 0.630817711353302, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 8.1785, |
|
"eval_samples_per_second": 127.53, |
|
"eval_steps_per_second": 16.018, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.9662162162162164e-05, |
|
"loss": 0.6183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 0.6186701059341431, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.8187, |
|
"eval_samples_per_second": 133.398, |
|
"eval_steps_per_second": 16.755, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 4.9324324324324325e-05, |
|
"loss": 0.6162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 0.6158396005630493, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.9658, |
|
"eval_samples_per_second": 130.935, |
|
"eval_steps_per_second": 16.445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 4.8986486486486486e-05, |
|
"loss": 0.614, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_loss": 0.6250273585319519, |
|
"eval_matthews_correlation": -0.02929206145132745, |
|
"eval_runtime": 7.9393, |
|
"eval_samples_per_second": 131.373, |
|
"eval_steps_per_second": 16.5, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 4.8648648648648654e-05, |
|
"loss": 0.6096, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 0.618541419506073, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 7.9167, |
|
"eval_samples_per_second": 131.746, |
|
"eval_steps_per_second": 16.547, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 4.8310810810810816e-05, |
|
"loss": 0.6055, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"eval_loss": 0.6223562359809875, |
|
"eval_matthews_correlation": 0.017470726784935973, |
|
"eval_runtime": 7.909, |
|
"eval_samples_per_second": 131.875, |
|
"eval_steps_per_second": 16.563, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 4.7979729729729736e-05, |
|
"loss": 0.6001, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"eval_loss": 0.6550844311714172, |
|
"eval_matthews_correlation": 0.13008617806157513, |
|
"eval_runtime": 8.0719, |
|
"eval_samples_per_second": 129.214, |
|
"eval_steps_per_second": 16.229, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 4.764527027027027e-05, |
|
"loss": 0.5909, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_loss": 0.6533612012863159, |
|
"eval_matthews_correlation": 0.056561954805504064, |
|
"eval_runtime": 8.0431, |
|
"eval_samples_per_second": 129.676, |
|
"eval_steps_per_second": 16.287, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 4.730743243243244e-05, |
|
"loss": 0.5726, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"eval_loss": 0.6678630709648132, |
|
"eval_matthews_correlation": 0.10285544103286985, |
|
"eval_runtime": 7.9909, |
|
"eval_samples_per_second": 130.524, |
|
"eval_steps_per_second": 16.394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 4.697297297297297e-05, |
|
"loss": 0.5524, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.6901304721832275, |
|
"eval_matthews_correlation": 0.06313184843830015, |
|
"eval_runtime": 8.3103, |
|
"eval_samples_per_second": 125.507, |
|
"eval_steps_per_second": 15.764, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 4.663513513513514e-05, |
|
"loss": 0.5167, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"eval_loss": 0.7026833891868591, |
|
"eval_matthews_correlation": 0.09479027897712053, |
|
"eval_runtime": 8.0473, |
|
"eval_samples_per_second": 129.608, |
|
"eval_steps_per_second": 16.279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"learning_rate": 4.6297297297297295e-05, |
|
"loss": 0.4779, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"eval_loss": 0.7578131556510925, |
|
"eval_matthews_correlation": 0.10121883963858187, |
|
"eval_runtime": 8.2334, |
|
"eval_samples_per_second": 126.679, |
|
"eval_steps_per_second": 15.911, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 42.42, |
|
"learning_rate": 4.5959459459459463e-05, |
|
"loss": 0.4271, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 42.42, |
|
"eval_loss": 0.8021395802497864, |
|
"eval_matthews_correlation": 0.11075235134282446, |
|
"eval_runtime": 8.0267, |
|
"eval_samples_per_second": 129.941, |
|
"eval_steps_per_second": 16.32, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 4.5621621621621625e-05, |
|
"loss": 0.3888, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_loss": 0.8813392519950867, |
|
"eval_matthews_correlation": 0.10250522978751038, |
|
"eval_runtime": 7.9207, |
|
"eval_samples_per_second": 131.68, |
|
"eval_steps_per_second": 16.539, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 4.5283783783783786e-05, |
|
"loss": 0.3428, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"eval_loss": 0.9361783862113953, |
|
"eval_matthews_correlation": 0.1436834264468159, |
|
"eval_runtime": 7.8976, |
|
"eval_samples_per_second": 132.066, |
|
"eval_steps_per_second": 16.587, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 51.51, |
|
"learning_rate": 4.494594594594595e-05, |
|
"loss": 0.2977, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 51.51, |
|
"eval_loss": 1.078627347946167, |
|
"eval_matthews_correlation": 0.11181658143300324, |
|
"eval_runtime": 7.9966, |
|
"eval_samples_per_second": 130.43, |
|
"eval_steps_per_second": 16.382, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 54.54, |
|
"learning_rate": 4.460810810810811e-05, |
|
"loss": 0.2642, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 54.54, |
|
"eval_loss": 1.0609544515609741, |
|
"eval_matthews_correlation": 0.09013603883941315, |
|
"eval_runtime": 7.8862, |
|
"eval_samples_per_second": 132.257, |
|
"eval_steps_per_second": 16.611, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 57.57, |
|
"learning_rate": 4.427027027027027e-05, |
|
"loss": 0.2272, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 57.57, |
|
"eval_loss": 1.183494210243225, |
|
"eval_matthews_correlation": 0.11545854045964393, |
|
"eval_runtime": 7.9453, |
|
"eval_samples_per_second": 131.273, |
|
"eval_steps_per_second": 16.488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"learning_rate": 4.393243243243244e-05, |
|
"loss": 0.1915, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"eval_loss": 1.2531063556671143, |
|
"eval_matthews_correlation": 0.12244528836896967, |
|
"eval_runtime": 8.0199, |
|
"eval_samples_per_second": 130.051, |
|
"eval_steps_per_second": 16.334, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 63.63, |
|
"learning_rate": 4.359459459459459e-05, |
|
"loss": 0.1691, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 63.63, |
|
"eval_loss": 1.3903430700302124, |
|
"eval_matthews_correlation": 0.07541181195571064, |
|
"eval_runtime": 7.9376, |
|
"eval_samples_per_second": 131.4, |
|
"eval_steps_per_second": 16.504, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"learning_rate": 4.325675675675676e-05, |
|
"loss": 0.1491, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"eval_loss": 1.4947072267532349, |
|
"eval_matthews_correlation": 0.06742580984707468, |
|
"eval_runtime": 7.9502, |
|
"eval_samples_per_second": 131.191, |
|
"eval_steps_per_second": 16.477, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 69.69, |
|
"learning_rate": 4.291891891891892e-05, |
|
"loss": 0.1339, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 69.69, |
|
"eval_loss": 1.5433533191680908, |
|
"eval_matthews_correlation": 0.0736455413240434, |
|
"eval_runtime": 8.2493, |
|
"eval_samples_per_second": 126.434, |
|
"eval_steps_per_second": 15.88, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 72.72, |
|
"learning_rate": 4.258108108108108e-05, |
|
"loss": 0.1164, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 72.72, |
|
"eval_loss": 1.5793086290359497, |
|
"eval_matthews_correlation": 0.11645872984022461, |
|
"eval_runtime": 8.2848, |
|
"eval_samples_per_second": 125.893, |
|
"eval_steps_per_second": 15.812, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 75.75, |
|
"learning_rate": 4.2243243243243244e-05, |
|
"loss": 0.1078, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 75.75, |
|
"eval_loss": 1.5938163995742798, |
|
"eval_matthews_correlation": 0.09946100084630931, |
|
"eval_runtime": 8.3263, |
|
"eval_samples_per_second": 125.266, |
|
"eval_steps_per_second": 15.733, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 78.78, |
|
"learning_rate": 4.1905405405405406e-05, |
|
"loss": 0.0974, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 78.78, |
|
"eval_loss": 1.700919508934021, |
|
"eval_matthews_correlation": 0.07568068132313144, |
|
"eval_runtime": 8.4149, |
|
"eval_samples_per_second": 123.947, |
|
"eval_steps_per_second": 15.568, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 78.78, |
|
"step": 2600, |
|
"total_flos": 1.7353653717551284e+19, |
|
"train_loss": 0.40566940747774566, |
|
"train_runtime": 7554.577, |
|
"train_samples_per_second": 508.301, |
|
"train_steps_per_second": 1.986 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 455, |
|
"total_flos": 1.7353653717551284e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|