invalid-coder's picture
Upload 9 files
45a3dcc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 72.99270072992701,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.82,
"learning_rate": 4.875e-05,
"loss": 10.1798,
"step": 250
},
{
"epoch": 1.82,
"eval_test_accuracy": 0.0,
"eval_test_loss": 3.4965455532073975,
"eval_test_runtime": 7.5045,
"eval_test_samples_per_second": 162.702,
"eval_test_steps_per_second": 2.665,
"step": 250
},
{
"epoch": 3.65,
"learning_rate": 4.75e-05,
"loss": 4.0133,
"step": 500
},
{
"epoch": 3.65,
"eval_test_accuracy": 0.085995085995086,
"eval_test_loss": 2.1827144622802734,
"eval_test_runtime": 11.227,
"eval_test_samples_per_second": 108.755,
"eval_test_steps_per_second": 1.781,
"step": 500
},
{
"epoch": 5.47,
"learning_rate": 4.6250000000000006e-05,
"loss": 2.1831,
"step": 750
},
{
"epoch": 5.47,
"eval_test_accuracy": 0.28992628992628994,
"eval_test_loss": 1.1154128313064575,
"eval_test_runtime": 4.9154,
"eval_test_samples_per_second": 248.402,
"eval_test_steps_per_second": 4.069,
"step": 750
},
{
"epoch": 7.3,
"learning_rate": 4.5e-05,
"loss": 1.178,
"step": 1000
},
{
"epoch": 7.3,
"eval_test_accuracy": 0.4430794430794431,
"eval_test_loss": 0.7581946849822998,
"eval_test_runtime": 5.2157,
"eval_test_samples_per_second": 234.1,
"eval_test_steps_per_second": 3.835,
"step": 1000
},
{
"epoch": 9.12,
"learning_rate": 4.375e-05,
"loss": 0.8603,
"step": 1250
},
{
"epoch": 9.12,
"eval_test_accuracy": 0.506961506961507,
"eval_test_loss": 0.6857301592826843,
"eval_test_runtime": 4.1605,
"eval_test_samples_per_second": 293.474,
"eval_test_steps_per_second": 4.807,
"step": 1250
},
{
"epoch": 10.95,
"learning_rate": 4.25e-05,
"loss": 0.7179,
"step": 1500
},
{
"epoch": 10.95,
"eval_test_accuracy": 0.5356265356265356,
"eval_test_loss": 0.629558801651001,
"eval_test_runtime": 4.1543,
"eval_test_samples_per_second": 293.909,
"eval_test_steps_per_second": 4.814,
"step": 1500
},
{
"epoch": 12.77,
"learning_rate": 4.125e-05,
"loss": 0.6347,
"step": 1750
},
{
"epoch": 12.77,
"eval_test_accuracy": 0.556920556920557,
"eval_test_loss": 0.6828880310058594,
"eval_test_runtime": 4.1527,
"eval_test_samples_per_second": 294.026,
"eval_test_steps_per_second": 4.816,
"step": 1750
},
{
"epoch": 14.6,
"learning_rate": 4e-05,
"loss": 0.5714,
"step": 2000
},
{
"epoch": 14.6,
"eval_test_accuracy": 0.5683865683865684,
"eval_test_loss": 0.6402557492256165,
"eval_test_runtime": 4.1126,
"eval_test_samples_per_second": 296.89,
"eval_test_steps_per_second": 4.863,
"step": 2000
},
{
"epoch": 16.42,
"learning_rate": 3.875e-05,
"loss": 0.535,
"step": 2250
},
{
"epoch": 16.42,
"eval_test_accuracy": 0.5823095823095823,
"eval_test_loss": 0.6427932381629944,
"eval_test_runtime": 4.1425,
"eval_test_samples_per_second": 294.751,
"eval_test_steps_per_second": 4.828,
"step": 2250
},
{
"epoch": 18.25,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4864,
"step": 2500
},
{
"epoch": 18.25,
"eval_test_accuracy": 0.5749385749385749,
"eval_test_loss": 0.6692995429039001,
"eval_test_runtime": 4.1218,
"eval_test_samples_per_second": 296.233,
"eval_test_steps_per_second": 4.852,
"step": 2500
},
{
"epoch": 20.07,
"learning_rate": 3.625e-05,
"loss": 0.4523,
"step": 2750
},
{
"epoch": 20.07,
"eval_test_accuracy": 0.588042588042588,
"eval_test_loss": 0.6854296326637268,
"eval_test_runtime": 4.1256,
"eval_test_samples_per_second": 295.954,
"eval_test_steps_per_second": 4.848,
"step": 2750
},
{
"epoch": 21.9,
"learning_rate": 3.5e-05,
"loss": 0.4267,
"step": 3000
},
{
"epoch": 21.9,
"eval_test_accuracy": 0.5847665847665847,
"eval_test_loss": 0.6832742691040039,
"eval_test_runtime": 4.114,
"eval_test_samples_per_second": 296.79,
"eval_test_steps_per_second": 4.861,
"step": 3000
},
{
"epoch": 23.72,
"learning_rate": 3.375000000000001e-05,
"loss": 0.4017,
"step": 3250
},
{
"epoch": 23.72,
"eval_test_accuracy": 0.5864045864045864,
"eval_test_loss": 0.7026733756065369,
"eval_test_runtime": 4.162,
"eval_test_samples_per_second": 293.366,
"eval_test_steps_per_second": 4.805,
"step": 3250
},
{
"epoch": 25.55,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3737,
"step": 3500
},
{
"epoch": 25.55,
"eval_test_accuracy": 0.5823095823095823,
"eval_test_loss": 0.7358095645904541,
"eval_test_runtime": 4.1139,
"eval_test_samples_per_second": 296.797,
"eval_test_steps_per_second": 4.862,
"step": 3500
},
{
"epoch": 27.37,
"learning_rate": 3.125e-05,
"loss": 0.3567,
"step": 3750
},
{
"epoch": 27.37,
"eval_test_accuracy": 0.583947583947584,
"eval_test_loss": 0.7573221921920776,
"eval_test_runtime": 4.1462,
"eval_test_samples_per_second": 294.489,
"eval_test_steps_per_second": 4.824,
"step": 3750
},
{
"epoch": 29.2,
"learning_rate": 3e-05,
"loss": 0.3329,
"step": 4000
},
{
"epoch": 29.2,
"eval_test_accuracy": 0.5831285831285832,
"eval_test_loss": 0.7671645283699036,
"eval_test_runtime": 4.1876,
"eval_test_samples_per_second": 291.577,
"eval_test_steps_per_second": 4.776,
"step": 4000
},
{
"epoch": 31.02,
"learning_rate": 2.8749999999999997e-05,
"loss": 0.3178,
"step": 4250
},
{
"epoch": 31.02,
"eval_test_accuracy": 0.5937755937755937,
"eval_test_loss": 0.8280954360961914,
"eval_test_runtime": 4.1401,
"eval_test_samples_per_second": 294.919,
"eval_test_steps_per_second": 4.831,
"step": 4250
},
{
"epoch": 32.85,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3031,
"step": 4500
},
{
"epoch": 32.85,
"eval_test_accuracy": 0.5954135954135954,
"eval_test_loss": 0.8298905491828918,
"eval_test_runtime": 4.1172,
"eval_test_samples_per_second": 296.557,
"eval_test_steps_per_second": 4.858,
"step": 4500
},
{
"epoch": 34.67,
"learning_rate": 2.625e-05,
"loss": 0.2942,
"step": 4750
},
{
"epoch": 34.67,
"eval_test_accuracy": 0.592956592956593,
"eval_test_loss": 0.8406508564949036,
"eval_test_runtime": 4.1428,
"eval_test_samples_per_second": 294.726,
"eval_test_steps_per_second": 4.828,
"step": 4750
},
{
"epoch": 36.5,
"learning_rate": 2.5e-05,
"loss": 0.2794,
"step": 5000
},
{
"epoch": 36.5,
"eval_test_accuracy": 0.6003276003276004,
"eval_test_loss": 0.8442530035972595,
"eval_test_runtime": 4.3235,
"eval_test_samples_per_second": 282.409,
"eval_test_steps_per_second": 4.626,
"step": 5000
},
{
"epoch": 38.32,
"learning_rate": 2.375e-05,
"loss": 0.2733,
"step": 5250
},
{
"epoch": 38.32,
"eval_test_accuracy": 0.6052416052416052,
"eval_test_loss": 0.8638033270835876,
"eval_test_runtime": 4.1266,
"eval_test_samples_per_second": 295.887,
"eval_test_steps_per_second": 4.847,
"step": 5250
},
{
"epoch": 40.15,
"learning_rate": 2.25e-05,
"loss": 0.2631,
"step": 5500
},
{
"epoch": 40.15,
"eval_test_accuracy": 0.5888615888615889,
"eval_test_loss": 0.890779435634613,
"eval_test_runtime": 4.1284,
"eval_test_samples_per_second": 295.759,
"eval_test_steps_per_second": 4.845,
"step": 5500
},
{
"epoch": 41.97,
"learning_rate": 2.125e-05,
"loss": 0.2574,
"step": 5750
},
{
"epoch": 41.97,
"eval_test_accuracy": 0.588042588042588,
"eval_test_loss": 0.9194920063018799,
"eval_test_runtime": 4.2329,
"eval_test_samples_per_second": 288.451,
"eval_test_steps_per_second": 4.725,
"step": 5750
},
{
"epoch": 43.8,
"learning_rate": 2e-05,
"loss": 0.2445,
"step": 6000
},
{
"epoch": 43.8,
"eval_test_accuracy": 0.5913185913185913,
"eval_test_loss": 0.9236257672309875,
"eval_test_runtime": 4.1684,
"eval_test_samples_per_second": 292.916,
"eval_test_steps_per_second": 4.798,
"step": 6000
},
{
"epoch": 45.62,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.2417,
"step": 6250
},
{
"epoch": 45.62,
"eval_test_accuracy": 0.5913185913185913,
"eval_test_loss": 0.9303093552589417,
"eval_test_runtime": 4.1896,
"eval_test_samples_per_second": 291.435,
"eval_test_steps_per_second": 4.774,
"step": 6250
},
{
"epoch": 47.45,
"learning_rate": 1.75e-05,
"loss": 0.2316,
"step": 6500
},
{
"epoch": 47.45,
"eval_test_accuracy": 0.6060606060606061,
"eval_test_loss": 0.9456475377082825,
"eval_test_runtime": 4.1609,
"eval_test_samples_per_second": 293.446,
"eval_test_steps_per_second": 4.807,
"step": 6500
},
{
"epoch": 49.27,
"learning_rate": 1.6250000000000002e-05,
"loss": 0.227,
"step": 6750
},
{
"epoch": 49.27,
"eval_test_accuracy": 0.5978705978705978,
"eval_test_loss": 0.9745798110961914,
"eval_test_runtime": 4.1394,
"eval_test_samples_per_second": 294.971,
"eval_test_steps_per_second": 4.832,
"step": 6750
},
{
"epoch": 51.09,
"learning_rate": 1.5e-05,
"loss": 0.2241,
"step": 7000
},
{
"epoch": 51.09,
"eval_test_accuracy": 0.6052416052416052,
"eval_test_loss": 0.938654899597168,
"eval_test_runtime": 4.1652,
"eval_test_samples_per_second": 293.143,
"eval_test_steps_per_second": 4.802,
"step": 7000
},
{
"epoch": 52.92,
"learning_rate": 1.3750000000000002e-05,
"loss": 0.2174,
"step": 7250
},
{
"epoch": 52.92,
"eval_test_accuracy": 0.5986895986895987,
"eval_test_loss": 0.9762380719184875,
"eval_test_runtime": 4.2021,
"eval_test_samples_per_second": 290.57,
"eval_test_steps_per_second": 4.76,
"step": 7250
},
{
"epoch": 54.74,
"learning_rate": 1.25e-05,
"loss": 0.212,
"step": 7500
},
{
"epoch": 54.74,
"eval_test_accuracy": 0.601965601965602,
"eval_test_loss": 0.9834132194519043,
"eval_test_runtime": 4.1906,
"eval_test_samples_per_second": 291.369,
"eval_test_steps_per_second": 4.773,
"step": 7500
},
{
"epoch": 56.57,
"learning_rate": 1.125e-05,
"loss": 0.206,
"step": 7750
},
{
"epoch": 56.57,
"eval_test_accuracy": 0.5995085995085995,
"eval_test_loss": 0.9860948920249939,
"eval_test_runtime": 4.1715,
"eval_test_samples_per_second": 292.702,
"eval_test_steps_per_second": 4.794,
"step": 7750
},
{
"epoch": 58.39,
"learning_rate": 1e-05,
"loss": 0.2057,
"step": 8000
},
{
"epoch": 58.39,
"eval_test_accuracy": 0.5962325962325963,
"eval_test_loss": 1.0094884634017944,
"eval_test_runtime": 4.2216,
"eval_test_samples_per_second": 289.23,
"eval_test_steps_per_second": 4.738,
"step": 8000
},
{
"epoch": 60.22,
"learning_rate": 8.75e-06,
"loss": 0.2023,
"step": 8250
},
{
"epoch": 60.22,
"eval_test_accuracy": 0.597051597051597,
"eval_test_loss": 1.000124216079712,
"eval_test_runtime": 4.1702,
"eval_test_samples_per_second": 292.793,
"eval_test_steps_per_second": 4.796,
"step": 8250
},
{
"epoch": 62.04,
"learning_rate": 7.5e-06,
"loss": 0.1994,
"step": 8500
},
{
"epoch": 62.04,
"eval_test_accuracy": 0.5995085995085995,
"eval_test_loss": 1.0179657936096191,
"eval_test_runtime": 4.1982,
"eval_test_samples_per_second": 290.842,
"eval_test_steps_per_second": 4.764,
"step": 8500
},
{
"epoch": 63.87,
"learning_rate": 6.25e-06,
"loss": 0.1967,
"step": 8750
},
{
"epoch": 63.87,
"eval_test_accuracy": 0.6044226044226044,
"eval_test_loss": 1.0143113136291504,
"eval_test_runtime": 4.1544,
"eval_test_samples_per_second": 293.907,
"eval_test_steps_per_second": 4.814,
"step": 8750
},
{
"epoch": 65.69,
"learning_rate": 5e-06,
"loss": 0.1915,
"step": 9000
},
{
"epoch": 65.69,
"eval_test_accuracy": 0.6011466011466011,
"eval_test_loss": 1.0377224683761597,
"eval_test_runtime": 4.1791,
"eval_test_samples_per_second": 292.165,
"eval_test_steps_per_second": 4.786,
"step": 9000
},
{
"epoch": 67.52,
"learning_rate": 3.75e-06,
"loss": 0.1934,
"step": 9250
},
{
"epoch": 67.52,
"eval_test_accuracy": 0.601965601965602,
"eval_test_loss": 1.02960205078125,
"eval_test_runtime": 4.2049,
"eval_test_samples_per_second": 290.372,
"eval_test_steps_per_second": 4.756,
"step": 9250
},
{
"epoch": 69.34,
"learning_rate": 2.5e-06,
"loss": 0.1932,
"step": 9500
},
{
"epoch": 69.34,
"eval_test_accuracy": 0.601965601965602,
"eval_test_loss": 1.0294890403747559,
"eval_test_runtime": 4.1796,
"eval_test_samples_per_second": 292.135,
"eval_test_steps_per_second": 4.785,
"step": 9500
},
{
"epoch": 71.17,
"learning_rate": 1.25e-06,
"loss": 0.1898,
"step": 9750
},
{
"epoch": 71.17,
"eval_test_accuracy": 0.6011466011466011,
"eval_test_loss": 1.0313055515289307,
"eval_test_runtime": 4.1318,
"eval_test_samples_per_second": 295.51,
"eval_test_steps_per_second": 4.84,
"step": 9750
},
{
"epoch": 72.99,
"learning_rate": 0.0,
"loss": 0.1916,
"step": 10000
},
{
"epoch": 72.99,
"eval_test_accuracy": 0.6011466011466011,
"eval_test_loss": 1.0304898023605347,
"eval_test_runtime": 4.1756,
"eval_test_samples_per_second": 292.415,
"eval_test_steps_per_second": 4.79,
"step": 10000
}
],
"max_steps": 10000,
"num_train_epochs": 73,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}