GPT2_no_function_42 / trainer_state.json
xiulinyang's picture
Upload folder using huggingface_hub
92f136e verified
{
"best_metric": 4.7215962409973145,
"best_model_checkpoint": "models/GPT2_no_function_42/checkpoint-47020",
"epoch": 10.0,
"global_step": 47020,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21,
"learning_rate": 1e-05,
"loss": 8.0483,
"step": 1000
},
{
"epoch": 0.43,
"learning_rate": 2e-05,
"loss": 7.0834,
"step": 2000
},
{
"epoch": 0.64,
"learning_rate": 3e-05,
"loss": 6.6765,
"step": 3000
},
{
"epoch": 0.85,
"learning_rate": 4e-05,
"loss": 6.3706,
"step": 4000
},
{
"epoch": 1.0,
"eval_accuracy": 0.220326962593879,
"eval_loss": 6.010717391967773,
"eval_runtime": 1.4501,
"eval_samples_per_second": 597.198,
"eval_steps_per_second": 4.827,
"step": 4702
},
{
"epoch": 1.06,
"learning_rate": 5e-05,
"loss": 6.1229,
"step": 5000
},
{
"epoch": 1.28,
"learning_rate": 6e-05,
"loss": 5.9171,
"step": 6000
},
{
"epoch": 1.49,
"learning_rate": 7e-05,
"loss": 5.7528,
"step": 7000
},
{
"epoch": 1.7,
"learning_rate": 8e-05,
"loss": 5.6201,
"step": 8000
},
{
"epoch": 1.91,
"learning_rate": 9e-05,
"loss": 5.5145,
"step": 9000
},
{
"epoch": 2.0,
"eval_accuracy": 0.253105053554218,
"eval_loss": 5.362144470214844,
"eval_runtime": 1.4369,
"eval_samples_per_second": 602.691,
"eval_steps_per_second": 4.872,
"step": 9404
},
{
"epoch": 2.13,
"learning_rate": 0.0001,
"loss": 5.4006,
"step": 10000
},
{
"epoch": 2.34,
"learning_rate": 9.889000000000001e-05,
"loss": 5.3208,
"step": 11000
},
{
"epoch": 2.55,
"learning_rate": 9.77788888888889e-05,
"loss": 5.2626,
"step": 12000
},
{
"epoch": 2.76,
"learning_rate": 9.66688888888889e-05,
"loss": 5.2121,
"step": 13000
},
{
"epoch": 2.98,
"learning_rate": 9.555777777777778e-05,
"loss": 5.1658,
"step": 14000
},
{
"epoch": 3.0,
"eval_accuracy": 0.2755269044025386,
"eval_loss": 5.0851593017578125,
"eval_runtime": 1.4343,
"eval_samples_per_second": 603.766,
"eval_steps_per_second": 4.88,
"step": 14106
},
{
"epoch": 3.19,
"learning_rate": 9.444777777777778e-05,
"loss": 5.0681,
"step": 15000
},
{
"epoch": 3.4,
"learning_rate": 9.333666666666667e-05,
"loss": 5.0459,
"step": 16000
},
{
"epoch": 3.62,
"learning_rate": 9.222666666666668e-05,
"loss": 5.0245,
"step": 17000
},
{
"epoch": 3.83,
"learning_rate": 9.111555555555556e-05,
"loss": 5.0051,
"step": 18000
},
{
"epoch": 4.0,
"eval_accuracy": 0.28648324271244385,
"eval_loss": 4.952297210693359,
"eval_runtime": 1.4391,
"eval_samples_per_second": 601.751,
"eval_steps_per_second": 4.864,
"step": 18808
},
{
"epoch": 4.04,
"learning_rate": 9.000555555555557e-05,
"loss": 4.9688,
"step": 19000
},
{
"epoch": 4.25,
"learning_rate": 8.889444444444444e-05,
"loss": 4.893,
"step": 20000
},
{
"epoch": 4.47,
"learning_rate": 8.778444444444445e-05,
"loss": 4.8916,
"step": 21000
},
{
"epoch": 4.68,
"learning_rate": 8.667333333333334e-05,
"loss": 4.8863,
"step": 22000
},
{
"epoch": 4.89,
"learning_rate": 8.556333333333334e-05,
"loss": 4.8788,
"step": 23000
},
{
"epoch": 5.0,
"eval_accuracy": 0.29304795330144934,
"eval_loss": 4.870294094085693,
"eval_runtime": 1.4302,
"eval_samples_per_second": 605.492,
"eval_steps_per_second": 4.894,
"step": 23510
},
{
"epoch": 5.1,
"learning_rate": 8.445222222222222e-05,
"loss": 4.8231,
"step": 24000
},
{
"epoch": 5.32,
"learning_rate": 8.334222222222222e-05,
"loss": 4.7841,
"step": 25000
},
{
"epoch": 5.53,
"learning_rate": 8.223111111111111e-05,
"loss": 4.7904,
"step": 26000
},
{
"epoch": 5.74,
"learning_rate": 8.112111111111111e-05,
"loss": 4.7869,
"step": 27000
},
{
"epoch": 5.95,
"learning_rate": 8.001e-05,
"loss": 4.7861,
"step": 28000
},
{
"epoch": 6.0,
"eval_accuracy": 0.29845792947936933,
"eval_loss": 4.819924354553223,
"eval_runtime": 1.4319,
"eval_samples_per_second": 604.778,
"eval_steps_per_second": 4.889,
"step": 28212
},
{
"epoch": 6.17,
"learning_rate": 7.890000000000001e-05,
"loss": 4.7083,
"step": 29000
},
{
"epoch": 6.38,
"learning_rate": 7.77888888888889e-05,
"loss": 4.7059,
"step": 30000
},
{
"epoch": 6.59,
"learning_rate": 7.667888888888889e-05,
"loss": 4.7111,
"step": 31000
},
{
"epoch": 6.81,
"learning_rate": 7.556777777777779e-05,
"loss": 4.7142,
"step": 32000
},
{
"epoch": 7.0,
"eval_accuracy": 0.30186757833099964,
"eval_loss": 4.782296657562256,
"eval_runtime": 1.4327,
"eval_samples_per_second": 604.45,
"eval_steps_per_second": 4.886,
"step": 32914
},
{
"epoch": 7.02,
"learning_rate": 7.445777777777778e-05,
"loss": 4.7075,
"step": 33000
},
{
"epoch": 7.23,
"learning_rate": 7.334666666666668e-05,
"loss": 4.6238,
"step": 34000
},
{
"epoch": 7.44,
"learning_rate": 7.223666666666667e-05,
"loss": 4.6427,
"step": 35000
},
{
"epoch": 7.66,
"learning_rate": 7.112555555555556e-05,
"loss": 4.6487,
"step": 36000
},
{
"epoch": 7.87,
"learning_rate": 7.001555555555555e-05,
"loss": 4.6556,
"step": 37000
},
{
"epoch": 8.0,
"eval_accuracy": 0.30428615591642266,
"eval_loss": 4.755917072296143,
"eval_runtime": 1.4333,
"eval_samples_per_second": 604.206,
"eval_steps_per_second": 4.884,
"step": 37616
},
{
"epoch": 8.08,
"learning_rate": 6.890444444444445e-05,
"loss": 4.6189,
"step": 38000
},
{
"epoch": 8.29,
"learning_rate": 6.779444444444444e-05,
"loss": 4.5731,
"step": 39000
},
{
"epoch": 8.51,
"learning_rate": 6.668333333333333e-05,
"loss": 4.5902,
"step": 40000
},
{
"epoch": 8.72,
"learning_rate": 6.557333333333332e-05,
"loss": 4.5992,
"step": 41000
},
{
"epoch": 8.93,
"learning_rate": 6.446222222222223e-05,
"loss": 4.6029,
"step": 42000
},
{
"epoch": 9.0,
"eval_accuracy": 0.3071047989671037,
"eval_loss": 4.737186431884766,
"eval_runtime": 1.4325,
"eval_samples_per_second": 604.527,
"eval_steps_per_second": 4.886,
"step": 42318
},
{
"epoch": 9.15,
"learning_rate": 6.335222222222222e-05,
"loss": 4.5398,
"step": 43000
},
{
"epoch": 9.36,
"learning_rate": 6.224111111111112e-05,
"loss": 4.528,
"step": 44000
},
{
"epoch": 9.57,
"learning_rate": 6.113111111111111e-05,
"loss": 4.5451,
"step": 45000
},
{
"epoch": 9.78,
"learning_rate": 6.002e-05,
"loss": 4.5521,
"step": 46000
},
{
"epoch": 10.0,
"learning_rate": 5.891e-05,
"loss": 4.5609,
"step": 47000
},
{
"epoch": 10.0,
"eval_accuracy": 0.30925060464439635,
"eval_loss": 4.7215962409973145,
"eval_runtime": 1.4366,
"eval_samples_per_second": 602.829,
"eval_steps_per_second": 4.873,
"step": 47020
}
],
"max_steps": 100000,
"num_train_epochs": 22,
"total_flos": 3.930942652416e+17,
"trial_name": null,
"trial_params": null
}