results / trainer_state.json
paulac9's picture
Training in progress, epoch 15
6ac7657
{
"best_metric": 0.6615384615384615,
"best_model_checkpoint": "./results/checkpoint-43",
"epoch": 33.333333333333336,
"eval_steps": 500,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5076923076923077,
"eval_loss": 1.161078929901123,
"eval_runtime": 0.2665,
"eval_samples_per_second": 243.858,
"eval_steps_per_second": 11.255,
"step": 1
},
{
"epoch": 2.0,
"eval_accuracy": 0.5076923076923077,
"eval_loss": 1.1464765071868896,
"eval_runtime": 0.3457,
"eval_samples_per_second": 188.041,
"eval_steps_per_second": 8.679,
"step": 3
},
{
"epoch": 3.0,
"eval_accuracy": 0.5076923076923077,
"eval_loss": 1.1057497262954712,
"eval_runtime": 0.3211,
"eval_samples_per_second": 202.425,
"eval_steps_per_second": 9.343,
"step": 5
},
{
"epoch": 4.0,
"eval_accuracy": 0.5076923076923077,
"eval_loss": 1.113421082496643,
"eval_runtime": 0.2679,
"eval_samples_per_second": 242.627,
"eval_steps_per_second": 11.198,
"step": 6
},
{
"epoch": 5.0,
"eval_accuracy": 0.5076923076923077,
"eval_loss": 1.0864346027374268,
"eval_runtime": 0.3081,
"eval_samples_per_second": 210.961,
"eval_steps_per_second": 9.737,
"step": 7
},
{
"epoch": 6.0,
"eval_accuracy": 0.5384615384615384,
"eval_loss": 0.9837771058082581,
"eval_runtime": 0.3805,
"eval_samples_per_second": 170.821,
"eval_steps_per_second": 7.884,
"step": 9
},
{
"epoch": 6.67,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.5404,
"step": 10
},
{
"epoch": 7.0,
"eval_accuracy": 0.5538461538461539,
"eval_loss": 0.9654892683029175,
"eval_runtime": 0.2642,
"eval_samples_per_second": 246.029,
"eval_steps_per_second": 11.355,
"step": 11
},
{
"epoch": 8.0,
"eval_accuracy": 0.5692307692307692,
"eval_loss": 0.9630248546600342,
"eval_runtime": 0.2668,
"eval_samples_per_second": 243.599,
"eval_steps_per_second": 11.243,
"step": 12
},
{
"epoch": 9.0,
"eval_accuracy": 0.5538461538461539,
"eval_loss": 0.9631487131118774,
"eval_runtime": 0.4451,
"eval_samples_per_second": 146.049,
"eval_steps_per_second": 6.741,
"step": 13
},
{
"epoch": 10.0,
"eval_accuracy": 0.5384615384615384,
"eval_loss": 1.0176990032196045,
"eval_runtime": 0.373,
"eval_samples_per_second": 174.264,
"eval_steps_per_second": 8.043,
"step": 15
},
{
"epoch": 11.0,
"eval_accuracy": 0.5538461538461539,
"eval_loss": 1.0124413967132568,
"eval_runtime": 0.2736,
"eval_samples_per_second": 237.589,
"eval_steps_per_second": 10.966,
"step": 17
},
{
"epoch": 12.0,
"eval_accuracy": 0.5692307692307692,
"eval_loss": 0.9905163049697876,
"eval_runtime": 0.3001,
"eval_samples_per_second": 216.589,
"eval_steps_per_second": 9.996,
"step": 18
},
{
"epoch": 13.0,
"eval_accuracy": 0.6153846153846154,
"eval_loss": 0.9472815990447998,
"eval_runtime": 0.2929,
"eval_samples_per_second": 221.921,
"eval_steps_per_second": 10.243,
"step": 19
},
{
"epoch": 13.33,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.5207,
"step": 20
},
{
"epoch": 14.0,
"eval_accuracy": 0.6,
"eval_loss": 0.9549360871315002,
"eval_runtime": 0.2676,
"eval_samples_per_second": 242.87,
"eval_steps_per_second": 11.209,
"step": 21
},
{
"epoch": 15.0,
"eval_accuracy": 0.5846153846153846,
"eval_loss": 0.934834897518158,
"eval_runtime": 0.2803,
"eval_samples_per_second": 231.892,
"eval_steps_per_second": 10.703,
"step": 23
},
{
"epoch": 16.0,
"eval_accuracy": 0.5846153846153846,
"eval_loss": 0.9018541574478149,
"eval_runtime": 0.3596,
"eval_samples_per_second": 180.772,
"eval_steps_per_second": 8.343,
"step": 24
},
{
"epoch": 17.0,
"eval_accuracy": 0.5846153846153846,
"eval_loss": 0.8686960339546204,
"eval_runtime": 0.3401,
"eval_samples_per_second": 191.114,
"eval_steps_per_second": 8.821,
"step": 25
},
{
"epoch": 18.0,
"eval_accuracy": 0.5846153846153846,
"eval_loss": 0.846206784248352,
"eval_runtime": 0.2696,
"eval_samples_per_second": 241.07,
"eval_steps_per_second": 11.126,
"step": 27
},
{
"epoch": 19.0,
"eval_accuracy": 0.6153846153846154,
"eval_loss": 0.84183669090271,
"eval_runtime": 0.2747,
"eval_samples_per_second": 236.626,
"eval_steps_per_second": 10.921,
"step": 29
},
{
"epoch": 20.0,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.5146,
"step": 30
},
{
"epoch": 20.0,
"eval_accuracy": 0.6,
"eval_loss": 0.8419151306152344,
"eval_runtime": 0.2979,
"eval_samples_per_second": 218.221,
"eval_steps_per_second": 10.072,
"step": 30
},
{
"epoch": 21.0,
"eval_accuracy": 0.5692307692307692,
"eval_loss": 0.8435397148132324,
"eval_runtime": 0.3514,
"eval_samples_per_second": 184.977,
"eval_steps_per_second": 8.537,
"step": 31
},
{
"epoch": 22.0,
"eval_accuracy": 0.5538461538461539,
"eval_loss": 0.8414743542671204,
"eval_runtime": 0.4837,
"eval_samples_per_second": 134.389,
"eval_steps_per_second": 6.203,
"step": 33
},
{
"epoch": 23.0,
"eval_accuracy": 0.6153846153846154,
"eval_loss": 0.8293341994285583,
"eval_runtime": 0.4826,
"eval_samples_per_second": 134.685,
"eval_steps_per_second": 6.216,
"step": 35
},
{
"epoch": 24.0,
"eval_accuracy": 0.6,
"eval_loss": 0.8253650665283203,
"eval_runtime": 0.2849,
"eval_samples_per_second": 228.165,
"eval_steps_per_second": 10.531,
"step": 36
},
{
"epoch": 25.0,
"eval_accuracy": 0.6153846153846154,
"eval_loss": 0.8218726515769958,
"eval_runtime": 0.2622,
"eval_samples_per_second": 247.867,
"eval_steps_per_second": 11.44,
"step": 37
},
{
"epoch": 26.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8194869160652161,
"eval_runtime": 0.389,
"eval_samples_per_second": 167.11,
"eval_steps_per_second": 7.713,
"step": 39
},
{
"epoch": 26.67,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.4352,
"step": 40
},
{
"epoch": 27.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8192200660705566,
"eval_runtime": 0.2763,
"eval_samples_per_second": 235.224,
"eval_steps_per_second": 10.857,
"step": 41
},
{
"epoch": 28.0,
"eval_accuracy": 0.6307692307692307,
"eval_loss": 0.8197530508041382,
"eval_runtime": 0.2574,
"eval_samples_per_second": 252.551,
"eval_steps_per_second": 11.656,
"step": 42
},
{
"epoch": 29.0,
"eval_accuracy": 0.6615384615384615,
"eval_loss": 0.8229538202285767,
"eval_runtime": 0.2901,
"eval_samples_per_second": 224.029,
"eval_steps_per_second": 10.34,
"step": 43
},
{
"epoch": 30.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8263610005378723,
"eval_runtime": 0.2703,
"eval_samples_per_second": 240.492,
"eval_steps_per_second": 11.1,
"step": 45
},
{
"epoch": 31.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8268065452575684,
"eval_runtime": 0.425,
"eval_samples_per_second": 152.94,
"eval_steps_per_second": 7.059,
"step": 47
},
{
"epoch": 32.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8266280293464661,
"eval_runtime": 0.2626,
"eval_samples_per_second": 247.53,
"eval_steps_per_second": 11.424,
"step": 48
},
{
"epoch": 33.0,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8262693881988525,
"eval_runtime": 0.2849,
"eval_samples_per_second": 228.131,
"eval_steps_per_second": 10.529,
"step": 49
},
{
"epoch": 33.33,
"learning_rate": 0.0,
"loss": 0.4724,
"step": 50
},
{
"epoch": 33.33,
"eval_accuracy": 0.6461538461538462,
"eval_loss": 0.8263109922409058,
"eval_runtime": 0.3713,
"eval_samples_per_second": 175.053,
"eval_steps_per_second": 8.079,
"step": 50
},
{
"epoch": 33.33,
"step": 50,
"total_flos": 5.411451203278848e+16,
"train_loss": 0.4966608715057373,
"train_runtime": 129.3451,
"train_samples_per_second": 25.127,
"train_steps_per_second": 0.387
}
],
"logging_steps": 10,
"max_steps": 50,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 5.411451203278848e+16,
"trial_name": null,
"trial_params": null
}