superglue-boolq / trainer_state.json
ShengdingHu's picture
Training in progress, step 200
2b88c4e
raw
history blame
9.42 kB
{
"best_metric": 80.30581039755351,
"best_model_checkpoint": "outputs/bitfit/t5-base/superglue-boolq/checkpoint-2600",
"epoch": 20.0,
"global_step": 5900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.68,
"eval_accuracy": 74.6788990825688,
"eval_average_metrics": 74.6788990825688,
"eval_loss": 0.21743574738502502,
"eval_runtime": 16.9831,
"eval_samples_per_second": 96.272,
"step": 200
},
{
"epoch": 1.36,
"eval_accuracy": 75.71865443425077,
"eval_average_metrics": 75.71865443425077,
"eval_loss": 0.21506452560424805,
"eval_runtime": 14.6109,
"eval_samples_per_second": 111.903,
"step": 400
},
{
"epoch": 1.69,
"learning_rate": 0.00027457627118644066,
"loss": 0.374,
"step": 500
},
{
"epoch": 2.03,
"eval_accuracy": 77.18654434250764,
"eval_average_metrics": 77.18654434250764,
"eval_loss": 0.20255930721759796,
"eval_runtime": 17.1645,
"eval_samples_per_second": 95.254,
"step": 600
},
{
"epoch": 2.71,
"eval_accuracy": 76.5137614678899,
"eval_average_metrics": 76.5137614678899,
"eval_loss": 0.220754012465477,
"eval_runtime": 17.1929,
"eval_samples_per_second": 95.097,
"step": 800
},
{
"epoch": 3.39,
"learning_rate": 0.00024915254237288135,
"loss": 0.2114,
"step": 1000
},
{
"epoch": 3.39,
"eval_accuracy": 78.1651376146789,
"eval_average_metrics": 78.1651376146789,
"eval_loss": 0.19822736084461212,
"eval_runtime": 14.5865,
"eval_samples_per_second": 112.09,
"step": 1000
},
{
"epoch": 4.07,
"eval_accuracy": 78.71559633027523,
"eval_average_metrics": 78.71559633027523,
"eval_loss": 0.19769148528575897,
"eval_runtime": 17.1866,
"eval_samples_per_second": 95.132,
"step": 1200
},
{
"epoch": 4.75,
"eval_accuracy": 78.2262996941896,
"eval_average_metrics": 78.2262996941896,
"eval_loss": 0.1974276602268219,
"eval_runtime": 17.1722,
"eval_samples_per_second": 95.212,
"step": 1400
},
{
"epoch": 5.08,
"learning_rate": 0.000223728813559322,
"loss": 0.1997,
"step": 1500
},
{
"epoch": 5.42,
"eval_accuracy": 78.71559633027523,
"eval_average_metrics": 78.71559633027523,
"eval_loss": 0.19954562187194824,
"eval_runtime": 14.5679,
"eval_samples_per_second": 112.233,
"step": 1600
},
{
"epoch": 6.1,
"eval_accuracy": 78.2262996941896,
"eval_average_metrics": 78.2262996941896,
"eval_loss": 0.20299072563648224,
"eval_runtime": 17.2624,
"eval_samples_per_second": 94.714,
"step": 1800
},
{
"epoch": 6.78,
"learning_rate": 0.0001983050847457627,
"loss": 0.1934,
"step": 2000
},
{
"epoch": 6.78,
"eval_accuracy": 79.63302752293579,
"eval_average_metrics": 79.63302752293579,
"eval_loss": 0.19578830897808075,
"eval_runtime": 17.2808,
"eval_samples_per_second": 94.614,
"step": 2000
},
{
"epoch": 7.46,
"eval_accuracy": 79.26605504587157,
"eval_average_metrics": 79.26605504587157,
"eval_loss": 0.1937599629163742,
"eval_runtime": 15.3794,
"eval_samples_per_second": 106.311,
"step": 2200
},
{
"epoch": 8.14,
"eval_accuracy": 77.92048929663609,
"eval_average_metrics": 77.92048929663609,
"eval_loss": 0.2019716054201126,
"eval_runtime": 17.5057,
"eval_samples_per_second": 93.398,
"step": 2400
},
{
"epoch": 8.47,
"learning_rate": 0.0001728813559322034,
"loss": 0.1907,
"step": 2500
},
{
"epoch": 8.81,
"eval_accuracy": 80.30581039755351,
"eval_average_metrics": 80.30581039755351,
"eval_loss": 0.19154316186904907,
"eval_runtime": 17.3782,
"eval_samples_per_second": 94.083,
"step": 2600
},
{
"epoch": 9.49,
"eval_accuracy": 79.93883792048929,
"eval_average_metrics": 79.93883792048929,
"eval_loss": 0.19677455723285675,
"eval_runtime": 15.9661,
"eval_samples_per_second": 102.404,
"step": 2800
},
{
"epoch": 10.17,
"learning_rate": 0.00014745762711864405,
"loss": 0.183,
"step": 3000
},
{
"epoch": 10.17,
"eval_accuracy": 79.44954128440367,
"eval_average_metrics": 79.44954128440367,
"eval_loss": 0.19117017090320587,
"eval_runtime": 17.2749,
"eval_samples_per_second": 94.646,
"step": 3000
},
{
"epoch": 10.85,
"eval_accuracy": 78.10397553516819,
"eval_average_metrics": 78.10397553516819,
"eval_loss": 0.20411182940006256,
"eval_runtime": 17.1181,
"eval_samples_per_second": 95.513,
"step": 3200
},
{
"epoch": 11.53,
"eval_accuracy": 79.02140672782875,
"eval_average_metrics": 79.02140672782875,
"eval_loss": 0.194900244474411,
"eval_runtime": 17.1955,
"eval_samples_per_second": 95.083,
"step": 3400
},
{
"epoch": 11.86,
"learning_rate": 0.00012203389830508474,
"loss": 0.181,
"step": 3500
},
{
"epoch": 12.2,
"eval_accuracy": 79.38837920489297,
"eval_average_metrics": 79.38837920489297,
"eval_loss": 0.20091596245765686,
"eval_runtime": 16.9905,
"eval_samples_per_second": 96.23,
"step": 3600
},
{
"epoch": 12.88,
"eval_accuracy": 79.81651376146789,
"eval_average_metrics": 79.81651376146789,
"eval_loss": 0.18894420564174652,
"eval_runtime": 17.2706,
"eval_samples_per_second": 94.669,
"step": 3800
},
{
"epoch": 13.56,
"learning_rate": 9.661016949152541e-05,
"loss": 0.1786,
"step": 4000
},
{
"epoch": 13.56,
"eval_accuracy": 78.77675840978593,
"eval_average_metrics": 78.77675840978593,
"eval_loss": 0.20160046219825745,
"eval_runtime": 17.0941,
"eval_samples_per_second": 95.647,
"step": 4000
},
{
"epoch": 14.24,
"eval_accuracy": 79.57186544342507,
"eval_average_metrics": 79.57186544342507,
"eval_loss": 0.19864365458488464,
"eval_runtime": 17.0536,
"eval_samples_per_second": 95.874,
"step": 4200
},
{
"epoch": 14.92,
"eval_accuracy": 79.51070336391437,
"eval_average_metrics": 79.51070336391437,
"eval_loss": 0.19150203466415405,
"eval_runtime": 17.2063,
"eval_samples_per_second": 95.024,
"step": 4400
},
{
"epoch": 15.25,
"learning_rate": 7.11864406779661e-05,
"loss": 0.1769,
"step": 4500
},
{
"epoch": 15.59,
"eval_accuracy": 78.77675840978593,
"eval_average_metrics": 78.77675840978593,
"eval_loss": 0.19904659688472748,
"eval_runtime": 17.1806,
"eval_samples_per_second": 95.165,
"step": 4600
},
{
"epoch": 16.27,
"eval_accuracy": 79.20489296636084,
"eval_average_metrics": 79.20489296636084,
"eval_loss": 0.19741013646125793,
"eval_runtime": 17.2538,
"eval_samples_per_second": 94.762,
"step": 4800
},
{
"epoch": 16.95,
"learning_rate": 4.576271186440678e-05,
"loss": 0.1741,
"step": 5000
},
{
"epoch": 16.95,
"eval_accuracy": 79.51070336391437,
"eval_average_metrics": 79.51070336391437,
"eval_loss": 0.19429509341716766,
"eval_runtime": 17.4899,
"eval_samples_per_second": 93.483,
"step": 5000
},
{
"epoch": 17.63,
"eval_accuracy": 78.89908256880734,
"eval_average_metrics": 78.89908256880734,
"eval_loss": 0.20053960382938385,
"eval_runtime": 17.2615,
"eval_samples_per_second": 94.719,
"step": 5200
},
{
"epoch": 18.31,
"eval_accuracy": 79.32721712538226,
"eval_average_metrics": 79.32721712538226,
"eval_loss": 0.1975349634885788,
"eval_runtime": 17.2849,
"eval_samples_per_second": 94.591,
"step": 5400
},
{
"epoch": 18.64,
"learning_rate": 2.0338983050847455e-05,
"loss": 0.1717,
"step": 5500
},
{
"epoch": 18.98,
"eval_accuracy": 78.77675840978593,
"eval_average_metrics": 78.77675840978593,
"eval_loss": 0.20098499953746796,
"eval_runtime": 17.3626,
"eval_samples_per_second": 94.168,
"step": 5600
},
{
"epoch": 19.66,
"eval_accuracy": 79.20489296636084,
"eval_average_metrics": 79.20489296636084,
"eval_loss": 0.19755637645721436,
"eval_runtime": 17.3825,
"eval_samples_per_second": 94.06,
"step": 5800
},
{
"epoch": 20.0,
"step": 5900,
"total_flos": 5.74047486286578e+16,
"train_loss": 0.20100380073159427,
"train_runtime": 3071.6757,
"train_samples_per_second": 61.38,
"train_steps_per_second": 1.921
}
],
"max_steps": 5900,
"num_train_epochs": 20,
"total_flos": 5.74047486286578e+16,
"trial_name": null,
"trial_params": null
}