bert_base_lda_50_v1_mrpc / trainer_state.json
gokulsrinivasagan's picture
End of training
f0c3c62 verified
raw
history blame
8.38 kB
{
"best_metric": 0.6226160526275635,
"best_model_checkpoint": "bert_base_lda_50_v1_mrpc/checkpoint-150",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 22.977022171020508,
"learning_rate": 0.00098,
"loss": 1.9018,
"step": 15
},
{
"epoch": 1.0,
"eval_accuracy": 0.3161764705882353,
"eval_combined_score": 0.15808823529411764,
"eval_f1": 0.0,
"eval_loss": 1.746949315071106,
"eval_runtime": 0.2773,
"eval_samples_per_second": 1471.484,
"eval_steps_per_second": 7.213,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": 5.6883745193481445,
"learning_rate": 0.00096,
"loss": 0.7758,
"step": 30
},
{
"epoch": 2.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.713627815246582,
"eval_runtime": 0.2735,
"eval_samples_per_second": 1491.747,
"eval_steps_per_second": 7.312,
"step": 30
},
{
"epoch": 3.0,
"grad_norm": 2.137789249420166,
"learning_rate": 0.00094,
"loss": 0.6464,
"step": 45
},
{
"epoch": 3.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6401654481887817,
"eval_runtime": 0.3133,
"eval_samples_per_second": 1302.407,
"eval_steps_per_second": 6.384,
"step": 45
},
{
"epoch": 4.0,
"grad_norm": 0.9102729558944702,
"learning_rate": 0.00092,
"loss": 0.6412,
"step": 60
},
{
"epoch": 4.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6260914206504822,
"eval_runtime": 0.2816,
"eval_samples_per_second": 1449.033,
"eval_steps_per_second": 7.103,
"step": 60
},
{
"epoch": 5.0,
"grad_norm": 0.8048895001411438,
"learning_rate": 0.0009000000000000001,
"loss": 0.6318,
"step": 75
},
{
"epoch": 5.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6250861883163452,
"eval_runtime": 0.2775,
"eval_samples_per_second": 1470.21,
"eval_steps_per_second": 7.207,
"step": 75
},
{
"epoch": 6.0,
"grad_norm": 0.975186288356781,
"learning_rate": 0.00088,
"loss": 0.6352,
"step": 90
},
{
"epoch": 6.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6246122717857361,
"eval_runtime": 0.2709,
"eval_samples_per_second": 1506.067,
"eval_steps_per_second": 7.383,
"step": 90
},
{
"epoch": 7.0,
"grad_norm": 0.4961640536785126,
"learning_rate": 0.00086,
"loss": 0.635,
"step": 105
},
{
"epoch": 7.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6256270408630371,
"eval_runtime": 0.2693,
"eval_samples_per_second": 1515.101,
"eval_steps_per_second": 7.427,
"step": 105
},
{
"epoch": 8.0,
"grad_norm": 0.3314763903617859,
"learning_rate": 0.00084,
"loss": 0.634,
"step": 120
},
{
"epoch": 8.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6243536472320557,
"eval_runtime": 0.2744,
"eval_samples_per_second": 1487.06,
"eval_steps_per_second": 7.29,
"step": 120
},
{
"epoch": 9.0,
"grad_norm": 0.5980406403541565,
"learning_rate": 0.00082,
"loss": 0.6297,
"step": 135
},
{
"epoch": 9.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.634880542755127,
"eval_runtime": 0.2751,
"eval_samples_per_second": 1482.984,
"eval_steps_per_second": 7.27,
"step": 135
},
{
"epoch": 10.0,
"grad_norm": 0.4592650532722473,
"learning_rate": 0.0008,
"loss": 0.6357,
"step": 150
},
{
"epoch": 10.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6226160526275635,
"eval_runtime": 0.2684,
"eval_samples_per_second": 1520.307,
"eval_steps_per_second": 7.452,
"step": 150
},
{
"epoch": 11.0,
"grad_norm": 0.7528880834579468,
"learning_rate": 0.0007800000000000001,
"loss": 0.6341,
"step": 165
},
{
"epoch": 11.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6255601048469543,
"eval_runtime": 0.2722,
"eval_samples_per_second": 1499.113,
"eval_steps_per_second": 7.349,
"step": 165
},
{
"epoch": 12.0,
"grad_norm": 0.380775511264801,
"learning_rate": 0.00076,
"loss": 0.6333,
"step": 180
},
{
"epoch": 12.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6249569058418274,
"eval_runtime": 0.2722,
"eval_samples_per_second": 1498.681,
"eval_steps_per_second": 7.346,
"step": 180
},
{
"epoch": 13.0,
"grad_norm": 0.6090389490127563,
"learning_rate": 0.00074,
"loss": 0.6309,
"step": 195
},
{
"epoch": 13.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6240330338478088,
"eval_runtime": 0.2747,
"eval_samples_per_second": 1485.187,
"eval_steps_per_second": 7.28,
"step": 195
},
{
"epoch": 14.0,
"grad_norm": 1.031883716583252,
"learning_rate": 0.0007199999999999999,
"loss": 0.6343,
"step": 210
},
{
"epoch": 14.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6258903741836548,
"eval_runtime": 0.2733,
"eval_samples_per_second": 1492.679,
"eval_steps_per_second": 7.317,
"step": 210
},
{
"epoch": 15.0,
"grad_norm": 0.6238628029823303,
"learning_rate": 0.0007,
"loss": 0.6328,
"step": 225
},
{
"epoch": 15.0,
"eval_accuracy": 0.6838235294117647,
"eval_combined_score": 0.7480253018237863,
"eval_f1": 0.8122270742358079,
"eval_loss": 0.6254164576530457,
"eval_runtime": 0.2698,
"eval_samples_per_second": 1512.061,
"eval_steps_per_second": 7.412,
"step": 225
},
{
"epoch": 15.0,
"step": 225,
"total_flos": 7238185132953600.0,
"train_loss": 0.7287869644165039,
"train_runtime": 125.4342,
"train_samples_per_second": 1462.121,
"train_steps_per_second": 5.979
}
],
"logging_steps": 1,
"max_steps": 750,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7238185132953600.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}