|
{ |
|
"best_metric": 0.8304761904761905, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_final//finetune/sst2/checkpoint-2000", |
|
"epoch": 9.47867298578199, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.8110235929489136, |
|
"eval_f1": 0.8102766798418972, |
|
"eval_loss": 0.4257064461708069, |
|
"eval_mcc": 0.6220413857242502, |
|
"eval_runtime": 0.9922, |
|
"eval_samples_per_second": 511.972, |
|
"eval_steps_per_second": 64.5, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.407582938388626e-05, |
|
"loss": 0.3763, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.8070865869522095, |
|
"eval_f1": 0.8243727598566307, |
|
"eval_loss": 0.5127468705177307, |
|
"eval_mcc": 0.6277527819251606, |
|
"eval_runtime": 0.9929, |
|
"eval_samples_per_second": 511.623, |
|
"eval_steps_per_second": 64.456, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.815165876777251e-05, |
|
"loss": 0.1968, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.8129921555519104, |
|
"eval_f1": 0.8080808080808081, |
|
"eval_loss": 0.5491798520088196, |
|
"eval_mcc": 0.6265026705560554, |
|
"eval_runtime": 0.9941, |
|
"eval_samples_per_second": 511.013, |
|
"eval_steps_per_second": 64.38, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.222748815165877e-05, |
|
"loss": 0.1208, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.8188976645469666, |
|
"eval_f1": 0.8270676691729323, |
|
"eval_loss": 0.6646919846534729, |
|
"eval_mcc": 0.6413015929266274, |
|
"eval_runtime": 0.9945, |
|
"eval_samples_per_second": 510.824, |
|
"eval_steps_per_second": 64.356, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.6303317535545023e-05, |
|
"loss": 0.0765, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.8248031735420227, |
|
"eval_f1": 0.8304761904761905, |
|
"eval_loss": 0.687165379524231, |
|
"eval_mcc": 0.6515283866658971, |
|
"eval_runtime": 0.9913, |
|
"eval_samples_per_second": 512.447, |
|
"eval_steps_per_second": 64.56, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_accuracy": 0.8031495809555054, |
|
"eval_f1": 0.7975708502024291, |
|
"eval_loss": 0.790716826915741, |
|
"eval_mcc": 0.6068978167568908, |
|
"eval_runtime": 0.9943, |
|
"eval_samples_per_second": 510.929, |
|
"eval_steps_per_second": 64.369, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.037914691943128e-05, |
|
"loss": 0.0477, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_accuracy": 0.8248031735420227, |
|
"eval_f1": 0.8271844660194174, |
|
"eval_loss": 0.8302326202392578, |
|
"eval_mcc": 0.6500578442769696, |
|
"eval_runtime": 0.991, |
|
"eval_samples_per_second": 512.589, |
|
"eval_steps_per_second": 64.578, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4454976303317535e-05, |
|
"loss": 0.0294, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_accuracy": 0.8129921555519104, |
|
"eval_f1": 0.8118811881188119, |
|
"eval_loss": 0.9771924614906311, |
|
"eval_mcc": 0.6259775087526785, |
|
"eval_runtime": 0.9936, |
|
"eval_samples_per_second": 511.275, |
|
"eval_steps_per_second": 64.413, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.530805687203793e-06, |
|
"loss": 0.0172, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_accuracy": 0.8188976645469666, |
|
"eval_f1": 0.8210116731517509, |
|
"eval_loss": 1.0711852312088013, |
|
"eval_mcc": 0.6381511040271763, |
|
"eval_runtime": 0.9924, |
|
"eval_samples_per_second": 511.908, |
|
"eval_steps_per_second": 64.492, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.6066350710900472e-06, |
|
"loss": 0.0113, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_accuracy": 0.8149606585502625, |
|
"eval_f1": 0.815686274509804, |
|
"eval_loss": 1.136107087135315, |
|
"eval_mcc": 0.6300165875296968, |
|
"eval_runtime": 0.9923, |
|
"eval_samples_per_second": 511.934, |
|
"eval_steps_per_second": 64.496, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"step": 4000, |
|
"total_flos": 3.997142324210074e+16, |
|
"train_loss": 0.10950899147987365, |
|
"train_runtime": 2086.3883, |
|
"train_samples_per_second": 242.179, |
|
"train_steps_per_second": 2.023 |
|
} |
|
], |
|
"max_steps": 4220, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.997142324210074e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|