|
{ |
|
"best_metric": 0.05933361500501633, |
|
"best_model_checkpoint": "runs/roberta-base-500000-samples-512-max-len-64-train-batch-size-8-test-batch-size-3-epochs-1e-05-lr-0.1-warmup-ratio/checkpoint-12000", |
|
"epoch": 2.1331058020477816, |
|
"eval_steps": 1500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"eval_accuracy": 0.4971, |
|
"eval_f1": 0.6639222657346396, |
|
"eval_loss": 0.6927798390388489, |
|
"eval_precision": 0.4973866025833584, |
|
"eval_recall": 0.9981112361356695, |
|
"eval_runtime": 372.4207, |
|
"eval_samples_per_second": 134.257, |
|
"eval_steps_per_second": 16.782, |
|
"step": 0 |
|
}, |
|
{ |
|
"epoch": 0.07110352673492605, |
|
"grad_norm": 11.443495750427246, |
|
"learning_rate": 2.369668246445498e-06, |
|
"loss": 0.4665, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1422070534698521, |
|
"grad_norm": 5.395324230194092, |
|
"learning_rate": 4.739336492890996e-06, |
|
"loss": 0.1226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21331058020477817, |
|
"grad_norm": 3.713897705078125, |
|
"learning_rate": 7.1090047393364935e-06, |
|
"loss": 0.1013, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21331058020477817, |
|
"eval_accuracy": 0.96946, |
|
"eval_f1": 0.9694067677759302, |
|
"eval_loss": 0.08184666186571121, |
|
"eval_precision": 0.9665987454552719, |
|
"eval_recall": 0.9722311525478219, |
|
"eval_runtime": 369.1947, |
|
"eval_samples_per_second": 135.43, |
|
"eval_steps_per_second": 16.929, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2844141069397042, |
|
"grad_norm": 22.85436248779297, |
|
"learning_rate": 9.478672985781992e-06, |
|
"loss": 0.0911, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35551763367463024, |
|
"grad_norm": 2.183819532394409, |
|
"learning_rate": 9.794585484040872e-06, |
|
"loss": 0.0904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.42662116040955633, |
|
"grad_norm": 3.5085792541503906, |
|
"learning_rate": 9.53123354050353e-06, |
|
"loss": 0.0853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42662116040955633, |
|
"eval_accuracy": 0.97452, |
|
"eval_f1": 0.97444332998997, |
|
"eval_loss": 0.06914982199668884, |
|
"eval_precision": 0.9728430665705359, |
|
"eval_recall": 0.9760488667416815, |
|
"eval_runtime": 369.1227, |
|
"eval_samples_per_second": 135.456, |
|
"eval_steps_per_second": 16.932, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.49772468714448237, |
|
"grad_norm": 6.5320234298706055, |
|
"learning_rate": 9.267881596966186e-06, |
|
"loss": 0.0799, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5688282138794084, |
|
"grad_norm": 1.3349543809890747, |
|
"learning_rate": 9.004529653428843e-06, |
|
"loss": 0.0751, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6399317406143344, |
|
"grad_norm": 1.4865925312042236, |
|
"learning_rate": 8.7411777098915e-06, |
|
"loss": 0.0742, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6399317406143344, |
|
"eval_accuracy": 0.97572, |
|
"eval_f1": 0.9756136756257282, |
|
"eval_loss": 0.06528624147176743, |
|
"eval_precision": 0.9753393846895333, |
|
"eval_recall": 0.9758881208808873, |
|
"eval_runtime": 369.1379, |
|
"eval_samples_per_second": 135.451, |
|
"eval_steps_per_second": 16.931, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7110352673492605, |
|
"grad_norm": 1.9182387590408325, |
|
"learning_rate": 8.477825766354156e-06, |
|
"loss": 0.0761, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7821387940841866, |
|
"grad_norm": 4.340898036956787, |
|
"learning_rate": 8.214473822816812e-06, |
|
"loss": 0.0742, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8532423208191127, |
|
"grad_norm": 4.503939151763916, |
|
"learning_rate": 7.95112187927947e-06, |
|
"loss": 0.0722, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8532423208191127, |
|
"eval_accuracy": 0.97488, |
|
"eval_f1": 0.9744964262508122, |
|
"eval_loss": 0.06521258503198624, |
|
"eval_precision": 0.9848957478246594, |
|
"eval_recall": 0.9643144189037133, |
|
"eval_runtime": 369.1106, |
|
"eval_samples_per_second": 135.461, |
|
"eval_steps_per_second": 16.933, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9243458475540387, |
|
"grad_norm": 5.767714977264404, |
|
"learning_rate": 7.687769935742126e-06, |
|
"loss": 0.0664, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9954493742889647, |
|
"grad_norm": 1.0236719846725464, |
|
"learning_rate": 7.424417992204783e-06, |
|
"loss": 0.0687, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0665529010238908, |
|
"grad_norm": 5.575131416320801, |
|
"learning_rate": 7.1610660486674395e-06, |
|
"loss": 0.0594, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0665529010238908, |
|
"eval_accuracy": 0.9734, |
|
"eval_f1": 0.9729013854930725, |
|
"eval_loss": 0.07639238238334656, |
|
"eval_precision": 0.9867333443544387, |
|
"eval_recall": 0.9594518566146921, |
|
"eval_runtime": 369.1258, |
|
"eval_samples_per_second": 135.455, |
|
"eval_steps_per_second": 16.932, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.1376564277588168, |
|
"grad_norm": 1.4480912685394287, |
|
"learning_rate": 6.8977141051300965e-06, |
|
"loss": 0.0613, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2087599544937428, |
|
"grad_norm": 14.01652717590332, |
|
"learning_rate": 6.6343621615927535e-06, |
|
"loss": 0.0626, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.2798634812286689, |
|
"grad_norm": 2.645029067993164, |
|
"learning_rate": 6.3710102180554104e-06, |
|
"loss": 0.0595, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.2798634812286689, |
|
"eval_accuracy": 0.97674, |
|
"eval_f1": 0.9764427069618586, |
|
"eval_loss": 0.0677267462015152, |
|
"eval_precision": 0.9843986113947315, |
|
"eval_recall": 0.968614370679955, |
|
"eval_runtime": 369.1238, |
|
"eval_samples_per_second": 135.456, |
|
"eval_steps_per_second": 16.932, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.350967007963595, |
|
"grad_norm": 4.5592122077941895, |
|
"learning_rate": 6.1076582745180666e-06, |
|
"loss": 0.0618, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.4220705346985212, |
|
"grad_norm": 5.417106628417969, |
|
"learning_rate": 5.8443063309807235e-06, |
|
"loss": 0.058, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.493174061433447, |
|
"grad_norm": 1.136661171913147, |
|
"learning_rate": 5.5809543874433805e-06, |
|
"loss": 0.0542, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.493174061433447, |
|
"eval_accuracy": 0.97848, |
|
"eval_f1": 0.9783422567529487, |
|
"eval_loss": 0.06500901281833649, |
|
"eval_precision": 0.9800387127994193, |
|
"eval_recall": 0.9766516637196592, |
|
"eval_runtime": 369.2146, |
|
"eval_samples_per_second": 135.423, |
|
"eval_steps_per_second": 16.928, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.5642775881683733, |
|
"grad_norm": 2.5331344604492188, |
|
"learning_rate": 5.317602443906037e-06, |
|
"loss": 0.0623, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.635381114903299, |
|
"grad_norm": 2.5099124908447266, |
|
"learning_rate": 5.054250500368693e-06, |
|
"loss": 0.0617, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.7064846416382253, |
|
"grad_norm": 0.18802767992019653, |
|
"learning_rate": 4.79089855683135e-06, |
|
"loss": 0.0571, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.7064846416382253, |
|
"eval_accuracy": 0.97944, |
|
"eval_f1": 0.9793847511330366, |
|
"eval_loss": 0.05933361500501633, |
|
"eval_precision": 0.9774637739172204, |
|
"eval_recall": 0.9813132936826877, |
|
"eval_runtime": 369.1545, |
|
"eval_samples_per_second": 135.445, |
|
"eval_steps_per_second": 16.931, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.7775881683731511, |
|
"grad_norm": 0.17306402325630188, |
|
"learning_rate": 4.527546613294007e-06, |
|
"loss": 0.0575, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.8486916951080774, |
|
"grad_norm": 2.0170910358428955, |
|
"learning_rate": 4.264194669756664e-06, |
|
"loss": 0.0573, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.9197952218430034, |
|
"grad_norm": 1.0754927396774292, |
|
"learning_rate": 4.00084272621932e-06, |
|
"loss": 0.0562, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.9197952218430034, |
|
"eval_accuracy": 0.9793, |
|
"eval_f1": 0.9792272955343703, |
|
"eval_loss": 0.05992409214377403, |
|
"eval_precision": 0.9781083356721864, |
|
"eval_recall": 0.9803488185179232, |
|
"eval_runtime": 369.2584, |
|
"eval_samples_per_second": 135.407, |
|
"eval_steps_per_second": 16.926, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.9908987485779295, |
|
"grad_norm": 0.5176452398300171, |
|
"learning_rate": 3.7374907826819767e-06, |
|
"loss": 0.0553, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.0620022753128557, |
|
"grad_norm": 3.9174857139587402, |
|
"learning_rate": 3.474138839144633e-06, |
|
"loss": 0.0506, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.1331058020477816, |
|
"grad_norm": 2.6643998622894287, |
|
"learning_rate": 3.21078689560729e-06, |
|
"loss": 0.0463, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.1331058020477816, |
|
"eval_accuracy": 0.97976, |
|
"eval_f1": 0.9796746334605343, |
|
"eval_loss": 0.05948900803923607, |
|
"eval_precision": 0.9792419497309885, |
|
"eval_recall": 0.9801076997267321, |
|
"eval_runtime": 369.2026, |
|
"eval_samples_per_second": 135.427, |
|
"eval_steps_per_second": 16.928, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21096, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5256135448428544e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|