|
{ |
|
"best_metric": 0.8015293708724366, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3404", |
|
"epoch": 9.988249118683902, |
|
"eval_steps": 500, |
|
"global_step": 4250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9988249118683902, |
|
"eval_accuracy": 0.9733340656624604, |
|
"eval_f1": 0.7536426810132258, |
|
"eval_loss": 0.07383445650339127, |
|
"eval_precision": 0.7233218588640276, |
|
"eval_recall": 0.786616752456715, |
|
"eval_runtime": 14.6851, |
|
"eval_samples_per_second": 463.736, |
|
"eval_steps_per_second": 58.018, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1750881316098707, |
|
"grad_norm": 1.1428656578063965, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.0996, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9743364411550661, |
|
"eval_f1": 0.7698492462311558, |
|
"eval_loss": 0.07865303754806519, |
|
"eval_precision": 0.7363811151463363, |
|
"eval_recall": 0.8065044454843239, |
|
"eval_runtime": 14.4143, |
|
"eval_samples_per_second": 472.448, |
|
"eval_steps_per_second": 59.108, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.3501762632197414, |
|
"grad_norm": 1.476723074913025, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.0458, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.99882491186839, |
|
"eval_accuracy": 0.9759155258351985, |
|
"eval_f1": 0.7928563303378454, |
|
"eval_loss": 0.07876282930374146, |
|
"eval_precision": 0.7715297764002657, |
|
"eval_recall": 0.8153954141319607, |
|
"eval_runtime": 14.6794, |
|
"eval_samples_per_second": 463.917, |
|
"eval_steps_per_second": 58.041, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 3.525264394829612, |
|
"grad_norm": 0.4094911515712738, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.0279, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9757026926826589, |
|
"eval_f1": 0.7929102344196683, |
|
"eval_loss": 0.09221930056810379, |
|
"eval_precision": 0.775441735629613, |
|
"eval_recall": 0.8111839026672906, |
|
"eval_runtime": 14.825, |
|
"eval_samples_per_second": 459.361, |
|
"eval_steps_per_second": 57.471, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 4.700352526439483, |
|
"grad_norm": 0.49960169196128845, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0169, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.9988249118683905, |
|
"eval_accuracy": 0.974439424938553, |
|
"eval_f1": 0.7863421230561191, |
|
"eval_loss": 0.09940312057733536, |
|
"eval_precision": 0.7584782608695653, |
|
"eval_recall": 0.816331305568554, |
|
"eval_runtime": 14.3671, |
|
"eval_samples_per_second": 473.999, |
|
"eval_steps_per_second": 59.302, |
|
"step": 2127 |
|
}, |
|
{ |
|
"epoch": 5.875440658049354, |
|
"grad_norm": 0.6507154703140259, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0114, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9764579070948961, |
|
"eval_f1": 0.7909059593523942, |
|
"eval_loss": 0.10803968459367752, |
|
"eval_precision": 0.7765501691093574, |
|
"eval_recall": 0.8058025269068788, |
|
"eval_runtime": 14.5335, |
|
"eval_samples_per_second": 468.572, |
|
"eval_steps_per_second": 58.623, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 6.9988249118683905, |
|
"eval_accuracy": 0.975997912861988, |
|
"eval_f1": 0.7943099690260411, |
|
"eval_loss": 0.11656877398490906, |
|
"eval_precision": 0.7792032410533424, |
|
"eval_recall": 0.810014038371549, |
|
"eval_runtime": 14.6842, |
|
"eval_samples_per_second": 463.764, |
|
"eval_steps_per_second": 58.022, |
|
"step": 2978 |
|
}, |
|
{ |
|
"epoch": 7.050528789659224, |
|
"grad_norm": 0.15811629593372345, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.0079, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9767668584453568, |
|
"eval_f1": 0.8015293708724366, |
|
"eval_loss": 0.1294233798980713, |
|
"eval_precision": 0.7938948817994033, |
|
"eval_recall": 0.8093121197941039, |
|
"eval_runtime": 14.774, |
|
"eval_samples_per_second": 460.946, |
|
"eval_steps_per_second": 57.669, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 8.225616921269095, |
|
"grad_norm": 0.11404519528150558, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.0053, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.99882491186839, |
|
"eval_accuracy": 0.9766158155629093, |
|
"eval_f1": 0.7988929889298894, |
|
"eval_loss": 0.13398884236812592, |
|
"eval_precision": 0.787630741246021, |
|
"eval_recall": 0.8104819840898456, |
|
"eval_runtime": 14.4437, |
|
"eval_samples_per_second": 471.487, |
|
"eval_steps_per_second": 58.988, |
|
"step": 3829 |
|
}, |
|
{ |
|
"epoch": 9.400705052878966, |
|
"grad_norm": 0.5636719465255737, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.0038, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"eval_accuracy": 0.9766776058330014, |
|
"eval_f1": 0.7988459319099828, |
|
"eval_loss": 0.13674204051494598, |
|
"eval_precision": 0.7882031427920747, |
|
"eval_recall": 0.8097800655124006, |
|
"eval_runtime": 14.8828, |
|
"eval_samples_per_second": 457.575, |
|
"eval_steps_per_second": 57.247, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"step": 4250, |
|
"total_flos": 1.2649124434987926e+16, |
|
"train_loss": 0.02590363489880281, |
|
"train_runtime": 1203.9865, |
|
"train_samples_per_second": 226.157, |
|
"train_steps_per_second": 3.53 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2649124434987926e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|