|
{ |
|
"best_metric": 0.02586853690445423, |
|
"best_model_checkpoint": "autotrain-kno3k-fiasf/checkpoint-678", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 678, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.68883228302002, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.8263, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 58.380653381347656, |
|
"learning_rate": 4.852459016393443e-05, |
|
"loss": 0.2512, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.1297217607498169, |
|
"learning_rate": 4.491803278688525e-05, |
|
"loss": 0.1081, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.23067115247249603, |
|
"learning_rate": 4.122950819672131e-05, |
|
"loss": 0.1776, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0700414851307869, |
|
"learning_rate": 3.754098360655738e-05, |
|
"loss": 0.0775, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9844530816213215, |
|
"eval_f1_macro": 0.9790876623736272, |
|
"eval_f1_micro": 0.9844530816213215, |
|
"eval_f1_weighted": 0.9845629146174618, |
|
"eval_loss": 0.05487915500998497, |
|
"eval_precision_macro": 0.9728622841711356, |
|
"eval_precision_micro": 0.9844530816213215, |
|
"eval_precision_weighted": 0.9849390553498074, |
|
"eval_recall_macro": 0.9856721822982112, |
|
"eval_recall_micro": 0.9844530816213215, |
|
"eval_recall_weighted": 0.9844530816213215, |
|
"eval_runtime": 10.4593, |
|
"eval_samples_per_second": 172.191, |
|
"eval_steps_per_second": 10.804, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.03147374466061592, |
|
"learning_rate": 3.39344262295082e-05, |
|
"loss": 0.1134, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.15661084651947021, |
|
"learning_rate": 3.0245901639344264e-05, |
|
"loss": 0.069, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.04975114390254021, |
|
"learning_rate": 2.6557377049180327e-05, |
|
"loss": 0.0821, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.02913733199238777, |
|
"learning_rate": 2.2868852459016393e-05, |
|
"loss": 0.0636, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.03793076425790787, |
|
"learning_rate": 1.918032786885246e-05, |
|
"loss": 0.135, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9922265408106608, |
|
"eval_f1_macro": 0.9885732001605064, |
|
"eval_f1_micro": 0.9922265408106608, |
|
"eval_f1_weighted": 0.9922495320181636, |
|
"eval_loss": 0.0289547611027956, |
|
"eval_precision_macro": 0.9857016924464328, |
|
"eval_precision_micro": 0.9922265408106608, |
|
"eval_precision_weighted": 0.9923116897185307, |
|
"eval_recall_macro": 0.991509527641691, |
|
"eval_recall_micro": 0.9922265408106608, |
|
"eval_recall_weighted": 0.9922265408106608, |
|
"eval_runtime": 10.5141, |
|
"eval_samples_per_second": 171.294, |
|
"eval_steps_per_second": 10.747, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 23.56169891357422, |
|
"learning_rate": 1.5491803278688525e-05, |
|
"loss": 0.0508, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.0823952853679657, |
|
"learning_rate": 1.1803278688524591e-05, |
|
"loss": 0.0443, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.025019163265824318, |
|
"learning_rate": 8.114754098360657e-06, |
|
"loss": 0.1144, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.830748975276947, |
|
"learning_rate": 4.426229508196722e-06, |
|
"loss": 0.0624, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 1.670173168182373, |
|
"learning_rate": 7.377049180327869e-07, |
|
"loss": 0.131, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9938922820655192, |
|
"eval_f1_macro": 0.990657441189161, |
|
"eval_f1_micro": 0.9938922820655192, |
|
"eval_f1_weighted": 0.9939035669908675, |
|
"eval_loss": 0.02586853690445423, |
|
"eval_precision_macro": 0.9889770605232528, |
|
"eval_precision_micro": 0.9938922820655192, |
|
"eval_precision_weighted": 0.9939283176272281, |
|
"eval_recall_macro": 0.9923605914714783, |
|
"eval_recall_micro": 0.9938922820655192, |
|
"eval_recall_weighted": 0.9938922820655192, |
|
"eval_runtime": 10.4887, |
|
"eval_samples_per_second": 171.709, |
|
"eval_steps_per_second": 10.774, |
|
"step": 678 |
|
} |
|
], |
|
"logging_steps": 45, |
|
"max_steps": 678, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.3599465039988531e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|