|
{ |
|
"best_metric": 0.02599843218922615, |
|
"best_model_checkpoint": "./results4/checkpoint-2400", |
|
"epoch": 4.4036697247706424, |
|
"eval_steps": 200, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 6.81126594543457, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 1.8154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"eval_accuracy": 0.12834224598930483, |
|
"eval_f1": 0.053512789620671505, |
|
"eval_loss": 1.8153961896896362, |
|
"eval_precision": 0.03873999770058728, |
|
"eval_recall": 0.12834224598930483, |
|
"eval_runtime": 8.8051, |
|
"eval_samples_per_second": 106.189, |
|
"eval_steps_per_second": 26.576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 9.484223365783691, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 1.7996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"eval_accuracy": 0.20962566844919786, |
|
"eval_f1": 0.1699240887979582, |
|
"eval_loss": 1.776768684387207, |
|
"eval_precision": 0.2712506960219069, |
|
"eval_recall": 0.20962566844919786, |
|
"eval_runtime": 8.617, |
|
"eval_samples_per_second": 108.506, |
|
"eval_steps_per_second": 27.155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 8.83604621887207, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"eval_accuracy": 0.35508021390374334, |
|
"eval_f1": 0.34776363638784535, |
|
"eval_loss": 1.7193909883499146, |
|
"eval_precision": 0.5237977574987844, |
|
"eval_recall": 0.35508021390374334, |
|
"eval_runtime": 8.8621, |
|
"eval_samples_per_second": 105.506, |
|
"eval_steps_per_second": 26.405, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 12.071432113647461, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.7051, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"eval_accuracy": 0.5401069518716578, |
|
"eval_f1": 0.5429174295461449, |
|
"eval_loss": 1.6069858074188232, |
|
"eval_precision": 0.6148066517946612, |
|
"eval_recall": 0.5401069518716578, |
|
"eval_runtime": 8.6363, |
|
"eval_samples_per_second": 108.263, |
|
"eval_steps_per_second": 27.095, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 8.39781379699707, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"eval_accuracy": 0.6181818181818182, |
|
"eval_f1": 0.599751774424811, |
|
"eval_loss": 1.3523486852645874, |
|
"eval_precision": 0.6869881919032953, |
|
"eval_recall": 0.6181818181818182, |
|
"eval_runtime": 8.7123, |
|
"eval_samples_per_second": 107.32, |
|
"eval_steps_per_second": 26.859, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 7.149374008178711, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.2159, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"eval_accuracy": 0.7561497326203208, |
|
"eval_f1": 0.7396853774240922, |
|
"eval_loss": 0.8623968958854675, |
|
"eval_precision": 0.7976461918397214, |
|
"eval_recall": 0.7561497326203208, |
|
"eval_runtime": 8.4876, |
|
"eval_samples_per_second": 110.161, |
|
"eval_steps_per_second": 27.57, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 12.584464073181152, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.7501, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"eval_accuracy": 0.8909090909090909, |
|
"eval_f1": 0.8893217599642673, |
|
"eval_loss": 0.43212181329727173, |
|
"eval_precision": 0.8944228004598542, |
|
"eval_recall": 0.8909090909090909, |
|
"eval_runtime": 8.5383, |
|
"eval_samples_per_second": 109.507, |
|
"eval_steps_per_second": 27.406, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 6.3582305908203125, |
|
"learning_rate": 4.99209709753674e-06, |
|
"loss": 0.4346, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"eval_accuracy": 0.9401069518716577, |
|
"eval_f1": 0.939586410891439, |
|
"eval_loss": 0.20562343299388885, |
|
"eval_precision": 0.942115798236324, |
|
"eval_recall": 0.9401069518716577, |
|
"eval_runtime": 8.5478, |
|
"eval_samples_per_second": 109.384, |
|
"eval_steps_per_second": 27.375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.302752293577982, |
|
"grad_norm": 1.9859445095062256, |
|
"learning_rate": 4.929173350101025e-06, |
|
"loss": 0.1985, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.302752293577982, |
|
"eval_accuracy": 0.9796791443850268, |
|
"eval_f1": 0.9795557753030716, |
|
"eval_loss": 0.07811883836984634, |
|
"eval_precision": 0.9796698126299838, |
|
"eval_recall": 0.9796791443850268, |
|
"eval_runtime": 8.5151, |
|
"eval_samples_per_second": 109.804, |
|
"eval_steps_per_second": 27.48, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.669724770642202, |
|
"grad_norm": 3.169071912765503, |
|
"learning_rate": 4.804914636820517e-06, |
|
"loss": 0.1066, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.669724770642202, |
|
"eval_accuracy": 0.9828877005347594, |
|
"eval_f1": 0.9828341396664676, |
|
"eval_loss": 0.05222497880458832, |
|
"eval_precision": 0.9829524348459922, |
|
"eval_recall": 0.9828877005347594, |
|
"eval_runtime": 8.3944, |
|
"eval_samples_per_second": 111.384, |
|
"eval_steps_per_second": 27.876, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.036697247706422, |
|
"grad_norm": 0.22888700664043427, |
|
"learning_rate": 4.622458405228411e-06, |
|
"loss": 0.096, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.036697247706422, |
|
"eval_accuracy": 0.986096256684492, |
|
"eval_f1": 0.9860397886588865, |
|
"eval_loss": 0.037959374487400055, |
|
"eval_precision": 0.9862011528885352, |
|
"eval_recall": 0.986096256684492, |
|
"eval_runtime": 8.8571, |
|
"eval_samples_per_second": 105.565, |
|
"eval_steps_per_second": 26.42, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.4036697247706424, |
|
"grad_norm": 0.051910221576690674, |
|
"learning_rate": 4.386411550395576e-06, |
|
"loss": 0.0686, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.4036697247706424, |
|
"eval_accuracy": 0.9925133689839573, |
|
"eval_f1": 0.9925048378298892, |
|
"eval_loss": 0.02599843218922615, |
|
"eval_precision": 0.9925302733753678, |
|
"eval_recall": 0.9925133689839573, |
|
"eval_runtime": 8.5262, |
|
"eval_samples_per_second": 109.661, |
|
"eval_steps_per_second": 27.445, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 5450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 396630381488796.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|