|
{ |
|
"best_metric": 0.55, |
|
"best_model_checkpoint": "test\\checkpoint-1500", |
|
"epoch": 1000.0, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.264135479927063, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.6603, |
|
"eval_samples_per_second": 0.429, |
|
"eval_steps_per_second": 0.215, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.3265936374664307, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.6509, |
|
"eval_samples_per_second": 0.43, |
|
"eval_steps_per_second": 0.215, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.4988499879837036, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.6202, |
|
"eval_samples_per_second": 0.433, |
|
"eval_steps_per_second": 0.216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.6139302253723145, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.7737, |
|
"eval_samples_per_second": 0.419, |
|
"eval_steps_per_second": 0.209, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"grad_norm": 0.14490167796611786, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3046, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.6872639656066895, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.6336, |
|
"eval_samples_per_second": 0.432, |
|
"eval_steps_per_second": 0.216, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.7239123582839966, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.5649, |
|
"eval_samples_per_second": 0.438, |
|
"eval_steps_per_second": 0.219, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.7806979417800903, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.4489, |
|
"eval_samples_per_second": 0.45, |
|
"eval_steps_per_second": 0.225, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.8008891344070435, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.4619, |
|
"eval_samples_per_second": 0.448, |
|
"eval_steps_per_second": 0.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.8712844848632812, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.4231, |
|
"eval_samples_per_second": 0.452, |
|
"eval_steps_per_second": 0.226, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"grad_norm": 0.03944549337029457, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0081, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.9119956493377686, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.9883, |
|
"eval_samples_per_second": 0.401, |
|
"eval_steps_per_second": 0.2, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 550.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 1.947400450706482, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.9564, |
|
"eval_samples_per_second": 0.404, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 600.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.939751386642456, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.9517, |
|
"eval_samples_per_second": 0.404, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 650.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.9685324430465698, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.9434, |
|
"eval_samples_per_second": 0.405, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 700.0, |
|
"eval_accuracy": 0.6, |
|
"eval_f1": 0.5365853658536585, |
|
"eval_loss": 1.9726978540420532, |
|
"eval_precision": 0.5238095238095238, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 5.0891, |
|
"eval_samples_per_second": 0.393, |
|
"eval_steps_per_second": 0.196, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 750.0, |
|
"grad_norm": 0.0346556194126606, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0044, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 750.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 1.9932883977890015, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 5.166, |
|
"eval_samples_per_second": 0.387, |
|
"eval_steps_per_second": 0.194, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 800.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 2.012096643447876, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 5.144, |
|
"eval_samples_per_second": 0.389, |
|
"eval_steps_per_second": 0.194, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 850.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 2.0032660961151123, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 5.175, |
|
"eval_samples_per_second": 0.386, |
|
"eval_steps_per_second": 0.193, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 900.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 2.017909288406372, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.7769, |
|
"eval_samples_per_second": 0.419, |
|
"eval_steps_per_second": 0.209, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 950.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 2.016812324523926, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.6461, |
|
"eval_samples_per_second": 0.43, |
|
"eval_steps_per_second": 0.215, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"grad_norm": 0.030378883704543114, |
|
"learning_rate": 0.0, |
|
"loss": 0.0034, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_f1": 0.55, |
|
"eval_loss": 2.0183417797088623, |
|
"eval_precision": 0.55, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 4.7632, |
|
"eval_samples_per_second": 0.42, |
|
"eval_steps_per_second": 0.21, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1000, |
|
"save_steps": 500, |
|
"total_flos": 796315622400000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|