|
{ |
|
"best_metric": 0.052040886133909225, |
|
"best_model_checkpoint": "ckpt/checkpoint-200", |
|
"epoch": 2.824858757062147, |
|
"eval_steps": 200, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"grad_norm": 0.08298324048519135, |
|
"learning_rate": 0.00029152542372881354, |
|
"loss": 0.1901, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"grad_norm": 0.11693409830331802, |
|
"learning_rate": 0.0002830508474576271, |
|
"loss": 0.0624, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"eval_accuracy": 0.9860834990059643, |
|
"eval_f1": 0.9891354958870091, |
|
"eval_loss": 0.052040886133909225, |
|
"eval_precision": 0.9869908626297041, |
|
"eval_recall": 0.9912894695909161, |
|
"eval_runtime": 12.1769, |
|
"eval_samples_per_second": 826.154, |
|
"eval_steps_per_second": 6.488, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.10722041875123978, |
|
"learning_rate": 0.00027457627118644066, |
|
"loss": 0.0562, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"grad_norm": 0.08397164940834045, |
|
"learning_rate": 0.0002661016949152542, |
|
"loss": 0.0524, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"eval_accuracy": 0.9874751491053678, |
|
"eval_f1": 0.9902219462983083, |
|
"eval_loss": 0.04475295916199684, |
|
"eval_precision": 0.9880749574105622, |
|
"eval_recall": 0.9923782858920517, |
|
"eval_runtime": 11.6899, |
|
"eval_samples_per_second": 860.571, |
|
"eval_steps_per_second": 6.758, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4124293785310735, |
|
"grad_norm": 0.16527259349822998, |
|
"learning_rate": 0.0002576271186440678, |
|
"loss": 0.0492, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 0.07748957723379135, |
|
"learning_rate": 0.00024915254237288135, |
|
"loss": 0.0518, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"eval_accuracy": 0.9872763419483102, |
|
"eval_f1": 0.9900482040118178, |
|
"eval_loss": 0.04457252100110054, |
|
"eval_precision": 0.9897404010570496, |
|
"eval_recall": 0.9903561984756571, |
|
"eval_runtime": 12.8001, |
|
"eval_samples_per_second": 785.934, |
|
"eval_steps_per_second": 6.172, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9774011299435028, |
|
"grad_norm": 0.07877205312252045, |
|
"learning_rate": 0.0002406779661016949, |
|
"loss": 0.0489, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2598870056497176, |
|
"grad_norm": 0.09426571428775787, |
|
"learning_rate": 0.00023220338983050845, |
|
"loss": 0.0477, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2598870056497176, |
|
"eval_accuracy": 0.9879721669980119, |
|
"eval_f1": 0.9906339499961297, |
|
"eval_loss": 0.04242447018623352, |
|
"eval_precision": 0.9859784283513097, |
|
"eval_recall": 0.9953336444237051, |
|
"eval_runtime": 13.2576, |
|
"eval_samples_per_second": 758.811, |
|
"eval_steps_per_second": 5.959, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 0.09674050658941269, |
|
"learning_rate": 0.000223728813559322, |
|
"loss": 0.0463, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.824858757062147, |
|
"grad_norm": 0.08737868070602417, |
|
"learning_rate": 0.0002152542372881356, |
|
"loss": 0.0465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.824858757062147, |
|
"eval_accuracy": 0.9887673956262425, |
|
"eval_f1": 0.9912369135323769, |
|
"eval_loss": 0.04115341976284981, |
|
"eval_precision": 0.9884008660686668, |
|
"eval_recall": 0.9940892829366931, |
|
"eval_runtime": 11.8282, |
|
"eval_samples_per_second": 850.507, |
|
"eval_steps_per_second": 6.679, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8484875417960448.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|