|
{ |
|
"best_metric": 6.333486080169678, |
|
"best_model_checkpoint": "./output/checkpoint-150", |
|
"epoch": 0.9036144578313253, |
|
"eval_steps": 150, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.060240963855421686, |
|
"grad_norm": 52.598548889160156, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.4587, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 1.3145289421081543, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1245, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18072289156626506, |
|
"grad_norm": 47.875640869140625, |
|
"learning_rate": 3.75e-05, |
|
"loss": 0.0925, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 25.39703369140625, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1134, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"grad_norm": 22.82965660095215, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1348, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 0.446526437997818, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.5273, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42168674698795183, |
|
"grad_norm": 50.92262649536133, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.2283, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 65.35910034179688, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3627, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5421686746987951, |
|
"grad_norm": 8.19798755645752, |
|
"learning_rate": 0.00011250000000000001, |
|
"loss": 0.2596, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 141.95716857910156, |
|
"learning_rate": 0.000125, |
|
"loss": 0.6644, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6626506024096386, |
|
"grad_norm": 736.8707885742188, |
|
"learning_rate": 0.00012499871543489787, |
|
"loss": 0.9693, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 249.3212432861328, |
|
"learning_rate": 0.00012499486179239495, |
|
"loss": 1.4457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7831325301204819, |
|
"grad_norm": 30.746965408325195, |
|
"learning_rate": 0.00012498843923089938, |
|
"loss": 1.4675, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 41.87055587768555, |
|
"learning_rate": 0.0001249794480144175, |
|
"loss": 3.2459, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 64.25176239013672, |
|
"learning_rate": 0.000124967888512543, |
|
"loss": 5.6311, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"eval_loss": 6.333486080169678, |
|
"eval_runtime": 3.9268, |
|
"eval_samples_per_second": 9.422, |
|
"eval_steps_per_second": 9.422, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 31, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 481056683261952.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|