|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.373134328358209, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 2.403158187866211, |
|
"learning_rate": 4.9998282347929784e-05, |
|
"loss": 3.3875, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 2.301710367202759, |
|
"learning_rate": 4.99931296277454e-05, |
|
"loss": 2.9015, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.055970149253731345, |
|
"grad_norm": 1.271048665046692, |
|
"learning_rate": 4.998454254749331e-05, |
|
"loss": 2.6229, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 1.069893717765808, |
|
"learning_rate": 4.997252228714279e-05, |
|
"loss": 2.3704, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09328358208955224, |
|
"grad_norm": 0.9044906497001648, |
|
"learning_rate": 4.9957070498423854e-05, |
|
"loss": 2.3782, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 0.9635376334190369, |
|
"learning_rate": 4.993818930460026e-05, |
|
"loss": 2.3576, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13059701492537312, |
|
"grad_norm": 0.8513979315757751, |
|
"learning_rate": 4.9915881300177725e-05, |
|
"loss": 2.4603, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 0.845267117023468, |
|
"learning_rate": 4.9890149550547454e-05, |
|
"loss": 2.2033, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16791044776119404, |
|
"grad_norm": 0.6632418036460876, |
|
"learning_rate": 4.98609975915649e-05, |
|
"loss": 2.1851, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 0.6857479810714722, |
|
"learning_rate": 4.982842942906386e-05, |
|
"loss": 2.3592, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.20522388059701493, |
|
"grad_norm": 0.7204287648200989, |
|
"learning_rate": 4.979244953830608e-05, |
|
"loss": 2.1323, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 0.6864420175552368, |
|
"learning_rate": 4.9753062863366276e-05, |
|
"loss": 2.2138, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24253731343283583, |
|
"grad_norm": 0.7536088228225708, |
|
"learning_rate": 4.971027481645274e-05, |
|
"loss": 2.2584, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 0.9708526134490967, |
|
"learning_rate": 4.966409127716367e-05, |
|
"loss": 2.2669, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2798507462686567, |
|
"grad_norm": 0.7516190409660339, |
|
"learning_rate": 4.96145185916792e-05, |
|
"loss": 2.2133, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 0.7864778637886047, |
|
"learning_rate": 4.95615635718894e-05, |
|
"loss": 2.1683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31716417910447764, |
|
"grad_norm": 0.7846741080284119, |
|
"learning_rate": 4.950523349445824e-05, |
|
"loss": 2.1274, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 0.816838800907135, |
|
"learning_rate": 4.944553609982363e-05, |
|
"loss": 2.2033, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35447761194029853, |
|
"grad_norm": 0.7661916017532349, |
|
"learning_rate": 4.938247959113386e-05, |
|
"loss": 2.1492, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 0.8964986205101013, |
|
"learning_rate": 4.931607263312032e-05, |
|
"loss": 2.0862, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0457337537390182e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|