|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.43605359317905, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1218026796589525, |
|
"grad_norm": 0.567721962928772, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6252, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.243605359317905, |
|
"grad_norm": 0.7323570251464844, |
|
"learning_rate": 1e-05, |
|
"loss": 1.582, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3654080389768575, |
|
"grad_norm": 0.7963395118713379, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.4706, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.48721071863581, |
|
"grad_norm": 0.6532277464866638, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2949, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6090133982947625, |
|
"grad_norm": 0.7197393774986267, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1536, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.730816077953715, |
|
"grad_norm": 0.7984452247619629, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0621, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8526187576126675, |
|
"grad_norm": 1.125217318534851, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.0322, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.97442143727162, |
|
"grad_norm": 0.9447218179702759, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0086, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0962241169305724, |
|
"grad_norm": 0.9581252932548523, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.9828, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.218026796589525, |
|
"grad_norm": 1.0622270107269287, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9694, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3398294762484775, |
|
"grad_norm": 1.1124155521392822, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.9658, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.46163215590743, |
|
"grad_norm": 1.2832890748977661, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.9635, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5834348355663823, |
|
"grad_norm": 1.0713014602661133, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.9382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.705237515225335, |
|
"grad_norm": 1.0779744386672974, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.9457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.8270401948842876, |
|
"grad_norm": 1.0238535404205322, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.9462, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.94884287454324, |
|
"grad_norm": 1.1264749765396118, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 0.9315, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0706455542021924, |
|
"grad_norm": 1.1411375999450684, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 0.9232, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.192448233861145, |
|
"grad_norm": 1.1316472291946411, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 0.9232, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.3142509135200973, |
|
"grad_norm": 1.1166857481002808, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 0.9241, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.43605359317905, |
|
"grad_norm": 1.2450848817825317, |
|
"learning_rate": 0.0, |
|
"loss": 0.9213, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.241676072477491e+16, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|