|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.975951903807615, |
|
"eval_steps": 500, |
|
"global_step": 124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 96.1263392680429, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 1.184, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 109.03932941785152, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 5.0239, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 57.030962020859526, |
|
"learning_rate": 0.00019983983492623833, |
|
"loss": 4.0648, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 22.869725355526718, |
|
"learning_rate": 0.0001980438647961327, |
|
"loss": 2.8094, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 6.985773379251052, |
|
"learning_rate": 0.00019428774454610843, |
|
"loss": 2.2094, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 5.841783328387945, |
|
"learning_rate": 0.00018864656872260985, |
|
"loss": 1.3499, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.122244488977956, |
|
"grad_norm": 4.328203171108087, |
|
"learning_rate": 0.0001812331190023886, |
|
"loss": 1.2527, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.282565130260521, |
|
"grad_norm": 1.8271946123422904, |
|
"learning_rate": 0.00017219560939545246, |
|
"loss": 0.9903, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.4428857715430863, |
|
"grad_norm": 1.3376797882778457, |
|
"learning_rate": 0.00016171472306414554, |
|
"loss": 0.8082, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.6032064128256514, |
|
"grad_norm": 0.8625176200253305, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.7515, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.7635270541082164, |
|
"grad_norm": 0.9297452179429324, |
|
"learning_rate": 0.00013728564777803088, |
|
"loss": 0.6321, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.9238476953907817, |
|
"grad_norm": 1.1609492766676697, |
|
"learning_rate": 0.0001238258591423165, |
|
"loss": 0.5802, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0841683366733466, |
|
"grad_norm": 0.6088051647121085, |
|
"learning_rate": 0.00010988973003642499, |
|
"loss": 0.4983, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.244488977955912, |
|
"grad_norm": 0.45623167145983035, |
|
"learning_rate": 9.57558796803852e-05, |
|
"loss": 0.4119, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.404809619238477, |
|
"grad_norm": 0.43383089288281307, |
|
"learning_rate": 8.170688025276134e-05, |
|
"loss": 0.3915, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.565130260521042, |
|
"grad_norm": 0.44731640463982364, |
|
"learning_rate": 6.802360754287547e-05, |
|
"loss": 0.3917, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.7254509018036073, |
|
"grad_norm": 0.4205909891049952, |
|
"learning_rate": 5.497962551823266e-05, |
|
"loss": 0.3886, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.8857715430861726, |
|
"grad_norm": 0.4168469096572953, |
|
"learning_rate": 4.283571707415214e-05, |
|
"loss": 0.3689, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0460921843687374, |
|
"grad_norm": 0.6798047385077177, |
|
"learning_rate": 3.1834670310046734e-05, |
|
"loss": 0.3212, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 3.2064128256513027, |
|
"grad_norm": 0.3963204162500924, |
|
"learning_rate": 2.2196424568156073e-05, |
|
"loss": 0.1699, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.3667334669338675, |
|
"grad_norm": 0.38329008287670924, |
|
"learning_rate": 1.4113673277957395e-05, |
|
"loss": 0.1585, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.527054108216433, |
|
"grad_norm": 0.34785356511529864, |
|
"learning_rate": 7.74801151675314e-06, |
|
"loss": 0.1499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.687374749498998, |
|
"grad_norm": 0.3409235398527789, |
|
"learning_rate": 3.226705306650113e-06, |
|
"loss": 0.1438, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.847695390781563, |
|
"grad_norm": 0.365385237116056, |
|
"learning_rate": 6.401472380297091e-07, |
|
"loss": 0.1525, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.975951903807615, |
|
"step": 124, |
|
"total_flos": 135489736671232.0, |
|
"train_loss": 1.0211431388893435, |
|
"train_runtime": 8462.237, |
|
"train_samples_per_second": 3.773, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 135489736671232.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|