|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9890909090909092, |
|
"eval_steps": 5, |
|
"global_step": 136, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07272727272727272, |
|
"eval_loss": 0.8767483830451965, |
|
"eval_runtime": 30.8146, |
|
"eval_samples_per_second": 15.285, |
|
"eval_steps_per_second": 3.829, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 3.115849256515503, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 3.2126, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"eval_loss": 0.779396116733551, |
|
"eval_runtime": 27.2217, |
|
"eval_samples_per_second": 17.302, |
|
"eval_steps_per_second": 4.335, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21818181818181817, |
|
"eval_loss": 0.7435232996940613, |
|
"eval_runtime": 27.2836, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 4.325, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 2.84679913520813, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 2.9019, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"eval_loss": 0.6930269598960876, |
|
"eval_runtime": 27.2747, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.326, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"eval_loss": 0.6732496619224548, |
|
"eval_runtime": 27.2843, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 4.325, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 2.5042567253112793, |
|
"learning_rate": 9.998342337571565e-05, |
|
"loss": 2.9314, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"eval_loss": 0.6518906950950623, |
|
"eval_runtime": 27.2739, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 4.326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.509090909090909, |
|
"eval_loss": 0.6448661088943481, |
|
"eval_runtime": 27.279, |
|
"eval_samples_per_second": 17.266, |
|
"eval_steps_per_second": 4.326, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 2.148580312728882, |
|
"learning_rate": 9.940439480455386e-05, |
|
"loss": 2.3866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"eval_loss": 0.6359825730323792, |
|
"eval_runtime": 27.2872, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 4.324, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6545454545454545, |
|
"eval_loss": 0.6275559663772583, |
|
"eval_runtime": 27.2773, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 4.326, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 3.116907835006714, |
|
"learning_rate": 9.800749368358009e-05, |
|
"loss": 2.7002, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"eval_loss": 0.6227777004241943, |
|
"eval_runtime": 27.2702, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 4.327, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.6195926070213318, |
|
"eval_runtime": 27.263, |
|
"eval_samples_per_second": 17.276, |
|
"eval_steps_per_second": 4.328, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 4.004441738128662, |
|
"learning_rate": 9.581584522435024e-05, |
|
"loss": 2.5389, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"eval_loss": 0.6146515011787415, |
|
"eval_runtime": 27.269, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 4.327, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"eval_loss": 0.610045850276947, |
|
"eval_runtime": 27.3064, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 4.321, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.029090909090909, |
|
"grad_norm": 2.843431234359741, |
|
"learning_rate": 9.286573140381662e-05, |
|
"loss": 2.4095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.029090909090909, |
|
"eval_loss": 0.6076003909111023, |
|
"eval_runtime": 27.2492, |
|
"eval_samples_per_second": 17.285, |
|
"eval_steps_per_second": 4.33, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1018181818181818, |
|
"eval_loss": 0.6160494089126587, |
|
"eval_runtime": 27.2598, |
|
"eval_samples_per_second": 17.278, |
|
"eval_steps_per_second": 4.329, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1745454545454546, |
|
"grad_norm": 4.093331336975098, |
|
"learning_rate": 8.920599032883554e-05, |
|
"loss": 2.0692, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1745454545454546, |
|
"eval_loss": 0.6218172311782837, |
|
"eval_runtime": 27.2772, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 4.326, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2472727272727273, |
|
"eval_loss": 0.6184111833572388, |
|
"eval_runtime": 27.3075, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 4.321, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.453794240951538, |
|
"learning_rate": 8.489720773831717e-05, |
|
"loss": 1.8616, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.6163813471794128, |
|
"eval_runtime": 27.2607, |
|
"eval_samples_per_second": 17.278, |
|
"eval_steps_per_second": 4.329, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3927272727272726, |
|
"eval_loss": 0.6139249801635742, |
|
"eval_runtime": 27.2587, |
|
"eval_samples_per_second": 17.279, |
|
"eval_steps_per_second": 4.329, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.4654545454545453, |
|
"grad_norm": 3.4409964084625244, |
|
"learning_rate": 8.001071402741842e-05, |
|
"loss": 2.1618, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4654545454545453, |
|
"eval_loss": 0.6118062734603882, |
|
"eval_runtime": 27.245, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 4.331, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.538181818181818, |
|
"eval_loss": 0.610755205154419, |
|
"eval_runtime": 27.2505, |
|
"eval_samples_per_second": 17.284, |
|
"eval_steps_per_second": 4.33, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.6109090909090908, |
|
"grad_norm": 3.2959372997283936, |
|
"learning_rate": 7.462740339769324e-05, |
|
"loss": 2.0259, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6109090909090908, |
|
"eval_loss": 0.6130332350730896, |
|
"eval_runtime": 27.2607, |
|
"eval_samples_per_second": 17.278, |
|
"eval_steps_per_second": 4.329, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6836363636363636, |
|
"eval_loss": 0.6104254722595215, |
|
"eval_runtime": 27.2481, |
|
"eval_samples_per_second": 17.286, |
|
"eval_steps_per_second": 4.331, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.7563636363636363, |
|
"grad_norm": 2.7095448970794678, |
|
"learning_rate": 6.883639468175927e-05, |
|
"loss": 1.8146, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7563636363636363, |
|
"eval_loss": 0.6095255613327026, |
|
"eval_runtime": 27.2419, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 4.332, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.829090909090909, |
|
"eval_loss": 0.6098220944404602, |
|
"eval_runtime": 27.2413, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 4.332, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.9018181818181819, |
|
"grad_norm": 3.7072970867156982, |
|
"learning_rate": 6.273355601206144e-05, |
|
"loss": 1.6977, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9018181818181819, |
|
"eval_loss": 0.6083605289459229, |
|
"eval_runtime": 27.262, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 4.328, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9745454545454546, |
|
"eval_loss": 0.6045976281166077, |
|
"eval_runtime": 27.2423, |
|
"eval_samples_per_second": 17.289, |
|
"eval_steps_per_second": 4.332, |
|
"step": 135 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 272, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.413948656205824e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|