|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 151, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019945941475610623, |
|
"loss": 0.6627, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019784350367254322, |
|
"loss": 0.3933, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019516973750305532, |
|
"loss": 0.2526, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001914670242183795, |
|
"loss": 0.2494, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018677539646179707, |
|
"loss": 0.1195, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018114557872800905, |
|
"loss": 0.1413, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017463843894486937, |
|
"loss": 0.1202, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00016732433038731242, |
|
"loss": 0.1129, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001592823310385073, |
|
"loss": 0.1156, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00015059938862204127, |
|
"loss": 0.0889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014136938054879283, |
|
"loss": 0.0921, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001316920989420703, |
|
"loss": 0.0577, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00012167217171462566, |
|
"loss": 0.0885, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011141793136253986, |
|
"loss": 0.0795, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010104024370624644, |
|
"loss": 0.0613, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.065130924199998e-05, |
|
"loss": 0.0661, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.036345006322359e-05, |
|
"loss": 0.0333, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.028789546718326e-05, |
|
"loss": 0.0512, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.053357937665237e-05, |
|
"loss": 0.0553, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.1205962578487155e-05, |
|
"loss": 0.0409, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.240589251272342e-05, |
|
"loss": 0.0592, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.422851293981676e-05, |
|
"loss": 0.0536, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.6762235274383772e-05, |
|
"loss": 0.0275, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.008778270707944e-05, |
|
"loss": 0.0466, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4277317449282834e-05, |
|
"loss": 0.0328, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.393660536564408e-06, |
|
"loss": 0.0299, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.489612626189245e-06, |
|
"loss": 0.0258, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.607383131993424e-06, |
|
"loss": 0.0374, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.781338686584927e-07, |
|
"loss": 0.037, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.164213936770576e-08, |
|
"loss": 0.0316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 151, |
|
"total_flos": 6843505735041024.0, |
|
"train_loss": 0.1083572210361626, |
|
"train_runtime": 702.143, |
|
"train_samples_per_second": 3.439, |
|
"train_steps_per_second": 0.215 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 151, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 6843505735041024.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|