|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.96, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004987337087656614, |
|
"loss": 1.7079, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004949476630105669, |
|
"loss": 1.5881, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004886802166271364, |
|
"loss": 1.537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004799948609147061, |
|
"loss": 1.6139, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000468979581391722, |
|
"loss": 1.5905, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004557459664734141, |
|
"loss": 1.5532, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004404280770443398, |
|
"loss": 1.5624, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00042318108837739987, |
|
"loss": 1.566, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004041797181571358, |
|
"loss": 1.6084, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00038361645653195025, |
|
"loss": 1.5778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0003616996161254065, |
|
"loss": 1.5952, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0003386512217606339, |
|
"loss": 1.526, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00031470476127563017, |
|
"loss": 1.5188, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002901028202144401, |
|
"loss": 1.5547, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00026509462435557154, |
|
"loss": 1.4073, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002399335149726463, |
|
"loss": 1.543, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002148743824037269, |
|
"loss": 1.4798, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019017108392811063, |
|
"loss": 1.479, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00016607387210834887, |
|
"loss": 1.5438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00014282685964923643, |
|
"loss": 1.5179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00012066554645558578, |
|
"loss": 1.4537, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.981443394050524e-05, |
|
"loss": 1.5523, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.048475075202727e-05, |
|
"loss": 1.5372, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.28723129572247e-05, |
|
"loss": 1.5376, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.715554036085673e-05, |
|
"loss": 1.5009, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.3493649053890325e-05, |
|
"loss": 1.5572, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.2025038501977485e-05, |
|
"loss": 1.5053, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.286588951321363e-05, |
|
"loss": 1.5444, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.108987288226536e-06, |
|
"loss": 1.5053, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.822781475486507e-06, |
|
"loss": 1.4731, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 1.0028445508042752e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|