|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.960960960960961, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03203203203203203, |
|
"grad_norm": 0.5500465035438538, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9326, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06406406406406406, |
|
"grad_norm": 0.3067496418952942, |
|
"learning_rate": 0.0004, |
|
"loss": 2.0123, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0960960960960961, |
|
"grad_norm": 0.24163766205310822, |
|
"learning_rate": 0.0003988275914308719, |
|
"loss": 1.6857, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.12812812812812813, |
|
"grad_norm": 0.4033714830875397, |
|
"learning_rate": 0.00039532411114201737, |
|
"loss": 1.568, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.16016016016016016, |
|
"grad_norm": 0.40937018394470215, |
|
"learning_rate": 0.00038953063423656053, |
|
"loss": 1.2066, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1921921921921922, |
|
"grad_norm": 0.303056001663208, |
|
"learning_rate": 0.00038151508393419143, |
|
"loss": 1.2382, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.22422422422422422, |
|
"grad_norm": 0.286432147026062, |
|
"learning_rate": 0.00037137143523351785, |
|
"loss": 0.9406, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.25625625625625625, |
|
"grad_norm": 0.354612797498703, |
|
"learning_rate": 0.00035921861314112876, |
|
"loss": 1.032, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 0.3172045052051544, |
|
"learning_rate": 0.0003451990983846262, |
|
"loss": 1.1604, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.3203203203203203, |
|
"grad_norm": 0.35591357946395874, |
|
"learning_rate": 0.00032947725695636553, |
|
"loss": 1.2686, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35235235235235235, |
|
"grad_norm": 0.45947980880737305, |
|
"learning_rate": 0.0003122374130724765, |
|
"loss": 1.4562, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3843843843843844, |
|
"grad_norm": 0.381909042596817, |
|
"learning_rate": 0.00029368168813995806, |
|
"loss": 1.3231, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.4164164164164164, |
|
"grad_norm": 0.47724688053131104, |
|
"learning_rate": 0.0002740276310679829, |
|
"loss": 1.276, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.44844844844844844, |
|
"grad_norm": 0.3836033344268799, |
|
"learning_rate": 0.00025350566770584423, |
|
"loss": 1.5523, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.4804804804804805, |
|
"grad_norm": 0.3899593651294708, |
|
"learning_rate": 0.000232356399310553, |
|
"loss": 1.2293, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5125125125125125, |
|
"grad_norm": 0.3470306992530823, |
|
"learning_rate": 0.00021082778171708353, |
|
"loss": 1.2625, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.5445445445445446, |
|
"grad_norm": 0.36498191952705383, |
|
"learning_rate": 0.00018917221828291652, |
|
"loss": 1.2286, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 0.3170139193534851, |
|
"learning_rate": 0.00016764360068944706, |
|
"loss": 1.2458, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.6086086086086087, |
|
"grad_norm": 0.3011997938156128, |
|
"learning_rate": 0.00014649433229415587, |
|
"loss": 1.1672, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.6406406406406406, |
|
"grad_norm": 0.29357728362083435, |
|
"learning_rate": 0.00012597236893201712, |
|
"loss": 1.1728, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6726726726726727, |
|
"grad_norm": 0.30459967255592346, |
|
"learning_rate": 0.000106318311860042, |
|
"loss": 1.1528, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.7047047047047047, |
|
"grad_norm": 0.23712120950222015, |
|
"learning_rate": 8.776258692752355e-05, |
|
"loss": 0.7205, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.7367367367367368, |
|
"grad_norm": 0.2815784215927124, |
|
"learning_rate": 7.052274304363449e-05, |
|
"loss": 0.7541, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.7687687687687688, |
|
"grad_norm": 0.3061799705028534, |
|
"learning_rate": 5.4800901615373876e-05, |
|
"loss": 1.4001, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.8008008008008008, |
|
"grad_norm": 0.32817238569259644, |
|
"learning_rate": 4.078138685887125e-05, |
|
"loss": 1.1528, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8328328328328328, |
|
"grad_norm": 0.37422603368759155, |
|
"learning_rate": 2.862856476648219e-05, |
|
"loss": 1.3155, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.3299436867237091, |
|
"learning_rate": 1.8484916065808623e-05, |
|
"loss": 1.1114, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.8968968968968969, |
|
"grad_norm": 0.32912251353263855, |
|
"learning_rate": 1.0469365763439531e-05, |
|
"loss": 1.0327, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.928928928928929, |
|
"grad_norm": 0.30801478028297424, |
|
"learning_rate": 4.6758888579826685e-06, |
|
"loss": 0.85, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.960960960960961, |
|
"grad_norm": 0.3802436292171478, |
|
"learning_rate": 1.1724085691280805e-06, |
|
"loss": 1.2445, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 31, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 30, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.01879310700544e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|