|
{ |
|
"best_metric": 1.3310532569885254, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-250", |
|
"epoch": 0.002108094661882697, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.432378647530788e-06, |
|
"eval_loss": 2.016960620880127, |
|
"eval_runtime": 3409.8944, |
|
"eval_samples_per_second": 14.644, |
|
"eval_steps_per_second": 3.661, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.432378647530789e-05, |
|
"grad_norm": 0.7229357957839966, |
|
"learning_rate": 4.2000000000000004e-05, |
|
"loss": 1.8129, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00016864757295061578, |
|
"grad_norm": 0.6948916912078857, |
|
"learning_rate": 8.400000000000001e-05, |
|
"loss": 1.5689, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00025297135942592365, |
|
"grad_norm": 0.9268532991409302, |
|
"learning_rate": 0.000126, |
|
"loss": 1.2979, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00033729514590123157, |
|
"grad_norm": 1.0942802429199219, |
|
"learning_rate": 0.00016800000000000002, |
|
"loss": 1.3864, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00042161893237653943, |
|
"grad_norm": 4.664062023162842, |
|
"learning_rate": 0.00021, |
|
"loss": 1.9757, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00042161893237653943, |
|
"eval_loss": 1.524364948272705, |
|
"eval_runtime": 3404.7905, |
|
"eval_samples_per_second": 14.666, |
|
"eval_steps_per_second": 3.667, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0005059427188518473, |
|
"grad_norm": 0.6090065240859985, |
|
"learning_rate": 0.00020974422527728155, |
|
"loss": 1.4874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0005902665053271552, |
|
"grad_norm": 0.6349875926971436, |
|
"learning_rate": 0.0002089781472178649, |
|
"loss": 1.241, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0006745902918024631, |
|
"grad_norm": 0.7745322585105896, |
|
"learning_rate": 0.0002077054980770496, |
|
"loss": 1.0923, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.000758914078277771, |
|
"grad_norm": 1.1790937185287476, |
|
"learning_rate": 0.00020593247807352348, |
|
"loss": 1.1274, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0008432378647530789, |
|
"grad_norm": 3.3187856674194336, |
|
"learning_rate": 0.00020366772518252038, |
|
"loss": 2.0574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0008432378647530789, |
|
"eval_loss": 1.4387811422348022, |
|
"eval_runtime": 3420.3676, |
|
"eval_samples_per_second": 14.599, |
|
"eval_steps_per_second": 3.65, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0009275616512283868, |
|
"grad_norm": 0.5923383235931396, |
|
"learning_rate": 0.0002009222730524731, |
|
"loss": 1.5078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0010118854377036946, |
|
"grad_norm": 0.6275829672813416, |
|
"learning_rate": 0.00019770949725018733, |
|
"loss": 1.3687, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0010962092241790025, |
|
"grad_norm": 0.9197534918785095, |
|
"learning_rate": 0.00019404505009642473, |
|
"loss": 1.1862, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0011805330106543104, |
|
"grad_norm": 1.0203487873077393, |
|
"learning_rate": 0.0001899467844093695, |
|
"loss": 1.1169, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0012648567971296184, |
|
"grad_norm": 2.69136118888855, |
|
"learning_rate": 0.00018543466652749268, |
|
"loss": 1.9026, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0012648567971296184, |
|
"eval_loss": 1.4064786434173584, |
|
"eval_runtime": 3415.4512, |
|
"eval_samples_per_second": 14.62, |
|
"eval_steps_per_second": 3.655, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0013491805836049263, |
|
"grad_norm": 0.7159033417701721, |
|
"learning_rate": 0.00018053067903555837, |
|
"loss": 1.4857, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.001433504370080234, |
|
"grad_norm": 0.6960952877998352, |
|
"learning_rate": 0.00017525871366768012, |
|
"loss": 1.0184, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.001517828156555542, |
|
"grad_norm": 0.6692061424255371, |
|
"learning_rate": 0.00016964445490919413, |
|
"loss": 1.163, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0016021519430308498, |
|
"grad_norm": 1.0128861665725708, |
|
"learning_rate": 0.00016371525486442843, |
|
"loss": 1.2, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0016864757295061577, |
|
"grad_norm": 3.461775302886963, |
|
"learning_rate": 0.0001575, |
|
"loss": 1.9163, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0016864757295061577, |
|
"eval_loss": 1.361072063446045, |
|
"eval_runtime": 3401.7609, |
|
"eval_samples_per_second": 14.679, |
|
"eval_steps_per_second": 3.67, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0017707995159814657, |
|
"grad_norm": 0.7128476500511169, |
|
"learning_rate": 0.00015102897041285315, |
|
"loss": 1.3027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0018551233024567736, |
|
"grad_norm": 0.6080098748207092, |
|
"learning_rate": 0.00014433369230867077, |
|
"loss": 1.2597, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0019394470889320815, |
|
"grad_norm": 0.7591213583946228, |
|
"learning_rate": 0.0001374467844093695, |
|
"loss": 1.0402, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.002023770875407389, |
|
"grad_norm": 0.8502488136291504, |
|
"learning_rate": 0.0001304017990379651, |
|
"loss": 1.0837, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.002108094661882697, |
|
"grad_norm": 3.2781741619110107, |
|
"learning_rate": 0.0001232330586550277, |
|
"loss": 1.553, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.002108094661882697, |
|
"eval_loss": 1.3310532569885254, |
|
"eval_runtime": 3412.7075, |
|
"eval_samples_per_second": 14.631, |
|
"eval_steps_per_second": 3.658, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.483158242852864e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|