|
{ |
|
"best_metric": 0.8269708752632141, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.0023223409196470044, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.644681839294008e-05, |
|
"grad_norm": 3.092576503753662, |
|
"learning_rate": 1.018e-05, |
|
"loss": 0.997, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 4.644681839294008e-05, |
|
"eval_loss": 1.5409296751022339, |
|
"eval_runtime": 162.8209, |
|
"eval_samples_per_second": 55.681, |
|
"eval_steps_per_second": 13.923, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 9.289363678588016e-05, |
|
"grad_norm": 21.47955322265625, |
|
"learning_rate": 2.036e-05, |
|
"loss": 1.9273, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00013934045517882026, |
|
"grad_norm": 9.852048873901367, |
|
"learning_rate": 3.0539999999999996e-05, |
|
"loss": 1.6379, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00018578727357176033, |
|
"grad_norm": 17.509506225585938, |
|
"learning_rate": 4.072e-05, |
|
"loss": 1.4906, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00023223409196470042, |
|
"grad_norm": 14.546015739440918, |
|
"learning_rate": 5.09e-05, |
|
"loss": 1.4437, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0002786809103576405, |
|
"grad_norm": 5.5904154777526855, |
|
"learning_rate": 6.107999999999999e-05, |
|
"loss": 1.6272, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0003251277287505806, |
|
"grad_norm": 4.112157821655273, |
|
"learning_rate": 7.125999999999999e-05, |
|
"loss": 1.4437, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00037157454714352065, |
|
"grad_norm": 4.384551048278809, |
|
"learning_rate": 8.144e-05, |
|
"loss": 1.3646, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0004180213655364608, |
|
"grad_norm": 8.227641105651855, |
|
"learning_rate": 9.162e-05, |
|
"loss": 1.1622, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00046446818392940084, |
|
"grad_norm": 5.1519856452941895, |
|
"learning_rate": 0.0001018, |
|
"loss": 1.4487, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0005109150023223409, |
|
"grad_norm": 9.391843795776367, |
|
"learning_rate": 0.00010126421052631578, |
|
"loss": 1.2691, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.000557361820715281, |
|
"grad_norm": 5.215498924255371, |
|
"learning_rate": 0.00010072842105263156, |
|
"loss": 0.9428, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.000603808639108221, |
|
"grad_norm": 2.952871084213257, |
|
"learning_rate": 0.00010019263157894736, |
|
"loss": 1.0689, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0006502554575011612, |
|
"grad_norm": 2.8391802310943604, |
|
"learning_rate": 9.965684210526316e-05, |
|
"loss": 0.9291, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0006967022758941013, |
|
"grad_norm": 2.461745023727417, |
|
"learning_rate": 9.912105263157895e-05, |
|
"loss": 0.8318, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0007431490942870413, |
|
"grad_norm": 2.3736178874969482, |
|
"learning_rate": 9.858526315789473e-05, |
|
"loss": 0.5747, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0007895959126799814, |
|
"grad_norm": 3.788600444793701, |
|
"learning_rate": 9.804947368421052e-05, |
|
"loss": 0.9676, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0008360427310729215, |
|
"grad_norm": 3.9328246116638184, |
|
"learning_rate": 9.75136842105263e-05, |
|
"loss": 1.0193, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0008824895494658616, |
|
"grad_norm": 2.693528652191162, |
|
"learning_rate": 9.69778947368421e-05, |
|
"loss": 0.7466, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0009289363678588017, |
|
"grad_norm": 3.4937636852264404, |
|
"learning_rate": 9.644210526315789e-05, |
|
"loss": 0.8632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0009753831862517418, |
|
"grad_norm": 2.1122593879699707, |
|
"learning_rate": 9.590631578947369e-05, |
|
"loss": 0.7247, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0010218300046446818, |
|
"grad_norm": 3.186523675918579, |
|
"learning_rate": 9.537052631578947e-05, |
|
"loss": 0.6886, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.001068276823037622, |
|
"grad_norm": 2.758934497833252, |
|
"learning_rate": 9.483473684210526e-05, |
|
"loss": 0.6994, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.001114723641430562, |
|
"grad_norm": 2.430767774581909, |
|
"learning_rate": 9.429894736842104e-05, |
|
"loss": 0.6421, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0011611704598235022, |
|
"grad_norm": 3.4728782176971436, |
|
"learning_rate": 9.376315789473684e-05, |
|
"loss": 0.8879, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.001207617278216442, |
|
"grad_norm": 3.511162519454956, |
|
"learning_rate": 9.322736842105262e-05, |
|
"loss": 0.9696, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0012540640966093822, |
|
"grad_norm": 3.0230774879455566, |
|
"learning_rate": 9.269157894736842e-05, |
|
"loss": 0.9589, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0013005109150023223, |
|
"grad_norm": 2.1474082469940186, |
|
"learning_rate": 9.215578947368421e-05, |
|
"loss": 0.7707, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0013469577333952625, |
|
"grad_norm": 2.3117761611938477, |
|
"learning_rate": 9.162e-05, |
|
"loss": 0.7724, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0013934045517882026, |
|
"grad_norm": 1.9512385129928589, |
|
"learning_rate": 9.108421052631578e-05, |
|
"loss": 0.603, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0014398513701811425, |
|
"grad_norm": 6.22908878326416, |
|
"learning_rate": 9.054842105263158e-05, |
|
"loss": 1.0569, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0014862981885740826, |
|
"grad_norm": 2.2944276332855225, |
|
"learning_rate": 9.001263157894736e-05, |
|
"loss": 0.6613, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0015327450069670227, |
|
"grad_norm": 2.312437057495117, |
|
"learning_rate": 8.947684210526315e-05, |
|
"loss": 0.8758, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0015791918253599629, |
|
"grad_norm": 2.5488150119781494, |
|
"learning_rate": 8.894105263157895e-05, |
|
"loss": 0.635, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.001625638643752903, |
|
"grad_norm": 3.658079147338867, |
|
"learning_rate": 8.840526315789473e-05, |
|
"loss": 1.0831, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.001672085462145843, |
|
"grad_norm": 2.474161386489868, |
|
"learning_rate": 8.786947368421052e-05, |
|
"loss": 0.7918, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.001718532280538783, |
|
"grad_norm": 2.5074217319488525, |
|
"learning_rate": 8.733368421052632e-05, |
|
"loss": 0.8514, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0017649790989317231, |
|
"grad_norm": 2.0377755165100098, |
|
"learning_rate": 8.67978947368421e-05, |
|
"loss": 0.6768, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0018114259173246632, |
|
"grad_norm": 2.2819857597351074, |
|
"learning_rate": 8.626210526315789e-05, |
|
"loss": 0.6528, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0018578727357176034, |
|
"grad_norm": 2.3973352909088135, |
|
"learning_rate": 8.572631578947367e-05, |
|
"loss": 0.8542, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0019043195541105435, |
|
"grad_norm": 2.628427267074585, |
|
"learning_rate": 8.519052631578947e-05, |
|
"loss": 0.8507, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0019507663725034836, |
|
"grad_norm": 3.116105556488037, |
|
"learning_rate": 8.465473684210527e-05, |
|
"loss": 0.9283, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0019972131908964235, |
|
"grad_norm": 3.5683062076568604, |
|
"learning_rate": 8.411894736842105e-05, |
|
"loss": 1.1937, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0020436600092893636, |
|
"grad_norm": 2.569610118865967, |
|
"learning_rate": 8.358315789473684e-05, |
|
"loss": 0.9237, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0020901068276823038, |
|
"grad_norm": 3.7425827980041504, |
|
"learning_rate": 8.304736842105262e-05, |
|
"loss": 1.3234, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.002136553646075244, |
|
"grad_norm": 3.7030258178710938, |
|
"learning_rate": 8.251157894736841e-05, |
|
"loss": 0.7639, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.002183000464468184, |
|
"grad_norm": 3.188816785812378, |
|
"learning_rate": 8.197578947368421e-05, |
|
"loss": 1.1931, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.002229447282861124, |
|
"grad_norm": 5.445688724517822, |
|
"learning_rate": 8.144e-05, |
|
"loss": 0.8938, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0022758941012540643, |
|
"grad_norm": 2.3576200008392334, |
|
"learning_rate": 8.090421052631579e-05, |
|
"loss": 0.836, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0023223409196470044, |
|
"grad_norm": 22.260164260864258, |
|
"learning_rate": 8.036842105263158e-05, |
|
"loss": 1.0266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0023223409196470044, |
|
"eval_loss": 0.8269708752632141, |
|
"eval_runtime": 162.7994, |
|
"eval_samples_per_second": 55.688, |
|
"eval_steps_per_second": 13.925, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3119728734240768.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|