|
{ |
|
"best_metric": 1.0833667516708374, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.32663726931242854, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006532745386248571, |
|
"eval_loss": 1.8997453451156616, |
|
"eval_runtime": 135.0082, |
|
"eval_samples_per_second": 19.095, |
|
"eval_steps_per_second": 4.777, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006532745386248571, |
|
"grad_norm": 0.5372188687324524, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0612, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.013065490772497142, |
|
"grad_norm": 0.3328971266746521, |
|
"learning_rate": 0.0001998582695676762, |
|
"loss": 1.022, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.019598236158745713, |
|
"grad_norm": 0.5935003161430359, |
|
"learning_rate": 0.00019943348002101371, |
|
"loss": 1.0128, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026130981544994283, |
|
"grad_norm": 2.405089855194092, |
|
"learning_rate": 0.00019872683547213446, |
|
"loss": 1.2233, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03266372693124286, |
|
"grad_norm": 3.8762905597686768, |
|
"learning_rate": 0.00019774033898178667, |
|
"loss": 1.6253, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03266372693124286, |
|
"eval_loss": 1.367977261543274, |
|
"eval_runtime": 137.0715, |
|
"eval_samples_per_second": 18.808, |
|
"eval_steps_per_second": 4.706, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.039196472317491425, |
|
"grad_norm": 0.32745811343193054, |
|
"learning_rate": 0.0001964767868814516, |
|
"loss": 0.9681, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04572921770374, |
|
"grad_norm": 0.3707106113433838, |
|
"learning_rate": 0.00019493976084683813, |
|
"loss": 0.9395, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05226196308998857, |
|
"grad_norm": 0.5433753728866577, |
|
"learning_rate": 0.00019313361774523385, |
|
"loss": 1.0418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05879470847623714, |
|
"grad_norm": 0.9471728205680847, |
|
"learning_rate": 0.00019106347728549135, |
|
"loss": 1.1623, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06532745386248572, |
|
"grad_norm": 2.322746753692627, |
|
"learning_rate": 0.00018873520750565718, |
|
"loss": 1.533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06532745386248572, |
|
"eval_loss": 1.3632649183273315, |
|
"eval_runtime": 136.6492, |
|
"eval_samples_per_second": 18.866, |
|
"eval_steps_per_second": 4.72, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07186019924873428, |
|
"grad_norm": 0.3458673059940338, |
|
"learning_rate": 0.0001861554081393806, |
|
"loss": 0.9667, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07839294463498285, |
|
"grad_norm": 0.34533071517944336, |
|
"learning_rate": 0.0001833313919082515, |
|
"loss": 0.9634, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08492569002123142, |
|
"grad_norm": 0.5500729084014893, |
|
"learning_rate": 0.00018027116379309638, |
|
"loss": 1.0725, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09145843540748, |
|
"grad_norm": 0.853624701499939, |
|
"learning_rate": 0.00017698339834299061, |
|
"loss": 1.152, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09799118079372857, |
|
"grad_norm": 2.3200571537017822, |
|
"learning_rate": 0.00017347741508630672, |
|
"loss": 1.5809, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09799118079372857, |
|
"eval_loss": 1.2978923320770264, |
|
"eval_runtime": 136.7406, |
|
"eval_samples_per_second": 18.853, |
|
"eval_steps_per_second": 4.717, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10452392617997713, |
|
"grad_norm": 0.3230119049549103, |
|
"learning_rate": 0.0001697631521134985, |
|
"loss": 0.9422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1110566715662257, |
|
"grad_norm": 0.34993496537208557, |
|
"learning_rate": 0.00016585113790650388, |
|
"loss": 0.9937, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11758941695247428, |
|
"grad_norm": 0.6289933323860168, |
|
"learning_rate": 0.0001617524614946192, |
|
"loss": 1.042, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12412216233872285, |
|
"grad_norm": 1.0999337434768677, |
|
"learning_rate": 0.0001574787410214407, |
|
"loss": 1.1156, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13065490772497143, |
|
"grad_norm": 2.4530375003814697, |
|
"learning_rate": 0.00015304209081197425, |
|
"loss": 1.503, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13065490772497143, |
|
"eval_loss": 1.2213399410247803, |
|
"eval_runtime": 136.9165, |
|
"eval_samples_per_second": 18.829, |
|
"eval_steps_per_second": 4.711, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13718765311122, |
|
"grad_norm": 0.23753122985363007, |
|
"learning_rate": 0.00014845508703326504, |
|
"loss": 0.9201, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14372039849746857, |
|
"grad_norm": 0.32499635219573975, |
|
"learning_rate": 0.00014373073204588556, |
|
"loss": 0.9755, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15025314388371713, |
|
"grad_norm": 0.38442739844322205, |
|
"learning_rate": 0.00013888241754733208, |
|
"loss": 1.0136, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1567858892699657, |
|
"grad_norm": 0.8352793455123901, |
|
"learning_rate": 0.00013392388661180303, |
|
"loss": 1.1551, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16331863465621427, |
|
"grad_norm": 2.5753586292266846, |
|
"learning_rate": 0.0001288691947339621, |
|
"loss": 1.5156, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16331863465621427, |
|
"eval_loss": 1.155815839767456, |
|
"eval_runtime": 137.0554, |
|
"eval_samples_per_second": 18.81, |
|
"eval_steps_per_second": 4.706, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16985138004246284, |
|
"grad_norm": 0.2267913520336151, |
|
"learning_rate": 0.0001237326699871115, |
|
"loss": 0.8876, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1763841254287114, |
|
"grad_norm": 0.3093341886997223, |
|
"learning_rate": 0.00011852887240871145, |
|
"loss": 0.9403, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18291687081496, |
|
"grad_norm": 0.43315809965133667, |
|
"learning_rate": 0.00011327255272837221, |
|
"loss": 0.9982, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18944961620120856, |
|
"grad_norm": 0.8610518574714661, |
|
"learning_rate": 0.00010797861055530831, |
|
"loss": 1.1578, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19598236158745713, |
|
"grad_norm": 3.0702617168426514, |
|
"learning_rate": 0.00010266205214377748, |
|
"loss": 1.3948, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19598236158745713, |
|
"eval_loss": 1.1323291063308716, |
|
"eval_runtime": 136.8684, |
|
"eval_samples_per_second": 18.836, |
|
"eval_steps_per_second": 4.713, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2025151069737057, |
|
"grad_norm": 0.35798534750938416, |
|
"learning_rate": 9.733794785622253e-05, |
|
"loss": 0.8783, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.20904785235995427, |
|
"grad_norm": 0.31939324736595154, |
|
"learning_rate": 9.202138944469168e-05, |
|
"loss": 0.9702, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21558059774620283, |
|
"grad_norm": 0.5139957666397095, |
|
"learning_rate": 8.672744727162781e-05, |
|
"loss": 1.0535, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2221133431324514, |
|
"grad_norm": 0.8899235725402832, |
|
"learning_rate": 8.147112759128859e-05, |
|
"loss": 1.1356, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2286460885187, |
|
"grad_norm": 2.6317663192749023, |
|
"learning_rate": 7.626733001288851e-05, |
|
"loss": 1.5253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2286460885187, |
|
"eval_loss": 1.107937216758728, |
|
"eval_runtime": 136.976, |
|
"eval_samples_per_second": 18.821, |
|
"eval_steps_per_second": 4.709, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23517883390494856, |
|
"grad_norm": 0.21436120569705963, |
|
"learning_rate": 7.113080526603792e-05, |
|
"loss": 0.8752, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24171157929119713, |
|
"grad_norm": 0.33155834674835205, |
|
"learning_rate": 6.607611338819697e-05, |
|
"loss": 0.9239, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2482443246774457, |
|
"grad_norm": 0.44394493103027344, |
|
"learning_rate": 6.111758245266794e-05, |
|
"loss": 0.9837, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25477707006369427, |
|
"grad_norm": 0.7826946377754211, |
|
"learning_rate": 5.626926795411447e-05, |
|
"loss": 1.1915, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26130981544994286, |
|
"grad_norm": 2.0713536739349365, |
|
"learning_rate": 5.1544912966734994e-05, |
|
"loss": 1.431, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26130981544994286, |
|
"eval_loss": 1.0921169519424438, |
|
"eval_runtime": 136.7603, |
|
"eval_samples_per_second": 18.85, |
|
"eval_steps_per_second": 4.716, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2678425608361914, |
|
"grad_norm": 0.2013624608516693, |
|
"learning_rate": 4.695790918802576e-05, |
|
"loss": 0.898, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27437530622244, |
|
"grad_norm": 0.35071200132369995, |
|
"learning_rate": 4.252125897855932e-05, |
|
"loss": 0.9476, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28090805160868854, |
|
"grad_norm": 0.5039893388748169, |
|
"learning_rate": 3.824753850538082e-05, |
|
"loss": 1.0456, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28744079699493713, |
|
"grad_norm": 0.9260233640670776, |
|
"learning_rate": 3.414886209349615e-05, |
|
"loss": 1.1276, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29397354238118567, |
|
"grad_norm": 3.32053279876709, |
|
"learning_rate": 3.0236847886501542e-05, |
|
"loss": 1.501, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29397354238118567, |
|
"eval_loss": 1.085567831993103, |
|
"eval_runtime": 136.7334, |
|
"eval_samples_per_second": 18.854, |
|
"eval_steps_per_second": 4.717, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.30050628776743427, |
|
"grad_norm": 0.19558390974998474, |
|
"learning_rate": 2.6522584913693294e-05, |
|
"loss": 0.89, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.30703903315368286, |
|
"grad_norm": 0.3003573715686798, |
|
"learning_rate": 2.301660165700936e-05, |
|
"loss": 0.9225, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3135717785399314, |
|
"grad_norm": 0.584018886089325, |
|
"learning_rate": 1.9728836206903656e-05, |
|
"loss": 1.0287, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.32010452392618, |
|
"grad_norm": 0.8156113624572754, |
|
"learning_rate": 1.6668608091748495e-05, |
|
"loss": 1.0906, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32663726931242854, |
|
"grad_norm": 2.664822816848755, |
|
"learning_rate": 1.3844591860619383e-05, |
|
"loss": 1.5452, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32663726931242854, |
|
"eval_loss": 1.0833667516708374, |
|
"eval_runtime": 136.7852, |
|
"eval_samples_per_second": 18.847, |
|
"eval_steps_per_second": 4.715, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.356406596473979e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|