{ "best_metric": 1.36418879032135, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.847457627118644, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01694915254237288, "grad_norm": 1.4696528911590576, "learning_rate": 1e-05, "loss": 4.4767, "step": 1 }, { "epoch": 0.01694915254237288, "eval_loss": 1.5105643272399902, "eval_runtime": 13.1638, "eval_samples_per_second": 7.597, "eval_steps_per_second": 1.899, "step": 1 }, { "epoch": 0.03389830508474576, "grad_norm": 1.0327868461608887, "learning_rate": 2e-05, "loss": 5.5338, "step": 2 }, { "epoch": 0.05084745762711865, "grad_norm": 0.8375867009162903, "learning_rate": 3e-05, "loss": 6.7695, "step": 3 }, { "epoch": 0.06779661016949153, "grad_norm": 1.0288580656051636, "learning_rate": 4e-05, "loss": 6.0015, "step": 4 }, { "epoch": 0.0847457627118644, "grad_norm": 1.235273003578186, "learning_rate": 5e-05, "loss": 6.1554, "step": 5 }, { "epoch": 0.1016949152542373, "grad_norm": 1.609580636024475, "learning_rate": 6e-05, "loss": 6.4157, "step": 6 }, { "epoch": 0.11864406779661017, "grad_norm": 1.5870156288146973, "learning_rate": 7e-05, "loss": 6.3081, "step": 7 }, { "epoch": 0.13559322033898305, "grad_norm": 1.571000099182129, "learning_rate": 8e-05, "loss": 6.3219, "step": 8 }, { "epoch": 0.15254237288135594, "grad_norm": 1.3622900247573853, "learning_rate": 9e-05, "loss": 6.145, "step": 9 }, { "epoch": 0.1694915254237288, "grad_norm": 1.6992497444152832, "learning_rate": 0.0001, "loss": 5.4642, "step": 10 }, { "epoch": 0.1864406779661017, "grad_norm": 1.4771649837493896, "learning_rate": 9.999115304121457e-05, "loss": 5.3796, "step": 11 }, { "epoch": 0.2033898305084746, "grad_norm": 2.005577325820923, "learning_rate": 9.996461529560553e-05, "loss": 6.4416, "step": 12 }, { "epoch": 0.22033898305084745, "grad_norm": 2.1262521743774414, "learning_rate": 9.992039615430648e-05, "loss": 6.1819, "step": 13 }, { "epoch": 0.23728813559322035, "grad_norm": 2.5400421619415283, "learning_rate": 9.985851126551428e-05, "loss": 5.9766, "step": 14 }, { "epoch": 0.2542372881355932, "grad_norm": 2.724677562713623, "learning_rate": 9.977898252895134e-05, "loss": 4.8459, "step": 15 }, { "epoch": 0.2711864406779661, "grad_norm": 1.9490002393722534, "learning_rate": 9.968183808811586e-05, "loss": 4.5774, "step": 16 }, { "epoch": 0.288135593220339, "grad_norm": 1.7252360582351685, "learning_rate": 9.95671123203224e-05, "loss": 6.9357, "step": 17 }, { "epoch": 0.3050847457627119, "grad_norm": 1.5308066606521606, "learning_rate": 9.943484582453653e-05, "loss": 6.6148, "step": 18 }, { "epoch": 0.3220338983050847, "grad_norm": 1.300583839416504, "learning_rate": 9.928508540700774e-05, "loss": 7.4276, "step": 19 }, { "epoch": 0.3389830508474576, "grad_norm": 1.0917631387710571, "learning_rate": 9.911788406470569e-05, "loss": 5.9673, "step": 20 }, { "epoch": 0.3559322033898305, "grad_norm": 1.2227107286453247, "learning_rate": 9.893330096656574e-05, "loss": 5.8025, "step": 21 }, { "epoch": 0.3728813559322034, "grad_norm": 1.5362129211425781, "learning_rate": 9.873140143255036e-05, "loss": 6.5079, "step": 22 }, { "epoch": 0.3898305084745763, "grad_norm": 1.2382031679153442, "learning_rate": 9.85122569105338e-05, "loss": 5.7189, "step": 23 }, { "epoch": 0.4067796610169492, "grad_norm": 1.4853235483169556, "learning_rate": 9.827594495101823e-05, "loss": 6.06, "step": 24 }, { "epoch": 0.423728813559322, "grad_norm": 1.4762415885925293, "learning_rate": 9.802254917969032e-05, "loss": 5.8676, "step": 25 }, { "epoch": 0.4406779661016949, "grad_norm": 1.4179600477218628, "learning_rate": 9.775215926782788e-05, "loss": 5.3958, "step": 26 }, { "epoch": 0.4576271186440678, "grad_norm": 1.6792457103729248, "learning_rate": 9.746487090056713e-05, "loss": 5.2567, "step": 27 }, { "epoch": 0.4745762711864407, "grad_norm": 2.73538875579834, "learning_rate": 9.716078574304189e-05, "loss": 5.8031, "step": 28 }, { "epoch": 0.4915254237288136, "grad_norm": 1.1012799739837646, "learning_rate": 9.684001140440639e-05, "loss": 4.4475, "step": 29 }, { "epoch": 0.5084745762711864, "grad_norm": 0.7912802696228027, "learning_rate": 9.650266139975474e-05, "loss": 5.1582, "step": 30 }, { "epoch": 0.5254237288135594, "grad_norm": 0.8357610702514648, "learning_rate": 9.614885510995047e-05, "loss": 5.9797, "step": 31 }, { "epoch": 0.5423728813559322, "grad_norm": 0.8933336734771729, "learning_rate": 9.577871773938011e-05, "loss": 5.8404, "step": 32 }, { "epoch": 0.559322033898305, "grad_norm": 0.9425844550132751, "learning_rate": 9.539238027164619e-05, "loss": 6.3711, "step": 33 }, { "epoch": 0.576271186440678, "grad_norm": 0.8766078352928162, "learning_rate": 9.498997942321483e-05, "loss": 6.1781, "step": 34 }, { "epoch": 0.5932203389830508, "grad_norm": 1.1381494998931885, "learning_rate": 9.457165759503493e-05, "loss": 5.9204, "step": 35 }, { "epoch": 0.6101694915254238, "grad_norm": 0.9810196757316589, "learning_rate": 9.413756282214537e-05, "loss": 5.3107, "step": 36 }, { "epoch": 0.6271186440677966, "grad_norm": 0.9835663437843323, "learning_rate": 9.368784872128878e-05, "loss": 5.4947, "step": 37 }, { "epoch": 0.6440677966101694, "grad_norm": 1.1894506216049194, "learning_rate": 9.322267443654972e-05, "loss": 5.6231, "step": 38 }, { "epoch": 0.6610169491525424, "grad_norm": 1.3286501169204712, "learning_rate": 9.274220458303727e-05, "loss": 5.8462, "step": 39 }, { "epoch": 0.6779661016949152, "grad_norm": 1.3058652877807617, "learning_rate": 9.224660918863104e-05, "loss": 5.3328, "step": 40 }, { "epoch": 0.6949152542372882, "grad_norm": 1.677504301071167, "learning_rate": 9.173606363381219e-05, "loss": 5.7914, "step": 41 }, { "epoch": 0.711864406779661, "grad_norm": 2.292499303817749, "learning_rate": 9.121074858959997e-05, "loss": 5.8802, "step": 42 }, { "epoch": 0.7288135593220338, "grad_norm": 0.9735826849937439, "learning_rate": 9.067084995361623e-05, "loss": 4.7076, "step": 43 }, { "epoch": 0.7457627118644068, "grad_norm": 0.8391918540000916, "learning_rate": 9.011655878430019e-05, "loss": 4.8208, "step": 44 }, { "epoch": 0.7627118644067796, "grad_norm": 0.7652661800384521, "learning_rate": 8.954807123329704e-05, "loss": 6.4505, "step": 45 }, { "epoch": 0.7796610169491526, "grad_norm": 0.9724342226982117, "learning_rate": 8.896558847604414e-05, "loss": 5.5774, "step": 46 }, { "epoch": 0.7966101694915254, "grad_norm": 0.7980533838272095, "learning_rate": 8.836931664057935e-05, "loss": 5.8932, "step": 47 }, { "epoch": 0.8135593220338984, "grad_norm": 0.9073213338851929, "learning_rate": 8.775946673459681e-05, "loss": 5.1781, "step": 48 }, { "epoch": 0.8305084745762712, "grad_norm": 1.0768805742263794, "learning_rate": 8.713625457077585e-05, "loss": 5.388, "step": 49 }, { "epoch": 0.847457627118644, "grad_norm": 1.1347218751907349, "learning_rate": 8.649990069040961e-05, "loss": 5.9919, "step": 50 }, { "epoch": 0.847457627118644, "eval_loss": 1.36418879032135, "eval_runtime": 13.3724, "eval_samples_per_second": 7.478, "eval_steps_per_second": 1.87, "step": 50 } ], "logging_steps": 1, "max_steps": 177, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3781766506545152e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }