{ "best_metric": 0.6004818081855774, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.0697674418604652, "eval_steps": 25, "global_step": 33, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09302325581395349, "grad_norm": 1.616852879524231, "learning_rate": 5e-05, "loss": 2.0474, "step": 1 }, { "epoch": 0.09302325581395349, "eval_loss": 2.9822559356689453, "eval_runtime": 3.4381, "eval_samples_per_second": 14.543, "eval_steps_per_second": 3.781, "step": 1 }, { "epoch": 0.18604651162790697, "grad_norm": 2.1614630222320557, "learning_rate": 0.0001, "loss": 2.4994, "step": 2 }, { "epoch": 0.27906976744186046, "grad_norm": 1.7438641786575317, "learning_rate": 9.976911955263529e-05, "loss": 1.8839, "step": 3 }, { "epoch": 0.37209302325581395, "grad_norm": 1.934104561805725, "learning_rate": 9.907884735636226e-05, "loss": 1.7032, "step": 4 }, { "epoch": 0.46511627906976744, "grad_norm": 2.6693766117095947, "learning_rate": 9.793626653800219e-05, "loss": 1.7775, "step": 5 }, { "epoch": 0.5581395348837209, "grad_norm": 1.5809624195098877, "learning_rate": 9.635310152291039e-05, "loss": 0.8558, "step": 6 }, { "epoch": 0.6511627906976745, "grad_norm": 2.7533814907073975, "learning_rate": 9.43455977265062e-05, "loss": 0.9982, "step": 7 }, { "epoch": 0.7441860465116279, "grad_norm": 2.5589840412139893, "learning_rate": 9.193435485432745e-05, "loss": 0.7236, "step": 8 }, { "epoch": 0.8372093023255814, "grad_norm": 1.5994662046432495, "learning_rate": 8.914411552117559e-05, "loss": 0.7621, "step": 9 }, { "epoch": 0.9302325581395349, "grad_norm": 1.975174903869629, "learning_rate": 8.600351135840589e-05, "loss": 0.8413, "step": 10 }, { "epoch": 1.0232558139534884, "grad_norm": 1.1943004131317139, "learning_rate": 8.254476921464484e-05, "loss": 0.6243, "step": 11 }, { "epoch": 1.1162790697674418, "grad_norm": 0.5302379727363586, "learning_rate": 7.880338046471331e-05, "loss": 0.4453, "step": 12 }, { "epoch": 1.2093023255813953, "grad_norm": 0.6890483498573303, "learning_rate": 7.481773682009356e-05, "loss": 0.5305, "step": 13 }, { "epoch": 1.302325581395349, "grad_norm": 0.4094037115573883, "learning_rate": 7.062873637801692e-05, "loss": 0.4227, "step": 14 }, { "epoch": 1.3953488372093024, "grad_norm": 0.454261839389801, "learning_rate": 6.627936395164243e-05, "loss": 0.4896, "step": 15 }, { "epoch": 1.4883720930232558, "grad_norm": 0.5503323078155518, "learning_rate": 6.181424998770595e-05, "loss": 0.5499, "step": 16 }, { "epoch": 1.5813953488372094, "grad_norm": 0.331122487783432, "learning_rate": 5.727921259774208e-05, "loss": 0.3566, "step": 17 }, { "epoch": 1.6744186046511627, "grad_norm": 0.4989646375179291, "learning_rate": 5.2720787402257935e-05, "loss": 0.4685, "step": 18 }, { "epoch": 1.7674418604651163, "grad_norm": 0.30750206112861633, "learning_rate": 4.8185750012294065e-05, "loss": 0.4377, "step": 19 }, { "epoch": 1.8604651162790697, "grad_norm": 0.41947314143180847, "learning_rate": 4.372063604835758e-05, "loss": 0.47, "step": 20 }, { "epoch": 1.9534883720930232, "grad_norm": 0.5602025389671326, "learning_rate": 3.93712636219831e-05, "loss": 0.5443, "step": 21 }, { "epoch": 2.046511627906977, "grad_norm": 0.5113930702209473, "learning_rate": 3.518226317990646e-05, "loss": 0.4755, "step": 22 }, { "epoch": 2.13953488372093, "grad_norm": 0.27189022302627563, "learning_rate": 3.119661953528671e-05, "loss": 0.3877, "step": 23 }, { "epoch": 2.2325581395348837, "grad_norm": 0.15714497864246368, "learning_rate": 2.745523078535517e-05, "loss": 0.4046, "step": 24 }, { "epoch": 2.3255813953488373, "grad_norm": 0.2902805805206299, "learning_rate": 2.39964886415941e-05, "loss": 0.3778, "step": 25 }, { "epoch": 2.3255813953488373, "eval_loss": 0.6004818081855774, "eval_runtime": 3.5049, "eval_samples_per_second": 14.266, "eval_steps_per_second": 3.709, "step": 25 }, { "epoch": 2.4186046511627906, "grad_norm": 0.2399173527956009, "learning_rate": 2.0855884478824412e-05, "loss": 0.4546, "step": 26 }, { "epoch": 2.511627906976744, "grad_norm": 0.21842080354690552, "learning_rate": 1.806564514567258e-05, "loss": 0.4159, "step": 27 }, { "epoch": 2.604651162790698, "grad_norm": 0.09714235365390778, "learning_rate": 1.5654402273493805e-05, "loss": 0.3683, "step": 28 }, { "epoch": 2.697674418604651, "grad_norm": 0.1338994950056076, "learning_rate": 1.3646898477089626e-05, "loss": 0.405, "step": 29 }, { "epoch": 2.7906976744186047, "grad_norm": 0.18371757864952087, "learning_rate": 1.2063733461997805e-05, "loss": 0.4067, "step": 30 }, { "epoch": 2.883720930232558, "grad_norm": 0.12735600769519806, "learning_rate": 1.092115264363775e-05, "loss": 0.4753, "step": 31 }, { "epoch": 2.9767441860465116, "grad_norm": 0.1807902604341507, "learning_rate": 1.023088044736472e-05, "loss": 0.4981, "step": 32 }, { "epoch": 3.0697674418604652, "grad_norm": 0.3723318576812744, "learning_rate": 1e-05, "loss": 0.4004, "step": 33 } ], "logging_steps": 1, "max_steps": 33, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.953855388419359e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }