{ "best_metric": 0.8921794295310974, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.10178117048346055, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005089058524173028, "eval_loss": 1.7877963781356812, "eval_runtime": 34.8303, "eval_samples_per_second": 23.772, "eval_steps_per_second": 5.943, "step": 1 }, { "epoch": 0.005089058524173028, "grad_norm": 19.215145111083984, "learning_rate": 5.05e-06, "loss": 1.8396, "step": 10 }, { "epoch": 0.010178117048346057, "grad_norm": 18.71825408935547, "learning_rate": 1.01e-05, "loss": 1.8566, "step": 20 }, { "epoch": 0.015267175572519083, "grad_norm": 11.373724937438965, "learning_rate": 9.538888888888889e-06, "loss": 1.0249, "step": 30 }, { "epoch": 0.020356234096692113, "grad_norm": 9.73820972442627, "learning_rate": 8.977777777777778e-06, "loss": 0.8865, "step": 40 }, { "epoch": 0.02544529262086514, "grad_norm": 16.09589195251465, "learning_rate": 8.416666666666667e-06, "loss": 0.8376, "step": 50 }, { "epoch": 0.02544529262086514, "eval_loss": 1.0367200374603271, "eval_runtime": 34.9046, "eval_samples_per_second": 23.722, "eval_steps_per_second": 5.93, "step": 50 }, { "epoch": 0.030534351145038167, "grad_norm": 10.095135688781738, "learning_rate": 7.855555555555556e-06, "loss": 1.0159, "step": 60 }, { "epoch": 0.035623409669211195, "grad_norm": 14.570350646972656, "learning_rate": 7.294444444444444e-06, "loss": 1.5958, "step": 70 }, { "epoch": 0.04071246819338423, "grad_norm": 6.843364715576172, "learning_rate": 6.733333333333333e-06, "loss": 0.9956, "step": 80 }, { "epoch": 0.04580152671755725, "grad_norm": 7.70328426361084, "learning_rate": 6.172222222222223e-06, "loss": 0.7241, "step": 90 }, { "epoch": 0.05089058524173028, "grad_norm": 14.757134437561035, "learning_rate": 5.611111111111111e-06, "loss": 0.6845, "step": 100 }, { "epoch": 0.05089058524173028, "eval_loss": 0.9478112459182739, "eval_runtime": 35.0001, "eval_samples_per_second": 23.657, "eval_steps_per_second": 5.914, "step": 100 }, { "epoch": 0.05597964376590331, "grad_norm": 9.257173538208008, "learning_rate": 5.05e-06, "loss": 1.2995, "step": 110 }, { "epoch": 0.061068702290076333, "grad_norm": 12.88524341583252, "learning_rate": 4.488888888888889e-06, "loss": 1.1067, "step": 120 }, { "epoch": 0.06615776081424936, "grad_norm": 5.52728796005249, "learning_rate": 3.927777777777778e-06, "loss": 0.8998, "step": 130 }, { "epoch": 0.07124681933842239, "grad_norm": 11.348931312561035, "learning_rate": 3.3666666666666665e-06, "loss": 0.7852, "step": 140 }, { "epoch": 0.07633587786259542, "grad_norm": 9.074958801269531, "learning_rate": 2.8055555555555555e-06, "loss": 0.7446, "step": 150 }, { "epoch": 0.07633587786259542, "eval_loss": 0.9015980362892151, "eval_runtime": 35.055, "eval_samples_per_second": 23.62, "eval_steps_per_second": 5.905, "step": 150 }, { "epoch": 0.08142493638676845, "grad_norm": 9.362646102905273, "learning_rate": 2.2444444444444445e-06, "loss": 1.1438, "step": 160 }, { "epoch": 0.08651399491094147, "grad_norm": 14.217305183410645, "learning_rate": 1.6833333333333332e-06, "loss": 1.2251, "step": 170 }, { "epoch": 0.0916030534351145, "grad_norm": 8.306926727294922, "learning_rate": 1.1222222222222222e-06, "loss": 0.7782, "step": 180 }, { "epoch": 0.09669211195928754, "grad_norm": 8.390512466430664, "learning_rate": 5.611111111111111e-07, "loss": 0.8091, "step": 190 }, { "epoch": 0.10178117048346055, "grad_norm": 9.584101676940918, "learning_rate": 0.0, "loss": 0.6583, "step": 200 }, { "epoch": 0.10178117048346055, "eval_loss": 0.8921794295310974, "eval_runtime": 35.0768, "eval_samples_per_second": 23.605, "eval_steps_per_second": 5.901, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.33047358301143e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }