{ "best_metric": 0.29708796739578247, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.08255082034877721, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016510164069755444, "grad_norm": 3.0247745513916016, "learning_rate": 5e-05, "loss": 0.4272, "step": 1 }, { "epoch": 0.0016510164069755444, "eval_loss": 0.5801298022270203, "eval_runtime": 152.3421, "eval_samples_per_second": 26.788, "eval_steps_per_second": 3.354, "step": 1 }, { "epoch": 0.0033020328139510887, "grad_norm": 0.26526594161987305, "learning_rate": 0.0001, "loss": 0.5465, "step": 2 }, { "epoch": 0.004953049220926633, "grad_norm": 0.2640111446380615, "learning_rate": 9.989294616193017e-05, "loss": 0.5798, "step": 3 }, { "epoch": 0.006604065627902177, "grad_norm": 0.2363673597574234, "learning_rate": 9.957224306869053e-05, "loss": 0.5595, "step": 4 }, { "epoch": 0.008255082034877721, "grad_norm": 0.211054265499115, "learning_rate": 9.903926402016153e-05, "loss": 0.5095, "step": 5 }, { "epoch": 0.009906098441853266, "grad_norm": 0.18822599947452545, "learning_rate": 9.829629131445342e-05, "loss": 0.5189, "step": 6 }, { "epoch": 0.01155711484882881, "grad_norm": 0.20426109433174133, "learning_rate": 9.73465064747553e-05, "loss": 0.5167, "step": 7 }, { "epoch": 0.013208131255804355, "grad_norm": 0.2221483439207077, "learning_rate": 9.619397662556435e-05, "loss": 0.4754, "step": 8 }, { "epoch": 0.014859147662779898, "grad_norm": 0.18756835162639618, "learning_rate": 9.484363707663442e-05, "loss": 0.4448, "step": 9 }, { "epoch": 0.016510164069755442, "grad_norm": 0.16543976962566376, "learning_rate": 9.330127018922194e-05, "loss": 0.416, "step": 10 }, { "epoch": 0.018161180476730987, "grad_norm": 0.7194027900695801, "learning_rate": 9.157348061512727e-05, "loss": 0.2621, "step": 11 }, { "epoch": 0.019812196883706532, "grad_norm": 0.17668159306049347, "learning_rate": 8.966766701456177e-05, "loss": 0.1983, "step": 12 }, { "epoch": 0.021463213290682077, "grad_norm": 0.1292767971754074, "learning_rate": 8.759199037394887e-05, "loss": 0.2998, "step": 13 }, { "epoch": 0.02311422969765762, "grad_norm": 0.15346571803092957, "learning_rate": 8.535533905932738e-05, "loss": 0.3568, "step": 14 }, { "epoch": 0.024765246104633164, "grad_norm": 0.1705729067325592, "learning_rate": 8.296729075500344e-05, "loss": 0.3867, "step": 15 }, { "epoch": 0.02641626251160871, "grad_norm": 0.1557266265153885, "learning_rate": 8.043807145043604e-05, "loss": 0.4098, "step": 16 }, { "epoch": 0.028067278918584255, "grad_norm": 0.13209468126296997, "learning_rate": 7.777851165098012e-05, "loss": 0.4109, "step": 17 }, { "epoch": 0.029718295325559797, "grad_norm": 0.11559902131557465, "learning_rate": 7.500000000000001e-05, "loss": 0.3883, "step": 18 }, { "epoch": 0.03136931173253534, "grad_norm": 0.09431163221597672, "learning_rate": 7.211443451095007e-05, "loss": 0.3791, "step": 19 }, { "epoch": 0.033020328139510884, "grad_norm": 0.09266266971826553, "learning_rate": 6.91341716182545e-05, "loss": 0.3857, "step": 20 }, { "epoch": 0.03467134454648643, "grad_norm": 0.10236460715532303, "learning_rate": 6.607197326515808e-05, "loss": 0.3849, "step": 21 }, { "epoch": 0.036322360953461974, "grad_norm": 0.10628839582204819, "learning_rate": 6.294095225512603e-05, "loss": 0.3592, "step": 22 }, { "epoch": 0.03797337736043752, "grad_norm": 0.1099647805094719, "learning_rate": 5.9754516100806423e-05, "loss": 0.2481, "step": 23 }, { "epoch": 0.039624393767413064, "grad_norm": 0.10537803918123245, "learning_rate": 5.6526309611002594e-05, "loss": 0.1525, "step": 24 }, { "epoch": 0.041275410174388606, "grad_norm": 0.16766606271266937, "learning_rate": 5.327015646150716e-05, "loss": 0.2986, "step": 25 }, { "epoch": 0.041275410174388606, "eval_loss": 0.3243732452392578, "eval_runtime": 152.2878, "eval_samples_per_second": 26.798, "eval_steps_per_second": 3.355, "step": 25 }, { "epoch": 0.042926426581364155, "grad_norm": 0.10008738189935684, "learning_rate": 5e-05, "loss": 0.2265, "step": 26 }, { "epoch": 0.0445774429883397, "grad_norm": 0.11355409026145935, "learning_rate": 4.6729843538492847e-05, "loss": 0.3318, "step": 27 }, { "epoch": 0.04622845939531524, "grad_norm": 0.09922956675291061, "learning_rate": 4.347369038899744e-05, "loss": 0.3443, "step": 28 }, { "epoch": 0.04787947580229079, "grad_norm": 0.08054020255804062, "learning_rate": 4.0245483899193595e-05, "loss": 0.3249, "step": 29 }, { "epoch": 0.04953049220926633, "grad_norm": 0.08450499176979065, "learning_rate": 3.705904774487396e-05, "loss": 0.3489, "step": 30 }, { "epoch": 0.05118150861624187, "grad_norm": 0.09624389559030533, "learning_rate": 3.392802673484193e-05, "loss": 0.3536, "step": 31 }, { "epoch": 0.05283252502321742, "grad_norm": 0.10854283720254898, "learning_rate": 3.086582838174551e-05, "loss": 0.3573, "step": 32 }, { "epoch": 0.05448354143019296, "grad_norm": 0.09736617654561996, "learning_rate": 2.7885565489049946e-05, "loss": 0.3333, "step": 33 }, { "epoch": 0.05613455783716851, "grad_norm": 0.0933280810713768, "learning_rate": 2.500000000000001e-05, "loss": 0.329, "step": 34 }, { "epoch": 0.05778557424414405, "grad_norm": 0.10208204388618469, "learning_rate": 2.2221488349019903e-05, "loss": 0.2832, "step": 35 }, { "epoch": 0.05943659065111959, "grad_norm": 0.08625288307666779, "learning_rate": 1.9561928549563968e-05, "loss": 0.1621, "step": 36 }, { "epoch": 0.06108760705809514, "grad_norm": 0.08310309797525406, "learning_rate": 1.703270924499656e-05, "loss": 0.1118, "step": 37 }, { "epoch": 0.06273862346507068, "grad_norm": 0.08370844274759293, "learning_rate": 1.4644660940672627e-05, "loss": 0.2181, "step": 38 }, { "epoch": 0.06438963987204623, "grad_norm": 0.08878277242183685, "learning_rate": 1.2408009626051137e-05, "loss": 0.2772, "step": 39 }, { "epoch": 0.06604065627902177, "grad_norm": 0.09928658604621887, "learning_rate": 1.0332332985438248e-05, "loss": 0.3036, "step": 40 }, { "epoch": 0.06769167268599732, "grad_norm": 0.08427450805902481, "learning_rate": 8.426519384872733e-06, "loss": 0.3139, "step": 41 }, { "epoch": 0.06934268909297286, "grad_norm": 0.08152101188898087, "learning_rate": 6.698729810778065e-06, "loss": 0.3079, "step": 42 }, { "epoch": 0.0709937054999484, "grad_norm": 0.09090898931026459, "learning_rate": 5.156362923365588e-06, "loss": 0.3204, "step": 43 }, { "epoch": 0.07264472190692395, "grad_norm": 0.08596403151750565, "learning_rate": 3.8060233744356633e-06, "loss": 0.3383, "step": 44 }, { "epoch": 0.07429573831389949, "grad_norm": 0.07474559545516968, "learning_rate": 2.653493525244721e-06, "loss": 0.3143, "step": 45 }, { "epoch": 0.07594675472087505, "grad_norm": 0.07625479996204376, "learning_rate": 1.70370868554659e-06, "loss": 0.3256, "step": 46 }, { "epoch": 0.07759777112785059, "grad_norm": 0.08517799526453018, "learning_rate": 9.607359798384785e-07, "loss": 0.3103, "step": 47 }, { "epoch": 0.07924878753482613, "grad_norm": 0.08896566182374954, "learning_rate": 4.277569313094809e-07, "loss": 0.2409, "step": 48 }, { "epoch": 0.08089980394180167, "grad_norm": 0.07853224873542786, "learning_rate": 1.0705383806982606e-07, "loss": 0.1056, "step": 49 }, { "epoch": 0.08255082034877721, "grad_norm": 0.21440893411636353, "learning_rate": 0.0, "loss": 0.3215, "step": 50 }, { "epoch": 0.08255082034877721, "eval_loss": 0.29708796739578247, "eval_runtime": 152.8033, "eval_samples_per_second": 26.708, "eval_steps_per_second": 3.344, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.299421435609743e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }