{ "best_metric": 1.9741289615631104, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.003188318002837603, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.5941590014188015e-05, "eval_loss": 2.3955931663513184, "eval_runtime": 1553.8147, "eval_samples_per_second": 16.999, "eval_steps_per_second": 4.25, "step": 1 }, { "epoch": 0.00015941590014188016, "grad_norm": 0.6692927479743958, "learning_rate": 4.0600000000000004e-05, "loss": 1.9118, "step": 10 }, { "epoch": 0.0003188318002837603, "grad_norm": 0.9186792969703674, "learning_rate": 8.120000000000001e-05, "loss": 1.8931, "step": 20 }, { "epoch": 0.00047824770042564045, "grad_norm": 1.1474906206130981, "learning_rate": 0.00012179999999999999, "loss": 1.9426, "step": 30 }, { "epoch": 0.0006376636005675206, "grad_norm": 1.8628828525543213, "learning_rate": 0.00016240000000000002, "loss": 1.9296, "step": 40 }, { "epoch": 0.0007970795007094007, "grad_norm": 6.320926189422607, "learning_rate": 0.000203, "loss": 2.0846, "step": 50 }, { "epoch": 0.0007970795007094007, "eval_loss": 1.9741289615631104, "eval_runtime": 1551.6609, "eval_samples_per_second": 17.022, "eval_steps_per_second": 4.256, "step": 50 }, { "epoch": 0.0009564954008512809, "grad_norm": 0.5567274689674377, "learning_rate": 0.00020275275110137215, "loss": 1.8676, "step": 60 }, { "epoch": 0.001115911300993161, "grad_norm": 0.9768388867378235, "learning_rate": 0.00020201220897726938, "loss": 1.824, "step": 70 }, { "epoch": 0.0012753272011350413, "grad_norm": 0.8935255408287048, "learning_rate": 0.00020078198147448128, "loss": 1.8593, "step": 80 }, { "epoch": 0.0014347431012769214, "grad_norm": 1.3647874593734741, "learning_rate": 0.00019906806213773937, "loss": 2.0022, "step": 90 }, { "epoch": 0.0015941590014188014, "grad_norm": 9.45513916015625, "learning_rate": 0.0001968788010097697, "loss": 1.7834, "step": 100 }, { "epoch": 0.0015941590014188014, "eval_loss": 2.133173704147339, "eval_runtime": 1552.7201, "eval_samples_per_second": 17.011, "eval_steps_per_second": 4.253, "step": 100 }, { "epoch": 0.0017535749015606817, "grad_norm": 0.7368646264076233, "learning_rate": 0.00019422486395072398, "loss": 1.7586, "step": 110 }, { "epoch": 0.0019129908017025618, "grad_norm": 0.8807580471038818, "learning_rate": 0.0001911191806751811, "loss": 1.764, "step": 120 }, { "epoch": 0.002072406701844442, "grad_norm": 1.2745860815048218, "learning_rate": 0.00018757688175987723, "loss": 1.8485, "step": 130 }, { "epoch": 0.002231822601986322, "grad_norm": 1.4981567859649658, "learning_rate": 0.00018361522492905716, "loss": 1.9636, "step": 140 }, { "epoch": 0.0023912385021282023, "grad_norm": 4.255372047424316, "learning_rate": 0.00017925351097657625, "loss": 2.0425, "step": 150 }, { "epoch": 0.0023912385021282023, "eval_loss": 1.9914931058883667, "eval_runtime": 1554.4671, "eval_samples_per_second": 16.992, "eval_steps_per_second": 4.248, "step": 150 }, { "epoch": 0.0025506544022700826, "grad_norm": 0.7145971655845642, "learning_rate": 0.00017451298973437308, "loss": 1.7097, "step": 160 }, { "epoch": 0.0027100703024119624, "grad_norm": 0.8129323124885559, "learning_rate": 0.0001694167565454241, "loss": 1.853, "step": 170 }, { "epoch": 0.0028694862025538427, "grad_norm": 1.0721731185913086, "learning_rate": 0.0001639896397455543, "loss": 1.8659, "step": 180 }, { "epoch": 0.003028902102695723, "grad_norm": 1.490907907485962, "learning_rate": 0.0001582580797022808, "loss": 1.9599, "step": 190 }, { "epoch": 0.003188318002837603, "grad_norm": 5.143823623657227, "learning_rate": 0.00015225, "loss": 1.9821, "step": 200 }, { "epoch": 0.003188318002837603, "eval_loss": 2.058631658554077, "eval_runtime": 1553.3483, "eval_samples_per_second": 17.004, "eval_steps_per_second": 4.251, "step": 200 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.696910581891072e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }