{ "best_metric": 6.430952072143555, "best_model_checkpoint": "finetuned-bert__java-8m-methods__jemma-java-bert-tokenizer__XXXM/checkpoint-45000", "epoch": 2.626185404569461, "global_step": 45000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 0.0004998919902242132, "loss": 5.8968, "step": 5000 }, { "epoch": 0.29, "eval_loss": 6.462705612182617, "eval_runtime": 1805.2005, "eval_samples_per_second": 27.514, "eval_steps_per_second": 6.879, "step": 5000 }, { "epoch": 0.58, "learning_rate": 0.0004994534515508689, "loss": 5.9119, "step": 10000 }, { "epoch": 0.58, "eval_loss": 6.476224422454834, "eval_runtime": 1804.4274, "eval_samples_per_second": 27.526, "eval_steps_per_second": 6.882, "step": 10000 }, { "epoch": 0.88, "learning_rate": 0.0004986784236353977, "loss": 5.9089, "step": 15000 }, { "epoch": 0.88, "eval_loss": 6.472481727600098, "eval_runtime": 1804.1115, "eval_samples_per_second": 27.531, "eval_steps_per_second": 6.883, "step": 15000 }, { "epoch": 1.17, "learning_rate": 0.0004975676423415004, "loss": 5.9054, "step": 20000 }, { "epoch": 1.17, "eval_loss": 6.4574785232543945, "eval_runtime": 1804.4293, "eval_samples_per_second": 27.526, "eval_steps_per_second": 6.882, "step": 20000 }, { "epoch": 1.46, "learning_rate": 0.0004961227282936757, "loss": 5.9018, "step": 25000 }, { "epoch": 1.46, "eval_loss": 6.441328525543213, "eval_runtime": 1805.0751, "eval_samples_per_second": 27.516, "eval_steps_per_second": 6.879, "step": 25000 }, { "epoch": 1.75, "learning_rate": 0.0004943460202721656, "loss": 5.9057, "step": 30000 }, { "epoch": 1.75, "eval_loss": 6.43818998336792, "eval_runtime": 1791.0913, "eval_samples_per_second": 27.731, "eval_steps_per_second": 6.933, "step": 30000 }, { "epoch": 2.04, "learning_rate": 0.0004922392052032267, "loss": 5.9076, "step": 35000 }, { "epoch": 2.04, "eval_loss": 6.455899238586426, "eval_runtime": 1805.3008, "eval_samples_per_second": 27.513, "eval_steps_per_second": 6.879, "step": 35000 }, { "epoch": 2.33, "learning_rate": 0.0004898059684069413, "loss": 5.8996, "step": 40000 }, { "epoch": 2.33, "eval_loss": 6.460456848144531, "eval_runtime": 1805.5197, "eval_samples_per_second": 27.51, "eval_steps_per_second": 6.878, "step": 40000 }, { "epoch": 2.63, "learning_rate": 0.00048704920398382135, "loss": 5.9083, "step": 45000 }, { "epoch": 2.63, "eval_loss": 6.430952072143555, "eval_runtime": 1807.4508, "eval_samples_per_second": 27.48, "eval_steps_per_second": 6.87, "step": 45000 } ], "max_steps": 428375, "num_train_epochs": 25, "total_flos": 7.5543354473762e+17, "trial_name": null, "trial_params": null }