{ "best_metric": 0.04605380445718765, "best_model_checkpoint": "/data/wheld3/byt5-base-cstop_artificial/checkpoint-200", "epoch": 428.5019607843137, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 28.5, "learning_rate": 0.0009333333333333333, "loss": 0.2563, "step": 200 }, { "epoch": 28.5, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.04605380445718765, "eval_runtime": 27.8305, "eval_samples_per_second": 20.086, "eval_steps_per_second": 2.515, "step": 200 }, { "epoch": 57.13, "learning_rate": 0.0008666666666666667, "loss": 0.0065, "step": 400 }, { "epoch": 57.13, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.05634024366736412, "eval_runtime": 28.5438, "eval_samples_per_second": 19.584, "eval_steps_per_second": 2.452, "step": 400 }, { "epoch": 85.63, "learning_rate": 0.0008, "loss": 0.0021, "step": 600 }, { "epoch": 85.63, "eval_exact_match": 0.03577817531305903, "eval_loss": 0.05924277380108833, "eval_runtime": 28.509, "eval_samples_per_second": 19.608, "eval_steps_per_second": 2.455, "step": 600 }, { "epoch": 114.25, "learning_rate": 0.0007333333333333333, "loss": 0.0013, "step": 800 }, { "epoch": 114.25, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.05689763277769089, "eval_runtime": 28.1931, "eval_samples_per_second": 19.828, "eval_steps_per_second": 2.483, "step": 800 }, { "epoch": 142.75, "learning_rate": 0.0006666666666666666, "loss": 0.0008, "step": 1000 }, { "epoch": 142.75, "eval_exact_match": 0.03577817531305903, "eval_loss": 0.06747602671384811, "eval_runtime": 28.4722, "eval_samples_per_second": 19.633, "eval_steps_per_second": 2.459, "step": 1000 }, { "epoch": 171.38, "learning_rate": 0.0006, "loss": 0.0007, "step": 1200 }, { "epoch": 171.38, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.06273317337036133, "eval_runtime": 28.026, "eval_samples_per_second": 19.946, "eval_steps_per_second": 2.498, "step": 1200 }, { "epoch": 199.88, "learning_rate": 0.0005333333333333334, "loss": 0.0004, "step": 1400 }, { "epoch": 199.88, "eval_exact_match": 0.03577817531305903, "eval_loss": 0.06774432212114334, "eval_runtime": 29.2197, "eval_samples_per_second": 19.131, "eval_steps_per_second": 2.396, "step": 1400 }, { "epoch": 228.5, "learning_rate": 0.00046666666666666666, "loss": 0.0003, "step": 1600 }, { "epoch": 228.5, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.06500059366226196, "eval_runtime": 27.5272, "eval_samples_per_second": 20.307, "eval_steps_per_second": 2.543, "step": 1600 }, { "epoch": 257.13, "learning_rate": 0.0004, "loss": 0.0002, "step": 1800 }, { "epoch": 257.13, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.06925792992115021, "eval_runtime": 27.2422, "eval_samples_per_second": 20.52, "eval_steps_per_second": 2.57, "step": 1800 }, { "epoch": 285.63, "learning_rate": 0.0003333333333333333, "loss": 0.0002, "step": 2000 }, { "epoch": 285.63, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.07205212116241455, "eval_runtime": 27.1898, "eval_samples_per_second": 20.559, "eval_steps_per_second": 2.574, "step": 2000 }, { "epoch": 314.25, "learning_rate": 0.0002666666666666667, "loss": 0.0002, "step": 2200 }, { "epoch": 314.25, "eval_exact_match": 0.03756708407871199, "eval_loss": 0.0713699460029602, "eval_runtime": 27.2794, "eval_samples_per_second": 20.492, "eval_steps_per_second": 2.566, "step": 2200 }, { "epoch": 342.75, "learning_rate": 0.0002, "loss": 0.0002, "step": 2400 }, { "epoch": 342.75, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.07009705901145935, "eval_runtime": 27.286, "eval_samples_per_second": 20.487, "eval_steps_per_second": 2.565, "step": 2400 }, { "epoch": 371.38, "learning_rate": 0.00013333333333333334, "loss": 0.0002, "step": 2600 }, { "epoch": 371.38, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.07500004023313522, "eval_runtime": 27.1876, "eval_samples_per_second": 20.561, "eval_steps_per_second": 2.575, "step": 2600 }, { "epoch": 399.88, "learning_rate": 6.666666666666667e-05, "loss": 0.0001, "step": 2800 }, { "epoch": 399.88, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.07391420006752014, "eval_runtime": 27.4245, "eval_samples_per_second": 20.383, "eval_steps_per_second": 2.552, "step": 2800 }, { "epoch": 428.5, "learning_rate": 0.0, "loss": 0.0001, "step": 3000 }, { "epoch": 428.5, "eval_exact_match": 0.03935599284436494, "eval_loss": 0.07452824711799622, "eval_runtime": 28.2025, "eval_samples_per_second": 19.821, "eval_steps_per_second": 2.482, "step": 3000 }, { "epoch": 428.5, "step": 3000, "total_flos": 3.592904346944225e+17, "train_loss": 0.017975078212097286, "train_runtime": 47263.2401, "train_samples_per_second": 32.499, "train_steps_per_second": 0.063 } ], "max_steps": 3000, "num_train_epochs": 429, "total_flos": 3.592904346944225e+17, "trial_name": null, "trial_params": null }