{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 417130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999988013329178e-05, "loss": 5.0345, "step": 1 }, { "epoch": 1.0, "learning_rate": 4.5e-05, "loss": 3.5452, "step": 41713 }, { "epoch": 1.0, "eval_accuracy": 0.4118760802529946, "eval_loss": 3.3738181591033936, "eval_runtime": 157.5504, "eval_samples_per_second": 129.368, "eval_steps_per_second": 16.173, "step": 41713 }, { "epoch": 2.0, "learning_rate": 4e-05, "loss": 3.4132, "step": 83426 }, { "epoch": 2.0, "eval_accuracy": 0.4179529258872079, "eval_loss": 3.326674461364746, "eval_runtime": 156.5806, "eval_samples_per_second": 130.169, "eval_steps_per_second": 16.273, "step": 83426 }, { "epoch": 3.0, "learning_rate": 3.5e-05, "loss": 3.3573, "step": 125139 }, { "epoch": 3.0, "eval_accuracy": 0.4213834655713664, "eval_loss": 3.3009300231933594, "eval_runtime": 156.2147, "eval_samples_per_second": 130.474, "eval_steps_per_second": 16.311, "step": 125139 }, { "epoch": 4.0, "learning_rate": 3e-05, "loss": 3.3196, "step": 166852 }, { "epoch": 4.0, "eval_accuracy": 0.42338731797471446, "eval_loss": 3.2852749824523926, "eval_runtime": 157.4446, "eval_samples_per_second": 129.455, "eval_steps_per_second": 16.183, "step": 166852 }, { "epoch": 5.0, "learning_rate": 2.5e-05, "loss": 3.2912, "step": 208565 }, { "epoch": 5.0, "eval_accuracy": 0.42540662325952266, "eval_loss": 3.2730937004089355, "eval_runtime": 155.9547, "eval_samples_per_second": 130.692, "eval_steps_per_second": 16.338, "step": 208565 }, { "epoch": 6.0, "learning_rate": 2e-05, "loss": 3.2688, "step": 250278 }, { "epoch": 6.0, "eval_accuracy": 0.42657331580976576, "eval_loss": 3.265730857849121, "eval_runtime": 157.36, "eval_samples_per_second": 129.525, "eval_steps_per_second": 16.192, "step": 250278 }, { "epoch": 7.0, "learning_rate": 1.5e-05, "loss": 3.2506, "step": 291991 }, { "epoch": 7.0, "eval_accuracy": 0.4276302929016416, "eval_loss": 3.25875186920166, "eval_runtime": 157.1763, "eval_samples_per_second": 129.676, "eval_steps_per_second": 16.211, "step": 291991 }, { "epoch": 8.0, "learning_rate": 1e-05, "loss": 3.2358, "step": 333704 }, { "epoch": 8.0, "eval_accuracy": 0.4284013916865043, "eval_loss": 3.2545275688171387, "eval_runtime": 155.6431, "eval_samples_per_second": 130.953, "eval_steps_per_second": 16.371, "step": 333704 }, { "epoch": 9.0, "learning_rate": 5e-06, "loss": 3.2237, "step": 375417 }, { "epoch": 9.0, "eval_accuracy": 0.4288819762999157, "eval_loss": 3.2520158290863037, "eval_runtime": 156.3518, "eval_samples_per_second": 130.36, "eval_steps_per_second": 16.297, "step": 375417 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 3.2144, "step": 417130 }, { "epoch": 10.0, "eval_accuracy": 0.42910256618275966, "eval_loss": 3.251399040222168, "eval_runtime": 154.5215, "eval_samples_per_second": 131.904, "eval_steps_per_second": 16.49, "step": 417130 }, { "epoch": 10.0, "step": 417130, "total_flos": 6.5394993954816e+17, "train_loss": 3.311987802313766, "train_runtime": 196254.5716, "train_samples_per_second": 51.01, "train_steps_per_second": 2.125 } ], "max_steps": 417130, "num_train_epochs": 10, "total_flos": 6.5394993954816e+17, "trial_name": null, "trial_params": null }