{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.9632e-05, "loss": 0.9899, "step": 50 }, { "epoch": 0.04, "learning_rate": 1.9232e-05, "loss": 0.9949, "step": 100 }, { "epoch": 0.06, "learning_rate": 1.8832000000000002e-05, "loss": 0.828, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.8432000000000002e-05, "loss": 0.8372, "step": 200 }, { "epoch": 0.1, "learning_rate": 1.8032e-05, "loss": 0.8409, "step": 250 }, { "epoch": 0.1, "eval_accuracy": 0.6220472440944882, "eval_loss": 0.8243474364280701, "eval_runtime": 435.8774, "eval_samples_per_second": 2.04, "eval_steps_per_second": 2.04, "step": 250 }, { "epoch": 0.12, "learning_rate": 1.7632000000000003e-05, "loss": 0.694, "step": 300 }, { "epoch": 0.14, "learning_rate": 1.7232000000000004e-05, "loss": 0.6918, "step": 350 }, { "epoch": 0.16, "learning_rate": 1.6832e-05, "loss": 0.6794, "step": 400 }, { "epoch": 0.18, "learning_rate": 1.6432e-05, "loss": 0.6338, "step": 450 }, { "epoch": 0.2, "learning_rate": 1.6032e-05, "loss": 0.6288, "step": 500 }, { "epoch": 0.2, "eval_accuracy": 0.671541057367829, "eval_loss": 0.7539446949958801, "eval_runtime": 434.4073, "eval_samples_per_second": 2.046, "eval_steps_per_second": 2.046, "step": 500 }, { "epoch": 0.22, "learning_rate": 1.5632000000000002e-05, "loss": 0.6622, "step": 550 }, { "epoch": 0.24, "learning_rate": 1.5232000000000003e-05, "loss": 0.773, "step": 600 }, { "epoch": 0.26, "learning_rate": 1.4832000000000001e-05, "loss": 0.6051, "step": 650 }, { "epoch": 0.28, "learning_rate": 1.4432000000000002e-05, "loss": 0.7805, "step": 700 }, { "epoch": 0.3, "learning_rate": 1.4032e-05, "loss": 0.5882, "step": 750 }, { "epoch": 0.3, "eval_accuracy": 0.7075365579302587, "eval_loss": 0.6791747808456421, "eval_runtime": 433.8268, "eval_samples_per_second": 2.049, "eval_steps_per_second": 2.049, "step": 750 }, { "epoch": 0.32, "learning_rate": 1.3632000000000001e-05, "loss": 0.5672, "step": 800 }, { "epoch": 0.34, "learning_rate": 1.3232e-05, "loss": 0.6807, "step": 850 }, { "epoch": 0.36, "learning_rate": 1.2832e-05, "loss": 0.6796, "step": 900 }, { "epoch": 0.38, "learning_rate": 1.2432000000000002e-05, "loss": 0.6922, "step": 950 }, { "epoch": 0.4, "learning_rate": 1.2032000000000001e-05, "loss": 0.7671, "step": 1000 }, { "epoch": 0.4, "eval_accuracy": 0.7334083239595051, "eval_loss": 0.6129724383354187, "eval_runtime": 433.287, "eval_samples_per_second": 2.052, "eval_steps_per_second": 2.052, "step": 1000 }, { "epoch": 0.42, "learning_rate": 1.1632000000000001e-05, "loss": 0.645, "step": 1050 }, { "epoch": 0.44, "learning_rate": 1.1232e-05, "loss": 0.5891, "step": 1100 }, { "epoch": 0.46, "learning_rate": 1.0832e-05, "loss": 0.6426, "step": 1150 }, { "epoch": 0.48, "learning_rate": 1.0432e-05, "loss": 0.567, "step": 1200 }, { "epoch": 0.5, "learning_rate": 1.0032000000000002e-05, "loss": 0.5782, "step": 1250 }, { "epoch": 0.5, "eval_accuracy": 0.7255343082114736, "eval_loss": 0.6114887595176697, "eval_runtime": 433.3273, "eval_samples_per_second": 2.052, "eval_steps_per_second": 2.052, "step": 1250 }, { "epoch": 0.52, "learning_rate": 9.632e-06, "loss": 0.5736, "step": 1300 }, { "epoch": 0.54, "learning_rate": 9.232e-06, "loss": 0.6849, "step": 1350 }, { "epoch": 0.56, "learning_rate": 8.832000000000001e-06, "loss": 0.5305, "step": 1400 }, { "epoch": 0.58, "learning_rate": 8.432e-06, "loss": 0.7265, "step": 1450 }, { "epoch": 0.6, "learning_rate": 8.032e-06, "loss": 0.5691, "step": 1500 }, { "epoch": 0.6, "eval_accuracy": 0.7412823397075365, "eval_loss": 0.5794617533683777, "eval_runtime": 433.4136, "eval_samples_per_second": 2.051, "eval_steps_per_second": 2.051, "step": 1500 }, { "epoch": 0.62, "learning_rate": 7.632e-06, "loss": 0.519, "step": 1550 }, { "epoch": 0.64, "learning_rate": 7.232e-06, "loss": 0.5378, "step": 1600 }, { "epoch": 0.66, "learning_rate": 6.832000000000001e-06, "loss": 0.5982, "step": 1650 }, { "epoch": 0.68, "learning_rate": 6.432e-06, "loss": 0.7027, "step": 1700 }, { "epoch": 0.7, "learning_rate": 6.032e-06, "loss": 0.6579, "step": 1750 }, { "epoch": 0.7, "eval_accuracy": 0.7469066366704162, "eval_loss": 0.5774183869361877, "eval_runtime": 433.4068, "eval_samples_per_second": 2.051, "eval_steps_per_second": 2.051, "step": 1750 }, { "epoch": 0.72, "learning_rate": 5.6320000000000005e-06, "loss": 0.5044, "step": 1800 }, { "epoch": 0.74, "learning_rate": 5.232e-06, "loss": 0.6482, "step": 1850 }, { "epoch": 0.76, "learning_rate": 4.8320000000000005e-06, "loss": 0.5406, "step": 1900 }, { "epoch": 0.78, "learning_rate": 4.432e-06, "loss": 0.5372, "step": 1950 }, { "epoch": 0.8, "learning_rate": 4.0320000000000005e-06, "loss": 0.6107, "step": 2000 }, { "epoch": 0.8, "eval_accuracy": 0.7401574803149606, "eval_loss": 0.5690832734107971, "eval_runtime": 433.3967, "eval_samples_per_second": 2.051, "eval_steps_per_second": 2.051, "step": 2000 }, { "epoch": 0.82, "learning_rate": 3.6320000000000005e-06, "loss": 0.4043, "step": 2050 }, { "epoch": 0.84, "learning_rate": 3.2400000000000003e-06, "loss": 0.5344, "step": 2100 }, { "epoch": 0.86, "learning_rate": 2.84e-06, "loss": 0.7056, "step": 2150 }, { "epoch": 0.88, "learning_rate": 2.4400000000000004e-06, "loss": 0.5719, "step": 2200 }, { "epoch": 0.9, "learning_rate": 2.04e-06, "loss": 0.6255, "step": 2250 }, { "epoch": 0.9, "eval_accuracy": 0.7435320584926884, "eval_loss": 0.570974588394165, "eval_runtime": 433.4106, "eval_samples_per_second": 2.051, "eval_steps_per_second": 2.051, "step": 2250 }, { "epoch": 0.92, "learning_rate": 1.6400000000000002e-06, "loss": 0.5958, "step": 2300 }, { "epoch": 0.94, "learning_rate": 1.2400000000000002e-06, "loss": 0.5984, "step": 2350 }, { "epoch": 0.96, "learning_rate": 8.480000000000001e-07, "loss": 0.6103, "step": 2400 }, { "epoch": 0.98, "learning_rate": 4.4800000000000004e-07, "loss": 0.612, "step": 2450 }, { "epoch": 1.0, "learning_rate": 4.8e-08, "loss": 0.7034, "step": 2500 }, { "epoch": 1.0, "eval_accuracy": 0.7435320584926884, "eval_loss": 0.5713425874710083, "eval_runtime": 434.3078, "eval_samples_per_second": 2.047, "eval_steps_per_second": 2.047, "step": 2500 }, { "epoch": 1.0, "step": 2500, "total_flos": 0.0, "train_loss": 0.6507886672973633, "train_runtime": 17457.236, "train_samples_per_second": 0.573, "train_steps_per_second": 0.143 } ], "max_steps": 2500, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }